Coverage for src/local_deep_research/utilities/url

1"""URL utility functions for the local deep research application."""

3from ..security.network_utils import is_private_ip

5# Re-export for backwards compatibility

6__all__ = ["normalize_url", "is_private_ip"]

9def normalize_url(raw_url: str) -> str:

10 """

11 Normalize a URL to ensure it has a proper scheme and format.

13 Args:

14 raw_url: The raw URL string to normalize

16 Returns:

17 A properly formatted URL string

19 Examples:

20 >>> normalize_url("localhost:11434")

21 'http://localhost:11434'

22 >>> normalize_url("https://example.com:11434")

23 'https://example.com:11434'

24 >>> normalize_url("http:example.com")

25 'http://example.com'

26 """

27 if not raw_url:

28 raise ValueError("URL cannot be empty")

30 # Clean up the URL

31 raw_url = raw_url.strip()

33 # First check if the URL already has a proper scheme

34 if raw_url.startswith(("http://", "https://")):

35 return raw_url

37 # Handle case where URL is malformed like "http:hostname" (missing //)

38 if raw_url.startswith(("http:", "https:")) and not raw_url.startswith(

39 ("http://", "https://")

40 ):

41 scheme = raw_url.split(":", 1)[0]

42 rest = raw_url.split(":", 1)[1]

43 return f"{scheme}://{rest}"

45 # Handle URLs that start with //

46 if raw_url.startswith("//"):

47 # Remove the // and process

48 raw_url = raw_url[2:]

50 # At this point, we should have hostname:port or just hostname

51 # Determine if this is localhost or an external host

52 hostname = raw_url.split(":")[0].split("/")[0]

54 # Handle IPv6 addresses in brackets

55 if hostname.startswith("[") and "]" in raw_url:

56 # Extract the IPv6 address including brackets

57 hostname = raw_url.split("]")[0] + "]"

59 # Use http for local/private addresses, https for external hosts

60 scheme = "http" if is_private_ip(hostname) else "https"

62 return f"{scheme}://{raw_url}"

Coverage for src / local_deep_research / utilities / url_utils.py: 100%

19 statements