Coverage for src / local_deep_research / utilities / url_utils.py: 100%
19 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
1"""URL utility functions for the local deep research application."""
3from ..security.network_utils import is_private_ip
5# Re-export for backwards compatibility
6__all__ = ["normalize_url", "is_private_ip"]
9def normalize_url(raw_url: str) -> str:
10 """
11 Normalize a URL to ensure it has a proper scheme and format.
13 Args:
14 raw_url: The raw URL string to normalize
16 Returns:
17 A properly formatted URL string
19 Examples:
20 >>> normalize_url("localhost:11434")
21 'http://localhost:11434'
22 >>> normalize_url("https://example.com:11434")
23 'https://example.com:11434'
24 >>> normalize_url("http:example.com")
25 'http://example.com'
26 """
27 if not raw_url:
28 raise ValueError("URL cannot be empty")
30 # Clean up the URL
31 raw_url = raw_url.strip()
33 # First check if the URL already has a proper scheme
34 if raw_url.startswith(("http://", "https://")):
35 return raw_url
37 # Handle case where URL is malformed like "http:hostname" (missing //)
38 if raw_url.startswith(("http:", "https:")) and not raw_url.startswith(
39 ("http://", "https://")
40 ):
41 scheme = raw_url.split(":", 1)[0]
42 rest = raw_url.split(":", 1)[1]
43 return f"{scheme}://{rest}"
45 # Handle URLs that start with //
46 if raw_url.startswith("//"):
47 # Remove the // and process
48 raw_url = raw_url[2:]
50 # At this point, we should have hostname:port or just hostname
51 # Determine if this is localhost or an external host
52 hostname = raw_url.split(":")[0].split("/")[0]
54 # Handle IPv6 addresses in brackets
55 if hostname.startswith("[") and "]" in raw_url:
56 # Extract the IPv6 address including brackets
57 hostname = raw_url.split("]")[0] + "]"
59 # Use http for local/private addresses, https for external hosts
60 scheme = "http" if is_private_ip(hostname) else "https"
62 return f"{scheme}://{raw_url}"