Coverage for src / local_deep_research / utilities / url_utils.py: 100%

19 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-01-11 00:51 +0000

1"""URL utility functions for the local deep research application.""" 

2 

3from ..security.network_utils import is_private_ip 

4 

5# Re-export for backwards compatibility 

6__all__ = ["normalize_url", "is_private_ip"] 

7 

8 

9def normalize_url(raw_url: str) -> str: 

10 """ 

11 Normalize a URL to ensure it has a proper scheme and format. 

12 

13 Args: 

14 raw_url: The raw URL string to normalize 

15 

16 Returns: 

17 A properly formatted URL string 

18 

19 Examples: 

20 >>> normalize_url("localhost:11434") 

21 'http://localhost:11434' 

22 >>> normalize_url("https://example.com:11434") 

23 'https://example.com:11434' 

24 >>> normalize_url("http:example.com") 

25 'http://example.com' 

26 """ 

27 if not raw_url: 

28 raise ValueError("URL cannot be empty") 

29 

30 # Clean up the URL 

31 raw_url = raw_url.strip() 

32 

33 # First check if the URL already has a proper scheme 

34 if raw_url.startswith(("http://", "https://")): 

35 return raw_url 

36 

37 # Handle case where URL is malformed like "http:hostname" (missing //) 

38 if raw_url.startswith(("http:", "https:")) and not raw_url.startswith( 

39 ("http://", "https://") 

40 ): 

41 scheme = raw_url.split(":", 1)[0] 

42 rest = raw_url.split(":", 1)[1] 

43 return f"{scheme}://{rest}" 

44 

45 # Handle URLs that start with // 

46 if raw_url.startswith("//"): 

47 # Remove the // and process 

48 raw_url = raw_url[2:] 

49 

50 # At this point, we should have hostname:port or just hostname 

51 # Determine if this is localhost or an external host 

52 hostname = raw_url.split(":")[0].split("/")[0] 

53 

54 # Handle IPv6 addresses in brackets 

55 if hostname.startswith("[") and "]" in raw_url: 

56 # Extract the IPv6 address including brackets 

57 hostname = raw_url.split("]")[0] + "]" 

58 

59 # Use http for local/private addresses, https for external hosts 

60 scheme = "http" if is_private_ip(hostname) else "https" 

61 

62 return f"{scheme}://{raw_url}"