Coverage for src / local_deep_research / security / url_builder.py: 48%
66 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
1"""
2URL building utilities for security and application use.
4Provides centralized URL construction logic that can be reused
5throughout the application for consistent URL handling.
6"""
8import re
9from typing import Optional, Union
10from urllib.parse import urlparse
11from loguru import logger
14class URLBuilderError(Exception):
15 """Raised when URL construction fails."""
17 pass
20def normalize_bind_address(host: str) -> str:
21 """
22 Convert bind addresses to URL-friendly hostnames.
24 Args:
25 host: Host address from settings (may include bind addresses)
27 Returns:
28 URL-friendly hostname
29 """
30 # Convert bind-all addresses to localhost for URLs
31 if host in ("0.0.0.0", "::"):
32 return "localhost"
33 return host
36def build_base_url_from_settings(
37 external_url: Optional[str] = None,
38 host: Optional[str] = None,
39 port: Optional[Union[str, int]] = None,
40 fallback_base: str = "http://localhost:5000",
41) -> str:
42 """
43 Build a base URL from application settings with intelligent fallbacks.
45 This function handles the common pattern of building application URLs
46 from various configuration sources with proper normalization.
48 Args:
49 external_url: Pre-configured external URL (highest priority)
50 host: Hostname/IP address (used if external_url not provided)
51 port: Port number (used with host if external_url not provided)
52 fallback_base: Final fallback URL if nothing else is available
54 Returns:
55 Complete base URL (e.g., "https://myapp.com" or "http://localhost:5000")
57 Raises:
58 URLBuilderError: If URL construction fails
59 """
60 try:
61 # Try external URL first (highest priority)
62 if external_url and external_url.strip():
63 base_url = external_url.strip().rstrip("/")
64 logger.debug(f"Using configured external URL: {base_url}")
65 return base_url
67 # Try to construct from host and port
68 if host and port:
69 normalized_host = normalize_bind_address(host)
71 # Use HTTP for host/port combinations (typically internal server addresses)
72 # For external URLs, users should configure external_url setting instead
73 base_url = f"http://{normalized_host}:{int(port)}" # DevSkim: ignore DS137138
74 logger.debug(f"Constructed URL from host/port: {base_url}")
75 return base_url
77 # Final fallback
78 base_url = fallback_base.rstrip("/")
79 logger.debug(f"Using fallback URL: {base_url}")
80 return base_url
82 except Exception as e:
83 raise URLBuilderError(f"Failed to build base URL: {e}")
86def build_full_url(
87 base_url: str,
88 path: str,
89 validate: bool = True,
90 allowed_schemes: Optional[list] = None,
91) -> str:
92 """
93 Build a complete URL from base URL and path.
95 Args:
96 base_url: Base URL (e.g., "https://myapp.com")
97 path: Path to append (e.g., "/research/123")
98 validate: Whether to validate the resulting URL
99 allowed_schemes: List of allowed URL schemes (default: ["http", "https"])
101 Returns:
102 Complete URL (e.g., "https://myapp.com/research/123")
104 Raises:
105 URLBuilderError: If URL construction or validation fails
106 """
107 try:
108 # Ensure path starts with /
109 if not path.startswith("/"):
110 path = f"/{path}"
112 # Ensure base URL doesn't end with /
113 base_url = base_url.rstrip("/")
115 # Construct full URL
116 full_url = f"{base_url}{path}"
118 if validate: 118 ↛ 119line 118 didn't jump to line 119 because the condition on line 118 was never true
119 validate_constructed_url(full_url, allowed_schemes)
121 return full_url
123 except Exception as e:
124 raise URLBuilderError(f"Failed to build full URL: {e}")
127def validate_constructed_url(
128 url: str, allowed_schemes: Optional[list] = None
129) -> bool:
130 """
131 Validate a constructed URL.
133 Args:
134 url: URL to validate
135 allowed_schemes: List of allowed schemes (default: ["http", "https"])
137 Returns:
138 True if valid
140 Raises:
141 URLBuilderError: If URL is invalid
142 """
143 if not url or not isinstance(url, str):
144 raise URLBuilderError("URL must be a non-empty string")
146 try:
147 parsed = urlparse(url)
148 except Exception as e:
149 raise URLBuilderError(f"Failed to parse URL: {e}")
151 # Check scheme
152 if not parsed.scheme:
153 raise URLBuilderError("URL must have a scheme")
155 if allowed_schemes and parsed.scheme not in allowed_schemes:
156 raise URLBuilderError(
157 f"URL scheme '{parsed.scheme}' not in allowed schemes: {allowed_schemes}"
158 )
160 # Check hostname
161 if not parsed.netloc:
162 raise URLBuilderError("URL must have a hostname")
164 return True
167def mask_sensitive_url(url: str) -> str:
168 """
169 Mask sensitive parts of a URL for secure logging.
171 This function masks passwords, webhook tokens, and other sensitive
172 information in URLs to prevent accidental exposure in logs.
174 Args:
175 url: URL to mask
177 Returns:
178 URL with sensitive parts replaced with ***
179 """
180 try:
181 parsed = urlparse(url)
183 # Mask password if present
184 if parsed.password:
185 netloc = parsed.netloc.replace(parsed.password, "***")
186 else:
187 netloc = parsed.netloc
189 # Mask path tokens (common in webhooks)
190 path = parsed.path
191 if path:
192 # Replace long alphanumeric tokens with ***
193 path = re.sub(
194 r"/[a-zA-Z0-9_-]{20,}",
195 "/***",
196 path,
197 )
199 # Reconstruct URL
200 masked = f"{parsed.scheme}://{netloc}{path}"
201 if parsed.query:
202 masked += "?***"
204 return masked
206 except Exception:
207 # If parsing fails, just return generic mask
208 return f"{url.split(':')[0]}://***"