Coverage for src / local_deep_research / security / url_builder.py: 48%

66 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-01-11 00:51 +0000

1""" 

2URL building utilities for security and application use. 

3 

4Provides centralized URL construction logic that can be reused 

5throughout the application for consistent URL handling. 

6""" 

7 

8import re 

9from typing import Optional, Union 

10from urllib.parse import urlparse 

11from loguru import logger 

12 

13 

14class URLBuilderError(Exception): 

15 """Raised when URL construction fails.""" 

16 

17 pass 

18 

19 

20def normalize_bind_address(host: str) -> str: 

21 """ 

22 Convert bind addresses to URL-friendly hostnames. 

23 

24 Args: 

25 host: Host address from settings (may include bind addresses) 

26 

27 Returns: 

28 URL-friendly hostname 

29 """ 

30 # Convert bind-all addresses to localhost for URLs 

31 if host in ("0.0.0.0", "::"): 

32 return "localhost" 

33 return host 

34 

35 

36def build_base_url_from_settings( 

37 external_url: Optional[str] = None, 

38 host: Optional[str] = None, 

39 port: Optional[Union[str, int]] = None, 

40 fallback_base: str = "http://localhost:5000", 

41) -> str: 

42 """ 

43 Build a base URL from application settings with intelligent fallbacks. 

44 

45 This function handles the common pattern of building application URLs 

46 from various configuration sources with proper normalization. 

47 

48 Args: 

49 external_url: Pre-configured external URL (highest priority) 

50 host: Hostname/IP address (used if external_url not provided) 

51 port: Port number (used with host if external_url not provided) 

52 fallback_base: Final fallback URL if nothing else is available 

53 

54 Returns: 

55 Complete base URL (e.g., "https://myapp.com" or "http://localhost:5000") 

56 

57 Raises: 

58 URLBuilderError: If URL construction fails 

59 """ 

60 try: 

61 # Try external URL first (highest priority) 

62 if external_url and external_url.strip(): 

63 base_url = external_url.strip().rstrip("/") 

64 logger.debug(f"Using configured external URL: {base_url}") 

65 return base_url 

66 

67 # Try to construct from host and port 

68 if host and port: 

69 normalized_host = normalize_bind_address(host) 

70 

71 # Use HTTP for host/port combinations (typically internal server addresses) 

72 # For external URLs, users should configure external_url setting instead 

73 base_url = f"http://{normalized_host}:{int(port)}" # DevSkim: ignore DS137138 

74 logger.debug(f"Constructed URL from host/port: {base_url}") 

75 return base_url 

76 

77 # Final fallback 

78 base_url = fallback_base.rstrip("/") 

79 logger.debug(f"Using fallback URL: {base_url}") 

80 return base_url 

81 

82 except Exception as e: 

83 raise URLBuilderError(f"Failed to build base URL: {e}") 

84 

85 

86def build_full_url( 

87 base_url: str, 

88 path: str, 

89 validate: bool = True, 

90 allowed_schemes: Optional[list] = None, 

91) -> str: 

92 """ 

93 Build a complete URL from base URL and path. 

94 

95 Args: 

96 base_url: Base URL (e.g., "https://myapp.com") 

97 path: Path to append (e.g., "/research/123") 

98 validate: Whether to validate the resulting URL 

99 allowed_schemes: List of allowed URL schemes (default: ["http", "https"]) 

100 

101 Returns: 

102 Complete URL (e.g., "https://myapp.com/research/123") 

103 

104 Raises: 

105 URLBuilderError: If URL construction or validation fails 

106 """ 

107 try: 

108 # Ensure path starts with / 

109 if not path.startswith("/"): 

110 path = f"/{path}" 

111 

112 # Ensure base URL doesn't end with / 

113 base_url = base_url.rstrip("/") 

114 

115 # Construct full URL 

116 full_url = f"{base_url}{path}" 

117 

118 if validate: 118 ↛ 119line 118 didn't jump to line 119 because the condition on line 118 was never true

119 validate_constructed_url(full_url, allowed_schemes) 

120 

121 return full_url 

122 

123 except Exception as e: 

124 raise URLBuilderError(f"Failed to build full URL: {e}") 

125 

126 

127def validate_constructed_url( 

128 url: str, allowed_schemes: Optional[list] = None 

129) -> bool: 

130 """ 

131 Validate a constructed URL. 

132 

133 Args: 

134 url: URL to validate 

135 allowed_schemes: List of allowed schemes (default: ["http", "https"]) 

136 

137 Returns: 

138 True if valid 

139 

140 Raises: 

141 URLBuilderError: If URL is invalid 

142 """ 

143 if not url or not isinstance(url, str): 

144 raise URLBuilderError("URL must be a non-empty string") 

145 

146 try: 

147 parsed = urlparse(url) 

148 except Exception as e: 

149 raise URLBuilderError(f"Failed to parse URL: {e}") 

150 

151 # Check scheme 

152 if not parsed.scheme: 

153 raise URLBuilderError("URL must have a scheme") 

154 

155 if allowed_schemes and parsed.scheme not in allowed_schemes: 

156 raise URLBuilderError( 

157 f"URL scheme '{parsed.scheme}' not in allowed schemes: {allowed_schemes}" 

158 ) 

159 

160 # Check hostname 

161 if not parsed.netloc: 

162 raise URLBuilderError("URL must have a hostname") 

163 

164 return True 

165 

166 

167def mask_sensitive_url(url: str) -> str: 

168 """ 

169 Mask sensitive parts of a URL for secure logging. 

170 

171 This function masks passwords, webhook tokens, and other sensitive 

172 information in URLs to prevent accidental exposure in logs. 

173 

174 Args: 

175 url: URL to mask 

176 

177 Returns: 

178 URL with sensitive parts replaced with *** 

179 """ 

180 try: 

181 parsed = urlparse(url) 

182 

183 # Mask password if present 

184 if parsed.password: 

185 netloc = parsed.netloc.replace(parsed.password, "***") 

186 else: 

187 netloc = parsed.netloc 

188 

189 # Mask path tokens (common in webhooks) 

190 path = parsed.path 

191 if path: 

192 # Replace long alphanumeric tokens with *** 

193 path = re.sub( 

194 r"/[a-zA-Z0-9_-]{20,}", 

195 "/***", 

196 path, 

197 ) 

198 

199 # Reconstruct URL 

200 masked = f"{parsed.scheme}://{netloc}{path}" 

201 if parsed.query: 

202 masked += "?***" 

203 

204 return masked 

205 

206 except Exception: 

207 # If parsing fails, just return generic mask 

208 return f"{url.split(':')[0]}://***"