Coverage for src/local_deep_research/security/rate_limiter.py: 99%

66 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-03 23:15 +0000

1""" 

2Rate limiting utility for HTTP endpoints. 

3Provides a global limiter instance that can be imported by blueprints. 

4 

5Rate limits are configurable via environment variables (LDR_SECURITY_RATE_LIMIT_*). 

6Legacy server_config.json values are honored during the deprecation period. 

7Changes require server restart to take effect. 

8 

9Note: This is designed for single-instance local deployments. For multi-worker 

10production deployments, configure Redis storage via RATELIMIT_STORAGE_URL. 

11""" 

12 

13from flask import g, request, session as flask_session 

14from flask_limiter import Limiter 

15from flask_limiter.util import get_remote_address 

16from loguru import logger 

17 

18from ..settings.env_registry import is_rate_limiting_enabled 

19from ..web.server_config import load_server_config 

20 

21# Load rate limits from server config (UI-configurable) 

22# Multiple limits can be separated by semicolons (e.g., "5000 per hour;50000 per day") 

23_config = load_server_config() 

24DEFAULT_RATE_LIMIT = _config["rate_limit_default"] 

25LOGIN_RATE_LIMIT = _config["rate_limit_login"] 

26REGISTRATION_RATE_LIMIT = _config["rate_limit_registration"] 

27# Settings modification rate limit - prevent abuse of settings endpoints 

28SETTINGS_RATE_LIMIT = _config["rate_limit_settings"] 

29# Upload rate limits — separate per-user and per-IP buckets so an authenticated 

30# user from a single IP isn't double-capped beyond either decorator's intent. 

31_UPLOAD_RATE_LIMIT_USER = _config["rate_limit_upload_user"] 

32_UPLOAD_RATE_LIMIT_IP = _config["rate_limit_upload_ip"] 

33 

34 

35def get_client_ip(): 

36 """ 

37 Get the real client IP address, respecting X-Forwarded-For headers. 

38 

39 This is important for deployments behind proxies/load balancers. 

40 Falls back to direct remote address if no forwarded headers present. 

41 """ 

42 # Check X-Forwarded-For header (set by proxies/load balancers) 

43 forwarded_for = request.environ.get("HTTP_X_FORWARDED_FOR") 

44 if forwarded_for: 

45 # Take the first IP in the chain (client IP) 

46 return forwarded_for.split(",")[0].strip() 

47 

48 # Check X-Real-IP header (alternative proxy header) 

49 real_ip = request.environ.get("HTTP_X_REAL_IP") 

50 if real_ip: 

51 return real_ip.strip() 

52 

53 # Fallback to direct remote address 

54 return get_remote_address() 

55 

56 

57# Global limiter instance - will be initialized in app_factory 

58# Rate limiting is disabled in CI unless ENABLE_RATE_LIMITING=true 

59# This allows the rate limiting test to run with rate limiting enabled 

60# 

61# Note: In-memory storage is used by default, which is suitable for single-instance 

62# deployments. For multi-instance production deployments behind a load balancer, 

63# configure Redis storage via RATELIMIT_STORAGE_URL environment variable: 

64# export RATELIMIT_STORAGE_URL="redis://localhost:6379" 

65limiter = Limiter( 

66 key_func=get_client_ip, 

67 default_limits=[DEFAULT_RATE_LIMIT], 

68 storage_uri="memory://", 

69 headers_enabled=True, 

70 enabled=is_rate_limiting_enabled(), 

71) 

72 

73 

74# Shared rate limit decorators for authentication endpoints 

75# These can be imported and used directly on routes 

76login_limit = limiter.shared_limit( 

77 LOGIN_RATE_LIMIT, 

78 scope="login", 

79) 

80 

81registration_limit = limiter.shared_limit( 

82 REGISTRATION_RATE_LIMIT, 

83 scope="registration", 

84) 

85 

86settings_limit = limiter.shared_limit( 

87 SETTINGS_RATE_LIMIT, 

88 scope="settings", 

89) 

90 

91password_change_limit = limiter.shared_limit( 

92 LOGIN_RATE_LIMIT, 

93 scope="password_change", 

94) 

95 

96 

97# --------------------------------------------------------------------------- 

98# Shared helpers 

99# --------------------------------------------------------------------------- 

100 

101 

102def get_current_username(): 

103 """Return the authenticated username from g.current_user or the session. 

104 

105 g.current_user is set by the inject_current_user before_request handler 

106 and is the preferred source. The session fallback covers cases where 

107 g.current_user was cleared or is unavailable (e.g., tests, CLI contexts). 

108 """ 

109 if hasattr(g, "current_user") and g.current_user: 

110 return g.current_user 

111 return flask_session.get("username") 

112 

113 

114# --------------------------------------------------------------------------- 

115# API v1 rate limiting (per-user, configurable via DB setting) 

116# --------------------------------------------------------------------------- 

117 

118API_RATE_LIMIT_DEFAULT = 60 # requests per minute 

119 

120 

121def _get_user_api_rate_limit(): 

122 """Read the per-user API rate limit from DB, cached on flask.g.""" 

123 if hasattr(g, "_api_rate_limit"): 

124 return g._api_rate_limit 

125 

126 from ..database.session_context import get_user_db_session 

127 from ..utilities.db_utils import get_settings_manager 

128 

129 username = get_current_username() 

130 

131 rate_limit = API_RATE_LIMIT_DEFAULT 

132 if username: 

133 try: 

134 with get_user_db_session(username) as db_session: 

135 if db_session: 135 ↛ 143line 135 didn't jump to line 143

136 sm = get_settings_manager(db_session, username) 

137 rate_limit = sm.get_setting( 

138 "app.api_rate_limit", API_RATE_LIMIT_DEFAULT 

139 ) 

140 except Exception: 

141 logger.debug("Failed to read API rate limit setting", exc_info=True) 

142 

143 g._api_rate_limit = rate_limit 

144 return rate_limit 

145 

146 

147def _get_api_rate_limit_string(): 

148 """Return Flask-Limiter format string for the current user's API limit.""" 

149 return f"{_get_user_api_rate_limit()} per minute" 

150 

151 

152def _is_api_rate_limit_exempt(): 

153 """Exempt unauthenticated requests (auth decorator handles rejection) 

154 and users who set rate_limit=0 (disabled).""" 

155 if not get_current_username(): 

156 return True 

157 return not _get_user_api_rate_limit() 

158 

159 

160def _get_api_user_key(): 

161 """Key function for API rate limiting — keyed by authenticated username. 

162 

163 Unauthenticated requests are exempt via _is_api_rate_limit_exempt and 

164 rejected by api_access_control, so this function is only called for 

165 authenticated users. 

166 """ 

167 return f"api_user:{get_current_username()}" 

168 

169 

170api_rate_limit = limiter.shared_limit( 

171 _get_api_rate_limit_string, 

172 scope="api_v1", 

173 key_func=_get_api_user_key, 

174 exempt_when=_is_api_rate_limit_exempt, 

175) 

176 

177 

178# --------------------------------------------------------------------------- 

179# File upload rate limiting (dual-keyed: per-user AND per-IP) 

180# --------------------------------------------------------------------------- 

181 

182 

183def _get_upload_user_key(): 

184 """Key function for upload rate limiting — keyed by authenticated username.""" 

185 username = get_current_username() 

186 if username: 

187 return f"upload_user:{username}" 

188 return f"upload_ip:{get_client_ip()}" 

189 

190 

191upload_rate_limit_user = limiter.shared_limit( 

192 _UPLOAD_RATE_LIMIT_USER, 

193 scope="upload_user", 

194 key_func=_get_upload_user_key, 

195) 

196 

197upload_rate_limit_ip = limiter.shared_limit( 

198 _UPLOAD_RATE_LIMIT_IP, 

199 scope="upload_ip", 

200) 

201 

202 

203# --------------------------------------------------------------------------- 

204# Journal-quality data download — per-user cap on manual rebuilds. The 

205# download streams several hundred MB from upstream sources (OpenAlex S3, 

206# DOAJ CSV, predatory lists, JabRef, Institutions) and rebuilds the 

207# reference DB on disk. Authenticated-user abuse would burn bandwidth and 

208# I/O; 2 per hour is generous for legitimate use and catches accidental 

209# rapid clicks. 

210# --------------------------------------------------------------------------- 

211 

212journal_data_limit = limiter.shared_limit( 

213 "2 per hour", 

214 scope="journal_data", 

215 key_func=_get_api_user_key, 

216) 

217 

218 

219# Dashboard read endpoints (/api/journals, /api/journals/user-research, 

220# /api/journals/research/<id>). Each page click/filter triggers one 

221# request, so the limit needs to be generous — 60/min per authenticated 

222# user covers interactive browsing with headroom but still blocks 

223# scripted enumeration of the ~217K-row reference DB. 

224journals_read_limit = limiter.shared_limit( 

225 "60 per minute", 

226 scope="journals_read", 

227 key_func=_get_api_user_key, 

228)