Coverage for src/local_deep_research/config/paths.py: 100%
76 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-03 23:15 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-03 23:15 +0000
1"""
2Centralized path configuration for Local Deep Research.
3Handles database location using platformdirs for proper user data storage.
4"""
6import hashlib
7import os
8from pathlib import Path
10import platformdirs
11from loguru import logger
14def get_data_directory() -> Path:
15 """
16 Get the appropriate data directory for storing application data.
17 Uses platformdirs to get platform-specific user data directory.
19 Environment variable:
20 LDR_DATA_DIR: Override the default data directory location.
21 All subdirectories (research_outputs, cache, logs, database)
22 will be created under this directory.
24 Returns:
25 Path to data directory
26 """
27 # Check for explicit override via environment variable
28 custom_path = os.getenv("LDR_DATA_DIR")
29 if custom_path:
30 data_dir = Path(custom_path)
31 logger.debug(
32 f"Using custom data directory from LDR_DATA_DIR: {data_dir}"
33 )
34 return data_dir
36 # Use platformdirs for platform-specific user data directory
37 # Windows: C:\Users\Username\AppData\Local\local-deep-research
38 # macOS: ~/Library/Application Support/local-deep-research
39 # Linux: ~/.local/share/local-deep-research
40 data_dir = Path(platformdirs.user_data_dir("local-deep-research"))
41 # Log only the directory pattern, not the full path which may contain username
42 logger.debug(
43 f"Using platformdirs data directory pattern: .../{data_dir.name}"
44 )
46 return data_dir
49def get_research_outputs_directory() -> Path:
50 """
51 Get the directory for storing research outputs (reports, etc.).
53 Returns:
54 Path to research outputs directory
55 """
56 # Use subdirectory of main data directory
57 data_dir = get_data_directory()
58 outputs_dir = data_dir / "research_outputs"
59 outputs_dir.mkdir(parents=True, exist_ok=True)
61 logger.debug(f"Using research outputs directory: {outputs_dir}")
62 return outputs_dir
65def get_journal_data_directory() -> Path:
66 """Get the directory for downloaded journal quality data files.
68 Contains openalex_sources.json.gz, doaj_journals.json, and the
69 compiled journal_reference.db. Fetched on first use from
70 OpenAlex and DOAJ APIs.
72 Returns:
73 Path to journal data directory
74 """
75 data_dir = get_data_directory()
76 journal_dir = data_dir / "journal_data"
77 journal_dir.mkdir(parents=True, exist_ok=True)
78 return journal_dir
81def get_cache_directory() -> Path:
82 """
83 Get the directory for storing cache files (search cache, etc.).
85 Returns:
86 Path to cache directory
87 """
88 # Use subdirectory of main data directory
89 data_dir = get_data_directory()
90 cache_dir = data_dir / "cache"
91 cache_dir.mkdir(parents=True, exist_ok=True)
93 logger.debug(f"Using cache directory: {cache_dir}")
94 return cache_dir
97def get_logs_directory() -> Path:
98 """
99 Get the directory for storing log files.
101 Returns:
102 Path to logs directory
103 """
104 # Use subdirectory of main data directory
105 data_dir = get_data_directory()
106 logs_dir = data_dir / "logs"
107 logs_dir.mkdir(parents=True, exist_ok=True)
109 logger.debug(f"Using logs directory: {logs_dir}")
110 return logs_dir
113def get_encrypted_database_path() -> Path:
114 """Get the path to the encrypted databases directory.
116 Returns:
117 Path to the encrypted databases directory
118 """
119 data_dir = get_data_directory()
120 encrypted_db_path = data_dir / "encrypted_databases"
121 encrypted_db_path.mkdir(parents=True, exist_ok=True)
122 return encrypted_db_path
125def get_user_database_filename(username: str) -> str:
126 """Get the database filename for a specific user.
128 Args:
129 username: The username to generate a filename for
131 Returns:
132 The database filename (not full path) for the user
133 """
134 # Use username hash to avoid filesystem issues with special characters
135 username_hash = hashlib.sha256(username.encode()).hexdigest()[:16]
136 return f"ldr_user_{username_hash}.db"
139def get_library_directory() -> Path:
140 """
141 Get the directory for storing library files (documents, PDFs, etc.).
143 Returns:
144 Path to library directory
145 """
146 # Use subdirectory of main data directory
147 data_dir = get_data_directory()
148 library_dir = data_dir / "library"
149 library_dir.mkdir(parents=True, exist_ok=True)
151 logger.debug(f"Using library directory: {library_dir}")
152 return library_dir
155def get_config_directory() -> Path:
156 """
157 Get the directory for storing configuration files.
159 Returns:
160 Path to config directory
161 """
162 # Use subdirectory of main data directory
163 data_dir = get_data_directory()
164 config_dir = data_dir / "config"
165 config_dir.mkdir(parents=True, exist_ok=True)
167 logger.debug(f"Using config directory: {config_dir}")
168 return config_dir
171def get_models_directory() -> Path:
172 """
173 Get the directory for storing downloaded models.
175 Returns:
176 Path to models directory
177 """
178 # Use subdirectory of main data directory
179 data_dir = get_data_directory()
180 models_dir = data_dir / "models"
181 models_dir.mkdir(parents=True, exist_ok=True)
183 logger.debug(f"Using models directory: {models_dir}")
184 return models_dir
187def get_backup_directory() -> Path:
188 """Get the base backup directory for all users."""
189 data_dir = get_data_directory()
190 backup_dir = data_dir / "encrypted_databases" / "backups"
191 backup_dir.mkdir(parents=True, exist_ok=True)
192 return backup_dir
195def get_user_backup_directory(username: str) -> Path:
196 """Get backup directory for a specific user."""
197 username_hash = hashlib.sha256(username.encode()).hexdigest()[:16]
198 user_backup_dir = get_backup_directory() / username_hash
199 user_backup_dir.mkdir(parents=True, exist_ok=True, mode=0o700)
200 # Enforce 0o700 regardless of umask (mkdir mode is umask-masked)
201 user_backup_dir.chmod(0o700)
202 return user_backup_dir
205# Convenience functions for backward compatibility
206def get_data_dir() -> str:
207 """Get data directory as string for backward compatibility."""
208 return str(get_data_directory())