Coverage for src/local_deep_research/config/paths.py: 100%

76 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-03 23:15 +0000

1""" 

2Centralized path configuration for Local Deep Research. 

3Handles database location using platformdirs for proper user data storage. 

4""" 

5 

6import hashlib 

7import os 

8from pathlib import Path 

9 

10import platformdirs 

11from loguru import logger 

12 

13 

14def get_data_directory() -> Path: 

15 """ 

16 Get the appropriate data directory for storing application data. 

17 Uses platformdirs to get platform-specific user data directory. 

18 

19 Environment variable: 

20 LDR_DATA_DIR: Override the default data directory location. 

21 All subdirectories (research_outputs, cache, logs, database) 

22 will be created under this directory. 

23 

24 Returns: 

25 Path to data directory 

26 """ 

27 # Check for explicit override via environment variable 

28 custom_path = os.getenv("LDR_DATA_DIR") 

29 if custom_path: 

30 data_dir = Path(custom_path) 

31 logger.debug( 

32 f"Using custom data directory from LDR_DATA_DIR: {data_dir}" 

33 ) 

34 return data_dir 

35 

36 # Use platformdirs for platform-specific user data directory 

37 # Windows: C:\Users\Username\AppData\Local\local-deep-research 

38 # macOS: ~/Library/Application Support/local-deep-research 

39 # Linux: ~/.local/share/local-deep-research 

40 data_dir = Path(platformdirs.user_data_dir("local-deep-research")) 

41 # Log only the directory pattern, not the full path which may contain username 

42 logger.debug( 

43 f"Using platformdirs data directory pattern: .../{data_dir.name}" 

44 ) 

45 

46 return data_dir 

47 

48 

49def get_research_outputs_directory() -> Path: 

50 """ 

51 Get the directory for storing research outputs (reports, etc.). 

52 

53 Returns: 

54 Path to research outputs directory 

55 """ 

56 # Use subdirectory of main data directory 

57 data_dir = get_data_directory() 

58 outputs_dir = data_dir / "research_outputs" 

59 outputs_dir.mkdir(parents=True, exist_ok=True) 

60 

61 logger.debug(f"Using research outputs directory: {outputs_dir}") 

62 return outputs_dir 

63 

64 

65def get_journal_data_directory() -> Path: 

66 """Get the directory for downloaded journal quality data files. 

67 

68 Contains openalex_sources.json.gz, doaj_journals.json, and the 

69 compiled journal_reference.db. Fetched on first use from 

70 OpenAlex and DOAJ APIs. 

71 

72 Returns: 

73 Path to journal data directory 

74 """ 

75 data_dir = get_data_directory() 

76 journal_dir = data_dir / "journal_data" 

77 journal_dir.mkdir(parents=True, exist_ok=True) 

78 return journal_dir 

79 

80 

81def get_cache_directory() -> Path: 

82 """ 

83 Get the directory for storing cache files (search cache, etc.). 

84 

85 Returns: 

86 Path to cache directory 

87 """ 

88 # Use subdirectory of main data directory 

89 data_dir = get_data_directory() 

90 cache_dir = data_dir / "cache" 

91 cache_dir.mkdir(parents=True, exist_ok=True) 

92 

93 logger.debug(f"Using cache directory: {cache_dir}") 

94 return cache_dir 

95 

96 

97def get_logs_directory() -> Path: 

98 """ 

99 Get the directory for storing log files. 

100 

101 Returns: 

102 Path to logs directory 

103 """ 

104 # Use subdirectory of main data directory 

105 data_dir = get_data_directory() 

106 logs_dir = data_dir / "logs" 

107 logs_dir.mkdir(parents=True, exist_ok=True) 

108 

109 logger.debug(f"Using logs directory: {logs_dir}") 

110 return logs_dir 

111 

112 

113def get_encrypted_database_path() -> Path: 

114 """Get the path to the encrypted databases directory. 

115 

116 Returns: 

117 Path to the encrypted databases directory 

118 """ 

119 data_dir = get_data_directory() 

120 encrypted_db_path = data_dir / "encrypted_databases" 

121 encrypted_db_path.mkdir(parents=True, exist_ok=True) 

122 return encrypted_db_path 

123 

124 

125def get_user_database_filename(username: str) -> str: 

126 """Get the database filename for a specific user. 

127 

128 Args: 

129 username: The username to generate a filename for 

130 

131 Returns: 

132 The database filename (not full path) for the user 

133 """ 

134 # Use username hash to avoid filesystem issues with special characters 

135 username_hash = hashlib.sha256(username.encode()).hexdigest()[:16] 

136 return f"ldr_user_{username_hash}.db" 

137 

138 

139def get_library_directory() -> Path: 

140 """ 

141 Get the directory for storing library files (documents, PDFs, etc.). 

142 

143 Returns: 

144 Path to library directory 

145 """ 

146 # Use subdirectory of main data directory 

147 data_dir = get_data_directory() 

148 library_dir = data_dir / "library" 

149 library_dir.mkdir(parents=True, exist_ok=True) 

150 

151 logger.debug(f"Using library directory: {library_dir}") 

152 return library_dir 

153 

154 

155def get_config_directory() -> Path: 

156 """ 

157 Get the directory for storing configuration files. 

158 

159 Returns: 

160 Path to config directory 

161 """ 

162 # Use subdirectory of main data directory 

163 data_dir = get_data_directory() 

164 config_dir = data_dir / "config" 

165 config_dir.mkdir(parents=True, exist_ok=True) 

166 

167 logger.debug(f"Using config directory: {config_dir}") 

168 return config_dir 

169 

170 

171def get_models_directory() -> Path: 

172 """ 

173 Get the directory for storing downloaded models. 

174 

175 Returns: 

176 Path to models directory 

177 """ 

178 # Use subdirectory of main data directory 

179 data_dir = get_data_directory() 

180 models_dir = data_dir / "models" 

181 models_dir.mkdir(parents=True, exist_ok=True) 

182 

183 logger.debug(f"Using models directory: {models_dir}") 

184 return models_dir 

185 

186 

187def get_backup_directory() -> Path: 

188 """Get the base backup directory for all users.""" 

189 data_dir = get_data_directory() 

190 backup_dir = data_dir / "encrypted_databases" / "backups" 

191 backup_dir.mkdir(parents=True, exist_ok=True) 

192 return backup_dir 

193 

194 

195def get_user_backup_directory(username: str) -> Path: 

196 """Get backup directory for a specific user.""" 

197 username_hash = hashlib.sha256(username.encode()).hexdigest()[:16] 

198 user_backup_dir = get_backup_directory() / username_hash 

199 user_backup_dir.mkdir(parents=True, exist_ok=True, mode=0o700) 

200 # Enforce 0o700 regardless of umask (mkdir mode is umask-masked) 

201 user_backup_dir.chmod(0o700) 

202 return user_backup_dir 

203 

204 

205# Convenience functions for backward compatibility 

206def get_data_dir() -> str: 

207 """Get data directory as string for backward compatibility.""" 

208 return str(get_data_directory())