Coverage for src / local_deep_research / web_search_engines / engines / search_engine_github.py: 10%

320 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-01-11 00:51 +0000

1import base64 

2import json 

3import time 

4from typing import Any, Dict, List, Optional 

5 

6from langchain_core.language_models import BaseLLM 

7from loguru import logger 

8 

9from ...config import llm_config, search_config 

10from ...security.safe_requests import safe_get 

11from ..search_engine_base import BaseSearchEngine 

12 

13 

14class GitHubSearchEngine(BaseSearchEngine): 

15 """ 

16 GitHub search engine implementation. 

17 Provides search across GitHub repositories, code, issues, and users. 

18 """ 

19 

20 def __init__( 

21 self, 

22 max_results: int = 15, 

23 api_key: Optional[str] = None, 

24 search_type: str = "repositories", 

25 include_readme: bool = True, 

26 include_issues: bool = False, 

27 llm: Optional[BaseLLM] = None, 

28 max_filtered_results: Optional[int] = None, 

29 ): 

30 """ 

31 Initialize the GitHub search engine. 

32 

33 Args: 

34 max_results: Maximum number of search results 

35 api_key: GitHub API token (can also be set in GITHUB_API_KEY env) 

36 search_type: Type of GitHub search ("repositories", "code", "issues", "users") 

37 include_readme: Whether to include README content for repositories 

38 include_issues: Whether to include recent issues for repositories 

39 llm: Language model for relevance filtering 

40 max_filtered_results: Maximum number of results to keep after filtering 

41 """ 

42 # Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results 

43 super().__init__( 

44 llm=llm, 

45 max_filtered_results=max_filtered_results, 

46 max_results=max_results, 

47 ) 

48 self.api_key = api_key 

49 self.search_type = search_type 

50 self.include_readme = include_readme 

51 self.include_issues = include_issues 

52 

53 # API endpoints 

54 self.api_base = "https://api.github.com" 

55 self.search_endpoint = f"{self.api_base}/search/{search_type}" 

56 

57 # Set up API headers 

58 self.headers = { 

59 "Accept": "application/vnd.github.v3+json", 

60 "User-Agent": "Local-Deep-Research-Agent", 

61 } 

62 

63 # Add authentication if API key provided 

64 if self.api_key: 

65 self.headers["Authorization"] = f"token {self.api_key}" 

66 logger.info("Using authenticated GitHub API requests") 

67 else: 

68 logger.warning( 

69 "No GitHub API key provided. Rate limits will be restricted." 

70 ) 

71 

72 def _handle_rate_limits(self, response): 

73 """Handle GitHub API rate limits by logging warnings and sleeping if necessary""" 

74 remaining = int(response.headers.get("X-RateLimit-Remaining", 60)) 

75 reset_time = int(response.headers.get("X-RateLimit-Reset", 0)) 

76 

77 if remaining < 5: 

78 current_time = time.time() 

79 wait_time = max(reset_time - current_time, 0) 

80 logger.warning( 

81 f"GitHub API rate limit almost reached. {remaining} requests remaining." 

82 ) 

83 

84 if wait_time > 0 and remaining == 0: 

85 logger.warning( 

86 f"GitHub API rate limit exceeded. Waiting {wait_time:.0f} seconds." 

87 ) 

88 time.sleep(min(wait_time, 60)) # Wait at most 60 seconds 

89 

90 def _optimize_github_query(self, query: str) -> str: 

91 """ 

92 Optimize the GitHub search query using LLM to improve search results. 

93 

94 Args: 

95 query: Original search query 

96 

97 Returns: 

98 Optimized GitHub search query 

99 """ 

100 # Get LLM from config if not already set 

101 if not self.llm: 

102 try: 

103 self.llm = llm_config.get_llm() 

104 if not self.llm: 

105 logger.warning("No LLM available for query optimization") 

106 return query 

107 except Exception: 

108 logger.exception("Error getting LLM from config") 

109 return query 

110 

111 prompt = f"""Transform this GitHub search query into an optimized version for the GitHub search API. Follow these steps: 

112 1. Strip question words (e.g., 'what', 'are', 'is'), stop words (e.g., 'and', 'as', 'of', 'on'), and redundant terms (e.g., 'repositories', 'repos', 'github') since they're implied by the search context. 

113 2. Keep only domain-specific keywords and avoid using "-related" terms. 

114 3. Add GitHub-specific filters with dynamic thresholds based on query context: 

115 - For stars: Use higher threshold (e.g., 'stars:>1000') for mainstream topics, lower (e.g., 'stars:>50') for specialized topics 

116 - For language: Detect programming language from query or omit if unclear 

117 - For search scope: Use 'in:name,description,readme' for general queries, 'in:file' for code-specific queries 

118 4. For date ranges, adapt based on query context: 

119 - For emerging: Use 'created:>2024-01-01' 

120 - For mature: Use 'pushed:>2023-01-01' 

121 - For historical research: Use 'created:2020-01-01..2024-01-01' 

122 5. For excluding results, adapt based on query: 

123 - Exclude irrelevant languages based on context 

124 - Use 'NOT' to exclude competing terms 

125 6. Ensure the output is a concise, space-separated string with no punctuation or extra text beyond keywords and filters. 

126 

127 

128 Original query: "{query}" 

129 

130 Return ONLY the optimized query, ready for GitHub's search API. Do not include explanations or additional text.""" 

131 

132 try: 

133 response = self.llm.invoke(prompt) 

134 

135 # Handle different response formats (string or object with content attribute) 

136 if hasattr(response, "content"): 

137 optimized_query = response.content.strip() 

138 else: 

139 # Handle string responses 

140 optimized_query = str(response).strip() 

141 

142 # Validate the optimized query 

143 if optimized_query and len(optimized_query) > 0: 

144 logger.info( 

145 f"LLM optimized query from '{query}' to '{optimized_query}'" 

146 ) 

147 return optimized_query 

148 else: 

149 logger.warning("LLM returned empty query, using original") 

150 return query 

151 

152 except Exception: 

153 logger.exception("Error optimizing query with LLM") 

154 return query 

155 

156 def _search_github(self, query: str) -> List[Dict[str, Any]]: 

157 """ 

158 Perform a GitHub search based on the configured search type. 

159 

160 Args: 

161 query: The search query 

162 

163 Returns: 

164 List of GitHub search result items 

165 """ 

166 results = [] 

167 

168 try: 

169 # Optimize GitHub query using LLM 

170 github_query = self._optimize_github_query(query) 

171 

172 logger.info(f"Final GitHub query: {github_query}") 

173 

174 # Construct search parameters 

175 params = { 

176 "q": github_query, 

177 "per_page": min( 

178 self.max_results, 100 

179 ), # GitHub API max is 100 per page 

180 "page": 1, 

181 } 

182 

183 # Add sort parameters based on search type 

184 if self.search_type == "repositories": 

185 params["sort"] = "stars" 

186 params["order"] = "desc" 

187 elif self.search_type == "code": 

188 params["sort"] = "indexed" 

189 params["order"] = "desc" 

190 elif self.search_type == "issues": 

191 params["sort"] = "updated" 

192 params["order"] = "desc" 

193 elif self.search_type == "users": 

194 params["sort"] = "followers" 

195 params["order"] = "desc" 

196 

197 # Apply rate limiting before request 

198 self._last_wait_time = self.rate_tracker.apply_rate_limit( 

199 self.engine_type 

200 ) 

201 

202 # Execute the API request 

203 response = safe_get( 

204 self.search_endpoint, headers=self.headers, params=params 

205 ) 

206 

207 # Check for rate limiting 

208 self._handle_rate_limits(response) 

209 

210 # Handle response with detailed logging 

211 if response.status_code == 200: 

212 data = response.json() 

213 total_count = data.get("total_count", 0) 

214 results = data.get("items", []) 

215 logger.info( 

216 f"GitHub search returned {len(results)} results (total available: {total_count})" 

217 ) 

218 

219 # Log the rate limit information 

220 rate_limit_remaining = response.headers.get( 

221 "X-RateLimit-Remaining", "unknown" 

222 ) 

223 logger.info( 

224 f"GitHub API rate limit: {rate_limit_remaining} requests remaining" 

225 ) 

226 

227 # If no results, try to provide more guidance 

228 if not results: 

229 logger.warning( 

230 "No results found. Consider these search tips:" 

231 ) 

232 logger.warning("1. Use shorter, more specific queries") 

233 logger.warning( 

234 "2. For repositories, try adding 'stars:>100' or 'language:python'" 

235 ) 

236 logger.warning( 

237 "3. For contribution opportunities, search for 'good-first-issue' or 'help-wanted'" 

238 ) 

239 else: 

240 logger.error( 

241 f"GitHub API error: {response.status_code} - {response.text}" 

242 ) 

243 

244 except Exception: 

245 logger.exception("Error searching GitHub") 

246 

247 return results 

248 

249 def _get_readme_content(self, repo_full_name: str) -> str: 

250 """ 

251 Get README content for a repository. 

252 

253 Args: 

254 repo_full_name: Full name of the repository (owner/repo) 

255 

256 Returns: 

257 Decoded README content or empty string if not found 

258 """ 

259 try: 

260 # Get README 

261 # Apply rate limiting before request 

262 self._last_wait_time = self.rate_tracker.apply_rate_limit( 

263 self.engine_type 

264 ) 

265 

266 response = safe_get( 

267 f"{self.api_base}/repos/{repo_full_name}/readme", 

268 headers=self.headers, 

269 ) 

270 

271 # Check for rate limiting 

272 self._handle_rate_limits(response) 

273 

274 if response.status_code == 200: 

275 data = response.json() 

276 content = data.get("content", "") 

277 encoding = data.get("encoding", "") 

278 

279 if encoding == "base64" and content: 

280 return base64.b64decode(content).decode( 

281 "utf-8", errors="replace" 

282 ) 

283 return content 

284 else: 

285 logger.warning( 

286 f"Could not get README for {repo_full_name}: {response.status_code}" 

287 ) 

288 return "" 

289 

290 except Exception: 

291 logger.exception(f"Error getting README for {repo_full_name}") 

292 return "" 

293 

294 def _get_recent_issues( 

295 self, repo_full_name: str, limit: int = 5 

296 ) -> List[Dict[str, Any]]: 

297 """ 

298 Get recent issues for a repository. 

299 

300 Args: 

301 repo_full_name: Full name of the repository (owner/repo) 

302 limit: Maximum number of issues to return 

303 

304 Returns: 

305 List of recent issues 

306 """ 

307 issues = [] 

308 

309 try: 

310 # Get recent issues 

311 # Apply rate limiting before request 

312 self._last_wait_time = self.rate_tracker.apply_rate_limit( 

313 self.engine_type 

314 ) 

315 

316 response = safe_get( 

317 f"{self.api_base}/repos/{repo_full_name}/issues", 

318 headers=self.headers, 

319 params={ 

320 "state": "all", 

321 "per_page": limit, 

322 "sort": "updated", 

323 "direction": "desc", 

324 }, 

325 ) 

326 

327 # Check for rate limiting 

328 self._handle_rate_limits(response) 

329 

330 if response.status_code == 200: 

331 issues = response.json() 

332 logger.info( 

333 f"Got {len(issues)} recent issues for {repo_full_name}" 

334 ) 

335 else: 

336 logger.warning( 

337 f"Could not get issues for {repo_full_name}: {response.status_code}" 

338 ) 

339 

340 except Exception: 

341 logger.exception(f"Error getting issues for {repo_full_name}") 

342 

343 return issues 

344 

345 def _get_file_content(self, file_url: str) -> str: 

346 """ 

347 Get content of a file from GitHub. 

348 

349 Args: 

350 file_url: API URL for the file 

351 

352 Returns: 

353 Decoded file content or empty string if not found 

354 """ 

355 try: 

356 # Apply rate limiting before request 

357 self._last_wait_time = self.rate_tracker.apply_rate_limit( 

358 self.engine_type 

359 ) 

360 

361 # Get file content 

362 response = safe_get(file_url, headers=self.headers) 

363 

364 # Check for rate limiting 

365 self._handle_rate_limits(response) 

366 

367 if response.status_code == 200: 

368 data = response.json() 

369 content = data.get("content", "") 

370 encoding = data.get("encoding", "") 

371 

372 if encoding == "base64" and content: 

373 return base64.b64decode(content).decode( 

374 "utf-8", errors="replace" 

375 ) 

376 return content 

377 else: 

378 logger.warning( 

379 f"Could not get file content: {response.status_code}" 

380 ) 

381 return "" 

382 

383 except Exception: 

384 logger.exception("Error getting file content") 

385 return "" 

386 

387 def _format_repository_preview( 

388 self, repo: Dict[str, Any] 

389 ) -> Dict[str, Any]: 

390 """Format repository search result as preview""" 

391 return { 

392 "id": str(repo.get("id", "")), 

393 "title": repo.get("full_name", ""), 

394 "link": repo.get("html_url", ""), 

395 "snippet": repo.get("description", "No description provided"), 

396 "stars": repo.get("stargazers_count", 0), 

397 "forks": repo.get("forks_count", 0), 

398 "language": repo.get("language", ""), 

399 "updated_at": repo.get("updated_at", ""), 

400 "created_at": repo.get("created_at", ""), 

401 "topics": repo.get("topics", []), 

402 "owner": repo.get("owner", {}).get("login", ""), 

403 "is_fork": repo.get("fork", False), 

404 "search_type": "repository", 

405 "repo_full_name": repo.get("full_name", ""), 

406 } 

407 

408 def _format_code_preview(self, code: Dict[str, Any]) -> Dict[str, Any]: 

409 """Format code search result as preview""" 

410 repo = code.get("repository", {}) 

411 return { 

412 "id": f"code_{code.get('sha', '')}", 

413 "title": f"{code.get('name', '')} in {repo.get('full_name', '')}", 

414 "link": code.get("html_url", ""), 

415 "snippet": f"Match in {code.get('path', '')}", 

416 "path": code.get("path", ""), 

417 "repo_name": repo.get("full_name", ""), 

418 "repo_url": repo.get("html_url", ""), 

419 "search_type": "code", 

420 "file_url": code.get("url", ""), 

421 } 

422 

423 def _format_issue_preview(self, issue: Dict[str, Any]) -> Dict[str, Any]: 

424 """Format issue search result as preview""" 

425 repo = ( 

426 issue.get("repository", {}) 

427 if "repository" in issue 

428 else {"full_name": ""} 

429 ) 

430 return { 

431 "id": f"issue_{issue.get('number', '')}", 

432 "title": issue.get("title", ""), 

433 "link": issue.get("html_url", ""), 

434 "snippet": ( 

435 issue.get("body", "")[:200] + "..." 

436 if len(issue.get("body", "")) > 200 

437 else issue.get("body", "") 

438 ), 

439 "state": issue.get("state", ""), 

440 "created_at": issue.get("created_at", ""), 

441 "updated_at": issue.get("updated_at", ""), 

442 "user": issue.get("user", {}).get("login", ""), 

443 "comments": issue.get("comments", 0), 

444 "search_type": "issue", 

445 "repo_name": repo.get("full_name", ""), 

446 } 

447 

448 def _format_user_preview(self, user: Dict[str, Any]) -> Dict[str, Any]: 

449 """Format user search result as preview""" 

450 return { 

451 "id": f"user_{user.get('id', '')}", 

452 "title": user.get("login", ""), 

453 "link": user.get("html_url", ""), 

454 "snippet": user.get("bio", "No bio provided"), 

455 "name": user.get("name", ""), 

456 "followers": user.get("followers", 0), 

457 "public_repos": user.get("public_repos", 0), 

458 "location": user.get("location", ""), 

459 "search_type": "user", 

460 "user_login": user.get("login", ""), 

461 } 

462 

463 def _get_previews(self, query: str) -> List[Dict[str, Any]]: 

464 """ 

465 Get preview information for GitHub search results. 

466 

467 Args: 

468 query: The search query 

469 

470 Returns: 

471 List of preview dictionaries 

472 """ 

473 logger.info(f"Getting GitHub previews for query: {query}") 

474 

475 # For contribution-focused queries, automatically adjust search type and add filters 

476 if any( 

477 term in query.lower() 

478 for term in [ 

479 "contribute", 

480 "contributing", 

481 "contribution", 

482 "beginner", 

483 "newcomer", 

484 ] 

485 ): 

486 # Use repositories search with help-wanted or good-first-issue labels 

487 original_search_type = self.search_type 

488 self.search_type = "repositories" 

489 self.search_endpoint = f"{self.api_base}/search/repositories" 

490 

491 # Create a specialized query for finding beginner-friendly projects 

492 specialized_query = "good-first-issues:>5 is:public archived:false" 

493 

494 # Extract language preferences if present 

495 languages = [] 

496 for lang in [ 

497 "python", 

498 "javascript", 

499 "java", 

500 "rust", 

501 "go", 

502 "typescript", 

503 "c#", 

504 "c++", 

505 "ruby", 

506 ]: 

507 if lang in query.lower(): 

508 languages.append(lang) 

509 

510 if languages: 

511 specialized_query += f" language:{' language:'.join(languages)}" 

512 

513 # Extract keywords 

514 keywords = [ 

515 word 

516 for word in query.split() 

517 if len(word) > 3 

518 and word.lower() 

519 not in [ 

520 "recommend", 

521 "recommended", 

522 "github", 

523 "repositories", 

524 "looking", 

525 "developers", 

526 "contribute", 

527 "contributing", 

528 "beginner", 

529 "newcomer", 

530 ] 

531 ] 

532 

533 if keywords: 

534 specialized_query += " " + " ".join( 

535 keywords[:5] 

536 ) # Add up to 5 keywords 

537 

538 logger.info( 

539 f"Using specialized contribution query: {specialized_query}" 

540 ) 

541 

542 # Perform GitHub search with specialized query 

543 results = self._search_github(specialized_query) 

544 

545 # Restore original search type 

546 self.search_type = original_search_type 

547 self.search_endpoint = f"{self.api_base}/search/{self.search_type}" 

548 else: 

549 # Perform standard GitHub search 

550 results = self._search_github(query) 

551 

552 if not results: 

553 logger.warning(f"No GitHub results found for query: {query}") 

554 return [] 

555 

556 # Format results as previews 

557 previews = [] 

558 for result in results: 

559 # Format based on search type 

560 if self.search_type == "repositories": 

561 preview = self._format_repository_preview(result) 

562 elif self.search_type == "code": 

563 preview = self._format_code_preview(result) 

564 elif self.search_type == "issues": 

565 preview = self._format_issue_preview(result) 

566 elif self.search_type == "users": 

567 preview = self._format_user_preview(result) 

568 else: 

569 logger.warning(f"Unknown search type: {self.search_type}") 

570 continue 

571 

572 previews.append(preview) 

573 

574 logger.info(f"Formatted {len(previews)} GitHub preview results") 

575 return previews 

576 

577 def _get_full_content( 

578 self, relevant_items: List[Dict[str, Any]] 

579 ) -> List[Dict[str, Any]]: 

580 """ 

581 Get full content for the relevant GitHub search results. 

582 

583 Args: 

584 relevant_items: List of relevant preview dictionaries 

585 

586 Returns: 

587 List of result dictionaries with full content 

588 """ 

589 # Check if we should add full content 

590 if ( 

591 hasattr(search_config, "SEARCH_SNIPPETS_ONLY") 

592 and search_config.SEARCH_SNIPPETS_ONLY 

593 ): 

594 logger.info("Snippet-only mode, skipping full content retrieval") 

595 return relevant_items 

596 

597 logger.info( 

598 f"Getting full content for {len(relevant_items)} GitHub results" 

599 ) 

600 

601 results = [] 

602 for item in relevant_items: 

603 result = item.copy() 

604 search_type = item.get("search_type", "") 

605 

606 # Add content based on search type 

607 if search_type == "repository" and self.include_readme: 

608 repo_full_name = item.get("repo_full_name", "") 

609 if repo_full_name: 

610 # Get README content 

611 readme_content = self._get_readme_content(repo_full_name) 

612 result["full_content"] = readme_content 

613 result["content_type"] = "readme" 

614 

615 # Get recent issues if requested 

616 if self.include_issues: 

617 issues = self._get_recent_issues(repo_full_name) 

618 result["recent_issues"] = issues 

619 

620 elif search_type == "code": 

621 file_url = item.get("file_url", "") 

622 if file_url: 

623 # Get file content 

624 file_content = self._get_file_content(file_url) 

625 result["full_content"] = file_content 

626 result["content_type"] = "file" 

627 

628 elif search_type == "issue": 

629 # For issues, the snippet usually contains a summary already 

630 # We'll just keep it as is 

631 result["full_content"] = item.get("snippet", "") 

632 result["content_type"] = "issue" 

633 

634 elif search_type == "user": 

635 # For users, construct a profile summary 

636 profile_summary = f"GitHub user: {item.get('title', '')}\n" 

637 

638 if item.get("name"): 

639 profile_summary += f"Name: {item.get('name')}\n" 

640 

641 if item.get("location"): 

642 profile_summary += f"Location: {item.get('location')}\n" 

643 

644 profile_summary += f"Followers: {item.get('followers', 0)}\n" 

645 profile_summary += ( 

646 f"Public repositories: {item.get('public_repos', 0)}\n" 

647 ) 

648 

649 if ( 

650 item.get("snippet") 

651 and item.get("snippet") != "No bio provided" 

652 ): 

653 profile_summary += f"\nBio: {item.get('snippet')}\n" 

654 

655 result["full_content"] = profile_summary 

656 result["content_type"] = "user_profile" 

657 

658 results.append(result) 

659 

660 return results 

661 

662 def search_repository( 

663 self, repo_owner: str, repo_name: str 

664 ) -> Dict[str, Any]: 

665 """ 

666 Get detailed information about a specific repository. 

667 

668 Args: 

669 repo_owner: Owner of the repository 

670 repo_name: Name of the repository 

671 

672 Returns: 

673 Dictionary with repository information 

674 """ 

675 repo_full_name = f"{repo_owner}/{repo_name}" 

676 logger.info(f"Getting details for repository: {repo_full_name}") 

677 

678 try: 

679 # Get repository details 

680 # Apply rate limiting before request 

681 self._last_wait_time = self.rate_tracker.apply_rate_limit( 

682 self.engine_type 

683 ) 

684 

685 response = safe_get( 

686 f"{self.api_base}/repos/{repo_full_name}", headers=self.headers 

687 ) 

688 

689 # Check for rate limiting 

690 self._handle_rate_limits(response) 

691 

692 if response.status_code == 200: 

693 repo = response.json() 

694 

695 # Format as repository preview 

696 result = self._format_repository_preview(repo) 

697 

698 # Add README content if requested 

699 if self.include_readme: 

700 readme_content = self._get_readme_content(repo_full_name) 

701 result["full_content"] = readme_content 

702 result["content_type"] = "readme" 

703 

704 # Add recent issues if requested 

705 if self.include_issues: 

706 issues = self._get_recent_issues(repo_full_name) 

707 result["recent_issues"] = issues 

708 

709 return result 

710 else: 

711 logger.error( 

712 f"Error getting repository details: {response.status_code} - {response.text}" 

713 ) 

714 return {} 

715 

716 except Exception: 

717 logger.exception("Error getting repository details") 

718 return {} 

719 

720 def search_code( 

721 self, 

722 query: str, 

723 language: Optional[str] = None, 

724 user: Optional[str] = None, 

725 ) -> List[Dict[str, Any]]: 

726 """ 

727 Search for code with more specific parameters. 

728 

729 Args: 

730 query: Code search query 

731 language: Filter by programming language 

732 user: Filter by GitHub username/organization 

733 

734 Returns: 

735 List of code search results 

736 """ 

737 # Build advanced query 

738 advanced_query = query 

739 

740 if language: 

741 advanced_query += f" language:{language}" 

742 

743 if user: 

744 advanced_query += f" user:{user}" 

745 

746 # Save current search type 

747 original_search_type = self.search_type 

748 

749 try: 

750 # Set search type to code 

751 self.search_type = "code" 

752 self.search_endpoint = f"{self.api_base}/search/code" 

753 

754 # Perform search 

755 results = self._search_github(advanced_query) 

756 

757 # Format results 

758 previews = [self._format_code_preview(result) for result in results] 

759 

760 # Get full content if requested 

761 if ( 

762 hasattr(search_config, "SEARCH_SNIPPETS_ONLY") 

763 and not search_config.SEARCH_SNIPPETS_ONLY 

764 ): 

765 return self._get_full_content(previews) 

766 

767 return previews 

768 

769 finally: 

770 # Restore original search type 

771 self.search_type = original_search_type 

772 self.search_endpoint = f"{self.api_base}/search/{self.search_type}" 

773 

774 def search_issues( 

775 self, query: str, state: str = "open", sort: str = "updated" 

776 ) -> List[Dict[str, Any]]: 

777 """ 

778 Search for issues with more specific parameters. 

779 

780 Args: 

781 query: Issue search query 

782 state: Filter by issue state ("open", "closed", "all") 

783 sort: Sort order ("updated", "created", "comments") 

784 

785 Returns: 

786 List of issue search results 

787 """ 

788 # Build advanced query 

789 advanced_query = query + f" state:{state}" 

790 

791 # Save current search type 

792 original_search_type = self.search_type 

793 

794 try: 

795 # Set search type to issues 

796 self.search_type = "issues" 

797 self.search_endpoint = f"{self.api_base}/search/issues" 

798 

799 # Set sort parameter 

800 params = { 

801 "q": advanced_query, 

802 "per_page": min(self.max_results, 100), 

803 "page": 1, 

804 "sort": sort, 

805 "order": "desc", 

806 } 

807 

808 # Perform search 

809 response = safe_get( 

810 self.search_endpoint, headers=self.headers, params=params 

811 ) 

812 

813 # Check for rate limiting 

814 self._handle_rate_limits(response) 

815 

816 if response.status_code == 200: 

817 data = response.json() 

818 results = data.get("items", []) 

819 

820 # Format results 

821 previews = [ 

822 self._format_issue_preview(result) for result in results 

823 ] 

824 

825 # For issues, we don't need to get full content 

826 return previews 

827 else: 

828 logger.error( 

829 f"GitHub API error: {response.status_code} - {response.text}" 

830 ) 

831 return [] 

832 

833 finally: 

834 # Restore original search type 

835 self.search_type = original_search_type 

836 self.search_endpoint = f"{self.api_base}/search/{self.search_type}" 

837 

838 def set_search_type(self, search_type: str): 

839 """ 

840 Set the search type for subsequent searches. 

841 

842 Args: 

843 search_type: Type of GitHub search ("repositories", "code", "issues", "users") 

844 """ 

845 if search_type in ["repositories", "code", "issues", "users"]: 

846 self.search_type = search_type 

847 self.search_endpoint = f"{self.api_base}/search/{search_type}" 

848 logger.info(f"Set GitHub search type to: {search_type}") 

849 else: 

850 logger.error(f"Invalid GitHub search type: {search_type}") 

851 

852 def _filter_for_relevance( 

853 self, previews: List[Dict[str, Any]], query: str 

854 ) -> List[Dict[str, Any]]: 

855 """ 

856 Filter GitHub search results for relevance using LLM. 

857 

858 Args: 

859 previews: List of preview dictionaries 

860 query: Original search query 

861 

862 Returns: 

863 List of relevant preview dictionaries 

864 """ 

865 if not self.llm or not previews: 

866 return previews 

867 

868 # Create a specialized prompt for GitHub results 

869 prompt = f"""Analyze these GitHub search results and rank them by relevance to the query. 

870Consider: 

8711. Repository stars and activity (higher is better) 

8722. Match between query intent and repository description 

8733. Repository language and topics 

8744. Last update time (more recent is better) 

8755. Whether it's a fork (original repositories are preferred) 

876 

877Query: "{query}" 

878 

879Results: 

880{json.dumps(previews, indent=2)} 

881 

882Return ONLY a JSON array of indices in order of relevance (most relevant first). 

883Example: [0, 2, 1, 3] 

884Do not include any other text or explanation.""" 

885 

886 try: 

887 response = self.llm.invoke(prompt) 

888 response_text = response.content.strip() 

889 

890 # Extract JSON array from response 

891 start_idx = response_text.find("[") 

892 end_idx = response_text.rfind("]") 

893 

894 if start_idx >= 0 and end_idx > start_idx: 

895 array_text = response_text[start_idx : end_idx + 1] 

896 ranked_indices = json.loads(array_text) 

897 

898 # Return the results in ranked order 

899 ranked_results = [] 

900 for idx in ranked_indices: 

901 if idx < len(previews): 

902 ranked_results.append(previews[idx]) 

903 

904 # Limit to max_filtered_results if specified 

905 if ( 

906 self.max_filtered_results 

907 and len(ranked_results) > self.max_filtered_results 

908 ): 

909 logger.info( 

910 f"Limiting filtered results to top {self.max_filtered_results}" 

911 ) 

912 return ranked_results[: self.max_filtered_results] 

913 

914 return ranked_results 

915 else: 

916 logger.info( 

917 "Could not find JSON array in response, returning no previews" 

918 ) 

919 return [] 

920 

921 except Exception: 

922 logger.exception("Error filtering GitHub results") 

923 return []