Coverage for src / local_deep_research / web_search_engines / engines / search_engine_github.py: 10%
320 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
1import base64
2import json
3import time
4from typing import Any, Dict, List, Optional
6from langchain_core.language_models import BaseLLM
7from loguru import logger
9from ...config import llm_config, search_config
10from ...security.safe_requests import safe_get
11from ..search_engine_base import BaseSearchEngine
14class GitHubSearchEngine(BaseSearchEngine):
15 """
16 GitHub search engine implementation.
17 Provides search across GitHub repositories, code, issues, and users.
18 """
20 def __init__(
21 self,
22 max_results: int = 15,
23 api_key: Optional[str] = None,
24 search_type: str = "repositories",
25 include_readme: bool = True,
26 include_issues: bool = False,
27 llm: Optional[BaseLLM] = None,
28 max_filtered_results: Optional[int] = None,
29 ):
30 """
31 Initialize the GitHub search engine.
33 Args:
34 max_results: Maximum number of search results
35 api_key: GitHub API token (can also be set in GITHUB_API_KEY env)
36 search_type: Type of GitHub search ("repositories", "code", "issues", "users")
37 include_readme: Whether to include README content for repositories
38 include_issues: Whether to include recent issues for repositories
39 llm: Language model for relevance filtering
40 max_filtered_results: Maximum number of results to keep after filtering
41 """
42 # Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
43 super().__init__(
44 llm=llm,
45 max_filtered_results=max_filtered_results,
46 max_results=max_results,
47 )
48 self.api_key = api_key
49 self.search_type = search_type
50 self.include_readme = include_readme
51 self.include_issues = include_issues
53 # API endpoints
54 self.api_base = "https://api.github.com"
55 self.search_endpoint = f"{self.api_base}/search/{search_type}"
57 # Set up API headers
58 self.headers = {
59 "Accept": "application/vnd.github.v3+json",
60 "User-Agent": "Local-Deep-Research-Agent",
61 }
63 # Add authentication if API key provided
64 if self.api_key:
65 self.headers["Authorization"] = f"token {self.api_key}"
66 logger.info("Using authenticated GitHub API requests")
67 else:
68 logger.warning(
69 "No GitHub API key provided. Rate limits will be restricted."
70 )
72 def _handle_rate_limits(self, response):
73 """Handle GitHub API rate limits by logging warnings and sleeping if necessary"""
74 remaining = int(response.headers.get("X-RateLimit-Remaining", 60))
75 reset_time = int(response.headers.get("X-RateLimit-Reset", 0))
77 if remaining < 5:
78 current_time = time.time()
79 wait_time = max(reset_time - current_time, 0)
80 logger.warning(
81 f"GitHub API rate limit almost reached. {remaining} requests remaining."
82 )
84 if wait_time > 0 and remaining == 0:
85 logger.warning(
86 f"GitHub API rate limit exceeded. Waiting {wait_time:.0f} seconds."
87 )
88 time.sleep(min(wait_time, 60)) # Wait at most 60 seconds
90 def _optimize_github_query(self, query: str) -> str:
91 """
92 Optimize the GitHub search query using LLM to improve search results.
94 Args:
95 query: Original search query
97 Returns:
98 Optimized GitHub search query
99 """
100 # Get LLM from config if not already set
101 if not self.llm:
102 try:
103 self.llm = llm_config.get_llm()
104 if not self.llm:
105 logger.warning("No LLM available for query optimization")
106 return query
107 except Exception:
108 logger.exception("Error getting LLM from config")
109 return query
111 prompt = f"""Transform this GitHub search query into an optimized version for the GitHub search API. Follow these steps:
112 1. Strip question words (e.g., 'what', 'are', 'is'), stop words (e.g., 'and', 'as', 'of', 'on'), and redundant terms (e.g., 'repositories', 'repos', 'github') since they're implied by the search context.
113 2. Keep only domain-specific keywords and avoid using "-related" terms.
114 3. Add GitHub-specific filters with dynamic thresholds based on query context:
115 - For stars: Use higher threshold (e.g., 'stars:>1000') for mainstream topics, lower (e.g., 'stars:>50') for specialized topics
116 - For language: Detect programming language from query or omit if unclear
117 - For search scope: Use 'in:name,description,readme' for general queries, 'in:file' for code-specific queries
118 4. For date ranges, adapt based on query context:
119 - For emerging: Use 'created:>2024-01-01'
120 - For mature: Use 'pushed:>2023-01-01'
121 - For historical research: Use 'created:2020-01-01..2024-01-01'
122 5. For excluding results, adapt based on query:
123 - Exclude irrelevant languages based on context
124 - Use 'NOT' to exclude competing terms
125 6. Ensure the output is a concise, space-separated string with no punctuation or extra text beyond keywords and filters.
128 Original query: "{query}"
130 Return ONLY the optimized query, ready for GitHub's search API. Do not include explanations or additional text."""
132 try:
133 response = self.llm.invoke(prompt)
135 # Handle different response formats (string or object with content attribute)
136 if hasattr(response, "content"):
137 optimized_query = response.content.strip()
138 else:
139 # Handle string responses
140 optimized_query = str(response).strip()
142 # Validate the optimized query
143 if optimized_query and len(optimized_query) > 0:
144 logger.info(
145 f"LLM optimized query from '{query}' to '{optimized_query}'"
146 )
147 return optimized_query
148 else:
149 logger.warning("LLM returned empty query, using original")
150 return query
152 except Exception:
153 logger.exception("Error optimizing query with LLM")
154 return query
156 def _search_github(self, query: str) -> List[Dict[str, Any]]:
157 """
158 Perform a GitHub search based on the configured search type.
160 Args:
161 query: The search query
163 Returns:
164 List of GitHub search result items
165 """
166 results = []
168 try:
169 # Optimize GitHub query using LLM
170 github_query = self._optimize_github_query(query)
172 logger.info(f"Final GitHub query: {github_query}")
174 # Construct search parameters
175 params = {
176 "q": github_query,
177 "per_page": min(
178 self.max_results, 100
179 ), # GitHub API max is 100 per page
180 "page": 1,
181 }
183 # Add sort parameters based on search type
184 if self.search_type == "repositories":
185 params["sort"] = "stars"
186 params["order"] = "desc"
187 elif self.search_type == "code":
188 params["sort"] = "indexed"
189 params["order"] = "desc"
190 elif self.search_type == "issues":
191 params["sort"] = "updated"
192 params["order"] = "desc"
193 elif self.search_type == "users":
194 params["sort"] = "followers"
195 params["order"] = "desc"
197 # Apply rate limiting before request
198 self._last_wait_time = self.rate_tracker.apply_rate_limit(
199 self.engine_type
200 )
202 # Execute the API request
203 response = safe_get(
204 self.search_endpoint, headers=self.headers, params=params
205 )
207 # Check for rate limiting
208 self._handle_rate_limits(response)
210 # Handle response with detailed logging
211 if response.status_code == 200:
212 data = response.json()
213 total_count = data.get("total_count", 0)
214 results = data.get("items", [])
215 logger.info(
216 f"GitHub search returned {len(results)} results (total available: {total_count})"
217 )
219 # Log the rate limit information
220 rate_limit_remaining = response.headers.get(
221 "X-RateLimit-Remaining", "unknown"
222 )
223 logger.info(
224 f"GitHub API rate limit: {rate_limit_remaining} requests remaining"
225 )
227 # If no results, try to provide more guidance
228 if not results:
229 logger.warning(
230 "No results found. Consider these search tips:"
231 )
232 logger.warning("1. Use shorter, more specific queries")
233 logger.warning(
234 "2. For repositories, try adding 'stars:>100' or 'language:python'"
235 )
236 logger.warning(
237 "3. For contribution opportunities, search for 'good-first-issue' or 'help-wanted'"
238 )
239 else:
240 logger.error(
241 f"GitHub API error: {response.status_code} - {response.text}"
242 )
244 except Exception:
245 logger.exception("Error searching GitHub")
247 return results
249 def _get_readme_content(self, repo_full_name: str) -> str:
250 """
251 Get README content for a repository.
253 Args:
254 repo_full_name: Full name of the repository (owner/repo)
256 Returns:
257 Decoded README content or empty string if not found
258 """
259 try:
260 # Get README
261 # Apply rate limiting before request
262 self._last_wait_time = self.rate_tracker.apply_rate_limit(
263 self.engine_type
264 )
266 response = safe_get(
267 f"{self.api_base}/repos/{repo_full_name}/readme",
268 headers=self.headers,
269 )
271 # Check for rate limiting
272 self._handle_rate_limits(response)
274 if response.status_code == 200:
275 data = response.json()
276 content = data.get("content", "")
277 encoding = data.get("encoding", "")
279 if encoding == "base64" and content:
280 return base64.b64decode(content).decode(
281 "utf-8", errors="replace"
282 )
283 return content
284 else:
285 logger.warning(
286 f"Could not get README for {repo_full_name}: {response.status_code}"
287 )
288 return ""
290 except Exception:
291 logger.exception(f"Error getting README for {repo_full_name}")
292 return ""
294 def _get_recent_issues(
295 self, repo_full_name: str, limit: int = 5
296 ) -> List[Dict[str, Any]]:
297 """
298 Get recent issues for a repository.
300 Args:
301 repo_full_name: Full name of the repository (owner/repo)
302 limit: Maximum number of issues to return
304 Returns:
305 List of recent issues
306 """
307 issues = []
309 try:
310 # Get recent issues
311 # Apply rate limiting before request
312 self._last_wait_time = self.rate_tracker.apply_rate_limit(
313 self.engine_type
314 )
316 response = safe_get(
317 f"{self.api_base}/repos/{repo_full_name}/issues",
318 headers=self.headers,
319 params={
320 "state": "all",
321 "per_page": limit,
322 "sort": "updated",
323 "direction": "desc",
324 },
325 )
327 # Check for rate limiting
328 self._handle_rate_limits(response)
330 if response.status_code == 200:
331 issues = response.json()
332 logger.info(
333 f"Got {len(issues)} recent issues for {repo_full_name}"
334 )
335 else:
336 logger.warning(
337 f"Could not get issues for {repo_full_name}: {response.status_code}"
338 )
340 except Exception:
341 logger.exception(f"Error getting issues for {repo_full_name}")
343 return issues
345 def _get_file_content(self, file_url: str) -> str:
346 """
347 Get content of a file from GitHub.
349 Args:
350 file_url: API URL for the file
352 Returns:
353 Decoded file content or empty string if not found
354 """
355 try:
356 # Apply rate limiting before request
357 self._last_wait_time = self.rate_tracker.apply_rate_limit(
358 self.engine_type
359 )
361 # Get file content
362 response = safe_get(file_url, headers=self.headers)
364 # Check for rate limiting
365 self._handle_rate_limits(response)
367 if response.status_code == 200:
368 data = response.json()
369 content = data.get("content", "")
370 encoding = data.get("encoding", "")
372 if encoding == "base64" and content:
373 return base64.b64decode(content).decode(
374 "utf-8", errors="replace"
375 )
376 return content
377 else:
378 logger.warning(
379 f"Could not get file content: {response.status_code}"
380 )
381 return ""
383 except Exception:
384 logger.exception("Error getting file content")
385 return ""
387 def _format_repository_preview(
388 self, repo: Dict[str, Any]
389 ) -> Dict[str, Any]:
390 """Format repository search result as preview"""
391 return {
392 "id": str(repo.get("id", "")),
393 "title": repo.get("full_name", ""),
394 "link": repo.get("html_url", ""),
395 "snippet": repo.get("description", "No description provided"),
396 "stars": repo.get("stargazers_count", 0),
397 "forks": repo.get("forks_count", 0),
398 "language": repo.get("language", ""),
399 "updated_at": repo.get("updated_at", ""),
400 "created_at": repo.get("created_at", ""),
401 "topics": repo.get("topics", []),
402 "owner": repo.get("owner", {}).get("login", ""),
403 "is_fork": repo.get("fork", False),
404 "search_type": "repository",
405 "repo_full_name": repo.get("full_name", ""),
406 }
408 def _format_code_preview(self, code: Dict[str, Any]) -> Dict[str, Any]:
409 """Format code search result as preview"""
410 repo = code.get("repository", {})
411 return {
412 "id": f"code_{code.get('sha', '')}",
413 "title": f"{code.get('name', '')} in {repo.get('full_name', '')}",
414 "link": code.get("html_url", ""),
415 "snippet": f"Match in {code.get('path', '')}",
416 "path": code.get("path", ""),
417 "repo_name": repo.get("full_name", ""),
418 "repo_url": repo.get("html_url", ""),
419 "search_type": "code",
420 "file_url": code.get("url", ""),
421 }
423 def _format_issue_preview(self, issue: Dict[str, Any]) -> Dict[str, Any]:
424 """Format issue search result as preview"""
425 repo = (
426 issue.get("repository", {})
427 if "repository" in issue
428 else {"full_name": ""}
429 )
430 return {
431 "id": f"issue_{issue.get('number', '')}",
432 "title": issue.get("title", ""),
433 "link": issue.get("html_url", ""),
434 "snippet": (
435 issue.get("body", "")[:200] + "..."
436 if len(issue.get("body", "")) > 200
437 else issue.get("body", "")
438 ),
439 "state": issue.get("state", ""),
440 "created_at": issue.get("created_at", ""),
441 "updated_at": issue.get("updated_at", ""),
442 "user": issue.get("user", {}).get("login", ""),
443 "comments": issue.get("comments", 0),
444 "search_type": "issue",
445 "repo_name": repo.get("full_name", ""),
446 }
448 def _format_user_preview(self, user: Dict[str, Any]) -> Dict[str, Any]:
449 """Format user search result as preview"""
450 return {
451 "id": f"user_{user.get('id', '')}",
452 "title": user.get("login", ""),
453 "link": user.get("html_url", ""),
454 "snippet": user.get("bio", "No bio provided"),
455 "name": user.get("name", ""),
456 "followers": user.get("followers", 0),
457 "public_repos": user.get("public_repos", 0),
458 "location": user.get("location", ""),
459 "search_type": "user",
460 "user_login": user.get("login", ""),
461 }
463 def _get_previews(self, query: str) -> List[Dict[str, Any]]:
464 """
465 Get preview information for GitHub search results.
467 Args:
468 query: The search query
470 Returns:
471 List of preview dictionaries
472 """
473 logger.info(f"Getting GitHub previews for query: {query}")
475 # For contribution-focused queries, automatically adjust search type and add filters
476 if any(
477 term in query.lower()
478 for term in [
479 "contribute",
480 "contributing",
481 "contribution",
482 "beginner",
483 "newcomer",
484 ]
485 ):
486 # Use repositories search with help-wanted or good-first-issue labels
487 original_search_type = self.search_type
488 self.search_type = "repositories"
489 self.search_endpoint = f"{self.api_base}/search/repositories"
491 # Create a specialized query for finding beginner-friendly projects
492 specialized_query = "good-first-issues:>5 is:public archived:false"
494 # Extract language preferences if present
495 languages = []
496 for lang in [
497 "python",
498 "javascript",
499 "java",
500 "rust",
501 "go",
502 "typescript",
503 "c#",
504 "c++",
505 "ruby",
506 ]:
507 if lang in query.lower():
508 languages.append(lang)
510 if languages:
511 specialized_query += f" language:{' language:'.join(languages)}"
513 # Extract keywords
514 keywords = [
515 word
516 for word in query.split()
517 if len(word) > 3
518 and word.lower()
519 not in [
520 "recommend",
521 "recommended",
522 "github",
523 "repositories",
524 "looking",
525 "developers",
526 "contribute",
527 "contributing",
528 "beginner",
529 "newcomer",
530 ]
531 ]
533 if keywords:
534 specialized_query += " " + " ".join(
535 keywords[:5]
536 ) # Add up to 5 keywords
538 logger.info(
539 f"Using specialized contribution query: {specialized_query}"
540 )
542 # Perform GitHub search with specialized query
543 results = self._search_github(specialized_query)
545 # Restore original search type
546 self.search_type = original_search_type
547 self.search_endpoint = f"{self.api_base}/search/{self.search_type}"
548 else:
549 # Perform standard GitHub search
550 results = self._search_github(query)
552 if not results:
553 logger.warning(f"No GitHub results found for query: {query}")
554 return []
556 # Format results as previews
557 previews = []
558 for result in results:
559 # Format based on search type
560 if self.search_type == "repositories":
561 preview = self._format_repository_preview(result)
562 elif self.search_type == "code":
563 preview = self._format_code_preview(result)
564 elif self.search_type == "issues":
565 preview = self._format_issue_preview(result)
566 elif self.search_type == "users":
567 preview = self._format_user_preview(result)
568 else:
569 logger.warning(f"Unknown search type: {self.search_type}")
570 continue
572 previews.append(preview)
574 logger.info(f"Formatted {len(previews)} GitHub preview results")
575 return previews
577 def _get_full_content(
578 self, relevant_items: List[Dict[str, Any]]
579 ) -> List[Dict[str, Any]]:
580 """
581 Get full content for the relevant GitHub search results.
583 Args:
584 relevant_items: List of relevant preview dictionaries
586 Returns:
587 List of result dictionaries with full content
588 """
589 # Check if we should add full content
590 if (
591 hasattr(search_config, "SEARCH_SNIPPETS_ONLY")
592 and search_config.SEARCH_SNIPPETS_ONLY
593 ):
594 logger.info("Snippet-only mode, skipping full content retrieval")
595 return relevant_items
597 logger.info(
598 f"Getting full content for {len(relevant_items)} GitHub results"
599 )
601 results = []
602 for item in relevant_items:
603 result = item.copy()
604 search_type = item.get("search_type", "")
606 # Add content based on search type
607 if search_type == "repository" and self.include_readme:
608 repo_full_name = item.get("repo_full_name", "")
609 if repo_full_name:
610 # Get README content
611 readme_content = self._get_readme_content(repo_full_name)
612 result["full_content"] = readme_content
613 result["content_type"] = "readme"
615 # Get recent issues if requested
616 if self.include_issues:
617 issues = self._get_recent_issues(repo_full_name)
618 result["recent_issues"] = issues
620 elif search_type == "code":
621 file_url = item.get("file_url", "")
622 if file_url:
623 # Get file content
624 file_content = self._get_file_content(file_url)
625 result["full_content"] = file_content
626 result["content_type"] = "file"
628 elif search_type == "issue":
629 # For issues, the snippet usually contains a summary already
630 # We'll just keep it as is
631 result["full_content"] = item.get("snippet", "")
632 result["content_type"] = "issue"
634 elif search_type == "user":
635 # For users, construct a profile summary
636 profile_summary = f"GitHub user: {item.get('title', '')}\n"
638 if item.get("name"):
639 profile_summary += f"Name: {item.get('name')}\n"
641 if item.get("location"):
642 profile_summary += f"Location: {item.get('location')}\n"
644 profile_summary += f"Followers: {item.get('followers', 0)}\n"
645 profile_summary += (
646 f"Public repositories: {item.get('public_repos', 0)}\n"
647 )
649 if (
650 item.get("snippet")
651 and item.get("snippet") != "No bio provided"
652 ):
653 profile_summary += f"\nBio: {item.get('snippet')}\n"
655 result["full_content"] = profile_summary
656 result["content_type"] = "user_profile"
658 results.append(result)
660 return results
662 def search_repository(
663 self, repo_owner: str, repo_name: str
664 ) -> Dict[str, Any]:
665 """
666 Get detailed information about a specific repository.
668 Args:
669 repo_owner: Owner of the repository
670 repo_name: Name of the repository
672 Returns:
673 Dictionary with repository information
674 """
675 repo_full_name = f"{repo_owner}/{repo_name}"
676 logger.info(f"Getting details for repository: {repo_full_name}")
678 try:
679 # Get repository details
680 # Apply rate limiting before request
681 self._last_wait_time = self.rate_tracker.apply_rate_limit(
682 self.engine_type
683 )
685 response = safe_get(
686 f"{self.api_base}/repos/{repo_full_name}", headers=self.headers
687 )
689 # Check for rate limiting
690 self._handle_rate_limits(response)
692 if response.status_code == 200:
693 repo = response.json()
695 # Format as repository preview
696 result = self._format_repository_preview(repo)
698 # Add README content if requested
699 if self.include_readme:
700 readme_content = self._get_readme_content(repo_full_name)
701 result["full_content"] = readme_content
702 result["content_type"] = "readme"
704 # Add recent issues if requested
705 if self.include_issues:
706 issues = self._get_recent_issues(repo_full_name)
707 result["recent_issues"] = issues
709 return result
710 else:
711 logger.error(
712 f"Error getting repository details: {response.status_code} - {response.text}"
713 )
714 return {}
716 except Exception:
717 logger.exception("Error getting repository details")
718 return {}
720 def search_code(
721 self,
722 query: str,
723 language: Optional[str] = None,
724 user: Optional[str] = None,
725 ) -> List[Dict[str, Any]]:
726 """
727 Search for code with more specific parameters.
729 Args:
730 query: Code search query
731 language: Filter by programming language
732 user: Filter by GitHub username/organization
734 Returns:
735 List of code search results
736 """
737 # Build advanced query
738 advanced_query = query
740 if language:
741 advanced_query += f" language:{language}"
743 if user:
744 advanced_query += f" user:{user}"
746 # Save current search type
747 original_search_type = self.search_type
749 try:
750 # Set search type to code
751 self.search_type = "code"
752 self.search_endpoint = f"{self.api_base}/search/code"
754 # Perform search
755 results = self._search_github(advanced_query)
757 # Format results
758 previews = [self._format_code_preview(result) for result in results]
760 # Get full content if requested
761 if (
762 hasattr(search_config, "SEARCH_SNIPPETS_ONLY")
763 and not search_config.SEARCH_SNIPPETS_ONLY
764 ):
765 return self._get_full_content(previews)
767 return previews
769 finally:
770 # Restore original search type
771 self.search_type = original_search_type
772 self.search_endpoint = f"{self.api_base}/search/{self.search_type}"
774 def search_issues(
775 self, query: str, state: str = "open", sort: str = "updated"
776 ) -> List[Dict[str, Any]]:
777 """
778 Search for issues with more specific parameters.
780 Args:
781 query: Issue search query
782 state: Filter by issue state ("open", "closed", "all")
783 sort: Sort order ("updated", "created", "comments")
785 Returns:
786 List of issue search results
787 """
788 # Build advanced query
789 advanced_query = query + f" state:{state}"
791 # Save current search type
792 original_search_type = self.search_type
794 try:
795 # Set search type to issues
796 self.search_type = "issues"
797 self.search_endpoint = f"{self.api_base}/search/issues"
799 # Set sort parameter
800 params = {
801 "q": advanced_query,
802 "per_page": min(self.max_results, 100),
803 "page": 1,
804 "sort": sort,
805 "order": "desc",
806 }
808 # Perform search
809 response = safe_get(
810 self.search_endpoint, headers=self.headers, params=params
811 )
813 # Check for rate limiting
814 self._handle_rate_limits(response)
816 if response.status_code == 200:
817 data = response.json()
818 results = data.get("items", [])
820 # Format results
821 previews = [
822 self._format_issue_preview(result) for result in results
823 ]
825 # For issues, we don't need to get full content
826 return previews
827 else:
828 logger.error(
829 f"GitHub API error: {response.status_code} - {response.text}"
830 )
831 return []
833 finally:
834 # Restore original search type
835 self.search_type = original_search_type
836 self.search_endpoint = f"{self.api_base}/search/{self.search_type}"
838 def set_search_type(self, search_type: str):
839 """
840 Set the search type for subsequent searches.
842 Args:
843 search_type: Type of GitHub search ("repositories", "code", "issues", "users")
844 """
845 if search_type in ["repositories", "code", "issues", "users"]:
846 self.search_type = search_type
847 self.search_endpoint = f"{self.api_base}/search/{search_type}"
848 logger.info(f"Set GitHub search type to: {search_type}")
849 else:
850 logger.error(f"Invalid GitHub search type: {search_type}")
852 def _filter_for_relevance(
853 self, previews: List[Dict[str, Any]], query: str
854 ) -> List[Dict[str, Any]]:
855 """
856 Filter GitHub search results for relevance using LLM.
858 Args:
859 previews: List of preview dictionaries
860 query: Original search query
862 Returns:
863 List of relevant preview dictionaries
864 """
865 if not self.llm or not previews:
866 return previews
868 # Create a specialized prompt for GitHub results
869 prompt = f"""Analyze these GitHub search results and rank them by relevance to the query.
870Consider:
8711. Repository stars and activity (higher is better)
8722. Match between query intent and repository description
8733. Repository language and topics
8744. Last update time (more recent is better)
8755. Whether it's a fork (original repositories are preferred)
877Query: "{query}"
879Results:
880{json.dumps(previews, indent=2)}
882Return ONLY a JSON array of indices in order of relevance (most relevant first).
883Example: [0, 2, 1, 3]
884Do not include any other text or explanation."""
886 try:
887 response = self.llm.invoke(prompt)
888 response_text = response.content.strip()
890 # Extract JSON array from response
891 start_idx = response_text.find("[")
892 end_idx = response_text.rfind("]")
894 if start_idx >= 0 and end_idx > start_idx:
895 array_text = response_text[start_idx : end_idx + 1]
896 ranked_indices = json.loads(array_text)
898 # Return the results in ranked order
899 ranked_results = []
900 for idx in ranked_indices:
901 if idx < len(previews):
902 ranked_results.append(previews[idx])
904 # Limit to max_filtered_results if specified
905 if (
906 self.max_filtered_results
907 and len(ranked_results) > self.max_filtered_results
908 ):
909 logger.info(
910 f"Limiting filtered results to top {self.max_filtered_results}"
911 )
912 return ranked_results[: self.max_filtered_results]
914 return ranked_results
915 else:
916 logger.info(
917 "Could not find JSON array in response, returning no previews"
918 )
919 return []
921 except Exception:
922 logger.exception("Error filtering GitHub results")
923 return []