Coverage for src / local_deep_research / news / utils / topic_generator.py: 100%
57 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
1"""
2Topic generation utilities for news items.
3Uses LLM to extract relevant topics/tags from news content.
4"""
6from loguru import logger
7from typing import List
9from ...utilities.json_utils import extract_json, get_llm_response_text
12def generate_topics(
13 query: str, findings: str = "", category: str = "", max_topics: int = 5
14) -> List[str]:
15 """
16 Generate relevant topics/tags from news content.
18 Args:
19 query: The search query or research question
20 findings: The research findings/content
21 category: The news category (if available)
22 max_topics: Maximum number of topics to generate
24 Returns:
25 List of topic strings
26 """
27 # Try LLM generation first
28 topics = _generate_with_llm(query, findings, category, max_topics)
30 # No fallback - if LLM fails, mark as missing
31 if not topics:
32 topics = ["[Topic generation failed]"]
34 # Ensure we have valid topics
35 return _validate_topics(topics, max_topics)
38def _generate_with_llm(
39 query: str, findings: str, category: str, max_topics: int
40) -> List[str]:
41 """Generate topics using LLM."""
42 try:
43 from ...config.llm_config import get_llm
45 logger.debug(
46 f"Topic generation - findings length: {len(findings) if findings else 0}, category: {category}"
47 )
49 # Use the configured model for topic generation
50 llm = get_llm(temperature=0.5)
52 try:
53 # Prepare context
54 query_preview = query[:500] if len(query) > 500 else query
55 findings_preview = (
56 findings[:1000]
57 if findings and len(findings) > 1000
58 else findings
59 )
61 prompt = f"""Extract relevant topics/tags from this news content.
63Query: {query_preview}
64{f"Content: {findings_preview}" if findings_preview else ""}
65{f"Category: {category}" if category else ""}
67Generate {max_topics} specific, relevant topics that would help categorize and filter this news item.
69Requirements:
70- Each topic should be 1-3 words
71- Topics should be specific and meaningful
72- Include geographic regions if mentioned
73- Include key entities (countries, organizations, people)
74- Include event types (conflict, economy, disaster, etc.)
75- Topics should be diverse and cover different aspects
77Return ONLY a JSON array of topic strings, like: ["Topic 1", "Topic 2", "Topic 3"]"""
79 response = llm.invoke(prompt)
80 content = get_llm_response_text(response)
82 # Try to parse the JSON response
83 topics = extract_json(content, expected_type=list)
85 if topics is not None:
86 # Clean and validate each topic
87 cleaned_topics = []
88 for topic in topics:
89 if isinstance(topic, str):
90 cleaned = topic.strip()
91 if cleaned and len(cleaned) <= 30: # Max topic length
92 cleaned_topics.append(cleaned)
94 logger.debug(f"Generated topics: {cleaned_topics}")
95 return cleaned_topics[:max_topics]
97 # Try to extract topics from plain text response
98 logger.debug(f"Failed to parse LLM topics as JSON: {content}")
99 if "," in content:
100 topics = [t.strip().strip("\"'") for t in content.split(",")]
101 return [t for t in topics if t and len(t) <= 30][:max_topics]
102 finally:
103 from ...utilities.resource_utils import safe_close
105 safe_close(llm, "topic LLM")
107 except Exception as e:
108 logger.debug(f"LLM topic generation failed: {e}")
110 return []
113def _validate_topics(topics: List[str], max_topics: int) -> List[str]:
114 """Validate and clean topics."""
115 valid_topics = []
116 seen = set()
118 for topic in topics:
119 if not topic:
120 continue
122 # Clean the topic
123 cleaned = topic.strip()
125 # Skip if too short or too long
126 if len(cleaned) < 2 or len(cleaned) > 30:
127 continue
129 # Skip duplicates (case-insensitive)
130 normalized = cleaned.lower()
131 if normalized in seen:
132 continue
133 seen.add(normalized)
135 # Convert to lowercase as djpetti suggested
136 valid_topics.append(normalized)
138 if len(valid_topics) >= max_topics:
139 break
141 # Don't add default topics - show what actually happened
142 if not valid_topics:
143 valid_topics = ["[No valid topics]"]
145 return valid_topics