Coverage for src / local_deep_research / domain_classifier / models.py: 94%

18 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-01-11 00:51 +0000

1"""Database models for domain classification.""" 

2 

3from sqlalchemy import Column, String, Text, Float, Integer, Index 

4from sqlalchemy_utc import UtcDateTime, utcnow 

5from ..database.models import Base 

6 

7 

8class DomainClassification(Base): 

9 """Store domain classifications generated by LLM.""" 

10 

11 __tablename__ = "domain_classifications" 

12 

13 id = Column(Integer, primary_key=True, autoincrement=True) 

14 domain = Column(String(255), unique=True, nullable=False, index=True) 

15 category = Column(String(100), nullable=False) 

16 subcategory = Column(String(100)) 

17 confidence = Column(Float, default=0.0) 

18 reasoning = Column(Text) # Store LLM's reasoning for the classification 

19 sample_titles = Column(Text) # JSON array of sample titles from this domain 

20 sample_count = Column( 

21 Integer, default=0 

22 ) # Number of resources used for classification 

23 created_at = Column(UtcDateTime, default=utcnow()) 

24 updated_at = Column(UtcDateTime, default=utcnow(), onupdate=utcnow()) 

25 

26 # Create index for faster lookups 

27 __table_args__ = (Index("idx_domain_category", "domain", "category"),) 

28 

29 def to_dict(self): 

30 """Convert to dictionary for JSON serialization.""" 

31 return { 

32 "id": self.id, 

33 "domain": self.domain, 

34 "category": self.category, 

35 "subcategory": self.subcategory, 

36 "confidence": self.confidence, 

37 "reasoning": self.reasoning, 

38 "sample_titles": self.sample_titles, 

39 "sample_count": self.sample_count, 

40 "created_at": self.created_at.isoformat() 

41 if self.created_at 

42 else None, 

43 "updated_at": self.updated_at.isoformat() 

44 if self.updated_at 

45 else None, 

46 }