class ArticleTopic(Base): """ Represents an article having a topic, a 1:N relationship """ __tablename__ = "atopics" article_id = Column( psql_UUID(as_uuid=False), ForeignKey("articles.id", onupdate="CASCADE", ondelete="CASCADE"), nullable=False, index=True, ) topic_id = Column( psql_UUID(as_uuid=False), ForeignKey("topics.id", onupdate="CASCADE", ondelete="CASCADE"), nullable=False, index=True, ) # The back-reference to the Article parent of this ArticleTopic article = relationship("Article", backref=backref("atopics")) # The back-reference to the Topic parent of this ArticleTopic topic = relationship("Topic", backref=backref("atopics")) __table_args__ = ( PrimaryKeyConstraint("article_id", "topic_id", name="atopics_pkey"), ) def __repr__(self): return "ArticleTopic()"
class Feedback(Base): """ Represents a feedback form submission. """ __tablename__ = "feedback" # UUID id = Column( psql_UUID(as_uuid=False), index=True, nullable=False, unique=True, primary_key=True, server_default=text("uuid_generate_v1()"), ) # Timestamp of feedback timestamp = Column(DateTime, index=True, nullable=False) # Topic (e.g. Embla/Netskrafl/etc.) topic = Column(String, index=True, nullable=True) # Name name = Column(String, index=True, nullable=True) # Email email = Column(String, index=True, nullable=True) # Comment comment = Column(String, index=False, nullable=True) def __repr__(self): return "Feedback(name='{0}', email='{1}', topic='{2}', comment='{3}')".format( self.name, self.email, self.topic, self.comment )
class Topic(Base): """ Represents a topic for an article """ __tablename__ = "topics" id = Column( psql_UUID(as_uuid=False), server_default=text("uuid_generate_v1()"), primary_key=True, ) # The topic name name = Column(String(128), nullable=False, index=True) # An identifier for the topic, such as 'sport', 'business'... # The identifier must be usable as a CSS class name. identifier = Column(String(32), nullable=False) # The topic keywords, in the form word1/cat word2/cat... keywords = Column(String, nullable=False) # The associated vector, in JSON format vector = Column(String) # Is initally NULL # The cosine distance threshold to apply for this topic threshold = Column(Float) def __repr__(self): return "Topic(name='{0}')".format(self.name)
class Word(Base): """ Represents a word occurring in an article """ __tablename__ = "words" MAX_WORD_LEN = 64 # Foreign key to an article article_id = Column( psql_UUID(as_uuid=False), ForeignKey("articles.id", onupdate="CASCADE", ondelete="CASCADE"), nullable=False, ) # The word stem stem = Column(String(MAX_WORD_LEN), index=True, nullable=False) # The word category cat = Column(String(16), index=True, nullable=False) # Count of occurrences cnt = Column(Integer, nullable=False) # The back-reference to the Article parent of this Word article = relationship("Article", backref=backref("words")) __table_args__ = ( PrimaryKeyConstraint("article_id", "stem", "cat", name="words_pkey"), ) def __repr__(self): return "Word(stem='{0}', cat='{1}', cnt='{2}')".format( self.stem, self.cat, self.cnt )
class Word(Base): """ Represents a word occurring in an article """ __tablename__ = 'words' # Foreign key to an article article_id = Column(psql_UUID(as_uuid = False), ForeignKey('articles.id', onupdate="CASCADE", ondelete="CASCADE"), nullable = False) # The word stem stem = Column(String(64), index = True, nullable = False) # The word category cat = Column(String(16), index = True, nullable = False) # Count of occurrences cnt = Column(Integer, nullable = False) # The back-reference to the Article parent of this Word article = relationship("Article", backref=backref('words')) __table_args__ = ( PrimaryKeyConstraint('article_id', 'stem', 'cat', name='words_pkey'), ) def __repr__(self): return "Word(stem='{0}', cat='{1}', cnt='{2}')" \ .format(self.stem, self.cat, self.cnt) @classmethod def table(cls): return cls.__table__
class Location(Base): """ Represents a location """ __tablename__ = "locations" # UUID id = Column( psql_UUID(as_uuid=False), index=True, nullable=False, unique=True, primary_key=True, server_default=text("uuid_generate_v1()"), ) # Foreign key to an article article_url = Column( String, # We don't delete associated location if the article is deleted ForeignKey("articles.url", onupdate="CASCADE", ondelete="SET NULL"), index=True, nullable=True, ) # Name name = Column(String, index=True) # Kind (e.g. 'address', 'street', 'country', 'region', 'placename') kind = Column(String(16), index=True) # Country (ISO 3166-1 alpha-2, e.g. 'IS') country = Column(String(2)) # Continent ISO code (e.g. 'EU') continent = Column(String(2)) # Coordinates (WGS84) latitude = Column(Float) longitude = Column(Float) # Additional data data = Column(JSONB) # Timestamp of this entry timestamp = Column(DateTime) # The back-reference to the Article parent of this Location article = relationship("Article", backref=backref("locations", order_by=name)) __table_args__ = (UniqueConstraint("name", "kind", "article_url"), ) def __repr__(self): return "Location(id='{0}', name='{1}', kind='{2}', country='{3}')".format( self.id, self.name, self.kind, self.country) @classmethod def table(cls): return cls.__table__
class QueryLog(Base): """ Represents a fully anonymized, logged query and its answer. """ __tablename__ = "querylog" # UUID id = Column( psql_UUID(as_uuid=False), index=True, nullable=False, unique=True, primary_key=True, server_default=text("uuid_generate_v1()"), ) # See the Query class for documentation of these fields timestamp = Column(DateTime, index=True, nullable=False) interpretations = Column(JSONB, nullable=True) question = Column(String, index=True, nullable=False) bquestion = Column(String, index=False, nullable=True) answer = Column(String, index=False, nullable=True) voice = Column(String, index=False, nullable=True) qtype = Column(String(80), index=True, nullable=True) key = Column(String(256), index=True, nullable=True) error = Column(String(256), nullable=True) @staticmethod def from_Query(q: Query): """ Create QueryLog object from Query object. """ return QueryLog( timestamp=q.timestamp, interpretations=q.interpretations, question=q.question, bquestion=q.bquestion, answer=q.answer, voice=q.voice, qtype=q.qtype, key=q.key, error=q.error, ) def __repr__(self): return "QueryLog(question='{0}', answer='{1}')".format( self.question, self.answer )
class Correction(Base): """ Represents correction feedback """ __tablename__ = "corrections" # Primary key (UUID) id = Column( psql_UUID(as_uuid=False), server_default=text("uuid_generate_v1()"), primary_key=True ) # Timestamp of this entry timestamp = cast(datetime, Column(DateTime, nullable=False, index=True)) # The original sentence being annotated sentence = cast(str, Column(String, nullable=False)) # Annotation code code = cast(str, Column(String(32), nullable=False, index=True)) # Annotation text annotation = cast(str, Column(String, nullable=False)) # Annotation span start = cast(int, Column(Integer, nullable=False)) end = cast(int, Column(Integer, nullable=False)) # Correction correction = cast(str, Column(String, nullable=False)) # User feedback feedback = cast(str, Column(String(32), nullable=False, index=True)) # Reason text reason = cast(str, Column(String(32), index=True)) def __repr__(self) -> str: return "Correction(id='{0}', sent='{1}', code='{2}', annotation='{3}', feedback='{4}')".format( self.id, self.sentence, self.code, self.annotation, self.feedback )
class Query(Base): """ Represents a logged incoming query with its answer """ __tablename__ = "queries" # UUID id = Column( psql_UUID(as_uuid=False), index=True, nullable=False, unique=True, primary_key=True, server_default=text("uuid_generate_v1()"), ) # Timestamp of the incoming query timestamp = Column(DateTime, index=True, nullable=False) # Interpretations # JSON array containing list of possible interpretations # provided by a speech-to-text engine. interpretations = Column(JSONB, nullable=True) # Question question = Column(String, index=True, nullable=False) @hybrid_property def question_lc(self): return self.question.lower() # pylint: disable=no-self-argument @question_lc.comparator # type: ignore def question_lc(cls): return CaseInsensitiveComparator(cls.question) # Beautified question bquestion = Column(String, index=False, nullable=True) # Answer answer = Column(String, index=False, nullable=True) @hybrid_property def answer_lc(self): return self.answer.lower() # pylint: disable=no-self-argument @answer_lc.comparator # type: ignore def answer_lc(cls): return CaseInsensitiveComparator(cls.answer) # Voice answer voice = Column(String, index=False, nullable=True) @hybrid_property def voice_lc(self): return self.voice.lower() # pylint: disable=no-self-argument @voice_lc.comparator # type: ignore def voice_lc(cls): return CaseInsensitiveComparator(cls.voice) # Error code error = Column(String(256), nullable=True) # When does this answer expire, for caching purposes? # NULL=immediately expires = Column(DateTime, index=True, nullable=True) # The query type, NULL if not able to process qtype = Column(String(80), index=True, nullable=True) # The query key, NULL if not able to process or not applicable key = Column(String(256), index=True, nullable=True) # Client type # Either "www" (web interface), "ios" (iOS) or "android" (Android) client_type = Column(String(80), index=True, nullable=True) # Client version client_version = Column(String(10), nullable=True) # Client identifier, if applicable # If web client, this is the HTTP client user agent # On iOS and Android, this is a unique device UUID string client_id = Column(String(256), index=True, nullable=True) # Client location coordinates (WGS84) latitude = Column(Float, nullable=True) longitude = Column(Float, nullable=True) # Client IP address remote_addr = Column(INET, nullable=True) # Additional context used to answer the query context = Column(JSONB, nullable=True) # Add an index on the question in lower case question_lc_index = Index("ix_queries_question_lc", func.lower(question)) # !!! The following indices don't work since answers can become # !!! very long (thousands of characters) and PostgreSQL has a # !!! limit on index entry size vs. its page size. # Add an index on the answer in lower case # answer_lc_index = Index('ix_queries_answer_lc', func.lower(answer)) # Add an index on the voice answer in lower case # voice_lc_index = Index('ix_queries_voice_lc', func.lower(voice)) def __repr__(self): return "Query(question='{0}', answer='{1}')".format(self.question, self.answer)
class Article(Base): """ Represents an article from one of the roots, to be scraped or having already been scraped """ __tablename__ = "articles" # The article URL is the primary key url = Column(String, primary_key=True) # UUID id = Column( psql_UUID(as_uuid=False), index=True, nullable=False, unique=True, server_default=text("uuid_generate_v1()"), ) # Foreign key to a root root_id = Column( Integer, # We don't delete associated articles if the root is deleted ForeignKey("roots.id", onupdate="CASCADE", ondelete="SET NULL"), ) # Article heading, if known heading = Column(String) # Article author, if known author = Column(String) # Article time stamp, if known timestamp = Column(DateTime, index=True) # Authority of this article, 1.0 = most authoritative, 0.0 = least authoritative authority = Column(Float) # Time of the last scrape of this article scraped = Column(DateTime, index=True) # Time of the last parse of this article parsed = Column(DateTime, index=True) # Time of the last processing of this article processed = Column(DateTime, index=True) # Time of the last indexing of this article indexed = Column(DateTime, index=True) # Module used for scraping scr_module = Column(String(80)) # Class within module used for scraping scr_class = Column(String(80)) # Version of scraper class scr_version = Column(String(16)) # Version of parser/grammar/config parser_version = Column(String(64)) # Parse statistics num_sentences = Column(Integer) num_parsed = Column(Integer) ambiguity = Column(Float) # The HTML obtained in the last scrape html = Column(String) # The parse tree obtained in the last parse tree = Column(String) # The tokens of the article in JSON string format tokens = Column(String) # The article topic vector as an array of floats in JSON string format topic_vector = Column(String) # The back-reference to the Root parent of this Article root = relationship( "Root", foreign_keys="Article.root_id", backref=backref("articles", order_by=url), ) def __repr__(self): return "Article(url='{0}', heading='{1}', scraped={2})".format( self.url, self.heading, self.scraped )