from sqlalchemy.ext.declarative import declarative_base from sqlalchemy import Column, String, DateTime, ForeignKey, Index, Integer from sqlalchemy.dialects.postgresql import UUID from sqlalchemy.orm import relationship, mapped_column import uuid Base = declarative_base() class Documents(Base): __tablename__ = 'documents' id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) url = Column(String) text_content = Column(String) html_content = Column(String) first_crawl_date = Column(DateTime) last_crawl_date = Column(DateTime) last_index_date = Column(DateTime) document_tokens = relationship( "Document_Tokens", back_populates="document") document_ngrams = relationship( "Document_NGrams", back_populates="document") class Document_Tokens(Base): __tablename__ = 'document_tokens' id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) document_id = mapped_column(ForeignKey("documents.id")) # Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) token_id = mapped_column(ForeignKey("tokens.id")) # Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) document = relationship( "Documents", back_populates="document_tokens", uselist=False) token = relationship("Tokens", back_populates="document_tokens") __table_args__ = ( Index('idx_document_tokens_document_id_token_id', 'document_id', 'token_id', unique=True, postgresql_using='hash'), Index('idx_document_tokens_clustered', 'document_id', 'token_id', postgresql_using='hash'), ) class Tokens(Base): __tablename__ = 'tokens' id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) token = Column(String, index=True) document_tokens = relationship("Document_Tokens", back_populates="token") class NGrams(Base): __tablename__ = 'ngrams' id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) size = Column(Integer, index=True) gram = Column(String, index=True) document_ngrams = relationship("Document_NGrams", back_populates="ngram") class Document_NGrams(Base): __tablename__ = 'document_ngrams' id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) document_id = mapped_column(ForeignKey("documents.id")) ngram_id = mapped_column(ForeignKey("ngrams.id")) document = relationship( "Documents", back_populates="document_ngrams", uselist=False) ngram = relationship("NGrams", back_populates="document_ngrams") __table_args__ = ( Index('idx_document_ngrams_document_id_ngram_id', 'document_id', 'ngram_id', unique=True, postgresql_using='hash'), Index('idx_document_ngrams_clustered', 'document_id', 'ngram_id', postgresql_using='hash'), )