search-engine/src/models.py

72 lines
2.8 KiB
Python

from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, String, DateTime, ForeignKey, Index, Integer
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import relationship, mapped_column
import uuid
Base = declarative_base()
class Documents(Base):
__tablename__ = 'documents'
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
url = Column(String)
text_content = Column(String)
html_content = Column(String)
first_crawl_date = Column(DateTime)
last_crawl_date = Column(DateTime)
last_index_date = Column(DateTime)
document_tokens = relationship(
"Document_Tokens", back_populates="document")
document_ngrams = relationship(
"Document_NGrams", back_populates="document")
class Document_Tokens(Base):
__tablename__ = 'document_tokens'
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
document_id = mapped_column(ForeignKey("documents.id"))
# Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
token_id = mapped_column(ForeignKey("tokens.id"))
# Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
document = relationship(
"Documents", back_populates="document_tokens", uselist=False)
token = relationship("Tokens", back_populates="document_tokens")
__table_args__ = (
Index('idx_document_tokens_document_id_token_id', 'document_id',
'token_id', unique=True, postgresql_using='hash'),
Index('idx_document_tokens_clustered', 'document_id',
'token_id', postgresql_using='hash'),
)
class Tokens(Base):
__tablename__ = 'tokens'
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
token = Column(String, index=True)
document_tokens = relationship("Document_Tokens", back_populates="token")
class NGrams(Base):
__tablename__ = 'ngrams'
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
size = Column(Integer, index=True)
gram = Column(String, index=True)
document_ngrams = relationship("Document_NGrams", back_populates="ngram")
class Document_NGrams(Base):
__tablename__ = 'document_ngrams'
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
document_id = mapped_column(ForeignKey("documents.id"))
ngram_id = mapped_column(ForeignKey("ngrams.id"))
document = relationship(
"Documents", back_populates="document_ngrams", uselist=False)
ngram = relationship("NGrams", back_populates="document_ngrams")
__table_args__ = (
Index('idx_document_ngrams_document_id_ngram_id', 'document_id',
'ngram_id', unique=True, postgresql_using='hash'),
Index('idx_document_ngrams_clustered', 'document_id',
'ngram_id', postgresql_using='hash'),
)