72 lines
2.8 KiB
Python
72 lines
2.8 KiB
Python
from sqlalchemy.ext.declarative import declarative_base
|
|
from sqlalchemy import Column, String, DateTime, ForeignKey, Index, Integer
|
|
from sqlalchemy.dialects.postgresql import UUID
|
|
from sqlalchemy.orm import relationship, mapped_column
|
|
import uuid
|
|
|
|
Base = declarative_base()
|
|
|
|
|
|
class Documents(Base):
|
|
__tablename__ = 'documents'
|
|
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
|
url = Column(String)
|
|
text_content = Column(String)
|
|
html_content = Column(String)
|
|
first_crawl_date = Column(DateTime)
|
|
last_crawl_date = Column(DateTime)
|
|
last_index_date = Column(DateTime)
|
|
document_tokens = relationship(
|
|
"Document_Tokens", back_populates="document")
|
|
document_ngrams = relationship(
|
|
"Document_NGrams", back_populates="document")
|
|
|
|
|
|
class Document_Tokens(Base):
|
|
__tablename__ = 'document_tokens'
|
|
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
|
document_id = mapped_column(ForeignKey("documents.id"))
|
|
# Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
|
token_id = mapped_column(ForeignKey("tokens.id"))
|
|
# Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
|
document = relationship(
|
|
"Documents", back_populates="document_tokens", uselist=False)
|
|
token = relationship("Tokens", back_populates="document_tokens")
|
|
__table_args__ = (
|
|
Index('idx_document_tokens_document_id_token_id', 'document_id',
|
|
'token_id', unique=True, postgresql_using='hash'),
|
|
Index('idx_document_tokens_clustered', 'document_id',
|
|
'token_id', postgresql_using='hash'),
|
|
)
|
|
|
|
|
|
class Tokens(Base):
|
|
__tablename__ = 'tokens'
|
|
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
|
token = Column(String, index=True)
|
|
document_tokens = relationship("Document_Tokens", back_populates="token")
|
|
|
|
|
|
class NGrams(Base):
|
|
__tablename__ = 'ngrams'
|
|
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
|
size = Column(Integer, index=True)
|
|
gram = Column(String, index=True)
|
|
document_ngrams = relationship("Document_NGrams", back_populates="ngram")
|
|
|
|
|
|
class Document_NGrams(Base):
|
|
__tablename__ = 'document_ngrams'
|
|
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
|
document_id = mapped_column(ForeignKey("documents.id"))
|
|
ngram_id = mapped_column(ForeignKey("ngrams.id"))
|
|
document = relationship(
|
|
"Documents", back_populates="document_ngrams", uselist=False)
|
|
ngram = relationship("NGrams", back_populates="document_ngrams")
|
|
|
|
__table_args__ = (
|
|
Index('idx_document_ngrams_document_id_ngram_id', 'document_id',
|
|
'ngram_id', unique=True, postgresql_using='hash'),
|
|
Index('idx_document_ngrams_clustered', 'document_id',
|
|
'ngram_id', postgresql_using='hash'),
|
|
)
|