Rework ngram generation. Greatly improve performance of indexer. Commit horrendous sql sins
This commit is contained in:
parent
9f0e7e6b29
commit
bdb4064acc
5 changed files with 155 additions and 57 deletions
|
|
@ -32,6 +32,12 @@ class Document_Tokens(Base):
|
|||
document = relationship(
|
||||
"Documents", back_populates="document_tokens", uselist=False)
|
||||
token = relationship("Tokens", back_populates="document_tokens")
|
||||
__table_args__ = (
|
||||
Index('idx_document_tokens_document_id_token_id', 'document_id',
|
||||
'token_id', unique=True, postgresql_using='hash'),
|
||||
Index('idx_document_tokens_clustered', 'document_id',
|
||||
'token_id', postgresql_using='hash'),
|
||||
)
|
||||
|
||||
|
||||
class Tokens(Base):
|
||||
|
|
@ -53,9 +59,14 @@ class Document_NGrams(Base):
|
|||
__tablename__ = 'document_ngrams'
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
document_id = mapped_column(ForeignKey("documents.id"))
|
||||
# Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
ngram_id = mapped_column(ForeignKey("ngrams.id"))
|
||||
# Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
document = relationship(
|
||||
"Documents", back_populates="document_ngrams", uselist=False)
|
||||
ngram = relationship("NGrams", back_populates="document_ngrams")
|
||||
|
||||
__table_args__ = (
|
||||
Index('idx_document_ngrams_document_id_ngram_id', 'document_id',
|
||||
'ngram_id', unique=True, postgresql_using='hash'),
|
||||
Index('idx_document_ngrams_clustered', 'document_id',
|
||||
'ngram_id', postgresql_using='hash'),
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue