Add beginnings of ngram search capability

This commit is contained in:
rmgr 2024-04-05 21:36:15 +10:30
parent 343410e62f
commit 9d57f66cd7
4 changed files with 110 additions and 17 deletions

View file

@ -1,5 +1,5 @@
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, String, DateTime, ForeignKey, Index
from sqlalchemy import Column, String, DateTime, ForeignKey, Index, Integer
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import relationship, mapped_column
import uuid
@ -16,7 +16,10 @@ class Documents(Base):
first_crawl_date = Column(DateTime)
last_crawl_date = Column(DateTime)
last_index_date = Column(DateTime)
document_tokens = relationship("Document_Tokens", back_populates="document")
document_tokens = relationship(
"Document_Tokens", back_populates="document")
document_ngrams = relationship(
"Document_NGrams", back_populates="document")
class Document_Tokens(Base):
@ -25,8 +28,9 @@ class Document_Tokens(Base):
document_id = mapped_column(ForeignKey("documents.id"))
# Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
token_id = mapped_column(ForeignKey("tokens.id"))
#Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
document = relationship("Documents", back_populates="document_tokens", uselist=False)
# Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
document = relationship(
"Documents", back_populates="document_tokens", uselist=False)
token = relationship("Tokens", back_populates="document_tokens")
@ -35,3 +39,23 @@ class Tokens(Base):
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
token = Column(String, index=True)
document_tokens = relationship("Document_Tokens", back_populates="token")
class NGrams(Base):
__tablename__ = 'ngrams'
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
size = Column(Integer, index=True)
gram = Column(String, index=True)
document_ngrams = relationship("Document_NGrams", back_populates="ngram")
class Document_NGrams(Base):
__tablename__ = 'document_ngrams'
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
document_id = mapped_column(ForeignKey("documents.id"))
# Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
ngram_id = mapped_column(ForeignKey("ngrams.id"))
# Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
document = relationship(
"Documents", back_populates="document_ngrams", uselist=False)
ngram = relationship("NGrams", back_populates="document_ngrams")