Add beginnings of ngram search capability
This commit is contained in:
parent
343410e62f
commit
9d57f66cd7
4 changed files with 110 additions and 17 deletions
|
|
@ -1,5 +1,5 @@
|
|||
from sqlalchemy.ext.declarative import declarative_base
|
||||
from sqlalchemy import Column, String, DateTime, ForeignKey, Index
|
||||
from sqlalchemy import Column, String, DateTime, ForeignKey, Index, Integer
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
from sqlalchemy.orm import relationship, mapped_column
|
||||
import uuid
|
||||
|
|
@ -16,7 +16,10 @@ class Documents(Base):
|
|||
first_crawl_date = Column(DateTime)
|
||||
last_crawl_date = Column(DateTime)
|
||||
last_index_date = Column(DateTime)
|
||||
document_tokens = relationship("Document_Tokens", back_populates="document")
|
||||
document_tokens = relationship(
|
||||
"Document_Tokens", back_populates="document")
|
||||
document_ngrams = relationship(
|
||||
"Document_NGrams", back_populates="document")
|
||||
|
||||
|
||||
class Document_Tokens(Base):
|
||||
|
|
@ -25,8 +28,9 @@ class Document_Tokens(Base):
|
|||
document_id = mapped_column(ForeignKey("documents.id"))
|
||||
# Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
token_id = mapped_column(ForeignKey("tokens.id"))
|
||||
#Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
document = relationship("Documents", back_populates="document_tokens", uselist=False)
|
||||
# Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
document = relationship(
|
||||
"Documents", back_populates="document_tokens", uselist=False)
|
||||
token = relationship("Tokens", back_populates="document_tokens")
|
||||
|
||||
|
||||
|
|
@ -35,3 +39,23 @@ class Tokens(Base):
|
|||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
token = Column(String, index=True)
|
||||
document_tokens = relationship("Document_Tokens", back_populates="token")
|
||||
|
||||
|
||||
class NGrams(Base):
|
||||
__tablename__ = 'ngrams'
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
size = Column(Integer, index=True)
|
||||
gram = Column(String, index=True)
|
||||
document_ngrams = relationship("Document_NGrams", back_populates="ngram")
|
||||
|
||||
|
||||
class Document_NGrams(Base):
|
||||
__tablename__ = 'document_ngrams'
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
document_id = mapped_column(ForeignKey("documents.id"))
|
||||
# Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
ngram_id = mapped_column(ForeignKey("ngrams.id"))
|
||||
# Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
document = relationship(
|
||||
"Documents", back_populates="document_ngrams", uselist=False)
|
||||
ngram = relationship("NGrams", back_populates="document_ngrams")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue