Fix temp table being randomly dropped due to name collision. Fix multi-word non-phrase search

This commit is contained in:
rmgr 2024-05-05 19:06:56 +09:30
parent bdb4064acc
commit 98efe9d1a2
2 changed files with 10 additions and 3 deletions

View file

@ -70,7 +70,8 @@ def build_index():
def zip_ngrams(size: int, corpus, document_id):
size = int(size)
connection = engine.connect()
temptbl_name = 'temp_del_{}'.format(random.randint(100000, 9999999))
temptbl_name = 'temp_del_{}'.format(
time.time_ns() + random.randint(100000, 9999999))
temptbl = Table(temptbl_name, Base.metadata, Column('id', UUID(as_uuid=True), index=True), Column(
'gram', String, index=True), Column('size', Integer, index=True), extend_existing=True)

View file

@ -1,5 +1,5 @@
#!/usr/bin/python3
from sqlalchemy import create_engine, func
from sqlalchemy import create_engine, func, and_, or_
from config import DATABASE_URI
from models import Base, Tokens, Documents, Document_Tokens, NGrams, Document_NGrams
from sqlalchemy.orm import sessionmaker
@ -73,8 +73,14 @@ def search(query):
.join(Document_NGrams, Documents.id == Document_NGrams.document_id) \
.join(NGrams, Document_NGrams.ngram_id == NGrams.id) \
.group_by(Documents.url)
conditions = []
for ngram in query_words['ngrams']:
q = q.filter_by(size=len(ngram.split(' '))).filter_by(gram=ngram)
conditions.append(
(NGrams.size == len(ngram.split(' ')), NGrams.gram == ngram))
# q = q.filter_by(size=len(ngram.split(' '))).filter_by(gram=ngram)
and_conditions = [and_(*condition_pair)
for condition_pair in conditions]
q = q.filter(or_(*and_conditions))
print('query built: ' + str((time.time_ns() - start_time) // 1_000_000) + "ms")
print(q)
x = q.all()