Fix temp table being randomly dropped due to name collision. Fix multi-word non-phrase search
This commit is contained in:
parent
bdb4064acc
commit
98efe9d1a2
2 changed files with 10 additions and 3 deletions
|
|
@ -70,7 +70,8 @@ def build_index():
|
|||
def zip_ngrams(size: int, corpus, document_id):
|
||||
size = int(size)
|
||||
connection = engine.connect()
|
||||
temptbl_name = 'temp_del_{}'.format(random.randint(100000, 9999999))
|
||||
temptbl_name = 'temp_del_{}'.format(
|
||||
time.time_ns() + random.randint(100000, 9999999))
|
||||
temptbl = Table(temptbl_name, Base.metadata, Column('id', UUID(as_uuid=True), index=True), Column(
|
||||
'gram', String, index=True), Column('size', Integer, index=True), extend_existing=True)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
#!/usr/bin/python3
|
||||
from sqlalchemy import create_engine, func
|
||||
from sqlalchemy import create_engine, func, and_, or_
|
||||
from config import DATABASE_URI
|
||||
from models import Base, Tokens, Documents, Document_Tokens, NGrams, Document_NGrams
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
|
@ -73,8 +73,14 @@ def search(query):
|
|||
.join(Document_NGrams, Documents.id == Document_NGrams.document_id) \
|
||||
.join(NGrams, Document_NGrams.ngram_id == NGrams.id) \
|
||||
.group_by(Documents.url)
|
||||
conditions = []
|
||||
for ngram in query_words['ngrams']:
|
||||
q = q.filter_by(size=len(ngram.split(' '))).filter_by(gram=ngram)
|
||||
conditions.append(
|
||||
(NGrams.size == len(ngram.split(' ')), NGrams.gram == ngram))
|
||||
# q = q.filter_by(size=len(ngram.split(' '))).filter_by(gram=ngram)
|
||||
and_conditions = [and_(*condition_pair)
|
||||
for condition_pair in conditions]
|
||||
q = q.filter(or_(*and_conditions))
|
||||
print('query built: ' + str((time.time_ns() - start_time) // 1_000_000) + "ms")
|
||||
print(q)
|
||||
x = q.all()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue