Fix temp table being randomly dropped due to name collision. Fix multi-word non-phrase search
This commit is contained in:
parent
bdb4064acc
commit
98efe9d1a2
2 changed files with 10 additions and 3 deletions
|
|
@ -70,7 +70,8 @@ def build_index():
|
||||||
def zip_ngrams(size: int, corpus, document_id):
|
def zip_ngrams(size: int, corpus, document_id):
|
||||||
size = int(size)
|
size = int(size)
|
||||||
connection = engine.connect()
|
connection = engine.connect()
|
||||||
temptbl_name = 'temp_del_{}'.format(random.randint(100000, 9999999))
|
temptbl_name = 'temp_del_{}'.format(
|
||||||
|
time.time_ns() + random.randint(100000, 9999999))
|
||||||
temptbl = Table(temptbl_name, Base.metadata, Column('id', UUID(as_uuid=True), index=True), Column(
|
temptbl = Table(temptbl_name, Base.metadata, Column('id', UUID(as_uuid=True), index=True), Column(
|
||||||
'gram', String, index=True), Column('size', Integer, index=True), extend_existing=True)
|
'gram', String, index=True), Column('size', Integer, index=True), extend_existing=True)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
#!/usr/bin/python3
|
#!/usr/bin/python3
|
||||||
from sqlalchemy import create_engine, func
|
from sqlalchemy import create_engine, func, and_, or_
|
||||||
from config import DATABASE_URI
|
from config import DATABASE_URI
|
||||||
from models import Base, Tokens, Documents, Document_Tokens, NGrams, Document_NGrams
|
from models import Base, Tokens, Documents, Document_Tokens, NGrams, Document_NGrams
|
||||||
from sqlalchemy.orm import sessionmaker
|
from sqlalchemy.orm import sessionmaker
|
||||||
|
|
@ -73,8 +73,14 @@ def search(query):
|
||||||
.join(Document_NGrams, Documents.id == Document_NGrams.document_id) \
|
.join(Document_NGrams, Documents.id == Document_NGrams.document_id) \
|
||||||
.join(NGrams, Document_NGrams.ngram_id == NGrams.id) \
|
.join(NGrams, Document_NGrams.ngram_id == NGrams.id) \
|
||||||
.group_by(Documents.url)
|
.group_by(Documents.url)
|
||||||
|
conditions = []
|
||||||
for ngram in query_words['ngrams']:
|
for ngram in query_words['ngrams']:
|
||||||
q = q.filter_by(size=len(ngram.split(' '))).filter_by(gram=ngram)
|
conditions.append(
|
||||||
|
(NGrams.size == len(ngram.split(' ')), NGrams.gram == ngram))
|
||||||
|
# q = q.filter_by(size=len(ngram.split(' '))).filter_by(gram=ngram)
|
||||||
|
and_conditions = [and_(*condition_pair)
|
||||||
|
for condition_pair in conditions]
|
||||||
|
q = q.filter(or_(*and_conditions))
|
||||||
print('query built: ' + str((time.time_ns() - start_time) // 1_000_000) + "ms")
|
print('query built: ' + str((time.time_ns() - start_time) // 1_000_000) + "ms")
|
||||||
print(q)
|
print(q)
|
||||||
x = q.all()
|
x = q.all()
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue