diff --git a/src/index.py b/src/index.py index 542424c..679d312 100644 --- a/src/index.py +++ b/src/index.py @@ -70,7 +70,8 @@ def build_index(): def zip_ngrams(size: int, corpus, document_id): size = int(size) connection = engine.connect() - temptbl_name = 'temp_del_{}'.format(random.randint(100000, 9999999)) + temptbl_name = 'temp_del_{}'.format( + time.time_ns() + random.randint(100000, 9999999)) temptbl = Table(temptbl_name, Base.metadata, Column('id', UUID(as_uuid=True), index=True), Column( 'gram', String, index=True), Column('size', Integer, index=True), extend_existing=True) diff --git a/src/search.py b/src/search.py index 6033e60..d643eb2 100755 --- a/src/search.py +++ b/src/search.py @@ -1,5 +1,5 @@ #!/usr/bin/python3 -from sqlalchemy import create_engine, func +from sqlalchemy import create_engine, func, and_, or_ from config import DATABASE_URI from models import Base, Tokens, Documents, Document_Tokens, NGrams, Document_NGrams from sqlalchemy.orm import sessionmaker @@ -73,8 +73,14 @@ def search(query): .join(Document_NGrams, Documents.id == Document_NGrams.document_id) \ .join(NGrams, Document_NGrams.ngram_id == NGrams.id) \ .group_by(Documents.url) + conditions = [] for ngram in query_words['ngrams']: - q = q.filter_by(size=len(ngram.split(' '))).filter_by(gram=ngram) + conditions.append( + (NGrams.size == len(ngram.split(' ')), NGrams.gram == ngram)) +# q = q.filter_by(size=len(ngram.split(' '))).filter_by(gram=ngram) + and_conditions = [and_(*condition_pair) + for condition_pair in conditions] + q = q.filter(or_(*and_conditions)) print('query built: ' + str((time.time_ns() - start_time) // 1_000_000) + "ms") print(q) x = q.all()