From d4bb3fb8dc00d2f8c8bdee00def5731fa5a053c3 Mon Sep 17 00:00:00 2001 From: rmgr Date: Thu, 7 Mar 2024 21:12:19 +1030 Subject: [PATCH] Tidy up index.py --- src/index.py | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/src/index.py b/src/index.py index c80b5e7..227815e 100644 --- a/src/index.py +++ b/src/index.py @@ -1,18 +1,10 @@ #!/usr/bin/python3 import argparse -import requests -import hashlib -from urllib.parse import urlparse, urljoin -import urllib.robotparser -import os -from time import sleep -from bs4 import BeautifulSoup from sqlalchemy import create_engine from config import DATABASE_URI from models import Base, Documents, Document_Tokens, Tokens from sqlalchemy.orm import sessionmaker -from sqlalchemy import create_engine -import datetime +import uuid engine = create_engine(DATABASE_URI) Base.metadata.create_all(engine) @@ -30,16 +22,11 @@ def build_index(): word = word.lower() token = session.query(Tokens).filter_by(token=word).first() if token is None: - token = Tokens(token=word) + token = Tokens(token=word, id=uuid.uuid4()) session.add(token) document_token = Document_Tokens(document_id=document.id, token_id=token.id) session.add(document_token) session.commit() - - # Foreach document, break into words - # Check if word exists in database - # Create if not exist - # Link to document if __name__ == "__main__":