Begin adding Postgresql support instead of filesystem flat files

This commit is contained in:
rmgr 2024-03-01 21:12:40 +10:30
parent b43343e0ee
commit 24ee04c0ff
6 changed files with 80 additions and 13 deletions

14
src/index.py Executable file → Normal file
View file

@ -1,5 +1,6 @@
#!/usr/bin/python3
from sqlalchemy import create_engine
from config import DATABASE_URI
from models import Base, Website
from pathlib import Path
import argparse
import os
@ -7,6 +8,13 @@ import json
# investigate ngrams for "multi word" matching
ignored_words = ['a', 'the','is']
def remove_punctuation(input_string):
punc = '''!()-[]{};:'"\,<>./?@#$%^&*_~?!'''
for p in punc:
input_string = input_string.replace(p, '')
return input_string
def build_index():
with open(f"data/index.json", "w") as index:
# get a list of all content files
@ -20,6 +28,7 @@ def build_index():
content_words = content.split()
for word in content_words:
word = word.lower()
word = remove_punctuation(word)
if not word in ignored_words:
if not word in dictionary:
dictionary[word] = []
@ -42,3 +51,4 @@ if __name__ == "__main__":
args = parser.parse_args()
if args.rebuild:
build_index()