Initial commit
This commit is contained in:
commit
f36ab2fbfb
5 changed files with 144 additions and 0 deletions
36
src/index.py
Executable file
36
src/index.py
Executable file
|
|
@ -0,0 +1,36 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
from pathlib import Path
|
||||
import argparse
|
||||
import os
|
||||
import json
|
||||
|
||||
ignored_words = ['a', 'the','is']
|
||||
|
||||
def build_index():
|
||||
with open(f"data/index.json", "w") as index:
|
||||
# get a list of all content files
|
||||
# split on whitespace and add to index
|
||||
dictionary = {}
|
||||
pathlist = Path('data/content').rglob('*.txt')
|
||||
for path in pathlist:
|
||||
with open(str(path)) as content_file:
|
||||
url = content_file.readline()
|
||||
content = content_file.read()
|
||||
content_words = content.split()
|
||||
for word in content_words:
|
||||
word = word.lower()
|
||||
if not word in ignored_words:
|
||||
if not word in dictionary:
|
||||
dictionary[word] = []
|
||||
if not url.strip() in dictionary[word]:
|
||||
dictionary[word].append(url.strip())
|
||||
|
||||
index.write(json.dumps(dictionary))
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-r', "--rebuild", action="store_true", help="Blow away the index and rebuild")
|
||||
args = parser.parse_args()
|
||||
if args.rebuild:
|
||||
build_index()
|
||||
Loading…
Add table
Add a link
Reference in a new issue