db = Storage(cfg) db.load() finder = Finder(cfg) convertor = Convertor(cfg) def process_file(f): checksum = Storage.file_checksum(f) if checksum in db.data: print("file {} already processed ({})".format(f, checksum)) return db.store(checksum, {"pdf": f}) convertor.convert(f) for f in finder.find_all(): process_file(f) db.load() for f in glob.glob(os.path.join(cfg["html_out"], "*.html")): checksum = f.replace(".html", '').replace("html/", '') data = db.data[checksum] dirty = False # print(data) if "title" not in db.data[checksum]: print("title not cached") dirty = True title = Parser().parse(f) data.update({"title": title})