def mainToDatabase(c): index = interact_files.loadIndexFromFile('main') numDocs = index['& LAST DOC &'] for k, v in index.items(): if isinstance(v, dict): data = calculateWeight(k, v, numDocs) insertIntoDb(c, data)
ANALYTICS['tokenTime'] = time.time() - tokenTime saveTime = time.time() index['& LAST DOC &'] = docCountCont interact_files.saveIndexToFile(index, 'main') interact_files.saveIndexToFile(imgIndex, 'img') ANALYTICS['saveTime'] = time.time() - saveTime for i, k in ANALYTICS.items(): print("{} : {}".format(i, k)) if __name__ == '__main__': if len(sys.argv) == 1: loadTime = time.time() index = interact_files.loadIndexFromFile('main') imgIndex = interact_files.loadIndexFromFile('img') ANALYTICS['load'] = time.time() - loadTime elif sys.argv[1] in {'-r', 'reload'}: """ Index isn't necessarily needed until we need to merge...Maybe better to keep it out of memory, but either way is okay""" index = defaultdict(Counter) # {token : {docID: priority} } imgIndex = defaultdict( Counter) # {(title, imgAlt): [(srcurl, priority)]} interact_files.resetIndexFiles() else: raise Warning("Invalid command line input") main(index, imgIndex)