def load_word_freq(project): wfrq_cache = dict() allfrq_cache = dict() proj_name = get_project_name(project) try: with open(freq_get_file_name(proj_name), "r") as f: wfrq_cache = json.load(f) except Exception as e: print("json file empty: project %s" % e) try: with open(freq_get_file_name("all"), "r") as f: allfrq_cache = json.load(f) except Exception as e: print("json file empty: all %s" % e) return wfrq_cache, allfrq_cache
def project_word_freq(): allfrq = defaultdict(int) for path in get_project(): print(path) data = get_word_frequency(path) for i in data: allfrq[i] += data[i] data = simplejson.dumps(allfrq, indent=4, item_sort_key=lambda i: (-i[1], i[0])) with open(freq_get_file_name("all"), "w") as f: f.write(data)
def get_word_frequency(project=""): wfrq = defaultdict(int) for file in walk_dir(project): raw_text = get_text(file, get_all=True) clear_text = clean_text(raw_text) words = parse_words(clear_text) for word in words: wfrq[word.lower()] += 1 proj_name = get_project_name(project) data = simplejson.dumps(wfrq, indent=4, item_sort_key=lambda i: (-i[1], i[0])) with open(freq_get_file_name(proj_name), "w") as f: f.write(data) return wfrq
def project_word_freq(): allfrq = defaultdict(int) with open("metadata/projects.txt", "r") as f: lines = f.readlines() for l in lines: path = l.strip() if path: print(path) data = get_word_frequency(path) for i in data: allfrq[i] += data[i] data = simplejson.dumps(allfrq, indent=4, item_sort_key=lambda i: (-i[1], i[0])) with open(freq_get_file_name("all"), "w") as f: f.write(data)