def project_typo(project="", level=2): print(project) pro_freq, all_freq = load_word_freq(project) # cache is a dict storing file name as key and a list of typos as its value if os.path.isfile(project): gen = [ project, ] else: gen = walk_dir(project) file_typo = dict() for file in gen: if not pre_filter(file, level=level): continue words = get_all_words(file, level=level) if not words: continue bad_words = spell_get_unknown_words(words) bad_words = get_unknown_words(bad_words, user_dic) result = list() for b in bad_words: b = b.lower() if pro_freq.get(b, 0) >= 8: # print("pass", b) continue if all_freq.get(b, 0) >= 24: # print("pass", b) continue result.append(b) if not result: continue file_typo[file] = result # save_all_typo_words(file_typo, project=project) save_typo_by_file(file_typo, project=project)
def project_typo(project="", level=2): print(project) gen = walk_dir(project) for file in gen: if not pre_filter(file, level=level): continue parse_text(file, level=level) print(file) a = input()
def project_docs(project): for file in walk_dir(project): if "vendor/" in file: continue # if "generated/" in file: # continue # if "node_modules/" in file: # continue # if file.endswith("_test.go") or file.endswith(".pb.go"): # continue if get_file_extension(file) == "go": find_single_symbol(file)
def get_word_frequency(project=""): wfrq = defaultdict(int) for file in walk_dir(project): raw_text = get_text_simple(file) words = parse_raw_words(raw_text) for word in words: wfrq[word] += 1 proj_name = get_project_name(project) data = simplejson.dumps(wfrq, indent=4, item_sort_key=lambda i: (-i[1], i[0])) with open(freq_simple_get_file_name(proj_name), "w") as f: f.write(data) return wfrq
def run_file_ext_statistics(project, cache): if not project: return statistics = defaultdict(int) for file in walk_dir(project): if not is_qualified_file(file): continue ext = get_file_extension(file) # 看看没有扩展名的文件都是些啥,没发现什么有价值的 # if not ext: # print(file) statistics[ext] += 1 cache[ext] += 1 print(project) print_freq_dict(data=statistics)