示例#1
0
def project_typo(project="", level=2):
    print(project)
    pro_freq, all_freq = load_word_freq(project)
    # cache is a dict storing file name as key and a list of typos as its value
    if os.path.isfile(project):
        gen = [
            project,
        ]
    else:
        gen = walk_dir(project)
    file_typo = dict()
    for file in gen:
        if not pre_filter(file, level=level):
            continue
        words = get_all_words(file, level=level)
        if not words:
            continue
        bad_words = spell_get_unknown_words(words)
        bad_words = get_unknown_words(bad_words, user_dic)
        result = list()
        for b in bad_words:
            b = b.lower()
            if pro_freq.get(b, 0) >= 8:
                # print("pass", b)
                continue
            if all_freq.get(b, 0) >= 24:
                # print("pass", b)
                continue
            result.append(b)
        if not result:
            continue
        file_typo[file] = result
    # save_all_typo_words(file_typo, project=project)
    save_typo_by_file(file_typo, project=project)
示例#2
0
def project_typo(project="", level=2):
    print(project)
    gen = walk_dir(project)
    for file in gen:
        if not pre_filter(file, level=level):
            continue
        parse_text(file, level=level)
        print(file)
        a = input()
示例#3
0
def project_docs(project):
    for file in walk_dir(project):
        if "vendor/" in file:
            continue
        # if "generated/" in file:
        #     continue
        # if "node_modules/" in file:
        #     continue
        # if file.endswith("_test.go") or file.endswith(".pb.go"):
        #     continue
        if get_file_extension(file) == "go":
            find_single_symbol(file)
示例#4
0
def get_word_frequency(project=""):
    wfrq = defaultdict(int)
    for file in walk_dir(project):
        raw_text = get_text_simple(file)
        words = parse_raw_words(raw_text)
        for word in words:
            wfrq[word] += 1
    proj_name = get_project_name(project)
    data = simplejson.dumps(wfrq,
                            indent=4,
                            item_sort_key=lambda i: (-i[1], i[0]))
    with open(freq_simple_get_file_name(proj_name), "w") as f:
        f.write(data)
    return wfrq
示例#5
0
def run_file_ext_statistics(project, cache):
    if not project:
        return
    statistics = defaultdict(int)
    for file in walk_dir(project):
        if not is_qualified_file(file):
            continue
        ext = get_file_extension(file)
        # 看看没有扩展名的文件都是些啥,没发现什么有价值的
        # if not ext:
        #     print(file)
        statistics[ext] += 1
        cache[ext] += 1
    print(project)
    print_freq_dict(data=statistics)