Exemplo n.º 1
0
def ExtractFeatureWords(filename):
    result = Reader.readfile(filename)
    result_split = Segmentation.SplitCluster(result)
    delete_result = Candidate.DeleteRepetition(result_split)
    candidate = Candidate.BuildClass(delete_result)
    Candidate.CalLenScore(candidate, 2, 15)
    Candidate.CalSupScore(candidate, result_split)
    Candidate.CalPosScore(candidate, result_split)
    for i in range(len(candidate)):
        for j in range(len(candidate[i])):
            candidate[i][j].CalScore()
    candidate_list = Candidate.GenCandidateList(candidate)
    sorted_candidate_list = Candidate.CandidateListSort(candidate_list)

    # extracted_word = Candidate.CutByRank(sorted_candidate_list,0.5)
    # PrintExtractedWord(extracted_word)

    extracted_word = Candidate.CutByScore(sorted_candidate_list, 2.9)
    # PrintExtractedWord(extracted_word)

    # extracted_word = Candidate.CutByRankAndScore(sorted_candidate_list,0.05,2.9)
    # PrintExtractedWord(extracted_word)

    extracted_result = Candidate.ExtractedWordDeleteRepetition(extracted_word)
    # PrintExtractedResult(extracted_result)

    # print("")
    # print("")
    # print("")
    # print("")
    #
    # extracted_word = Candidate.CutByScore(sorted_candidate_list,2.0)
    # print("the result of cut by score: ")
    # for i in range(len(extracted_word)):
    #     print("")
    #     for j in range(len(extracted_word[i])):
    #         print(extracted_word[i][j])
    return extracted_result
Exemplo n.º 2
0
def ExtractedWordDeleteRepetition(extracted_word):
    """
    得出每个聚类去重后的所有关键词
    :param extracted_word: 未去重的每个聚类类别内部的关键词
    :return: 去重后提取的关键词
    """
    extracted_result = []
    for i in range(len(extracted_word)):
        for j in range(len(extracted_word[i])):
            if extracted_word[i][j][0] not in extracted_result:
                extracted_result.append(extracted_word[i][j][0])
    return extracted_result

if __name__ == '__main__':
    result = Reader.readfile('result38.bin')
    result_split = Segmentation.SplitCluster(result)
    delete_result = DeleteRepetition(result_split)
    candidate = BuildClass(delete_result)
    CalLenScore(candidate,2,15)
    CalSupScore(candidate,result_split)
    CalPosScore(candidate,result_split)
    for i in range(len(candidate)):
        for j in range(len(candidate[i])):
            candidate[i][j].CalScore()
    candidate_list = GenCandidateList(candidate)
    sorted_candidate_list = CandidateListSort(candidate_list)

    for i in range(len(sorted_candidate_list)):
        print("")
        for j in range(len(sorted_candidate_list[i])):
            print(sorted_candidate_list[i][j])