def cal_words_positions(files): result = {} for i in files: file = open(i, "r") content = file.read() for word in content.split(" "): if word not in result.keys(): result[word] = KMP.positions(content, word) file.close() result2 = [] for key in result.keys(): result2.append({"word": key, "pos": result[key]}) return result2
def word_result(freqs, word): result = [] for word_tuple in freqs: temp = {"file": word_tuple[0], "num": 0, "pos": []} if " " not in word: for word_and_freq_dict in word_tuple[1]: if word_and_freq_dict["word"] == word: temp["num"] = len(word_and_freq_dict["pos"]) temp["pos"] = word_and_freq_dict["pos"] else: pos = KMP.positions( File.File(word_tuple[0]).get_content(), word) temp["num"] = len(pos) temp["pos"] = pos result.append(temp) def num(result): return result["num"] result.sort(key=num, reverse=True) return result