def TFIDF(df, word_list): ''' 调用前面写的tfidf算法来训练矩阵 :return: 返回TFIDF矩阵 ''' df = read_file("classification_simple_test.csv", [0]) data = [a[0].split() for a in np.array(df).tolist()] IDF_list = T.IDF(data, word_list) # 计算idf值 TFIDF_met = np.empty(shape=(len(data), len(word_list))) for i, record in enumerate(data): TF_dic = T.TF(record) for index, word in enumerate(word_list): TFIDF_met[i][index] = TF_dic.get(word, 0) * IDF_list[index] return TFIDF_met
def __init__(self, span, d, iterations, index): self.kspan = span # 共现窗口的长度 self.d = d self.iteration = iterations # ---- TF-IDF 初始化 ---- # docs = dataset.read_sogou() for i, doc in enumerate(docs): docs[i] = TFIDF.cut_by_words(doc) data, self.VSM = TFIDF.create_VSM(docs) print("Create VSM Over !") # 生成 IDF self.idf = TFIDF.IDF(data, self.VSM) self.tfidf = TFIDF.TF_IDF(data[index], self.VSM, self.idf) self.keywords_doc = TFIDF.extract_keywords_tfidf(self.tfidf, self.VSM) print("Got the keywors from the docs !")