def analyze(self, text): sens_words, sens_tag = [], [] sens = tools.seperate_sentences(text) for sen in sens: tmp_words, tmp_tag = tools.seperate_pog(sen) sens_words.append(tmp_words) sens_tag.append(tmp_tag) return sens, sens_words, sens_tag
def get_sens_words(self,text): sens = tools.seperate_sentences(text) sens_words = [] for line in sens: words, tags = tools.seperate_pog(line) for i in range(len(words)): w = words[i] if w not in self.words_tags_dict.keys(): self.words_tags_dict[w] = tags[i] sens_words.append(words) return sens_words
def analyze(self, text): sens_words, sens_tag = [], [] sens = tools.seperate_sentences(text) tmp = [] for sen in sens: if "原标题" in sen: continue tmp.append(sen) tmp_words, tmp_tag = tools.seperate_pog(sen) sens_words.append(tmp_words) sens_tag.append(tmp_tag) return tmp, sens_words, sens_tag
def preprocess(self, text): sens_words, sens_tag = [], [] sens = tools.seperate_sentences(text) tmp = [] for i in range(1, len(sens)): sen = sens[i] # for sen in sens: if "原标题" in sen: continue tmp.append(sen) tmp_words, tmp_tag = tools.seperate_pog(sen) sens_words.append(tmp_words) sens_tag.append(tmp_tag) return tmp, sens_words, sens_tag
def build_graph(self,sentences): # sentences = essay # print(sentences) entry_graph,sent_graph = {},{} for i in range(sentences.__len__()): sen = sentences[i] sent_graph[i] = set() words, tags = tools.seperate_pog(sen) for word_i in range(len(words)): word = words[word_i] tag = tags[word_i] # print(word,tag) if str(tag) in self.targets or "all" in self.targets or ("n" in str(tag) and "all_n" in self.targets): sent_graph[i].add(word) if word not in entry_graph.keys(): entry_graph[word] = set() entry_graph[word].add(i) # print_graph(entry_graph) # print_graph(sent_graph) return entry_graph,sent_graph
absts = ftools.read_lines(abstract_path) res = [] for i in range(len(absts)): max_v, max_index = 0, 0 for j in range(len(lines)): v = tools.sim(absts[i], lines[j]) if v > max_v: max_v = v max_index = j res.append(max_index) print(res) sens, tags = [], [] for line in lines: tmp0, tmp1 = tools.seperate_pog(line) sens.append(tmp0) tags.append(tmp1) gv = Graph_Vec() sensv, essayv = gv.vectorize(sens, tags) dist = tools.Dist() print(sensv[res[0]]) print(sensv[res[1]]) print(sensv[res[2]]) print(essayv) print(dist.sim(sensv[res[0]], essayv)) print(dist.sim(sensv[res[1]], essayv)) print(dist.sim(sensv[res[2]], essayv)) print("-----")
if sens_words[i][j] in other_index_words.keys(): other_index = other_index_words[sens_words[i][j]] tmp[labels_other_graph[other_index] + ks[1] + ks[0]] += 1 essay_vector[labels_other_graph[other_index]] += 1 sens_vectors.append(tmp) return sens_vectors, essay_vector if __name__ == "__main__": # sen2v = Sen2Vec() # sen2v.train() # doc2v= Doc2Vec() # doc2v.train() from src.tools import FileTools as ftools from src.tools import Tools as tools import Dir sens = ftools.read_lines(Dir.res + "/cleandata_604/news/training_4.txt") myvec = MyVector() sens_words, sens_pog = [], [] for line in sens: w, p = tools.seperate_pog(line) sens_words.append(w) sens_pog.append(p) sens, essay = myvec.vectorize(sens_words, sens_pog) print(sens[0]) for ss in sens: print(ss) print(essay)