aspect_keywords.append(keywords_vector) sentence = [] for s in sentences: for i in s['text']: if i in string.punctuation: # 如果字符是标点符号的话就将其替换为空格 s['text'] = s['text'].replace(i, " ") sentence.append(s['text']) sents = [word_tokenize(sent) for sent in sentence] corpus = TextCollection(sents) tf_idf = [] for sen in sents: td = [] for data in sen: elem = [] data = data.lower() if data not in stop_words: # print(data) td.append(corpus.tf_idf(data, corpus)) tf_idf.append(td) for aspect in aspect_keywords: for vector in aspect[1:]: print( deal_data.cosine(aspect[0], vector) * corpus.tf_idf('food', corpus)) print('\n')
sentences_vector = [] for w in sents[i]: w = w.lower() if w not in stop_words: try: word_index = list(words_index.keys())[list( words_index.values()).index(w)] sentences_vector.append(wordVectors[word_index]) except ValueError: continue count = [] for aspect in aspect_vector: word_aspect_cosine = [] ci = 0 for j in range(len(sentences_vector)): data_td = deal_data.cosine(aspect, sentences_vector[j]) * tf_idf[i][j] word_aspect_cosine.append(data_td) if data_td > 0.0025: ci = ci + 1 count.append(ci) aspect_cosine.append(word_aspect_cosine) count_s.append(count) # # print(aspect_cosine) # cosine.append(aspect_cosine) print(len(sents)) print(len(sentences)) print(len(count_s)) # count_a = 0
aspect_cosine = [] for w in s_w: w = w.lower() if w not in stop_words: try: word_index = list(words_index.keys())[list( words_index.values()).index(w)] sentences_vector.append(wordVectors[word_index]) except ValueError: continue count = [s['id']] for aspect in aspect_vector: word_aspect_cosine = [] i = 0 for word_vector in sentences_vector: word_aspect_cosine.append(deal_data.cosine(aspect, word_vector)) if deal_data.cosine(aspect, word_vector) > 0.70: i = i + 1 count.append(i) aspect_cosine.append(word_aspect_cosine) count_s.append(count) # print(aspect_cosine) # cosine.append(aspect_cosine) # for data in count_s: print(data) count_a = 0 for i in range(len(sentences)): index = [
average_cosine = [s['id']] for w in s_w: w = w.lower() if w not in stop_words: try: word_index = list(words_index.keys())[list( words_index.values()).index(w)] sentences_vector.append(wordVectors[word_index]) except ValueError: continue for data in aspect_keywords: word_cosine = [] for word_vector in sentences_vector: sum_cosine = 0 for i in range(len(data)): sum_cosine = sum_cosine + deal_data.cosine( data[i], word_vector) word_cosine.append(sum_cosine / len(data)) if len(sentences_vector) != 0: average_cosine.append(sum(word_cosine) / len(word_cosine)) cosine.append(average_cosine) # print(cosine) # 计算准确率accuracy:两个及以上aspect的句子怎么算? # print(len(sentences)) # print(len(cosine)) # for i in range(len(sentences)): # print(cosine[i]) # print(sentences[i])
w = w.lower() if w not in stop_words: try: word_index = list(words_index.keys())[list( words_index.values()).index(w)] sentences_vector.append(wordVectors[word_index]) except ValueError: continue count = [] for data in aspect_keywords: word_cosine = [] c = 0 for word_vector in sentences_vector: label = set() for i in range(len(data)): if deal_data.cosine(data[i], word_vector) > 0.7: label.add(1) if 1 in label: c = c + 1 count.append(c) count_s.append(count) for count in count_s: print(count) count_a = 0 for i in range(len(sentences)): index = [j for j, data in enumerate(count_s[i]) if data == max(count_s[i])] label_max = set() for data in index:
s_w = s['text'].split() sentences_vector = [] average_cosine = [s['id']] for w in s_w: w = w.lower() if w not in stop_words: try: word_index = list(words_index.keys())[list( words_index.values()).index(w)] sentences_vector.append(wordVectors[word_index]) except ValueError: continue for aspect in aspect_vector: sum_cosine = 0 for word_vector in sentences_vector: sum_cosine = sum_cosine + deal_data.cosine(aspect, word_vector) if len(sentences_vector) != 0: average_cosine.append(sum_cosine / len(sentences_vector)) cosine.append(average_cosine) # 计算准确率accuracy:两个及以上aspect的句子怎么算? # print(len(sentences)) # print(len(cosine)) # for i in range(len(sentences)): # print(cosine[i]) # print(sentences[i]) count = 0 count_empty = 0 for i in range(len(sentences)):