def sentence_score(sentence): final_score = [] cuted_review = tp.cut_sentence(sentence) #cut sentence into subsentences # for w in cuted_review: # print w for sent in cuted_review: seg_sent = tp.segmentation(sent) # segment words seg_sent = tp.del_stopwords(seg_sent)[:] # for w in seg_sent: # print w i = 0 # current location s = 0 # emotion word location poscount = 0 # positive word score negcount = 0 # negative word score for word in seg_sent: # print word if word in posdict: # print word poscount += 1 for w in seg_sent[s:i]: # print w poscount = match(w, poscount) # print poscount s = i + 1 elif word in negdict: negcount += 1 for w in seg_sent[s:i]: negcount = match(w, negcount) s = i + 1 # if ! !, which means coming to end of sentence elif word == "!".decode("utf-8") or word == "!".decode('utf-8'): for w2 in seg_sent[::-1]: if w2 in posdict: poscount += 2 break elif w2 in negdict: negcount += 2 break i += 1 final_score.append(transform_to_positive_num(poscount, negcount)) # final process pos_result, neg_result = 0, 0 for res1, res2 in final_score: # 每个分句循环累加 pos_result += res1 neg_result += res2 #print pos_result, neg_result result = pos_result - neg_result # final score return result
def single_review_sentiment_score(comment_sent): single_review_senti_score = [] cuted_review = tp.cut_sentence(comment_sent) # 句子切分,单独对每个句子进行分析 for sent in cuted_review: seg_sent = tp.segmentation(sent) # 分词 seg_sent = tp.del_stopwords(seg_sent)[:] #for w in seg_sent: # print w, i = 0 # 记录扫描到的词的位置 s = 0 # 记录情感词的位置 poscount = 0 # 记录该分句中的积极情感得分 negcount = 0 # 记录该分句中的消极情感得分 for word in seg_sent: # 逐词分析 #print word if word in posdict: # 如果是积极情感词 #print "posword:", word poscount += 1 # 积极得分+1 for w in seg_sent[s:i]: poscount = match(w, poscount) #print "poscount:", poscount s = i + 1 # 记录情感词的位置变化 elif word in negdict: # 如果是消极情感词 #print "negword:", word negcount += 1 for w in seg_sent[s:i]: negcount = match(w, negcount) #print "negcount:", negcount s = i + 1 # 如果是感叹号,表示已经到本句句尾 elif word == "!" or word == "!": for w2 in seg_sent[::-1]: # 倒序扫描感叹号前的情感词,发现后权值+2,然后退出循环 if w2 in posdict: poscount += 2 break elif w2 in negdict: negcount += 2 break i += 1 #print "poscount,negcount", poscount, negcount single_review_senti_score.append( transform_to_positive_num(poscount, negcount)) # 对得分做最后处理 pos_result, neg_result = 0, 0 # 分别记录积极情感总得分和消极情感总得分 for res1, res2 in single_review_senti_score: # 每个分句循环累加 pos_result += res1 neg_result += res2 #print pos_result, neg_result result = pos_result - neg_result # 该条评论情感的最终得分 result = round(result, 1) return result
def single_review_sentiment_score(weibo_sent): single_review_senti_score = [] cuted_review = tp.cut_sentence(weibo_sent) # 句子切分,单独对每个句子进行分析 for sent in cuted_review: seg_sent = tp.segmentation(sent) # 分词 seg_sent = tp.del_stopwords(seg_sent)[:] #for w in seg_sent: # print w, i = 0 # 记录扫描到的词的位置 s = 0 # 记录情感词的位置 poscount = 0 # 记录该分句中的积极情感得分 negcount = 0 # 记录该分句中的消极情感得分 for word in seg_sent: # 逐词分析 #print word if word in posdict: # 如果是积极情感词 #print "posword:", word poscount += 1 # 积极得分+1 for w in seg_sent[s:i]: poscount = match(w, poscount) #print "poscount:", poscount s = i + 1 # 记录情感词的位置变化 elif word in negdict: # 如果是消极情感词 #print "negword:", word negcount += 1 for w in seg_sent[s:i]: negcount = match(w, negcount) #print "negcount:", negcount s = i + 1 # 如果是感叹号,表示已经到本句句尾 elif word == "!".decode("utf-8") or word == "!".decode('utf-8'): for w2 in seg_sent[::-1]: # 倒序扫描感叹号前的情感词,发现后权值+2,然后退出循环 if w2 in posdict: poscount += 2 break elif w2 in negdict: negcount += 2 break i += 1 #print "poscount,negcount", poscount, negcount single_review_senti_score.append(transform_to_positive_num(poscount, negcount)) # 对得分做最后处理 pos_result, neg_result = 0, 0 # 分别记录积极情感总得分和消极情感总得分 for res1, res2 in single_review_senti_score: # 每个分句循环累加 pos_result += res1 neg_result += res2 #print pos_result, neg_result result = pos_result - neg_result # 该条微博情感的最终得分 result = round(result, 1) return result
def calculate_score(self, content): total_score = 0.0 cut_contexts = tp.cut_sentence(content) isKeyWord = False # 对应每句话 # print 'content: ', content for cut_context in cut_contexts: words = tp.segmentation(cut_context) # 去停用词 words = self.del_stop_words(words) score = 0.0 prefix = 1.0 for word in words: if word in self.inverse_dict: prefix *= self.weight_inverse # print "inverse word:", word, self.weight_inverse elif word in self.ish_dict: prefix *= self.weight_ish # print "ish word", word, self.weight_ish elif word in self.more_dict: prefix *= self.weight_more # print "more word:", word, self.weight_more elif word in self.very_dict: prefix *= self.weight_very # print "very word:", word, self.weight_very elif word in self.most_dict: prefix *= self.weight_most # print "most word", word, self.weight_most # if len(word) >= 2: else: result, flag = self.word_search(word) if flag: score += result if word in self.key_words: isKeyWord = True # print 'key',word # print 'word:', word, score score_final = prefix * score # print 'sentence:', cut_context, score_final, prefix # if (score_final > -0.001) & (score_final < 0.001): # score_final = self.SnowNLP_analyze(cut_context) # print "final_socre",score_final total_score += score_final # print 'total score:', total_score if isKeyWord & (total_score < -0.01): total_score += self.weight_key_word return total_score
def single_review_sentiment_score(weibo_sent): single_review_senti_score = [] cuted_review = tp.cut_sentence(weibo_sent) for sent in cuted_review: seg_sent = tp.segmentation(sent) ''' if seg_sent[0]=='@': continue ''' i = 0 # 记录扫描到的词的位置 s = 0 # 记录情感词的位置 poscount = 0 # 记录该分句中的积极情感得分 negcount = 0 # 记录该分句中的消极情感得分 for word in seg_sent: if word in posdict: poscount += 1 for w in seg_sent[s:i]: poscount = match(w, poscount) s = i + 1 elif word in negdict: negcount += 1 for w in seg_sent[s:i]: negcount = match(w, negcount) s = i + 1 elif word == "吗".encode('utf-8').decode("utf-8"): for w2 in seg_sent[::-1]: if w2 in posdict: poscount = poscount * -1 break elif w2 in negdict: negcount = negcount * -1 break elif word == "?".encode('utf-8').decode( "utf-8") or word == "?".encode('utf-8').decode("utf-8"): if i + 1 < len(seg_sent): for w2 in seg_sent[i + 1:]: if w2 == '?'.encode('utf-8').decode( "utf-8") or w2 == '?'.encode('utf-8').decode( "utf-8"): if negcount > poscount: negcount = negcount + 1 else: poscount = poscount + 1 if negcount == 0 and poscount == 0: negcount = negcount + 1 elif word == "!".encode('utf-8').decode( "utf-8") or word == "!".encode('utf-8').decode("utf-8"): m = 0 for w2 in seg_sent[::-1]: if w2 == "!".encode('utf-8').decode( "utf-8") or word == "!".encode('utf-8').decode( "utf-8"): m = m + 1 if w2 in posdict: poscount = poscount + 1.5 * m break elif w2 in negdict: negcount = negcount + 1.5 * m break i += 1 single_review_senti_score.append( transform_to_positive_num(poscount, negcount)) ''' poscount, negcount=transform_to_positive_num(poscount, negcount) single_review_senti_score.append(poscount-negcount) single_review_senti_score=np.array(single_review_senti_score) if sum(single_review_senti_score)==0: result=0 else: result=np.mean(single_review_senti_score[single_review_senti_score!=0]) return result ''' pos_result, neg_result = 0, 0 for res1, res2 in single_review_senti_score: pos_result += res1 neg_result += res2 result = pos_result - neg_result result = round(result, 2) if result > 10: result = 10 elif result < -10: result = -10 return result