def calculate(news_sent): cuted_review = list(jieba.cut(news_sent)) # 句子切分,单独对每个句子进行分析 cuted_review = tp.del_stopwords(cuted_review) print(cuted_review) count_list = [] sent_list = [] for sent in cuted_review: count_list = match(sent, count_list) sent_list = sent_match(sent, sent_list) score = mutiplication_list(count_list, sent_list) if score != 0: if ('只要' in cuted_review)|('虽然' in cuted_review)|('虽说' in cuted_review) | ('如果' in cuted_review) | ('即使' in cuted_review) | ('即便' in cuted_review) | ( '不要说只有' in cuted_review) | ('即便真的有' in cuted_review) | ('除非' in cuted_review) | ('否则' in cuted_review) | ( '若' in cuted_review): score = 0 else: if (('下降' in cuted_review)|('降低' in cuted_review)|('增加' in cuted_review)) & (('费用' in cuted_review)|('成本' in cuted_review)|('财务费用' in cuted_review)|('财务成本' in cuted_review)|('风险' in cuted_review)|('成本' in cuted_review)|('进口成本' in cuted_review)): score=score*(-1) elif ('房价' in cuted_review) & (('下降' in cuted_review)|('下跌' in cuted_review)): score = score * (-1) elif (('现货价格' in cuted_review) | ('油价' in cuted_review)) & ( ('上涨' in cuted_review) | ('大涨' in cuted_review)): score = score * (-1) elif (('下降' in cuted_review) | ('回落' in cuted_review) | ('上行' in cuted_review) | ( '下行' in cuted_review)) & (('CPI' in cuted_review) |('通胀' in cuted_review) | ('商品价格' in cuted_review) | ('物价' in cuted_review) | ( '原油' in cuted_review) | ('利率' in cuted_review)|('成本' in cuted_review)): score = score * (-1) elif ('抵御住了' in cuted_review) & ('冲击' in cuted_review): score = score * (-1) elif ('下跌' in cuted_review) & ('反弹' in cuted_review): score = score * (-1) elif ('损失' in cuted_review) & (('减少' in cuted_review) | ('最小化' in cuted_review)): score = score * (-1) elif (('下跌风险' in cuted_review) | ('风险' in cuted_review)) & ('控制' in cuted_review): score = score * (-1) elif ('下降' in cuted_review) & ('比例' in cuted_review): score = 0 elif ('压力' in cuted_review) & ('缓解' in cuted_review): score = score * (-1) elif ('至少要到' in cuted_review) & ('回升' in cuted_review): score = score * (-1) elif ('增强' in cuted_review) & ('忧虑' in cuted_review): score = score * (-1) elif (('下调' in cuted_review) | ('下降' in cuted_review)) & (('存准率' in cuted_review) | ( '存款准备金率' in cuted_review)|('基准利率' in cuted_review)|('CPI' in cuted_review)): score = score * (-1) elif ('扑灭' in cuted_review) & ('希望' in cuted_review): score = score * (-1) return score
def calculate(news_sent): single_review_senti_score = [] cuted_review = list(jieba.cut(news_sent)) # 句子切分,单独对每个句子进行分析 cuted_review = tp.del_stopwords(cuted_review) count_list = [] for sent in cuted_review: count_list = match(sent, count_list) score = mutiplication_list(count_list) return score
def sentence_score(sentence): final_score = [] cuted_review = tp.cut_sentence(sentence) #cut sentence into subsentences # for w in cuted_review: # print w for sent in cuted_review: seg_sent = tp.segmentation(sent) # segment words seg_sent = tp.del_stopwords(seg_sent)[:] # for w in seg_sent: # print w i = 0 # current location s = 0 # emotion word location poscount = 0 # positive word score negcount = 0 # negative word score for word in seg_sent: # print word if word in posdict: # print word poscount += 1 for w in seg_sent[s:i]: # print w poscount = match(w, poscount) # print poscount s = i + 1 elif word in negdict: negcount += 1 for w in seg_sent[s:i]: negcount = match(w, negcount) s = i + 1 # if ! !, which means coming to end of sentence elif word == "!".decode("utf-8") or word == "!".decode('utf-8'): for w2 in seg_sent[::-1]: if w2 in posdict: poscount += 2 break elif w2 in negdict: negcount += 2 break i += 1 final_score.append(transform_to_positive_num(poscount, negcount)) # final process pos_result, neg_result = 0, 0 for res1, res2 in final_score: # 每个分句循环累加 pos_result += res1 neg_result += res2 #print pos_result, neg_result result = pos_result - neg_result # final score return result
def single_review_sentiment_score(comment_sent): single_review_senti_score = [] cuted_review = tp.cut_sentence(comment_sent) # 句子切分,单独对每个句子进行分析 for sent in cuted_review: seg_sent = tp.segmentation(sent) # 分词 seg_sent = tp.del_stopwords(seg_sent)[:] #for w in seg_sent: # print w, i = 0 # 记录扫描到的词的位置 s = 0 # 记录情感词的位置 poscount = 0 # 记录该分句中的积极情感得分 negcount = 0 # 记录该分句中的消极情感得分 for word in seg_sent: # 逐词分析 #print word if word in posdict: # 如果是积极情感词 #print "posword:", word poscount += 1 # 积极得分+1 for w in seg_sent[s:i]: poscount = match(w, poscount) #print "poscount:", poscount s = i + 1 # 记录情感词的位置变化 elif word in negdict: # 如果是消极情感词 #print "negword:", word negcount += 1 for w in seg_sent[s:i]: negcount = match(w, negcount) #print "negcount:", negcount s = i + 1 # 如果是感叹号,表示已经到本句句尾 elif word == "!" or word == "!": for w2 in seg_sent[::-1]: # 倒序扫描感叹号前的情感词,发现后权值+2,然后退出循环 if w2 in posdict: poscount += 2 break elif w2 in negdict: negcount += 2 break i += 1 #print "poscount,negcount", poscount, negcount single_review_senti_score.append( transform_to_positive_num(poscount, negcount)) # 对得分做最后处理 pos_result, neg_result = 0, 0 # 分别记录积极情感总得分和消极情感总得分 for res1, res2 in single_review_senti_score: # 每个分句循环累加 pos_result += res1 neg_result += res2 #print pos_result, neg_result result = pos_result - neg_result # 该条评论情感的最终得分 result = round(result, 1) return result
def single_review_sentiment_score(weibo_sent): single_review_senti_score = [] cuted_review = tp.cut_sentence(weibo_sent) # 句子切分,单独对每个句子进行分析 for sent in cuted_review: seg_sent = tp.segmentation(sent) # 分词 seg_sent = tp.del_stopwords(seg_sent)[:] #for w in seg_sent: # print w, i = 0 # 记录扫描到的词的位置 s = 0 # 记录情感词的位置 poscount = 0 # 记录该分句中的积极情感得分 negcount = 0 # 记录该分句中的消极情感得分 for word in seg_sent: # 逐词分析 #print word if word in posdict: # 如果是积极情感词 #print "posword:", word poscount += 1 # 积极得分+1 for w in seg_sent[s:i]: poscount = match(w, poscount) #print "poscount:", poscount s = i + 1 # 记录情感词的位置变化 elif word in negdict: # 如果是消极情感词 #print "negword:", word negcount += 1 for w in seg_sent[s:i]: negcount = match(w, negcount) #print "negcount:", negcount s = i + 1 # 如果是感叹号,表示已经到本句句尾 elif word == "!".decode("utf-8") or word == "!".decode('utf-8'): for w2 in seg_sent[::-1]: # 倒序扫描感叹号前的情感词,发现后权值+2,然后退出循环 if w2 in posdict: poscount += 2 break elif w2 in negdict: negcount += 2 break i += 1 #print "poscount,negcount", poscount, negcount single_review_senti_score.append(transform_to_positive_num(poscount, negcount)) # 对得分做最后处理 pos_result, neg_result = 0, 0 # 分别记录积极情感总得分和消极情感总得分 for res1, res2 in single_review_senti_score: # 每个分句循环累加 pos_result += res1 neg_result += res2 #print pos_result, neg_result result = pos_result - neg_result # 该条微博情感的最终得分 result = round(result, 1) return result
if v == 'inverse': v = -1 score = score * v else: score = score * v return score else: for i, v in enumerate(list): if v == 'inverse': v = -1 score = score * v else: score = score * v return score text='骑乘收益率曲线策略是短期货币市场证券管理中流行的一种策略。' single_review_senti_score = [] cuted_review = list(jieba.cut(text)) # 句子切分,单独对每个句子进行分析 cuted_review = tp.del_stopwords(cuted_review) print(cuted_review) count_list = [] for sent in cuted_review: count_list=match(sent,count_list) score=mutiplication_list(count_list) print(score)
import text_process as tp import jieba jieba.load_userdict( 'E:\postgraduate\\no_space_environment\category\pycharm\pycharm_file_location\\thesis\新闻文本处理\论文词典法CSDN\Sentiment_dict\emotion_dict\pos_all_dict.txt' ) jieba.load_userdict( 'E:\postgraduate\\no_space_environment\category\pycharm\pycharm_file_location\\thesis\新闻文本处理\论文词典法CSDN\Sentiment_dict\emotion_dict\\neg_all_dict.txt' ) jieba.load_userdict( "E:\postgraduate\\no_space_environment\category\pycharm\pycharm_file_location\\thesis\新闻文本处理\论文词典法CSDN\Sentiment_dict\degree_dict\insufficiently_inverse.txt" ) news_sent = '我不是很看好这支股票会上涨。' # cuted_review = tp.cut_sentence(news_sent) # 句子切分,单独对每个句子进行分析 cuted_review = list(jieba.cut(news_sent)) print(cuted_review) for sent in cuted_review: seg_sent = tp.segmentation(sent) # 分词 seg_sent = tp.del_stopwords(seg_sent)[:] print(seg_sent)