) jieba.load_userdict( "E:\postgraduate\\no_space_environment\category\pycharm\pycharm_file_location\\thesis\新闻文本处理\论文词典法CSDN\Sentiment_dict\degree_dict\inverse.txt" ) jieba.load_userdict( "E:\postgraduate\\no_space_environment\category\pycharm\pycharm_file_location\\thesis\新闻文本处理\论文词典法CSDN\Sentiment_dict\emotion_dict\\stop_words.txt" ) jieba.load_userdict( "E:\postgraduate\\no_space_environment\category\pycharm\pycharm_file_location\\thesis\新闻文本处理\论文词典法CSDN\Sentiment_dict\my_dict\\my_words.txt" ) # 1.读取情感词典和待处理文件 # 情感词典 stopdict = tp.read_lines( 'E:\postgraduate\\no_space_environment\category\pycharm\pycharm_file_location\\thesis\新闻文本处理\论文词典法CSDN\Sentiment_dict\emotion_dict\stop_words.txt' ) posdict = tp.read_lines( "E:\postgraduate\\no_space_environment\category\pycharm\pycharm_file_location\\thesis\新闻文本处理\论文词典法CSDN\Sentiment_dict\emotion_dict\pos_all_dict.txt" ) negdict = tp.read_lines( "E:\postgraduate\\no_space_environment\category\pycharm\pycharm_file_location\\thesis\新闻文本处理\论文词典法CSDN\Sentiment_dict\emotion_dict\\neg_all_dict.txt" ) # 程度副词词典 mostdict = tp.read_lines( 'E:\postgraduate\\no_space_environment\category\pycharm\pycharm_file_location\\thesis\新闻文本处理\论文词典法CSDN\Sentiment_dict\degree_dict\most.txt' ) # 权值为2.5 verydict = tp.read_lines( 'E:\postgraduate\\no_space_environment\category\pycharm\pycharm_file_location\\thesis\新闻文本处理\论文词典法CSDN\Sentiment_dict\degree_dict\\very.txt' ) # 权值为2 moredict = tp.read_lines(
#!/usr/bin/env python3 # -*- coding: utf-8 -*- import text_process as tp import numpy as np import pandas as pd # 1.读取情感词典和待处理文件 # 情感词典 posdict = tp.read_lines("./emotion_dict/pos_all_dict.txt") negdict = tp.read_lines("./emotion_dict/neg_all_dict.txt") # 程度副词词典 mostdict = tp.read_lines('./degree_dict/most.txt') # 权值为2 verydict = tp.read_lines('./degree_dict/very.txt') # 权值为1.5 moredict = tp.read_lines('./degree_dict/more.txt') # 权值为1.25 ishdict = tp.read_lines('./degree_dict/ish.txt') # 权值为0.5 insufficientdict = tp.read_lines('./degree_dict/insufficiently.txt') # 权值为0.25 inversedict = tp.read_lines('./degree_dict/inverse.txt') # 权值为-1 # 情感级别 emotion_level1 = "悲伤。在这个级别的人过的是八辈子都懊丧和消沉的生活。这种生活充满了对过去的懊悔、自责和悲恸。在悲伤中的人,看这个世界都是灰黑色的。" emotion_level2 = "愤怒。如果有人能跳出冷漠和内疚的怪圈,并摆脱恐惧的控制,他就开始有欲望了,而欲望则带来挫折感,接着引发愤怒。愤怒常常表现为怨恨和复仇心里,它是易变且危险的。愤怒来自未能满足的欲望,来自比之更低的能量级。挫败感来自于放大了欲望的重要性。愤怒很容易就导致憎恨,这会逐渐侵蚀一个人的心灵。" emotion_level3 = "淡定。到达这个能级的能量都变得很活跃了。淡定的能级则是灵活和无分别性的看待现实中的问题。到来这个能级,意味着对结果的超然,一个人不会再经验挫败和恐惧。这是一个有安全感的能级。到来这个能级的人们,都是很容易与之相处的,而且让人感到温馨可靠,这样的人总是镇定从容。他们不会去强迫别人做什么。" emotion_level4 = "平和。他感觉到所有的一切都生机勃勃并光芒四射,虽然在其他人眼里这个世界还是老样子,但是在这人眼里世界却是一个。所以头脑保持长久的沉默,不再分析判断。观察者和被观察者成为同一个人,观照者消融在观照中,成为观照本身。" emotion_level5 = "喜悦。当爱变得越来越无限的时候,它开始发展成为内在的喜悦。这是在每一个当下,从内在而非外在升起的喜悦。这个能级的人的特点是,他们具有巨大的耐性,以及对一再显现的困境具有持久的乐观态度,以及慈悲。同时发生着。在他们开来是稀松平常的作为,却会被平常人当成是奇迹来看待。" # 情感波动级别 emotion_level6 = "情感波动很小,个人情感是不易改变的、经得起考验的。能够理性的看待周围的人和事。" emotion_level7 = "情感波动较大,周围的喜悦或者悲伤都能轻易的感染他,他对周围的事物有敏感的认知。" def match(word, sentiment_value):
# -*- coding: utf-8 -*- __author__ = 'Bai Chenjia' import text_process as tp import numpy as np # 1.读取情感词典和待处理文件 # 情感词典 print "reading..." posdict = tp.read_lines("f://emotion/mysite/Sentiment_dict/emotion_dict/pos_all_dict.txt") negdict = tp.read_lines("f://emotion/mysite/Sentiment_dict/emotion_dict/neg_all_dict.txt") # 程度副词词典 mostdict = tp.read_lines('f://emotion/mysite/Sentiment_dict/degree_dict/most.txt') # 权值为2 verydict = tp.read_lines('f://emotion/mysite/Sentiment_dict/degree_dict/very.txt') # 权值为1.5 moredict = tp.read_lines('f://emotion/mysite/Sentiment_dict/degree_dict/more.txt') # 权值为1.25 ishdict = tp.read_lines('f://emotion/mysite/Sentiment_dict/degree_dict/ish.txt') # 权值为0.5 insufficientdict = tp.read_lines('f://emotion/mysite/Sentiment_dict/degree_dict/insufficiently.txt') # 权值为0.25 inversedict = tp.read_lines('f://emotion/mysite/Sentiment_dict/degree_dict/inverse.txt') # 权值为-1 # 情感级别 emotion_level1 = "悲伤。在这个级别的人过的是八辈子都懊丧和消沉的生活。这种生活充满了对过去的懊悔、自责和悲恸。在悲伤中的人,看这个世界都是灰黑色的。" emotion_level2 = "愤怒。如果有人能跳出冷漠和内疚的怪圈,并摆脱恐惧的控制,他就开始有欲望了,而欲望则带来挫折感,接着引发愤怒。愤怒常常表现为怨恨和复仇心里,它是易变且危险的。愤怒来自未能满足的欲望,来自比之更低的能量级。挫败感来自于放大了欲望的重要性。愤怒很容易就导致憎恨,这会逐渐侵蚀一个人的心灵。" emotion_level3 = "淡定。到达这个能级的能量都变得很活跃了。淡定的能级则是灵活和无分别性的看待现实中的问题。到来这个能级,意味着对结果的超然,一个人不会再经验挫败和恐惧。这是一个有安全感的能级。到来这个能级的人们,都是很容易与之相处的,而且让人感到温馨可靠,这样的人总是镇定从容。他们不会去强迫别人做什么。" emotion_level4 = "平和。他感觉到所有的一切都生机勃勃并光芒四射,虽然在其他人眼里这个世界还是老样子,但是在这人眼里世界却是一个。所以头脑保持长久的沉默,不再分析判断。观察者和被观察者成为同一个人,观照者消融在观照中,成为观照本身。" emotion_level5 = "喜悦。当爱变得越来越无限的时候,它开始发展成为内在的喜悦。这是在每一个当下,从内在而非外在升起的喜悦。这个能级的人的特点是,他们具有巨大的耐性,以及对一再显现的困境具有持久的乐观态度,以及慈悲。同时发生着。在他们开来是稀松平常的作为,却会被平常人当成是奇迹来看待。" # 情感波动级别 emotion_level6 = "情感波动很小,个人情感是不易改变的、经得起考验的。能够理性的看待周围的人和事。" emotion_level7 = "情感波动较大,周围的喜悦或者悲伤都能轻易的感染他,他对周围的事物有敏感的认知。" # 2.程度副词处理,根据程度副词的种类不同乘以不同的权值
# -*- coding: utf-8 -*- import text_process as tp import codecs #1 read sentimental dict and adverb dict # sentimental dict posdict = tp.read_lines("D://PythonProject/Sentiment_dict/emotion_dict/pos_all_dict.txt") negdict = tp.read_lines("D://PythonProject/Sentiment_dict/emotion_dict/neg_all_dict.txt") # adverb of dict mostdict = tp.read_lines('D://PythonProject/Sentiment_dict/degree_dict/most.txt') # weight is 2 verydict = tp.read_lines('D://PythonProject/Sentiment_dict/degree_dict/very.txt') # weight is 1.75 moredict = tp.read_lines('D://PythonProject/Sentiment_dict/degree_dict/more.txt') # weight is 1.5 ishdict = tp.read_lines('D://PythonProject/Sentiment_dict/degree_dict/ish.txt') # weight is 1.2 insufficientdict = tp.read_lines('D://PythonProject/Sentiment_dict/degree_dict/insufficiently.txt') # weight is 0.5 inversedict = tp.read_lines('D://PythonProject/Sentiment_dict/degree_dict/inverse.txt') # weight is -1 # 2 handle adverb and multiply word according to different weight def match(word, sentiment_value): if word in mostdict: sentiment_value *= 2.0 elif word in verydict: sentiment_value *= 1.75 elif word in moredict: sentiment_value *= 1.5 elif word in ishdict: sentiment_value *= 1.2 elif word in insufficientdict:
import jieba.analyse import jieba.posseg as pseg from model import Dict import text_process as tp # 程度副词词典 mostdict = tp.read_lines('degree_dict/most.txt') # 权值为2 verydict = tp.read_lines('degree_dict/very.txt') # 权值为1.5 moredict = tp.read_lines('degree_dict/more.txt') # 权值为1.25 ishdict = tp.read_lines('degree_dict/ish.txt') # 权值为0.5 insufficientdict = tp.read_lines('degree_dict/insufficiently.txt') # 权值为0.25 inversedict = tp.read_lines('degree_dict/inverse.txt') # 权值为-1 # 2.程度副词处理,根据程度副词的种类不同乘以不同的权值 def match(word, sentiment_value): if word in mostdict: sentiment_value *= 2.0 elif word in verydict: sentiment_value *= 1.75 elif word in moredict: sentiment_value *= 1.5 elif word in ishdict: sentiment_value *= 1.2 elif word in insufficientdict: sentiment_value *= 0.5 elif word in inversedict: #print "inversedict", word sentiment_value *= -1 return sentiment_value
jieba.load_userdict( 'E:\postgraduate\\no_space_environment\category\pycharm\pycharm_file_location\\thesis\新闻文本处理\论文词典法CSDN\Sentiment_dict\emotion_dict\pos_all_dict.txt' ) jieba.load_userdict( 'E:\postgraduate\\no_space_environment\category\pycharm\pycharm_file_location\\thesis\新闻文本处理\论文词典法CSDN\Sentiment_dict\emotion_dict\\neg_all_dict.txt' ) jieba.load_userdict( "E:\postgraduate\\no_space_environment\category\pycharm\pycharm_file_location\\thesis\新闻文本处理\论文词典法CSDN\Sentiment_dict\degree_dict\insufficiently_inverse.txt" ) # 1.读取情感词典和待处理文件 # 情感词典 print("reading...") posdict = tp.read_lines( "E:\postgraduate\\no_space_environment\category\pycharm\pycharm_file_location\\thesis\新闻文本处理\论文词典法CSDN\Sentiment_dict\emotion_dict\pos_all_dict.txt" ) negdict = tp.read_lines( "E:\postgraduate\\no_space_environment\category\pycharm\pycharm_file_location\\thesis\新闻文本处理\论文词典法CSDN\Sentiment_dict\emotion_dict\\neg_all_dict.txt" ) # 程度副词词典 mostdict = tp.read_lines( 'E:\postgraduate\\no_space_environment\category\pycharm\pycharm_file_location\\thesis\新闻文本处理\论文词典法CSDN\Sentiment_dict\degree_dict\most.txt' ) # 权值为2 verydict = tp.read_lines( 'E:\postgraduate\\no_space_environment\category\pycharm\pycharm_file_location\\thesis\新闻文本处理\论文词典法CSDN\Sentiment_dict\degree_dict\\very.txt' ) # 权值为1.5 moredict = tp.read_lines( 'E:\postgraduate\\no_space_environment\category\pycharm\pycharm_file_location\\thesis\新闻文本处理\论文词典法CSDN\Sentiment_dict\degree_dict\more.txt' ) # 权值为1.25 ishdict = tp.read_lines(