def main(): random.seed(0) nltk.download('punkt') nltk.download('words') nltk.download('averaged_perceptron_tagger') nltk.download('wordnet') train_filepath = './train_sak.json' # fc = FeatureClass() with open(train_filepath, 'r', encoding='utf-8') as ft: train_data = json.load(ft) sntmnt = sentiment.SentimentAnalysis() df_labels = pd.read_csv('./Police_reports_geocodings.csv') df_indexed = df_labels.set_index('USER_BCI_N') categories = list(df_indexed.columns.values) for t in tqdm(train_data): t.update({'sentiment': str(sntmnt.score(t['document']))}) for category in categories: try: t.update({ category: str(df_indexed.loc[t['BCI_Number_Compl']][category]) }) except KeyError: t.update({category: '999'}) with open('./train_sak_geo.json', 'w', encoding='utf-8') as fw: json.dump(train_data, fw) return 0
def __init__(self, path): global to_file to_file = path try: os.mkdir(os.path.join(os.getcwd(), "Files")) os.mkdir(os.path.join(os.getcwd(), "CSV_Files")) except: pass process = CrawlerProcess() process.crawl(FkscrapeSpider) process.start() obj = sentiment.SentimentAnalysis( os.path.join(os.getcwd(), "Files"), os.path.join(os.getcwd(), "CSV_Files")) obj1 = sort.Sort()
class sentiAnalyzer(object): id = 0 review = dict() # reading data with open('nokia6610 (1).csv') as csvfile: readCSV = csv.reader(csvfile, delimiter=',') for row in readCSV: row[0].strip() # assigning id to every sentence review[id] = row[0] id = id + 1 # checking but in the sentence # if there is a but then split it review = {key: list(map(str, value.split('but'))) for key, value in review.items()} # reading the aspect list and grouping it with open('Aspect.csv') as aspect_file: readAspect = csv.reader(aspect_file, delimiter=',') for aspect in readAspect: if aspect[0] != "Group Name": aspect_dic[aspect[0]] = aspect[1] # insert comma for multiple aspect value in a group aspect_dic = {key: list(map(str, value.split(','))) for key, value in aspect_dic.items()} aspect_sentence_dic = dict() # checking for sentences which have the aspects for key,value in aspect_dic.items(): aspect_sentence_list_dic = {} for word in value: for sentence_key, sentence_value in review.items(): #print(sentence_key) for l_value in sentence_value: if word.lower() in l_value: aspect_sentence_list_dic[sentence_key] = l_value aspect_sentence_dic[key] = aspect_sentence_list_dic # creating object of sentiment class # this class belongs to the file sentiment.py s = sentiment.SentimentAnalysis(filename='SentiWordNet.txt',weighting='harmonic') score_aspectwise_dic = {} x_axes = {} # calculating score for sentences for key, value in aspect_sentence_dic.items(): score_list = [] x = -5 x_list = [] for k, v in value.items(): sentiments = s.score(v) #norm_score = sentiments/math.sqrt((sentiments*sentiments) + 0.25) score_list.append(sentiments) x_list.append(x) x += 0.25 score_aspectwise_dic[key] = score_list x_axes[key] = x_list #print(score_aspectwise_dic) # for k,x in x_axes.items(): # for key,y in score_aspectwise_dic.items(): # if k==key: # plot.plot_points(x, y, 'r', 'ro') opinion_dic = {} # calculation of positive and negative parcentage of every aspects score for key,value in score_aspectwise_dic.items(): valence = [] score_sum = 0.0 pos = 0 neg = 0 neu = 0 for v in value: if v>0.01: score_sum += v pos+=1 elif v<-0.01: score_sum += v neg+=1 else: neu += 1 #print(score_sum) #print(len(value)) #print(pos+neg) final_score = score_sum/float(pos+neg) # Normalize the score to be between -1 and 1 norm_score = final_score/math.sqrt((final_score*final_score) + 0.25) #print(final_score,norm_score) valence.append(norm_score) pos_percent = (pos/float(len(value)))*100.0 neg_percent = (neg/float(len(value)))*100.0 valence.append(pos_percent) valence.append(neg_percent) opinion_dic[key] = valence # printing aspect wise polariy and positive and negative sentences percentages for k, v in opinion_dic.items(): print(k+": ", round(v[0], 4), "pos:",round(v[1], 2), "%", "neg:",round(v[2], 2), "%")
from tkinter import filedialog from nltk.corpus import stopwords from nltk.tokenize import word_tokenize #将每年频率高的词移除 spc_stopwords = [ 'rms', 'received', 'told', 'car', 'assigned', 'arrested', 'rape', 'mater', 'id', 'suspect', 'state', 'states', 'stated', 'male', 'sex', 'case', 'time', 'unit', 'crime', 'crimes', 'original', 'narrative', 'investigation', 'investigative', 'victim', 'victims', 'police' ] stopwords = stopwords.words('english') stopwords.extend(spc_stopwords) # 启动情感分析 stmnt = sentiment.SentimentAnalysis() # 词频统计 def wordcount(words_list): count_dic = {} words_count = [] for word in words_list: if word not in count_dic: count_dic[word] = 1 else: count_dic[word] += 1 for key in count_dic: words_count.append([key, count_dic.get(key)]) return words_count # 返还值为[word,count]