def school_weibo_dict_analysis(schoolname): weibo_content_results = pickle.load(open('dict weibo data\\%s_seg_weibo_new.pkl' % (schoolname), 'r')) cuted_weibo = [] meaning_count = 0 positive_count = 0 negative_count = 0 for cell in weibo_content_results: if cell[1] == 1: cuted_weibo.append(tp.cut_sentences_2(cell[0])) #pass else: meaning_count += 1 single_weibo_count = [] for index,weibo in enumerate(cuted_weibo): single_weibo_count = [] for sent in weibo: seg_sent = tp.segmentation(sent, 'list') i = 0 # word position counter a = 0 # sentiment word position poscount = 0 # count a positive word negcount = 0 # count a negative word for word in seg_sent: if word in posdict: poscount += 1 for w in seg_sent[a:i]: poscount = match(w, poscount) a = i + 1 elif word in negdict: negcount += 1 for w in seg_sent[a:i]: negcount = match(w, negcount) a = i + 1 elif word == '!'.decode('utf-8') or word == '!'.decode('utf-8'): for w2 in seg_sent[::-1]: if w2 in posdict: poscount += 2 break elif w2 in negdict: negcount += 2 break i += 1 single_weibo_count.append(transform_to_positive_num(poscount, negcount, 0, 0, 0)) [pos_count, neg_count] = judge_weibo(single_weibo_count) #print index, pos_count, neg_count if pos_count > neg_count: positive_count += 1 elif pos_count < neg_count: negative_count += 1 else: meaning_count += 1 end_results = [positive_count, negative_count, meaning_count] pickle.dump(end_results, open('dict weibo data\\%s_end_results.pkl' % schoolname, 'w')) print end_results return end_results
def all_weibo_sentence_sentiment_score(schoolname, weiboid=None): weibo_content_results = tp.get_one_weibo_data(schoolname, weiboid) cuted_weibo = [] upvotes = [] forwards = [] reviews = [] weiboid = None for cell in weibo_content_results: cuted_weibo.append(tp.cut_sentences_2(cell[0])) upvotes.append(int(cell[1])) forwards.append(int(cell[2])) reviews.append(int(cell[3])) weiboid = cell[4] ''' print cell[0] #微博内容 ''' #print cell[1], #点赞数量 #print cell[2] #转发数量 #print cell[3] #评论数量 single_weibo_count = [] all_weibo_count = [] for index,weibo in enumerate(cuted_weibo): for sent in weibo: seg_sent = tp.segmentation(sent, 'list') i = 0 # word position counter a = 0 # sentiment word position poscount = 0 # count a positive word negcount = 0 # count a negative word for word in seg_sent: if word in posdict: poscount += 1 for w in seg_sent[a:i]: poscount = match(w, poscount) a = i + 1 elif word in negdict: negcount += 1 for w in seg_sent[a:i]: negcount = match(w, negcount) a = i + 1 elif word == '!'.decode('utf-8') or word == '!'.decode('utf-8'): for w2 in seg_sent[::-1]: if w2 in posdict: poscount += 2 break elif w2 in negdict: negcount += 2 break i += 1 single_weibo_count.append(transform_to_positive_num(poscount, negcount, upvotes[index], forwards[index], reviews[index])) #[[s1_score], [s2_score], ...] all_weibo_count.append(single_weibo_count) # [[[s11_score], [s12_score], ...], [[s21_score], [s22_score], ...], ...] single_weibo_count = [] return all_weibo_count, weiboid
def single_weibo_sentiment_score(weibo): single_weibo_senti_score = [] cuted_weibo = tp.cut_sentences_2(weibo) # 切分成一个个小句子的list for sent in cuted_weibo: print '', sent seg_sent = tp.segmentation(sent, 'list') # 分词,返回单词列表 i = 0 # word position counter s = 0 # sentiment word position poscount = 0 # count a positive word negcount = 0 # count a negative word for word in seg_sent: print '', word if word in posdict: poscount += 1 for w in seg_sent[s:i]: poscount = match(w, poscount) a = i + 1 elif word in negdict: negcount += 1 for w in seg_sent[s:i]: negcount = match(w, negcount) a = i + 1 # Match "!" in the weibo, every "!" has a weight of +2 elif word == "!".decode('utf-8') or word == "!".decode('utf-8'): for w2 in seg_sent[::-1]: if w2 in posdict: poscount += 2 break elif w2 in negdict: negcount += 2 break i += 1 print 'positive and negative',poscount, negcount single_weibo_senti_score.append(transform_to_positive_num(poscount, negcount)) weibo_sentiment_score = sumup_sentence_sentiment_score(single_weibo_senti_score) for i in single_weibo_senti_score: print i return weibo_sentiment_score