Exemplo n.º 1
0
def train_model(neg_path, pos_path):
    neg_path = os.path.abspath(os.path.join(os.getcwd(), 'neg.txt'))
    pos_path = os.path.abspath(os.path.join(os.getcwd(), 'pos.txt'))
    mod_path = os.path.abspath(os.path.join(os.getcwd(), 'sentiment.marshal'))
    sentiment.train(neg_path, pos_path)
    sentiment.save(mod_path)
    return mod_path
Exemplo n.º 2
0
 def post(self, request):
     print(request.POST)
     cand_id = -1
     senti = -1
     if 'submit' in request.POST:
         for k in request.POST.keys():
             if k.startswith("group"):
                 try:
                     cand_id = int(k[5:])
                     senti = int(request.POST.get(k))
                 except ValueError:
                     cand_id = -1
                     senti = -1
         if cand_id > 0:
             self.cursor.execute(
                 self.select_candidate_by_id.format(cand_id))
             cand_weibo = self.cursor.fetchone()
             if cand_weibo and senti >= 0:
                 weibo_url = cand_weibo[1]
                 weibo_content = cand_weibo[2]
                 weibo_wb_from = cand_weibo[3]
                 weibo_sentiment = senti
                 self.cursor.execute(
                     self.insert_into_weibo.format(weibo_url, weibo_content,
                                                   weibo_wb_from,
                                                   weibo_sentiment))
                 self.cursor.execute(self.delete_candidate.format(cand_id))
                 self.db.commit()
         return redirect('index')
     elif 'refresh' in request.POST:
         print('refresh')
         with open('neg_updated.txt', 'w', encoding='utf-8') as neg_writer:
             with open('neg.txt', 'r', encoding='utf-8') as neg_reader:
                 while True:
                     line = neg_reader.readline()
                     if not line:
                         break
                     line = line.strip()
                     neg_writer.write(line + '\n')
             self.cursor.execute(self.select_all_weibos.format(0))
             neg_results = self.cursor.fetchall()
             for neg_result in neg_results:
                 neg_writer.write(neg_result[2].strip() + '\n')
         with open('pos_updated.txt', 'w', encoding='utf-8') as pos_writer:
             with open('pos.txt', 'r', encoding='utf-8') as pos_reader:
                 while True:
                     line = pos_reader.readline()
                     if not line:
                         break
                     line = line.strip()
                     pos_writer.write(line + '\n')
             self.cursor.execute(self.select_all_weibos.format(1))
             pos_results = self.cursor.fetchall()
             for pos_result in pos_results:
                 pos_writer.write(pos_result[2].strip() + '\n')
         print("开始训练新模型...")
         sentiment.train('neg_updated.txt', 'pos_updated.txt')
         sentiment.save('sentiment.marshal')
         print("训练完成!")
         return redirect('index')
def train():
    current_path = os.path.dirname(__file__)
    neg = current_path + '\\snownlp\\sentiment\\neg.txt'
    pos = current_path + '\\snownlp\\sentiment\\pos.txt'
    parm = current_path + '\\snownlp\\sentiment\\sentiment.marshal'
    sentiment.train(neg, pos)
    sentiment.save(parm)
Exemplo n.º 4
0
def save_train(request):
    f = open(
        '/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/snownlp/sentiment/SGMW_pos.txt',
        'w')
    high_processeds = Comments.objects.filter(arti_score__gt=0.5, is_scored=1)
    for i in high_processeds:
        high_list = i.content
        f.write('\n')
        f.write(high_list)
    f.close()

    f = open(
        '/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/snownlp/sentiment/SGMW_neg.txt',
        'w')
    low_processeds = Comments.objects.filter(arti_score__lte=0.5, is_scored=1)
    for i in low_processeds:
        low_list = i.content
        f.write('\n')
        f.write(low_list)
    f.close()

    sentiment.train(
        '/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/snownlp/sentiment/SGMW_neg.txt',
        '/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/snownlp/sentiment/SGMW_pos.txt'
    )
    sentiment.save(
        '/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/snownlp/sentiment/SGMW_sentiment.marshal'
    )

    return JsonResponse({"msg": "success"})
Exemplo n.º 5
0
def train():
    time_str = (datetime.datetime.now().strftime('%Y%m%d %H%M%S'))  #日期格式化
    print("prepare...")
    global start
    start = time.time()
    sentiment.train('target/neg.txt', 'target/pos.txt', printPer)
    sentiment.save('target/sen %s.marshal' % time_str)
Exemplo n.º 6
0
def train_and_test():
    from snownlp import sentiment
    print("start")
    sentiment.train('eastmoney_neg.txt', 'eastmoney_pos.txt')
    print("finish")
    sentiment.save('sentiment.marshal')

    sentiment_rating = SnowNLP("明天涨停").sentiments
    print(sentiment_rating)
Exemplo n.º 7
0
def selfTrainSentiment(infile):
    sentiment.train("pos", "neg")
    sentiment.save('sentfwhyj.marshal')
    sentiments_list = sentiment_analysis(infile)
    x = range(len(sentiments_list))
    pl.plot(x, sentiments_list, 'b.')
    pl.xlabel('sample')
    pl.ylabel('score')
    pl.show()
Exemplo n.º 8
0
 def trainSentimentCorpus(self, negPath, posPath, target_encoding):
     """训练Sentiment语料库"""
     self.convertEncoding(negPath, target_encoding)
     self.convertEncoding(posPath, target_encoding)
     # pos_docs = codecs.open(posPath, 'r', 'utf-8').readlines()
     sentiment.train(neg_file=negPath, pos_file=posPath)
     path_name = 'sentiment_Jxl_line'
     print u'数据训练完毕,即将保存{}.marshal文件'.format(path_name)
     sentiment.save('{}.marshal'.format(path_name))
     print u'保存完毕!'
Exemplo n.º 9
0
def train_snowNLP(table):
    filter_comment.filter_opppsive_comments(table)
    neg_file = FILE_PATH + 'train_files/' + table + '_neg.txt'
    pos_file = FILE_PATH + 'train_files/' + table + '_pos.txt'
    get_sentiment_file(DATA_PATH + table + '/big_files/' + 'positive.txt',
                       pos_file)
    get_sentiment_file(DATA_PATH + table + '/big_files/' + 'negative.txt',
                       neg_file)
    file_path = 'F:/computer_science/python3/lib/site-packages/snownlp/sentiment/'
    sentiment.train(neg_file, pos_file)
    sentiment.save(file_path + table + '.marshal')
Exemplo n.º 10
0
def train():
    if 'raw_data.csv' in dirlist:
        df = pd.read_csv('raw_data.csv')
    else:
        raise Exception('请先创建raw_data.csv文件')
    df.fillna('nan')
    commands = df.评论内容.dropna().tolist()
    models(commands)
    sentiment.train('neg.txt', 'pos.txt')
    sentiment.save('mysentiment.marshal')
    print('得到模型后需拷贝到snownlp的sentiment文件夹下\
        并修改__init.py__的路径来加载新权重')
Exemplo n.º 11
0
def train_model():
    data= pd.read_csv(r"./Train/weibo_senti_100k/weibo_senti_100k.csv",header=0)
    data = data.sample(frac = 1)
    train = data.iloc[:110000,[0,1]]
    test = data.iloc[110000:,[0,1]]
    train_neg = train.iloc[:, 1][train.label == 0]
    train_pos = train.iloc[:, 1][train.label == 1]
    train_neg.to_csv(r"./Train/weibo_senti_100k/neg.csv", index=0, header=0)
    train_pos.to_csv(r"./Train/weibo_senti_100k/pos.csv", index=0, header=0)
    test.to_csv(r"./Train/weibo_senti_100k/test.csv",index=0,columns=['label','review'])
    sentiment.train(r'./Train/weibo_senti_100k/neg.csv',r'./Train/weibo_senti_100k/pos.csv')
    sentiment.save(r'C:/Users/RA1LGUN/Anaconda3/Lib/site-packages/snownlp/sentiment/newsentiment.marshal')
Exemplo n.º 12
0
def train_model():
    data = pd.read_csv(r"./DataSet.csv", header=0)
    train = data.iloc[:40000, [1, 2]]
    test = data.iloc[40000:, [1, 2]]
    train_neg = train.iloc[:, 1][train.label == 0]
    train_pos = train.iloc[:, 1][train.label == 1]
    train_neg.to_csv(r"./neg.csv", index=0, header=0)
    train_pos.to_csv(r"./pos.csv", index=0, header=0)
    test.to_csv(r"./TestModel.csv", index=0, columns=['label', 'review'])
    sentiment.train(r'./neg.csv', r'./pos.csv')
    sentiment.save(
        r'C:/ProgramData/Miniconda3/Lib/site-packages/snownlp/sentiment/sentiment.marshal'
    )
Exemplo n.º 13
0
def train():
    pos = get_pos_reviews()
    neg = get_neg_reviews()

    pos_train, pos_test = split(pos)
    write(pos_train, "./train/pos_train")
    write(pos_test, "./train/pos_test")

    neg_train, neg_test = split(neg)
    write(neg_train, "./train/neg_train")
    write(neg_test, "./train/neg_test")

    sentiment.train("./train/neg_train", "./train/pos_train")
    sentiment.save('./train/sentiment.marshal')
Exemplo n.º 14
0
def train_sentiment(use_all_data=True):
    """
    读取negative和positive来训练模型

    use_all_data选择使用所有数据还是仅仅是训练集
    :return:
    """
    print 'train model'

    if not use_all_data:
        sentiment.train('../data/train_negative.txt', '../data/train_positive.txt')
        sentiment.save('../data/train_impurity_classifier')
    else:
        sentiment.train('../data/clean_negative.txt', '../data/clean_positive.txt')
        sentiment.save('../data/impurity_classifier')
Exemplo n.º 15
0
def nlp(filepath, neg, pos):
    text = pd.read_excel(filepath)
    contents = text.iloc[:, 0]
    contents_t = contents.values.tolist()
    sentiment.train(neg, pos)
    sent = [SnowNLP(i).sentiments for i in contents_t]
    predict = []
    # 大于0.5则输出1,小于0.5则输出-1
    for i in sent:
        if (i >= 0.5):
            predict.append(1)
        else:
            predict.append(-1)
    text['predict'] = predict
    text.to_excel('G:\\content_data.xlsx')
    return text
Exemplo n.º 16
0
def trainEmotion():
    fn = open('thuhole_ana/analysisExisted/neg.', 'a+', encoding='utf-8')
    fp = open('thuhole_ana/analysisExisted/pos.', 'a+', encoding='utf-8')
    f = csv.reader(
        open('thuhole_ana/analysisExisted/备份.csv', 'r', encoding='utf-8'))
    for i in f:
        if i[1] == ' 0' or i[1] == ' -1':
            fn.write(i[0].replace('\n', '') + '\n')
        if i[1] == ' 1':
            fp.write(i[0].replace('\n', '') + '\n')
    fn.close()
    fp.close()
    sentiment.train('venv/Lib/site-packages/snownlp/sentiment/neg.txt',
                    'venv/Lib/site-packages/snownlp/sentiment/pos.txt')
    sentiment.save(
        'venv/Lib/site-packages/snownlp/sentiment/sentiment.marshal2')
Exemplo n.º 17
0
def f():
    # 目标模型路径
    import os
    data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             'seg.marshal')
    print("data_path:" + data_path)

    # 训练
    from snownlp import sentiment  #加载情感分析模块
    from datetime import datetime
    startTime = datetime.now()
    print(datetime.now().strftime("%X") + " 开始训练")
    sentiment.train('neg.txt', 'pos.txt')  #对语料库进行训练 可进一步构建语料库
    sentiment.save(
        data_path
    )  #这一步是对上一步的训练结果进行保存,如果以后语料库没有改变,下次不用再进行训练,直接使用就可以了,所以一定要保存,保存位置可以自己决定,但是要把`snownlp/seg/__init__.py`里的`data_path`也改成你保存的位置,不然下次使用还是默认的。
    endTime = datetime.now()
    runTime = endTime - startTime
    print(datetime.now().strftime("%X") + " 训练完毕,耗时:" + str(runTime.seconds) +
          "秒")
Exemplo n.º 18
0
def SnowNLP_TRAIN(TrainPath):
    # 0.指定训练模型的保持路径
    pospath = "D:\\Users\\Musk18\\Desktop\\数据挖掘课设\\pos.txt"
    negpath = "D:\\Users\\Musk18\\Desktop\\数据挖掘课设\\neg.txt"
    sentimentpath = "F:/ProgramData/Anaconda3/envs/untitled2/Lib/site-packages/snownlp/sentiment/sentiment.marshal"
    # 1.将爬取的影评按照打分,分为正负样本,并分别保存,正样本保存到pos.txt,负样本保存到neg.txt
    posfile = open(pospath, 'w', encoding='utf-8')
    negfile = open(negpath, 'w', encoding='utf-8')
    df = pd.read_excel(TrainPath + '评分和影评.xls')
    i = 0
    for data in df['评分']:
        if data >= 4:
            posfile.write(str(df['评论'][i]) + '\n')
        elif data <= 2:
            negfile.write(str(df['评论'][i]) + '\n')
        i = i + 1
    # 2.利用snownlp训练新的模型
    sentiment.train(negpath, pospath)
    # 3.保存好新训练的模型
    sentiment.save(sentimentpath)
    print('训练完毕!模型已替换!')
Exemplo n.º 19
0
def train_model(text_set,train_frequency):
    '''
    :param text_set: 文本集合
    :param train_frequency: 训练次数
    :return:
    '''
    # 情感模型训练,分值大于0.8判断为积极,分值小于0.3判断为消极
    for i in range(1,train_frequency + 1):
        print('开始第{}次训练'.format(i))
        for text in text_set:
            sub_text = ','.join(re.findall("([\u4E00-\u9FA5]+)", text))
            socre = SnowNLP(sub_text)
            if socre.sentiments > 0.8:
                with open('pos.txt', mode='a', encoding='utf-8') as g:
                    g.writelines(sub_text + "\n")
            elif socre.sentiments < 0.3:
                with open('neg.txt', mode='a', encoding='utf-8') as f:
                    f.writelines(sub_text + "\n")
            else:
                pass
        sentiment.train('neg.txt', 'pos.txt')
        sentiment.save('sentiment.marshal')
Exemplo n.º 20
0
def train_model():
    #自行百度如何训练snownlp模型,实验室的同学请自己在实验室电脑里拿数据,github上没有
    from snownlp import sentiment
    sentiment.train('/home/hadoopnew/neg.txt', '/home/hadoopnew/pos.txt')
    sentiment.save('sentiment.marshal_knee')
Exemplo n.º 21
0
def train(path):
    """训练正向和负向情感数据集,并保存训练模型"""
    sentiment.train(f'{path}/差评.csv', f'{path}/好评.csv')
    sentiment.save('./sentiment.marshal')
Exemplo n.º 22
0
def train_material():
    sentiment.train('neg.txt', 'pos.txt')
    sentiment.save('sentiment.marshal')
Exemplo n.º 23
0
# coding: utf-8
from snownlp import SnowNLP,sentiment
import os.path
base = os.path.dirname(__file__)
pos = os.path.join(base,'model/sentiment/pos.txt')
neg = os.path.join(base,'model/sentiment/neg.txt')
tagdest = os.path.join(base,'model/sentiment/sentiment.marshal')
sentiment.train(neg,pos)
sentiment.save(tagdest)

Exemplo n.º 24
0
#-*-coding:utf-8-*-
from snownlp import sentiment
sentiment.train(
    'neg.txt', 'pos.txt')  #消极文本,积极文本      txt格式按行存储     记得修改txt编码为utf8,另存为时有选项
sentiment.save('my_sentiment.marshal')  #生成训练文件

#训练好后把生成的文件放到下面文件夹里
#D:\Python2.7\Lib\site-packages\snownlp\sentiment
#然后修改D:\Python2.7\Lib\site-packages\snownlp\sentiment\__init__.py里的data_path

#mac里:
#/Library/Python/2.7/site-packages/snownlp/sentiment
Exemplo n.º 25
0
from snownlp import SnowNLP
from snownlp import sentiment
import csv

f1 = open('./pos.txt', 'a+', encoding='utf-8')  # 存放正面  名字也可自定义哦
f2 = open('./neg.txt', 'a+', encoding='utf-8')  # 存放负面

with open('微博信息20191208-20200122.csv', 'r', encoding='utf-8') as f:
    reader = csv.DictReader(f)
    for i in reader:
        mytxt = open('commentqinggan.txt', mode='a', encoding='utf-8')
        s = SnowNLP(dict(i)['正文'])
        if s.sentiments < 0.25:
            f2.write(dict(i)['正文'])
            f2.write('\n')

        if s.sentiments > 0.8:
            f1.write(dict(i)['正文'])
            f1.write('\n')
        print(dict(i)['正文'], s.sentiments, file=mytxt)

# 保存此次的训练模型
sentiment.train('neg.txt', 'pos.txt')
# 生成新的训练模型
sentiment.save('sentiment.marshal')
Exemplo n.º 26
0
def train_my_data():
    # 重新训练模型
    sentiment.train(r'Data/Output/neg.txt', r'Data/Output/pos.txt')
    # 保存好新训练的模型
    sentiment.save(r'Data/Output/sentiment.marshal')
Exemplo n.º 27
0
import pandas as pd
from snownlp import SnowNLP, sentiment

sentiment.train('E:\\data\\low.txt','E:\\data\\high.txt')
mix=pd.read_csv('E:\\data\\mix.csv',encoding='gbk')
mix=mix.dropna()
right=0
wrong=0
for i in mix.index:
    txt=mix.loc[i,'txt']
    fen=mix.loc[i,'fen']
    s = SnowNLP(txt)
    f=s.sentiments
    print((f,fen))
    if  f>0.5 and fen>2.5 or f<0.5 and fen<2.5:
        right += 1
    else:
        wrong += 1
print(right/(right+wrong))
Exemplo n.º 28
0
def TrainAndSave(negfile, posfile):
    sentiment.train(negfile, posfile)
    sentiment.save('sentiment.marshal')
Exemplo n.º 29
0
def train():
    sentiment.train('F:/Anaconda/Lib/site-packages/snownlp/sentiment/neg.txt','F:/Anaconda/Lib/site-packages/snownlp/sentiment/pos.txt')
    sentiment.save('F:/Anaconda/Lib/site-packages/snownlp/sentiment/sentiment2.marshal')
Exemplo n.º 30
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Jan  4 09:59:46 2018

@author: Ming JIN
"""
from snownlp import sentiment

sentiment.train('negative_dict.txt', 'positive_dict.txt')
sentiment.save('sentiment.marshal')
Exemplo n.º 31
0
        # 去除英文字符和数字
        clean_txt=re.sub(r'[A-Za-z\d]*','',clean_txt)
        # 对文本进行分词
        clean_list=clean_txt.split('\n')
        return clean_list




if __name__ == '__main__':
  

#####################################################################
    print("正在加载训练集...")
    # 必须传入positive.txt和negative.txt
    sentiment.train('./../../Resources/sentiment_folders/hotel/positive.txt', './../../Resources/sentiment_folders/hotel/neg.txt') # 修改
    sentiment.save('sentiment.marshal')
    
    #############################################
    # # 测试的json文件
    # filename='./../../Resources/jsonfiles/ChnSentiCorp.json' # 修改
    # type_list,content_list=file_op.readfile(filename)
    ###############################################
    # 知乎的评论内容作为测试集
    comment_file='./../../Resources/CutWordPath/sentiment_comment.txt'
    content_list=Read_comment_file(comment_file)
    ###################################################

    # 进行snownlp情感分析
    sentences,sentences_score=sentiment_snownlp(content_list)
Exemplo n.º 32
0
import sys
import pandas as pd  #加载pandas
from snownlp import sentiment  #加载情感分析模块
from snownlp import SnowNLP

text = pd.read_excel(u'D:/自然语言处理/川大相关微博内容.xlsx', header=0)  # 读取文本数据
text0 = text.iloc[:, 0]  # 提取所有数据
text1 = [i.decode('utf-8') for i in text0]  # 上一步提取数据不是字符而是object,所以在这一步进行转码为字符

#对语料库进行训练,把路径改成相应的位置
sentiment.train('D:/Anaconda3/Lib/site-packages/snownlp/sentiment/neg.txt',
                'D:/Anaconda3/Lib/site-packages/snownlp/sentiment/pos.txt')
#这一步是对上一步的训练结果进行保存,如果以后语料库没有改变,下次不用再进行训练
sentiment.save('D:/pyscript/sentiment.marshal')

senti = [SnowNLP(i).sentiments for i in text1]  #遍历每条评论进行预测

newsenti = []
for i in senti:
    if (i >= 0.6):
        newsenti.append(1)
    else:
        newsenti.append(-1)
text[
    'predict'] = newsenti  #将新的预测标签增加为text的某一列,所以现在text的第0列为评论文本,第1列为实际标签,第2列为预测标签
counts = 0
for j in range(len(text.iloc[:, 0])):  #遍历所有标签,将预测标签和实际标签进行比较,相同则判断正确。
    if text.iloc[j, 2] == text.iloc[j, 1]:
        counts += 1
print("准确率为:%f", (float(counts) / float(len(text))))  #输出本次预测的准确率