def genvar(content, vardict): wordslist = News(content).cut() var = list(set(list(vardict.keys())).intersection(set(wordslist))) if var == []: return vardict for i in var: vardict[i] = [1] return vardict
def genvar(content, dictionary): wordslist = News(content).cut() emptydict = dictionary retvardict = dictionary var = list(set(list(dictionary.keys())).intersection(set(wordslist))) if var == []: return emptydict for i in var: retvardict[i] = [1] return retvardict
def factor_test(dataset, factor, groupnum):#因子分组检测 fracvaluelist = get_frac(dataset[factor], groupnum) dataset['group'] = [0]*len(golddata) j = 0 for idx, i in enumerate(fracvaluelist): indexlist = dataset[factor][dataset[factor] <= i][dataset[factor] >= j].index dataset['group'].loc[list(indexlist)] = idx + 1 j = i testresult = dataset.groupby('group').fivedaysreturn.mean() return testresult goldnews = pd.read_csv('hangqingdata.csv', encoding = 'gbk') #新闻数据处理 goldnews['date'] = list(map(lambda x : int(x), goldnews['date'])) goldnews['title'] = list(map(lambda x : News(x), goldnews['title'])) #将黄金新闻标题转化为新闻类 goldnews['score'] = list(map(lambda x : x.get_score(), goldnews['title'])) #新建一列记录新闻标题情绪打分 goldnews['content'] = list(map(lambda x : x.content, goldnews['title'])) #新建一列记录新闻标题情绪内容 goldnews['date'] = list(map(lambda x : int(x), goldnews['date'])) #将日期转换为整数 #收益率数据处理 golddata = pd.read_csv('goldetf.csv', encoding = 'gbk') #读取黄金ETF行情数据 colnames = ['date', 'open', 'high', 'low', 'close', 'turnover', 'volume'] #更改列名称,方便处理 golddata.columns = colnames golddata['date'] = list(map(lambda x : int(datetime.datetime.strptime(x, '%Y/%m/%d').strftime('%Y%m%d')), golddata['date'])) #将日期转换为整数 fivedaysreturnlist = [np.nan] * len(golddata) for i in range(0, len(golddata)-4): fivedaysreturn = golddata['close'].iloc[i+4] / golddata['open'].iloc[i] - 1 fivedaysreturnlist[i] = fivedaysreturn golddata['fivedaysreturn'] = fivedaysreturnlist
count += 1 return count def get_neusentnum(self): #groupby时寻找中性情绪 count = 0 for i in self.values: if i == 0: count += 1 return count goldnews = pd.read_csv('alldata.csv', encoding='gbk') #新闻数据处理 goldnews['date'] = list(map(lambda x: int(x), goldnews['date'])) goldnews['title'] = list(map(lambda x: News(x), goldnews['title'])) #将黄金新闻标题转化为新闻类 goldnews['score'] = list(map(lambda x: x.get_score(), goldnews['title'])) #新建一列记录新闻标题情绪打分 goldnews['content'] = list(map(lambda x: x.content, goldnews['title'])) #新建一列记录新闻标题情绪内容 goldnews['date'] = list(map(lambda x: int(x), goldnews['date'])) #将日期转换为整数 #收益率数据处理 golddata = pd.read_csv('goldetf.csv', encoding='gbk') #读取黄金ETF行情数据 colnames = ['date', 'open', 'high', 'low', 'close', 'turnover', 'volume'] #更改列名称,方便处理 golddata.columns = colnames golddata['date'] = list( map( lambda x: int(
import datetime import numpy as np import pandas as pd from NewsSent import News #从外部导入写好的情绪判别类 import matplotlib.pyplot as plt goldnews1 = pd.read_csv('analysisdata.csv', encoding='gbk') #读取黄金新闻 goldnews2 = pd.read_csv('newsdata.csv', encoding='gbk') goldnews3 = pd.read_csv('hangqingdata.csv', encoding='gbk') goldnews = pd.concat([goldnews1, goldnews2, goldnews3]) locindex = [] for idx, i in enumerate(goldnews['title']): if u'\u91d1' in i and u'\u4e0a\u6d77\u91d1\u4ea4\u6240' not in i and u'\u91d1\u878d' not in i and u'\u7eb8\u9ec4\u91d1' not in i and u'\u57fa\u91d1' not in i and u'\u8d44\u91d1' not in i and u'\u94af\u91d1' not in i and u'\u91d1\u9053' not in i: locindex.append(idx) goldnews = goldnews.iloc[locindex] goldnews['title'] = list(map(lambda x: News(x), goldnews['title'])) #将黄金新闻标题转化为新闻类 goldnews['content'] = list(map(lambda x: x.content, goldnews['title'])) #新建一列记录新闻标题情绪内容 goldnews['date'] = list(map(lambda x: int(x), goldnews['date'])) #将日期转换为整数 golddata = pd.read_csv('goldetf.csv', encoding='gbk') #读取黄金ETF行情数据 colnames = ['date', 'open', 'high', 'low', 'close', 'turnover', 'volume'] #更改列名称,方便处理 golddata.columns = colnames golddata['date'] = list( map( lambda x: int( datetime.datetime.strptime(x, '%Y/%m/%d').strftime('%Y%m%d')), golddata['date'])) #将日期转换为整数