示例#1
0
def genvar(content, vardict):
    wordslist = News(content).cut()
    var = list(set(list(vardict.keys())).intersection(set(wordslist)))
    if var == []:
        return vardict
    for i in var:
        vardict[i] = [1]
    return vardict
def genvar(content, dictionary):
    wordslist = News(content).cut()
    emptydict = dictionary
    retvardict = dictionary
    var = list(set(list(dictionary.keys())).intersection(set(wordslist)))
    if var == []:
        return emptydict
    for i in var:
        retvardict[i] = [1]
    return retvardict
示例#3
0
def factor_test(dataset, factor, groupnum):#因子分组检测
    fracvaluelist = get_frac(dataset[factor], groupnum)
    dataset['group'] = [0]*len(golddata)
    j = 0
    for idx, i in enumerate(fracvaluelist):
        indexlist = dataset[factor][dataset[factor] <= i][dataset[factor] >= j].index
        dataset['group'].loc[list(indexlist)] = idx + 1
        j = i
    testresult = dataset.groupby('group').fivedaysreturn.mean()
    return testresult

goldnews = pd.read_csv('hangqingdata.csv', encoding = 'gbk')
#新闻数据处理
goldnews['date'] = list(map(lambda x : int(x), goldnews['date']))
goldnews['title'] = list(map(lambda x : News(x), goldnews['title'])) #将黄金新闻标题转化为新闻类
goldnews['score'] = list(map(lambda x : x.get_score(), goldnews['title'])) #新建一列记录新闻标题情绪打分
goldnews['content'] = list(map(lambda x : x.content, goldnews['title'])) #新建一列记录新闻标题情绪内容
goldnews['date'] = list(map(lambda x : int(x), goldnews['date'])) #将日期转换为整数

#收益率数据处理
golddata = pd.read_csv('goldetf.csv', encoding = 'gbk') #读取黄金ETF行情数据
colnames = ['date', 'open', 'high', 'low', 'close', 'turnover', 'volume'] #更改列名称,方便处理
golddata.columns = colnames
golddata['date'] = list(map(lambda x : int(datetime.datetime.strptime(x, '%Y/%m/%d').strftime('%Y%m%d')), golddata['date'])) #将日期转换为整数
fivedaysreturnlist = [np.nan] * len(golddata)
for i in range(0, len(golddata)-4):
    fivedaysreturn = golddata['close'].iloc[i+4] / golddata['open'].iloc[i] - 1
    fivedaysreturnlist[i] = fivedaysreturn
golddata['fivedaysreturn'] = fivedaysreturnlist
示例#4
0
            count += 1
    return count


def get_neusentnum(self):  #groupby时寻找中性情绪
    count = 0
    for i in self.values:
        if i == 0:
            count += 1
    return count


goldnews = pd.read_csv('alldata.csv', encoding='gbk')
#新闻数据处理
goldnews['date'] = list(map(lambda x: int(x), goldnews['date']))
goldnews['title'] = list(map(lambda x: News(x),
                             goldnews['title']))  #将黄金新闻标题转化为新闻类
goldnews['score'] = list(map(lambda x: x.get_score(),
                             goldnews['title']))  #新建一列记录新闻标题情绪打分
goldnews['content'] = list(map(lambda x: x.content,
                               goldnews['title']))  #新建一列记录新闻标题情绪内容
goldnews['date'] = list(map(lambda x: int(x), goldnews['date']))  #将日期转换为整数

#收益率数据处理
golddata = pd.read_csv('goldetf.csv', encoding='gbk')  #读取黄金ETF行情数据
colnames = ['date', 'open', 'high', 'low', 'close', 'turnover',
            'volume']  #更改列名称,方便处理
golddata.columns = colnames
golddata['date'] = list(
    map(
        lambda x: int(
示例#5
0
import datetime
import numpy as np
import pandas as pd
from NewsSent import News  #从外部导入写好的情绪判别类
import matplotlib.pyplot as plt

goldnews1 = pd.read_csv('analysisdata.csv', encoding='gbk')  #读取黄金新闻
goldnews2 = pd.read_csv('newsdata.csv', encoding='gbk')
goldnews3 = pd.read_csv('hangqingdata.csv', encoding='gbk')
goldnews = pd.concat([goldnews1, goldnews2, goldnews3])
locindex = []
for idx, i in enumerate(goldnews['title']):
    if u'\u91d1' in i and u'\u4e0a\u6d77\u91d1\u4ea4\u6240' not in i and u'\u91d1\u878d' not in i and u'\u7eb8\u9ec4\u91d1' not in i and u'\u57fa\u91d1' not in i and u'\u8d44\u91d1' not in i and u'\u94af\u91d1' not in i and u'\u91d1\u9053' not in i:
        locindex.append(idx)
goldnews = goldnews.iloc[locindex]
goldnews['title'] = list(map(lambda x: News(x),
                             goldnews['title']))  #将黄金新闻标题转化为新闻类
goldnews['content'] = list(map(lambda x: x.content,
                               goldnews['title']))  #新建一列记录新闻标题情绪内容
goldnews['date'] = list(map(lambda x: int(x), goldnews['date']))  #将日期转换为整数

golddata = pd.read_csv('goldetf.csv', encoding='gbk')  #读取黄金ETF行情数据
colnames = ['date', 'open', 'high', 'low', 'close', 'turnover',
            'volume']  #更改列名称,方便处理
golddata.columns = colnames
golddata['date'] = list(
    map(
        lambda x: int(
            datetime.datetime.strptime(x, '%Y/%m/%d').strftime('%Y%m%d')),
        golddata['date']))  #将日期转换为整数