Пример #1
0
                                  df=False)
mine_summary = query_data.get_alphafactors_info(user='******')
# 调整正负
factor_value_adj = {}
for summa in mine_summary:
    if summa['factor_name'] in list(factor_value.keys()):
        if 'IC' in list(summa['perf']['1_d'].keys()):
            factor_value_adj[summa['factor_name']] = factor_value[
                summa['factor_name']] * uc.sign(summa['perf']['1_d']['IC'])
        else:
            factor_value_adj[summa['factor_name']] = factor_value[
                summa['factor_name']] * uc.sign(
                    summa['perf']['1_d']['ic-mean'])

# 建立股票在未来n日的涨跌标签
oc_data = fetch_data.fetch(begin, end, ['stock_adjopen', 'stock_adjclose'])
ud_tag = uc.ts_delay(oc_data['stock_adjclose'], -pred_window) / uc.ts_delay(
    oc_data['stock_adjopen'], -1) - 1  # 以第二日的开盘价买入
ud_tag = ud_tag.mask(ud_tag > 0, 1)
ud_tag = ud_tag.mask(ud_tag < 0, 0)

# 股票因子值的reshape
new_f = {}
for k, v in factor_value_adj.items():
    new_v = pd.DataFrame(v.stack())
    new_v.columns = [k]
    new_f[k] = new_v
new_f = pd.concat(new_f.values(), axis=1)

# 滚动生成上涨概率预测
prediction = {}
fac_data = {k: uc.cs_standardize(v) for k, v in fac_data.items()}  # 是否先截面标准化?
# """
# 因子描述性统计
factor_describe = {}
for fac in fac_data.keys():
    print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
    # 保存因子的描述性统计
    factor_describe[fac] = fac_data[fac].T.describe().T
    # 打印因子的描述性统计均值
    print(fac, fac_data[fac].T.describe().mean(axis=1))
    print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
# """
# 以下一日的开盘1小时tvwap到再下一日的开盘1小时tvwap收益率作为预测目标
begin = '2017-01-01'
end = '2021-03-02'
tvwap = fetch_data.fetch(begin, end, ['stock_twap_0930_1030'])  # adjtvwap?
fac_data['next_re'] = uc.ts_delay(tvwap['stock_twap_0930_1030'],
                                  -2) / uc.ts_delay(
                                      tvwap['stock_twap_0930_1030'], -1) - 1
fac_data['next_re'] = fac_data['next_re'].dropna(how='all')

# 将每天的对应数据合并
new_f = {}
for k, v in fac_data.items():
    new_v = pd.DataFrame(v.stack())
    new_v.columns = [k]
    new_f[k] = new_v
new_f = pd.concat(new_f.values(), axis=1)

f = open(data_pat + '/fac_reshape.pkl', 'wb')  # 记得修改
pickle.dump(new_f, f, -1)
Пример #3
0
import time
import json

data_pat = 'E:/FT_Users/LihaiYang/Files/factor_comb_data/fac_meaning/5group'  # 记得修改

begin = '2015-01-01'  # 记得修改
end = '2021-03-31'

fac_data = pd.read_pickle(data_pat + '/num_restrict/fac_comb.pkl')
noise = pd.DataFrame(np.random.standard_normal(
    fac_data['fac_choose_comb'].shape),
                     index=fac_data['fac_choose_comb'].index,
                     columns=list(fac_data['fac_choose_comb']))

# top2000股票池
cap_data = fetch_data.fetch(begin, end, ['stock_tcap'])
cap_rank = cap_data['stock_tcap'].rank(axis=1, ascending=False)
# 每日的top2000股票标记为1,否则为nan
top2000 = (cap_rank <= 2000).where(
    (cap_rank <= 2000) == 1)  # 2015年8月6日只有1999只?

# 根据top2000股票池把因子值在非2000的置为空值
noise = noise * top2000

fac_rand = {}
fac_rand['rand'] = noise.rank(axis=1)
fac_rand['info+rand'] = fac_data['fac_choose_comb'] + noise.rank(axis=1)
fac_rand['info+rand_eq'] = fac_data['fac_choose_comb'].rank(
    axis=1) + noise.rank(axis=1)
for k, v in fac_rand.items():
    a = v.notna().sum(axis=1)
import pickle
from utils_func import query_data
from ft_platform.factor_process import fetch
from copy import deepcopy
import numpy as np
import time
import json
from collections import Counter

data_pat = 'E:/FT_Users/LihaiYang/Files/factor_comb_data/fac_meaning/5group'  # 记得修改

# 计算未来1、3、5、10、20日收益率,以开盘1小时tvwap为标准
begin = '2015-01-01'  # 记得修改
end = '2020-02-28'
end1 = '2019-12-31'
data = fetch_data.fetch(begin, end, ['stock_adjtwap_0930_1030'])
index_data = fetch_data.fetch(begin, end, ['index_close'], '000905')
stock_re = {}
stock_re['1_d'] = uc.ts_delay(data['stock_adjtwap_0930_1030'],
                              -2) / uc.ts_delay(
                                  data['stock_adjtwap_0930_1030'], -1) - 1
stock_re['3_d'] = uc.ts_delay(data['stock_adjtwap_0930_1030'],
                              -4) / uc.ts_delay(
                                  data['stock_adjtwap_0930_1030'], -1) - 1
stock_re['5_d'] = uc.ts_delay(data['stock_adjtwap_0930_1030'],
                              -6) / uc.ts_delay(
                                  data['stock_adjtwap_0930_1030'], -1) - 1
stock_re['10_d'] = uc.ts_delay(data['stock_adjtwap_0930_1030'],
                               -11) / uc.ts_delay(
                                   data['stock_adjtwap_0930_1030'], -1) - 1
stock_re['20_d'] = uc.ts_delay(data['stock_adjtwap_0930_1030'],