示例#1
0
def analyze_event():
    # --------------------------------------------------------------------------------
    # Step.1 load dataview
    dv = DataView()
    dv.load_dataview(dataview_folder)

    # --------------------------------------------------------------------------------
    # Step.2 calculate mask (to mask those ill data points)
    mask_limit_reached = dv.get_ts('mask_limit_reached')
    mask_index_member = dv.get_ts('mask_index_member')
    mask_sus = dv.get_ts('mask_sus')

    mask_all = np.logical_or(
        mask_sus, np.logical_or(mask_index_member, mask_limit_reached))

    # --------------------------------------------------------------------------------
    # Step.3 get signal, benchmark and price data
    price = dv.get_ts('close_adj')
    price_bench = dv.data_benchmark

    dv.add_formula('in_',
                   '(Delay(index_weight, 1) == 0) && (index_weight > 0)',
                   is_quarterly=False)

    signal = dv.get_ts('in_').shift(1, axis=0)  # avoid look-ahead bias
    # Step.4 analyze!
    obj = SignalDigger(output_folder='../../output', output_format='pdf')

    obj.create_binary_event_report(signal,
                                   price,
                                   mask_all,
                                   price_bench,
                                   periods=[20, 60, 121, 242],
                                   group_by=None)
示例#2
0
def test_optimizer():
    from jaqs_fxdayu.research import Optimizer

    dv = DataView()
    dv.load_dataview(dataview_folder)

    mask = mask_index_member(dv)
    can_enter, can_exit = limit_up_down(dv)

    price = dv.get_ts('close_adj')
    high = dv.get_ts('high_adj')
    low = dv.get_ts('low_adj')
    price_bench = dv.data_benchmark
    optimizer = Optimizer(dataview=dv,
                          formula='- Correlation(vwap_adj, volume, LEN)',
                          params={"LEN": range(2, 4, 1)},
                          name='divert',
                          price=price,
                          high=high,
                          low=low,
                          benchmark_price=price_bench,  # =None求绝对收益 #=price_bench求相对收益
                          period=30,
                          n_quantiles=5,
                          mask=mask,
                          can_enter=can_enter,
                          can_exit=can_exit,
                          commission=0.0008,  # 手续费 默认0.0008
                          is_event=False,  # 是否是事件(0/1因子)
                          is_quarterly=False)  # 是否是季度因子 默认为False

    ret_best = optimizer.enumerate_optimizer(target_type="top_quantile_ret",  # 优化目标类型
                                             target="Ann. IR",  # 优化目标
                                             in_sample_range=[20140101, 20160101],  # 样本内范围 默认为None,在全样本上优化
                                             ascending=False)  # 是否按优化目标升序排列(从小到大)
示例#3
0
def analyze_event():
    # --------------------------------------------------------------------------------
    # Step.1 load dataview
    dv = DataView()
    dv.load_dataview(dataview_folder)

    # --------------------------------------------------------------------------------
    # Step.2 calculate mask (to mask those ill data points)
    mask_limit_reached = dv.get_ts('mask_limit_reached')
    mask_index_member = dv.get_ts('mask_index_member')
    mask_sus = dv.get_ts('mask_sus')
    
    mask_all = np.logical_or(mask_sus, np.logical_or(mask_index_member, mask_limit_reached))
    
    # --------------------------------------------------------------------------------
    # Step.3 get signal, benchmark and price data
    price = dv.get_ts('close_adj')
    price_bench = dv.data_benchmark

    dv.add_formula('in_', '(Delay(index_weight, 1) == 0) && (index_weight > 0)', is_quarterly=False)
    
    signal = dv.get_ts('in_').shift(1, axis=0)  # avoid look-ahead bias
    # Step.4 analyze!
    obj = SignalDigger(output_folder='../../output', output_format='pdf')

    obj.create_binary_event_report(signal, price, mask_all, price_bench, periods=[20, 60, 121, 242], group_by=None)
示例#4
0
def load_data(symbol):
    dv = DataView()
    dv.load_dataview(folder_path=dataview_store_folder)

    df = pd.DataFrame()

    df['close'] = dv.get_ts('close',
                            symbol=symbol,
                            start_date=20080101,
                            end_date=20171231)[symbol]
    df['open'] = dv.get_ts('open',
                           symbol=symbol,
                           start_date=20080101,
                           end_date=20171231)[symbol]
    df['high'] = dv.get_ts('high',
                           symbol=symbol,
                           start_date=20080101,
                           end_date=20171231)[symbol]
    df['low'] = dv.get_ts('low',
                          symbol=symbol,
                          start_date=20080101,
                          end_date=20171231)[symbol]

    df = df.dropna()

    return df
示例#5
0
def analyze_event():
    # --------------------------------------------------------------------------------
    # Step.1 load dataview
    dv = DataView()
    dv.load_dataview(dataview_folder)
    
    # --------------------------------------------------------------------------------
    # Step.3 get signal, benchmark and price data
    target_symbol = '600519.SH'
    price = dv.get_ts('close_adj', symbol=target_symbol)
    dv.add_formula('in_', 'open_adj / Delay(close_adj, 1)', is_quarterly=False)
    signal = dv.get_ts('in_', symbol=target_symbol).shift(1, axis=0)  # avoid look-ahead bias
    
    # Step.4 analyze!
    obj = SignalDigger(output_folder='../../output', output_format='pdf')

    obj.create_single_signal_report(signal, price, [1, 5, 9, 21], 6, mask=None,
                                    buy_condition={'cond1': {'column': 'quantile',
                                                             'filter': lambda x: x > 3,
                                                             'hold': 5},
                                                   'cond2': {'column': 'quantile',
                                                             'filter': lambda x: x > 5,
                                                             'hold': 5},
                                                   'cond3': {'column': 'quantile',
                                                             'filter': lambda x: x > 5,
                                                             'hold': 9},
                                                   })
def analyze_signal():
    # --------------------------------------------------------------------------------
    # Step.1 load dataview
    dv = DataView()
    dv.load_dataview(dataview_folder)

    # --------------------------------------------------------------------------------
    # Step.2 calculate mask (to mask those ill data points)
    trade_status = dv.get_ts('trade_status')
    mask_sus = trade_status == u'停牌'.encode('utf-8')

    df_index_member = dv.get_ts('index_member')
    mask_index_member = ~(df_index_member > 0)

    dv.add_formula('limit_reached',
                   'Abs((open - Delay(close, 1)) / Delay(close, 1)) > 0.095',
                   is_quarterly=False)
    df_limit_reached = dv.get_ts('limit_reached')
    mask_limit_reached = df_limit_reached > 0

    mask_all = np.logical_or(
        mask_sus, np.logical_or(mask_index_member, mask_limit_reached))

    # --------------------------------------------------------------------------------
    # Step.3 get signal, benchmark and price data
    # dv.add_formula('illi_daily', '(high - low) * 1000000000 / turnover', is_quarterly=False)
    # dv.add_formula('illi', 'Ewma(illi_daily, 11)', is_quarterly=False)

    # dv.add_formula('size', 'Log(float_mv)', is_quarterly=False)
    # dv.add_formula('value', '-1.0/pb', is_quarterly=False)
    # dv.add_formula('liquidity', 'Ts_Mean(volume, 22) / float_mv', is_quarterly=False)
    dv.add_formula('divert',
                   '- Correlation(vwap_adj, volume, 10)',
                   is_quarterly=False)

    signal = dv.get_ts('divert').shift(1, axis=0)  # avoid look-ahead bias
    price = dv.get_ts('close_adj')
    price_bench = dv.data_benchmark

    # Step.4 analyze!
    my_period = 5
    obj = SignalDigger(output_folder='../../output/test_signal',
                       output_format='pdf')
    obj.process_signal_before_analysis(
        signal,
        price=price,
        mask=mask_all,
        n_quantiles=5,
        period=my_period,
        benchmark_price=price_bench,
    )
    res = obj.create_full_report()
示例#7
0
def test_q_add_formula():
    dv = DataView()
    folder_path = '../output/prepared/20160609_20170601_freq=1D'
    dv.load_dataview(folder_path=quarterly_path)
    nrows, ncols = dv.data_d.shape
    n_securities = len(dv.data_d.columns.levels[0])

    formula = 'total_oper_rev / close'
    dv.add_formula('myvar1', formula, is_quarterly=False)
    df1 = dv.get_ts('myvar1')
    assert not df1.empty

    formula2 = 'Delta(oper_exp * myvar1 - open, 3)'
    dv.add_formula('myvar2', formula2, is_quarterly=False)
    df2 = dv.get_ts('myvar2')
    assert not df2.empty
示例#8
0
def test_q_add_formula():
    dv = DataView()
    folder_path = '../output/prepared/20160609_20170601_freq=1D'
    dv.load_dataview(folder_path=quarterly_path)
    nrows, ncols = dv.data_d.shape
    n_securities = len(dv.data_d.columns.levels[0])
    
    formula = 'total_oper_rev / close'
    dv.add_formula('myvar1', formula, is_quarterly=False)
    df1 = dv.get_ts('myvar1')
    assert not df1.empty
    
    formula2 = 'Delta(oper_exp * myvar1 - open, 3)'
    dv.add_formula('myvar2', formula2, is_quarterly=False)
    df2 = dv.get_ts('myvar2')
    assert not df2.empty
示例#9
0
def save_dataview():
    ds = RemoteDataService()
    ds.init_from_config(data_config)
    dv = DataView()

    props = {
        'start_date': 20150101,
        'end_date': 20171001,
        'universe': '000300.SH',
        'fields': 'volume,turnover,float_mv,pb,total_mv',
        'freq': 1
    }

    dv.init_from_config(props, ds)
    dv.prepare_data()

    trade_status = dv.get_ts('trade_status')
    mask_sus = trade_status == '停牌'
    dv.append_df(mask_sus, 'suspended', is_quarterly=False)

    dv.add_formula('not_index_member', '!index_member', is_quarterly=False)

    dv.add_formula('limit_reached',
                   'Abs((open - Delay(close, 1)) / Delay(close, 1)) > 0.095',
                   is_quarterly=False)

    dv.save_dataview(dataview_folder)
示例#10
0
def save_dataview():
    ds = RemoteDataService()
    ds.init_from_config(data_config)
    dv = DataView()
    
    props = {'start_date': 20160101, 'end_date': 20171001, 'universe': '000300.SH',
             'fields': 'volume,turnover',
             'freq': 1}
    
    dv.init_from_config(props, ds)
    dv.prepare_data()
    
    # for convenience to check limit reachers
    dv.add_formula('limit_reached', 'Abs((open - Delay(close, 1)) / Delay(close, 1)) > 0.095', is_quarterly=False)
    dv.add_formula('mask_limit_reached', 'limit_reached > 0', is_quarterly=False)
    
    dv.add_formula('mask_index_member', '!(index_member > 0)', is_quarterly=False)
    
    trade_status = dv.get_ts('trade_status')
    mask_sus = trade_status == u'停牌'
    dv.append_df(mask_sus, 'mask_sus', is_quarterly=False)
    
    # dv.add_formula('size', '', is_quarterly=False)
    
    dv.save_dataview(dataview_folder)
示例#11
0
def save_dataview():
    ds = RemoteDataService()
    ds.init_from_config(data_config)
    dv = DataView()

    props = {
        'start_date': 20160101,
        'end_date': 20171001,
        'universe': '000300.SH',
        'fields': 'volume,turnover',
        'freq': 1
    }

    dv.init_from_config(props, ds)
    dv.prepare_data()

    # for convenience to check limit reachers
    dv.add_formula('limit_reached',
                   'Abs((open - Delay(close, 1)) / Delay(close, 1)) > 0.095',
                   is_quarterly=False)
    dv.add_formula('mask_limit_reached',
                   'limit_reached > 0',
                   is_quarterly=False)

    dv.add_formula('mask_index_member',
                   '!(index_member > 0)',
                   is_quarterly=False)

    trade_status = dv.get_ts('trade_status')
    mask_sus = trade_status == u'停牌'
    dv.append_df(mask_sus, 'mask_sus', is_quarterly=False)

    # dv.add_formula('size', '', is_quarterly=False)

    dv.save_dataview(dataview_folder)
示例#12
0
def analyze_event():
    # --------------------------------------------------------------------------------
    # Step.1 load dataview
    dv = DataView()
    dv.load_dataview(dataview_folder)

    # --------------------------------------------------------------------------------
    # Step.2 calculate mask (to mask those ill data points)
    trade_status = dv.get_ts('trade_status')
    mask_sus = trade_status == u'停牌'.encode('utf-8')

    df_index_member = dv.get_ts('index_member')
    mask_index_member = ~(df_index_member > 0)

    dv.add_formula('limit_reached',
                   'Abs((open - Delay(close, 1)) / Delay(close, 1)) > 0.095',
                   is_quarterly=False)
    df_limit_reached = dv.get_ts('limit_reached')
    mask_limit_reached = df_limit_reached > 0

    mask_all = np.logical_or(
        mask_sus, np.logical_or(mask_index_member, mask_limit_reached))

    # --------------------------------------------------------------------------------
    # Step.3 get signal, benchmark and price data
    dv.add_formula('new_high',
                   'close_adj >= Ts_Max(close_adj, 300)',
                   is_quarterly=False)
    dv.add_formula('new_high_delay',
                   'Delay(Ts_Max(new_high, 300), 1)',
                   is_quarterly=False)
    dv.add_formula('sig', 'new_high && (! new_high_delay)', is_quarterly=False)

    signal = dv.get_ts('sig').shift(0, axis=0)  # avoid look-ahead bias
    price = dv.get_ts('close_adj')
    price_bench = dv.data_benchmark

    # Step.4 analyze!
    obj = SignalDigger(output_folder=jutil.join_relative_path('../output'),
                       output_format='pdf')

    obj.create_binary_event_report(signal,
                                   price,
                                   mask_all,
                                   5,
                                   price_bench,
                                   periods=[5, 20, 40])
示例#13
0
def test_analyze_signal():
    # --------------------------------------------------------------------------------
    # Step.1 load dataview
    dv = DataView()
    dv.load_dataview(dataview_folder)

    mask = mask_index_member(dv)
    can_enter, can_exit = limit_up_down(dv)

    # --------------------------------------------------------------------------------
    # Step.3 get signal, benchmark and price data
    dv.add_formula('divert',
                   '- Correlation(vwap_adj, volume, 10)',
                   is_quarterly=False,
                   add_data=True)

    signal = dv.get_ts('divert')
    price = dv.get_ts('close_adj')
    price_bench = dv.data_benchmark

    # Step.4 analyze!
    my_period = 5
    obj = SignalDigger(output_folder='../output/test_signal',
                       output_format='pdf')
    obj.process_signal_before_analysis(
        signal=signal,
        price=price,
        high=dv.get_ts("high_adj"),  # 可为空
        low=dv.get_ts("low_adj"),  # 可为空
        group=dv.get_ts("sw1"),
        n_quantiles=5,  # quantile分类数
        mask=mask,  # 过滤条件
        can_enter=can_enter,  # 是否能进场
        can_exit=can_exit,  # 是否能出场
        period=my_period,  # 持有期
        benchmark_price=price_bench,  # 基准价格 可不传入,持有期收益(return)计算为绝对收益
        commission=0.0008,
    )
    signal_data = obj.signal_data
    result = analysis(signal_data, is_event=False, period=my_period)
    ic = pfm.calc_signal_ic(signal_data, by_group=True)
    mean_ic_by_group = pfm.mean_information_coefficient(ic, by_group=True)
    plotting.plot_ic_by_group(mean_ic_by_group)
    res = obj.create_full_report()
示例#14
0
def test_DIY_signal():
    # --------------------------------------------------------------------------------
    # Step.1 load dataview
    dv = DataView()
    dv.load_dataview(dataview_folder)
    # 方法1:add_formula 基于dataview里已有的字段,通过表达式定义因子
    dv.add_formula("momentum",
                   "Return(close_adj, 20)",
                   is_quarterly=False,
                   add_data=True)
    # 方法2: append_df 构造一个因子表格(pandas.Dataframe),直接添加到dataview当中
    import pandas as pd
    import talib as ta

    close = dv.get_ts("close_adj").dropna(how='all', axis=1)
    slope_df = pd.DataFrame(
        {
            sec_symbol: -ta.LINEARREG_SLOPE(value.values, 10)
            for sec_symbol, value in close.iteritems()
        },
        index=close.index)
    dv.append_df(slope_df, 'slope')
    dv.get_ts("slope")

    # 定义事件
    from jaqs_fxdayu.research.signaldigger import process

    Open = dv.get_ts("open_adj")
    High = dv.get_ts("high_adj")
    Low = dv.get_ts("low_adj")
    Close = dv.get_ts("close_adj")
    trade_status = dv.get_ts('trade_status')
    mask_sus = trade_status != 1
    # 剔除掉停牌期的数据 再计算指标
    open_masked = process._mask_df(Open, mask=mask_sus)
    high_masked = process._mask_df(High, mask=mask_sus)
    low_masked = process._mask_df(Low, mask=mask_sus)
    close_masked = process._mask_df(Close, mask=mask_sus)
    from jaqs_fxdayu.data import signal_function_mod as sfm
    MA5 = sfm.ta(ta_method='MA',
                 ta_column=0,
                 Open=open_masked,
                 High=high_masked,
                 Low=low_masked,
                 Close=close_masked,
                 Volume=None,
                 timeperiod=10)
    MA10 = sfm.ta('MA', Close=close_masked, timeperiod=10)
    dv.append_df(MA5, 'MA5')
    dv.append_df(MA10, 'MA10')
    dv.add_formula("Cross",
                   "(MA5>=MA10)&&(Delay(MA5<MA10, 1))",
                   is_quarterly=False,
                   add_data=True)
示例#15
0
def test_analyze_signal():
    # --------------------------------------------------------------------------------
    # Step.1 load dataview
    dv = DataView()
    dv.load_dataview(dataview_folder)

    # --------------------------------------------------------------------------------
    # Step.2 calculate mask (to mask those ill data points)
    trade_status = dv.get_ts('trade_status')
    mask_sus = trade_status == u'停牌'

    df_index_member = dv.get_ts('index_member')
    mask_index_member = ~(df_index_member > 0)

    dv.add_formula('limit_reached', 'Abs((open - Delay(close, 1)) / Delay(close, 1)) > 0.095', is_quarterly=False)
    df_limit_reached = dv.get_ts('limit_reached')
    mask_limit_reached = df_limit_reached > 0

    mask_all = np.logical_or(mask_sus, np.logical_or(mask_index_member, mask_limit_reached))

    # --------------------------------------------------------------------------------
    # Step.3 get signal, benchmark and price data
    dv.add_formula('divert', '- Correlation(vwap_adj, volume, 10)', is_quarterly=False)
    
    signal = dv.get_ts('divert').shift(1, axis=0)  # avoid look-ahead bias
    price = dv.get_ts('close_adj')
    price_bench = dv.data_benchmark

    # Step.4 analyze!
    my_period = 5
    obj = SignalDigger(output_folder='../output/test_signal', output_format='pdf')
    obj.process_signal_before_analysis(signal, price=price,
                                       mask=mask_all,
                                       n_quantiles=5, period=my_period,
                                       benchmark_price=price_bench,
                                       )
    res = obj.create_full_report()
示例#16
0
def test_load():
    dv = DataView()
    dv.load_dataview(folder_path=daily_path)
    
    assert dv.start_date == 20160601 and set(dv.symbol) == set('000001.SZ,600030.SH,000063.SZ'.split(','))

    # test get_snapshot
    snap1 = dv.get_snapshot(20170504, symbol='600030.SH,000063.SZ', fields='close,pb')
    assert snap1.shape == (2, 2)
    assert set(snap1.columns.values) == {'close', 'pb'}
    assert set(snap1.index.values) == {'600030.SH', '000063.SZ'}
    
    # test get_ts
    ts1 = dv.get_ts('close', symbol='600030.SH,000063.SZ', start_date=20170101, end_date=20170302)
    assert ts1.shape == (38, 2)
    assert set(ts1.columns.values) == {'600030.SH', '000063.SZ'}
    assert ts1.index.values[-1] == 20170302
示例#17
0
def save_dataview():
    ds = RemoteDataService()
    ds.init_from_config(data_config)
    dv = DataView()
    
    props = {'start_date': 20150101, 'end_date': 20171001, 'universe': '000300.SH',
             'fields': 'volume,turnover,float_mv,pb,total_mv',
             'freq': 1}
    
    dv.init_from_config(props, ds)
    dv.prepare_data()

    trade_status = dv.get_ts('trade_status')
    mask_sus = trade_status == '停牌'
    dv.append_df(mask_sus, 'suspended', is_quarterly=False)

    dv.add_formula('not_index_member', '!index_member', is_quarterly=False)

    dv.add_formula('limit_reached', 'Abs((open - Delay(close, 1)) / Delay(close, 1)) > 0.095', is_quarterly=False)
    
    dv.save_dataview(dataview_folder)
示例#18
0
def test_load():
    dv = DataView()
    dv.load_dataview(folder_path=daily_path)

    assert dv.start_date == 20160601 and set(dv.symbol) == set(
        '000001.SZ,600030.SH,000063.SZ'.split(','))

    # test get_snapshot
    snap1 = dv.get_snapshot(20170504,
                            symbol='600030.SH,000063.SZ',
                            fields='close,pb')
    assert snap1.shape == (2, 2)
    assert set(snap1.columns.values) == {'close', 'pb'}
    assert set(snap1.index.values) == {'600030.SH', '000063.SZ'}

    # test get_ts
    ts1 = dv.get_ts('close',
                    symbol='600030.SH,000063.SZ',
                    start_date=20170101,
                    end_date=20170302)
    assert ts1.shape == (38, 2)
    assert set(ts1.columns.values) == {'600030.SH', '000063.SZ'}
    assert ts1.index.values[-1] == 20170302
示例#19
0
check_factor = ','.join(factor_list)
dataview_folder = '/Users/adam/Desktop/intern/test5/fxdayu_adam/data'
dataview_folder2 = 'muti_factor/'
dv = DataView()
#ds = LocalDataService(fp=dataview_folder)
data_config = {
    "remote.data.address":
    "tcp://data.tushare.org:8910",
    "remote.data.username":
    "******",
    "remote.data.password":
    "******"
}
ds = RemoteDataService()
ds.init_from_config(data_config)
sw1 = dv.get_ts('sw1')
dict_classify = {
    '480000': '银行',
    '430000': '房地产',
    '460000': '休闲服务',
    '640000': '机械设备',
    '240000': '有色金属',
    '510000': '综合',
    '410000': '公用事业',
    '450000': '商业贸易',
    '730000': '通信',
    '330000': '家用电器',
    '720000': '传媒',
    '630000': '电气设备',
    '270000': '电子',
    '490000': '非银金融',
示例#20
0
warnings.filterwarnings("ignore")

dataview_folder = 'G:/data/hs300_2'
dv = DataView()
dv.load_dataview(dataview_folder)


#1.计算并作图
#step1 Momentum: ROCR100的计算并作图
def change_index(df):
    df.index = pd.Index(
        map(lambda x: datetime.strptime(str(x), "%Y%m%d"), df.index))
    return df


data = change_index(dv.get_ts('close_adj').loc[20170105:])

#!!!注意下这个表达,通过字典生成式读取数据   最后一行是典型的字典读取数据并转换为dataframe格式
symbol = ['000001.SZ', '600036.SH', '600050.SH', '000008.SZ', '000009.SZ']
price_dict = {name: data[name]
              for name in symbol}  #name成为字典的key,data[name]成为key对应的元素
data_mom = pd.DataFrame(
    {item: ta.ROCR100(value.values, 20)
     for item, value in price_dict.items()},
    index=data.index).dropna(axis=0)
#dropna()表示删除为空的行,ta.ROCR100(value.values,20)表示计算动量,原式可为:A=ta.ROCR100(price_dict['000001.SZ'],20) (ta.ROCR100在talib中比较特殊)

fig = plt.figure(figsize=(15, 7))  #图片大小
plt.plot(data_mom)
plt.hlines(100,
           data_mom.index[0],
示例#21
0
from jaqs.data import RemoteDataService
import os
import numpy as np
import warnings

warnings.filterwarnings("ignore")
dv = DataView()
dataview_folder = 'G:/data/hs300_2'
dv.load_dataview(dataview_folder)

#For Example
import talib as ta
from datetime import datetime
import talib.abstract as abstract

data = dv.get_ts('close_adj') #data2 = dv.get_ts('close')
print (data.tail())
print (data['600036.SH'].values)  #X1=data['600036.SH']
print (type(data['600036.SH'].values)) #X2=data['600036.SH'].values

#读取'numpy.ndarray'
A=ta.MA(data['600036.SH'].values, 2)
#直接读取DataFrame,默认读取cloumns名为close的数据。
B=ta.abstract.MA(data, 2, price='600036.SH').tail()

'''
data['SMA'] = ta.abstract.MA(data, 20, price='600036.SH') #普通均线与ta.abstract.MA一样
#data['SMA2'] = ta.abstract.SMA(data, 20, price='600036.SH') #与上面完全一样,普通均线
data['WMA'] = ta.abstract.WMA(data, 20, price='600036.SH') #权重均线(突出中间,用于周期分析)
data['TRIMA'] = ta.abstract.TRIMA(data, 20, price='600036.SH') #指数移动平均线
data['EMA']  = ta.abstract.EMA(data, 20, price='600036.SH')  #指数移动平均线
}
ds = RemoteDataService()
ds.init_from_config(data_config)
dv_props = {
    'start_date': start,
    'end_date': end,
    'symbol': ','.join(stock_symbol),
    'fields': ','.join(factor_list),
    'freq': 1,
    "prepare_fields": True,
    "benchmark": '000300.SH'
}

dv.init_from_config(dv_props, data_api=ds)
dv.prepare_data()
sw1 = dv.get_ts('sw1')
dict_classify = {
    '480000': '银行',
    '430000': '房地产',
    '460000': '休闲服务',
    '640000': '机械设备',
    '240000': '有色金属',
    '510000': '综合',
    '410000': '公用事业',
    '450000': '商业贸易',
    '730000': '通信',
    '330000': '家用电器',
    '720000': '传媒',
    '630000': '电气设备',
    '270000': '电子',
    '490000': '非银金融',
dv_props = {
    'start_date': start,
    'end_date': end,
    'symbol': ','.join(stock_symbol),
    'fields': check_factor,
    'freq': 1,
    "prepare_fields": True
}

dv.init_from_config(dv_props, data_api=ds)
dv.prepare_data()

#获取行业情况
dv.add_field('sw1')
sw1 = dv.get_ts('sw1')
dict_classify = {
    '480000': '银行',
    '430000': '房地产',
    '460000': '休闲服务',
    '640000': '机械设备',
    '240000': '有色金属',
    '510000': '综合',
    '410000': '公用事业',
    '450000': '商业贸易',
    '730000': '通信',
    '330000': '家用电器',
    '720000': '传媒',
    '630000': '电气设备',
    '270000': '电子',
    '490000': '非银金融',
示例#24
0
import talib.abstract as abstract

warnings.filterwarnings("ignore")

dataview_folder = 'G:/data/hs300_2'
dv = DataView()
dv.load_dataview(dataview_folder)


def change_index(df):  #调整时间索引
    df.index = pd.Index(
        map(lambda x: datetime.strptime(str(x), "%Y%m%d"), df.index))
    return df


data = change_index(dv.get_ts(
    'close_adj').loc[20170105:])  #A=dv.get_ts('close_adj').loc[20170105:]

#example1:均线百分比通道
middleband = ta.abstract.MA(data, timeperiod=20, price='600036.SH')  #求均线
upperband = middleband * 1.03
lowerband = middleband * 0.97
data_B = pd.concat([middleband, upperband, lowerband],
                   axis=1)  #将三条均线合并在一个dataframe中
data_B.columns = ['middleband', 'upperband', 'lowerband']

plt.figure(figsize=(15, 7))
plt.plot(data['600036.SH'])
plt.plot(data_B['middleband'], 'r', alpha=0.3)
plt.plot(data_B['upperband'], 'g', alpha=0.3)
plt.plot(data_B['lowerband'], 'g', alpha=0.3)
plt.show()
示例#25
0
from datetime import datetime
import matplotlib.pyplot as plt
import warnings
import talib.abstract as abstract

warnings.filterwarnings("ignore")
dv = DataView()
dataview_folder = 'G:/data/hs300_2'
dv.load_dataview(dataview_folder)

def change_index(df):
    df.index = pd.Index(map(lambda x: datetime.strptime(str(x),"%Y%m%d") , df.index))
    return df

#读取收盘价
data = change_index(dv.get_ts('close').loc[20170105:])

data['SMA'] = ta.abstract.MA(data, 20, price='600036.SH')
#data['SMA2'] = ta.abstract.SMA(data, 20, price='600036.SH') #与上面完全一样
data['WMA'] = ta.abstract.WMA(data, 20, price='600036.SH')
data['TRIMA'] = ta.abstract.TRIMA(data, 20, price='600036.SH')
data['EMA']  = ta.abstract.EMA(data, 20, price='600036.SH')
data['DEMA'] = ta.abstract.DEMA(data, 20, price='600036.SH')
data['KAMA'] = ta.abstract.KAMA(data, 20, price='600036.SH') 


fig = plt.figure(figsize=(15, 7))
plt.plot(data['600036.SH'])
plt.plot(data['SMA'], alpha=0.5) #普通均线
plt.plot(data['WMA'], alpha=0.5) #权重均线(突出前段)
plt.plot(data['TRIMA'], alpha=0.5) #权重均线(突出中间,用于周期分析)
示例#26
0
from datetime import datetime
import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings("ignore")

dataview_folder = 'G:/data/hs300_2'
dv = DataView()
dv.load_dataview(dataview_folder)

def change_index(df):
    df.index = pd.Index(map(lambda x: datetime.strptime(str(x),"%Y%m%d") , df.index))
    return df

#计算并画图
stock = change_index(dv.get_ts('close_adj').loc[20170105:])
hs300 = change_index(dv.data_benchmark.loc[20170105:]) #dv.data_benchmark默认是沪深300指数?
Rs = stock['600036.SH']/hs300.close #计算RS
Rs = RS.dropna() #删除空行
print (RS.tail())

#Momentum_RS的计算
import talib as ta
MOM_Rs = ta.ROCR100(Rs.values, 20) #ta.ROCR100(Mom_Rs,20)表示计算20日动量(增长率)!!!
MOM_Mom = ta.ROCR100(MOM_Rs, 20) #求增长率的增长率,相当于加速度!!!
data_s = stock['600036.SH']
data1 = pd.Series(MOM_Rs, index=Rs.index) #修改索引
data2 = pd.Series(MOM_Mom, index=Rs.index)
data = pd.concat([data_s, Rs, data1, data2], axis=1)
data.columns = ['close', 'RS', 'MOM_RS', 'MOM_MOM'] #得到最终的结果:分别表示收盘价,占有率,增长率,加速度
print (data.tail())
示例#27
0
def save_data_to_csv():
    dv = DataView()
    dv.load_dataview(folder_path=dataview_store_folder)

    # df = pd.DataFrame()

    # df['close'] = dv.get_ts('close', symbol=symbol, start_date=20080101, end_date=20171231)[symbol]
    # df['open']  = dv.get_ts('open', symbol=symbol, start_date=20080101, end_date=20171231)[symbol]
    # df['high']  = dv.get_ts('high', symbol=symbol, start_date=20080101, end_date=20171231)[symbol]
    # df['low']   = dv.get_ts('low', symbol=symbol, start_date=20080101, end_date=20171231)[symbol]

    # df = df.dropna()
    # snap1 = dv.get_snapshot(20080424, symbol='600030.SH', fields='open,close,high,low,volume')
    # ts1 = dv.get_ts('open,close,high,low,close_adj,future_return_2,future_return_3,future_return_4,future_return_5', symbol='600030.SH', start_date=20080101, end_date=20080302)
    sh_000905 = get_index_info()
    for symbol in sh_000905['symbol']:
        # for symbol in ['600030.SH', '600104.SH']:
        print(symbol)
        ts_symbol = dv.get_ts(
            'open,close,high,low,volume,future_return_2,future_return_3,future_return_4,future_return_5',
            symbol=symbol,
            start_date=start_date,
            end_date=end_date)[symbol]

        ts_symbol.fillna(0, inplace=True)
        ts_symbol = ts_symbol[(ts_symbol[['volume']] != 0).all(axis=1)]

        ts_symbol['date'] = ts_symbol.index
        ts_symbol['date'] = pd.to_datetime(ts_symbol['date'], format='%Y%m%d')
        ts_symbol = ts_symbol.reset_index(drop=True)
        _kdj = trendline.kdj(ts_symbol)
        _macd = trendline.macd(ts_symbol)
        _rsi = trendline.rsi(ts_symbol)
        _vrsi = trendline.vrsi(ts_symbol)
        _boll = trendline.boll(ts_symbol)
        _bbiboll = trendline.bbiboll(ts_symbol)
        _wr = trendline.wr(ts_symbol)
        _bias = trendline.bias(ts_symbol)
        _asi = trendline.asi(ts_symbol)
        _vr_rate = trendline.vr_rate(ts_symbol)
        _vr = trendline.vr(ts_symbol)
        _arbr = trendline.arbr(ts_symbol)
        _dpo = trendline.dpo(ts_symbol)
        _trix = trendline.trix(ts_symbol)
        _bbi = trendline.bbi(ts_symbol)
        _mtm = trendline.mtm(ts_symbol)
        _obv = trendline.obv(ts_symbol)
        _cci = trendline.cci(ts_symbol)
        _priceosc = trendline.priceosc(ts_symbol)
        _dbcd = trendline.dbcd(ts_symbol)
        _roc = trendline.roc(ts_symbol)
        _vroc = trendline.vroc(ts_symbol)
        _cr = trendline.cr(ts_symbol)
        _psy = trendline.psy(ts_symbol)
        _wad = trendline.wad(ts_symbol)
        _mfi = trendline.mfi(ts_symbol)
        _vosc = trendline.vosc(ts_symbol)
        # _jdqs = trendline.jdqs(ts_symbol)
        # _jdrs = trendline.jdrs(ts_symbol)

        ts_symbol = trendline.join_frame(ts_symbol, _kdj)
        ts_symbol = trendline.join_frame(ts_symbol, _macd)
        ts_symbol = trendline.join_frame(ts_symbol, _rsi)
        ts_symbol = trendline.join_frame(ts_symbol, _vrsi)
        ts_symbol = trendline.join_frame(ts_symbol, _boll)
        ts_symbol = trendline.join_frame(ts_symbol, _bbiboll)
        ts_symbol = trendline.join_frame(ts_symbol, _wr)
        ts_symbol = trendline.join_frame(ts_symbol, _bias)
        ts_symbol = trendline.join_frame(ts_symbol, _asi)
        ts_symbol = trendline.join_frame(ts_symbol, _vr_rate)
        ts_symbol = trendline.join_frame(ts_symbol, _vr)
        ts_symbol = trendline.join_frame(ts_symbol, _arbr)
        ts_symbol = trendline.join_frame(ts_symbol, _dpo)
        ts_symbol = trendline.join_frame(ts_symbol, _trix)
        ts_symbol = trendline.join_frame(ts_symbol, _bbi)
        ts_symbol = trendline.join_frame(ts_symbol, _mtm)
        ts_symbol = trendline.join_frame(ts_symbol, _obv)
        ts_symbol = trendline.join_frame(ts_symbol, _cci)
        ts_symbol = trendline.join_frame(ts_symbol, _priceosc)
        ts_symbol = trendline.join_frame(ts_symbol, _dbcd)
        ts_symbol = trendline.join_frame(ts_symbol, _roc)
        ts_symbol = trendline.join_frame(ts_symbol, _vroc)
        ts_symbol = trendline.join_frame(ts_symbol, _cr)
        ts_symbol = trendline.join_frame(ts_symbol, _psy)
        ts_symbol = trendline.join_frame(ts_symbol, _wad)
        ts_symbol = trendline.join_frame(ts_symbol, _mfi)
        ts_symbol = trendline.join_frame(ts_symbol, _vosc)
        # ts_symbol = trendline.join_frame(ts_symbol, _jdqs)
        # ts_symbol = trendline.join_frame(ts_symbol, _jdrs)

        save_csv(symbol, ts_symbol)
示例#28
0
print(dv.fields) #查看dv中取得的数据

#1.数据预处理
def change_columns_index(signal):
    new_names = {}
    for c in signal.columns: #signal的每一列
        if c.endswith('SZ'):
            new_names[c] = c.replace('SZ', 'XSHE') #将'SZ'转换为'XSHE'以满足需要的格式
        elif c.endswith('SH'):
            new_names[c] = c.replace('SH', 'XSHG') #将'SH'转换为'XSHE'
    signal = signal.rename_axis(new_names, axis=1)
    signal.index = pd.Index(map(lambda x: datetime.strptime(str(x),"%Y%m%d") , signal.index))
    signal.index = pd.Index(map(lambda x: x+timedelta(hours=15), signal.index))
    return signal

mask = dv.get_ts('mask_fundamental') #是否需要过滤该支股票:这里false表示不用过滤该股票,true表示需要过滤掉该股票???
group = change_columns_index(dv.get_ts('group'))
ROE_Data = dv.get_ts('roe')
ROE_Data = change_columns_index(dv.get_ts('roe').shift(1, axis=0)[mask==0]) #shift(1, axis=0)会自动提前1天,避免未来函数
prices = change_columns_index(dv.get_ts('close_adj'))

def get_largest(df, n=20): #这个方法把ROE最大的20只股票赋值为1,每天都取ROE最大的20支股票 !!???
    largest_list = []
    for time_index, value in df.iterrows():
        largest_list.append(dict.fromkeys(value.nlargest(n).index,1))
    largest_df = pd.DataFrame(largest_list, index = df.index)
    return largest_df

stock_df = get_largest(ROE_Data).dropna(how='all', axis=1)

stock_df.to_excel('roe_backtest.xlsx') #保存至当前目录下
示例#29
0
def change_index(df):
    df.index = pd.Index(
        map(lambda x: datetime.strptime(str(x), "%Y%m%d"), df.index))
    return df


def formula(positive, negative, total):  #市场宽度的计算
    return (positive - negative) / total


def sumrows(frame):
    return frame.sum(axis=1)  #每一行加总


#2.相关计算  下面有panel的直接构建方法,值得反复学习!!!
mask = dv.get_ts('index_member')  #打印哪些在指数中
A = dv.get_ts('close_adj').loc[20150105:][mask == 1]  #观察变化
B = dv.get_ts('close_adj').loc[20150105:][mask == 1].dropna(
    how='all', axis=1)  #观察变化,这里注意下若'high_adj'=0则表示停牌
PN = pd.Panel({
    'high':
    change_index(
        dv.get_ts('high_adj').loc[20150105:][mask == 1].dropna(how='all',
                                                               axis=1)),
    'low':
    change_index(
        dv.get_ts('low_adj').loc[20150105:][mask == 1].dropna(how='all',
                                                              axis=1)),
    'close':
    change_index(
        dv.get_ts('close_adj').loc[20150105:][mask == 1].dropna(how='all',
示例#30
0
warnings.filterwarnings("ignore")

dataview_folder = 'G:/data/hs300_2'
dv = DataView()
dv.load_dataview(dataview_folder)


def change_index(df):
    df.index = pd.Index(
        map(lambda x: datetime.strptime(str(x), "%Y%m%d"), df.index))
    return df


#2.计算与作图
#step1:ADV平均成交量的计算与作图
close = change_index(dv.get_ts('close_adj').loc[20170105:])  #读取后复权价格
volume = change_index(dv.get_ts('volume').loc[20170105:])  #读取成交量

adv10 = ta.abstract.MA(volume, 10, price='600036.SH')  #观察下这种计算方式!
adv20 = ta.abstract.MA(volume, 20, price='600036.SH')

fig, (ax, ax1) = plt.subplots(2, 1, sharex=True, figsize=(15, 7))
ax.plot(close['600036.SH'], label='600036')
ax.legend(loc='upper left')  #loc='upper left'表示位置,同理也可loc='upper right'
ax1.bar(volume.index, volume['600036.SH'], color='g')
ax1.plot(adv10, label='Volume_MA10')
ax1.plot(adv20, label='Volume_MA20')
plt.legend(loc='upper left')
plt.show()

#step2:OBV & A/D的计算与作图
示例#31
0
import warnings
warnings.filterwarnings("ignore")

dv = DataView()
dataview_folder = 'G:/data/hs300'  #文件地址
dv.load_dataview(dataview_folder)  #读取dataview_folder

#2_读取索引为股票代号的数据 get_snapshot
print(dv.get_snapshot(20170504, symbol='600036.SH,600050.SH', fields=''))
print(
    dv.get_snapshot(20170504, symbol='600036.SH,600050.SH',
                    fields='close_adj'))
#20170504表示时间,symbol='600036.SH,600050.SH'表示股票(可以添加),fields=''表示因子,若不输则会返回全部的因子

#3_读取时间序列数据 get_ts
data1 = dv.get_ts('pb')  #返回的是一个DataFrame格式的数据(包含沪深300全部),pb表示平均市净率
print(dv.get_ts('pb').head())

#4_添加自定义算法数据 add_formul
roe_pb = dv.add_formula('roe_pb', 'roe/pb', is_quarterly=False, add_data=True)
#'roe_pb'表示算法的新名称,'roe/pb'为公式,is_quarterly=False代表是否为季度数据
print(dv.get_ts('roe_pb').head())  #这里用get_ts的方法输入新的名称即可

#5_从数据服务添加新数据至本地
#先设置Config
data_config = {
    "remote.data.address":
    "tcp://data.tushare.org:8910",  #地址统一,暂不做修改
    "remote.data.username":
    "******",  #quantos账号(手机号码)
    #quantos账号的API令牌号码
示例#32
0
    'symbol': ','.join(stock_symbol),
    'fields': check_factor,
    'freq': 1,
    "prepare_fields": True
}

dv.init_from_config(dv_props, data_api=ds)

# In[10]:

dv.prepare_data()

# In[11]:

dv.add_field('sw1')
sw1 = dv.get_ts('sw1')
dict_classify = {
    '480000': '银行',
    '430000': '房地产',
    '460000': '休闲服务',
    '640000': '机械设备',
    '240000': '有色金属',
    '510000': '综合',
    '410000': '公用事业',
    '450000': '商业贸易',
    '730000': '通信',
    '330000': '家用电器',
    '720000': '传媒',
    '630000': '电气设备',
    '270000': '电子',
    '490000': '非银金融',
示例#33
0
dataview_folder = 'G:/data/hs300'
dv = DataView()
dv.load_dataview(dataview_folder)

#2_修改索引与列名
from datetime import timedelta


def change_columns_index(signal):
    # 改名称
    new_names = {}
    for c in signal.columns:
        if c.endswith('SZ'):  #若是上证股票
            new_names[c] = c.replace('SZ', 'XSHE')  #则将SZ改为XSHE
        elif c.endswith('SH'):  #若是深证股票
            new_names[c] = c.replace('SH', 'XSHG')  #则将SH改为XSHG
    signal = signal.rename_axis(new_names, axis=1)
    # 改时间索引: # 改时间索引: 将整数类型的日期转成datetime的格式的日期datetime.strptime :
    signal.index = pd.Index(
        map(lambda x: datetime.strptime(str(x), "%Y%m%d"), signal.index))
    # 然后加15个小时:   (x+timedelta(hours=15)) :
    signal.index = pd.Index(
        map(lambda x: x + timedelta(hours=15), signal.index))
    return signal


factor = change_columns_index(dv.get_ts('roe_pb_Q5'))  #对roe_pb_Q5进行修改

print(factor.tail())
示例#34
0
def test_multi_factor():
    from jaqs_fxdayu.research.signaldigger import multi_factor, process
    dv = DataView()
    dv.load_dataview(dataview_folder)
    dv.add_formula("momentum",
                   "Return(close_adj, 20)",
                   is_quarterly=False,
                   add_data=True)

    mask = mask_index_member(dv)
    can_enter, can_exit = limit_up_down(dv)

    ic = dict()
    factors_dict = {
        signal: dv.get_ts(signal)
        for signal in ["pb", "pe", "ps", "momentum"]
    }
    for period in [5, 15]:
        ic[period] = multi_factor.get_factors_ic_df(
            factors_dict,
            price=dv.get_ts("close_adj"),
            high=dv.get_ts("high_adj"),  # 可为空
            low=dv.get_ts("low_adj"),  # 可为空
            n_quantiles=5,  # quantile分类数
            mask=mask,  # 过滤条件
            can_enter=can_enter,  # 是否能进场
            can_exit=can_exit,  # 是否能出场
            period=period,  # 持有期
            benchmark_price=dv.
            data_benchmark,  # 基准价格 可不传入,持有期收益(return)计算为绝对收益
            commission=0.0008,
        )
    factor_dict = dict()
    index_member = dv.get_ts("index_member")
    for name in ["pb", "pe", "ps", "momentum"]:
        signal = -1 * dv.get_ts(name)  # 调整符号
        process.winsorize(factor_df=signal,
                          alpha=0.05,
                          index_member=index_member)  # 去极值
        signal = process.rank_standardize(
            signal, index_member)  # 因子在截面排序并归一化到0-1(只保留排序信息)
        signal = process.standardize(signal,
                                     index_member)  # z-score标准化 保留排序信息和分布信息
        # 行业市值中性化
        signal = process.neutralize(
            signal,
            group=dv.get_ts("sw1"),
            float_mv=dv.get_ts("float_mv"),
            index_member=index_member,  # 是否只处理时只考虑指数成份股
        )
        factor_dict[name] = signal

    # 因子间存在较强同质性时,使用施密特正交化方法对因子做正交化处理,用得到的正交化残差作为因子
    new_factors = multi_factor.orthogonalize(
        factors_dict=factor_dict,
        standardize_type="rank",
        # 输入因子标准化方法,有"rank"(排序标准化),"z_score"(z-score标准化)两种("rank"/"z_score")
        winsorization=False,  # 是否对输入因子去极值
        index_member=index_member)  # 是否只处理指数成分股

    #  多因子组合-动态加权参数配置
    props = {
        'price': dv.get_ts("close_adj"),
        'high': dv.get_ts("high_adj"),  # 可为空
        'low': dv.get_ts("low_adj"),  # 可为空
        'ret_type':
        'return',  # 可选参数还有upside_ret/downside_ret 则组合因子将以优化潜在上行、下行空间为目标
        'benchmark_price': dv.data_benchmark,  # 为空计算的是绝对收益 不为空计算相对收益
        'period': 30,  # 30天的持有期
        'mask': mask,
        'can_enter': can_enter,
        'can_exit': can_exit,
        'forward': True,
        'commission': 0.0008,
        "covariance_type": "shrink",  # 协方差矩阵估算方法 还可以为"simple"
        "rollback_period": 120
    }  # 滚动窗口天数

    comb_factors = dict()
    for method in [
            "equal_weight", "ic_weight", "ir_weight", "max_IR", "max_IC",
            "factors_ret_weight"
    ]:
        comb_factors[method] = multi_factor.combine_factors(
            factor_dict,
            standardize_type="rank",
            winsorization=False,
            weighted_method=method,
            props=props)
示例#35
0
        # 'universe': UNIVERSE,  # Investment universe and performance benchmark
        # 'benchmark': '000300.SH',
        'fields': 'open,high,low,close,volume',  # Data fields that we need
        'freq':
        1  # freq = 1 means we use daily data. Please do not change this.
    }

    # RemoteDataService communicates with a remote server to fetch data
    ds = RemoteDataService()

    # Use username and password in data_config to login
    ds.init_from_config(data_config)

    # DataView utilizes RemoteDataService to get various data and store them
    dv = DataView()
    dv.init_from_config(dataview_props, ds)
    dv.prepare_data()
    print(dv.prepare_data())
    dv.save_dataview(folder_path=dataview_store_folder)


# save_data()

dv = DataView()
dv.load_dataview('/dataview')
df = dv.get_ts('open,close,high,low',
               symbol='000300.SH',
               start_date=20170101,
               end_date=20170302)
df.columns = ['open', 'close', 'high', 'low']
print(df)
示例#36
0
    # 跌停
    down_limit = dv.add_formula(
        'down_limit',
        '(close - Delay(close, 1)) / Delay(close, 1) < -0.095',
        is_quarterly=False)
    can_enter = np.logical_and(up_limit < 1, ~mask_sus)  # 未涨停未停牌
    can_exit = np.logical_and(down_limit < 1, ~mask_sus)  # 未跌停未停牌
    return can_enter, can_exit


id_member = pd.concat([id_zz500[columns_500], id_hs300], axis=1)
mask = ~id_member
can_enter, can_exit = limit_up_down()

alpha_signal = factor_lis
price = dv.get_ts('close_adj')
sw1 = dv.get_ts('sw1')
dict_classify = {
    '480000': '银行',
    '430000': '房地产',
    '460000': '休闲服务',
    '640000': '机械设备',
    '240000': '有色金属',
    '510000': '综合',
    '410000': '公用事业',
    '450000': '商业贸易',
    '730000': '通信',
    '330000': '家用电器',
    '720000': '传媒',
    '630000': '电气设备',
    '270000': '电子',