Exemplo n.º 1
0
def main_fun(begin_date,
             cut_date,
             end_date,
             time_para_dict,
             sector_name,
             index_name,
             hold_time,
             return_file,
             new_factor_list,
             add_factor_list,
             if_hedge=False,
             if_only_long=False):
    if_save = True
    if_new_program = True
    use_factor_set_path = '/mnt/mfs/dat_whs/data/use_factor_set/market_top_2000_201808201941.pkl'
    return_file = 'aadj_r'

    para_ready_df, log_save_file, result_save_file = \
        save_load_control(use_factor_set_path, sector_name, new_factor_list, add_factor_list,
                          if_save, if_new_program, if_hedge, hold_time, return_file, if_only_long)
    total_para_num = len(para_ready_df)
    # sector
    sector_df = load_sector_data(begin_date, end_date, sector_name)

    xnms = sector_df.columns
    xinx = sector_df.index

    # suspend or limit up_dn
    # suspendday_df, limit_buy_df, limit_sell_df = load_locked_data(xnms, xinx)
    suspendday_df, limit_buy_sell_df = load_locked_data(xnms, xinx)
    # return
    return_choose = pd.read_table('/mnt/mfs/DAT_EQT/EM_Funda/DERIVED_14/aadj_r.csv', sep='|', index_col=0) \
        .astype(float)
    # return_choose = pd.read_table('/mnt/mfs/DAT_EQT/EM_Funda/DERIVED_14/aadj_r_vwap.csv', sep='|', index_col=0) \
    #     .astype(float)
    return_choose.index = pd.to_datetime(return_choose.index)
    return_choose = return_choose.reindex(columns=xnms,
                                          index=xinx,
                                          fill_value=0)

    # index data
    index_df = load_index_data(xinx, index_name)

    # index_df = pd.Series(index_df)
    test_index_3(time_para_dict, sector_name, sector_df, suspendday_df,
                 limit_buy_sell_df, return_choose, index_df, para_ready_df,
                 cut_date, log_save_file, result_save_file, if_save, if_hedge,
                 hold_time, if_only_long, xnms, xinx, total_para_num)
Exemplo n.º 2
0
    xinx = sector_df.index

    # suspend or limit up_dn
    suspendday_df, limit_buy_sell_df = load_locked_data_both(xnms, xinx)

    # return
    return_choose = pd.read_table(
        '/mnt/mfs/DAT_EQT/EM_Funda/DERIVED_14/aadj_r.csv',
        sep='|',
        index_col=0).astype(float)
    return_choose.index = pd.to_datetime(return_choose.index)
    return_choose = return_choose.reindex(columns=xnms,
                                          index=xinx,
                                          fill_value=0)

    # index data
    index_df = load_index_data(xinx, index_name)

    all_use_factor = pd.read_table(log_result, sep='|', header=None)
    all_use_factor.columns = [
        'key', 'fun_name', 'name1', 'name2', 'name3', 'filter_fun_name',
        'sector_name', 'con_in', 'con_out', 'ic', 'sp_u', 'sp_m', 'sp_d',
        'pot_in', 'fit_ratio', 'leve_ratio', 'sp_out'
    ]

    # 生成pnl文件
    # create_pnl_file(all_use_factor, file_name, hold_time)
    # 将生成的pnl文件组合
    pnl_df, sum_pos_df = pos_sum(all_use_factor, hold_time)
    plot_send_result(pnl_df, bt.AZ_Sharpe_y(pnl_df), 'sum_pos_plot')
Exemplo n.º 3
0
    stk_CR = return_df.replace(np.nan, 0).cumsum()
    ma5 = bt.AZ_Rolling_mean(stk_CR, 3)
    ma50 = bt.AZ_Rolling_mean(stk_CR, 12)

    vol30 = bt.AZ_Rolling(return_df, 30).std() * (250 ** 0.5)
    vol30[vol30 < 0.08] = 0.08

    cond_d = (ma5 < ma50).astype(int)
    cond_u = (ma5 > ma50).astype(int)

    hold_time_list = [1, 2, 5]
    xnms = return_df.columns
    xinx = return_df.index

    index_df = load_index_data(begin_date, end_date, xinx, '000300').shift(1)
    locked_df = load_locked_data(begin_date, end_date, xnms, xinx)

    use_company_list = get_use_company(data2)
    aa = random.sample(use_company_list, 200)
    all_tmp_df = pd.DataFrame()
    for company in use_company_list:
        print(company)
        part_data2 = data2[data2['COMPANYNAME'] == company][['RID', 'TRSDIR']]

        part_data1 = data1.loc[part_data2['RID'].values][['SECURITYCODE', 'TRADEDATE']]
        part_data1['TRSDIR'] = part_data2['TRSDIR'].values

        tmp_df = part_data1.groupby(['TRADEDATE', 'SECURITYCODE'])['TRSDIR'].apply(lambda x: x.iloc[-1]).unstack()
        tmp_df = (tmp_df == '0').astype(int)
        tmp_df = tmp_df.reindex(columns=xnms, index=xinx, fill_value=0)
Exemplo n.º 4
0
def create_pnl_file_and_delete_factor(factor_root_path, sector_name_list):
    begin_date = pd.to_datetime('20100101')
    cut_date = pd.to_datetime('20160401')
    end_date = pd.to_datetime('20180401')

    # sector_name = 'market_top_100'
    index_name = '000016'
    for sector_name in sector_name_list:
        # sector
        sector_df = load_sector_data(begin_date, end_date, sector_name)

        xnms = sector_df.columns
        xinx = sector_df.index

        # suspend or limit up_dn
        # suspendday_df, limit_buy_df, limit_sell_df = load_locked_data(xnms, xinx)
        suspendday_df, limit_buy_sell_df = load_locked_data_both(xnms, xinx)
        # return
        return_choose = pd.read_table(
            '/mnt/mfs/DAT_EQT/EM_Funda/DERIVED_14/aadj_r.csv',
            sep='|',
            index_col=0).astype(float)
        return_choose.index = pd.to_datetime(return_choose.index)
        return_choose = return_choose.reindex(columns=xnms,
                                              index=xinx,
                                              fill_value=0)

        # index data
        index_df = load_index_data(xinx, index_name)

        factor_path = factor_root_path + '/' + sector_name
        factor_name_list = [x for x in os.listdir(factor_path) if 'pkl' in x]
        save_pnl_path = os.path.join(root_path,
                                     'data/single_factor_pnl/' + sector_name)
        # bt.AZ_Delete_file(save_pnl_path)
        for factor_name in factor_name_list:
            factor_load_path = os.path.join(factor_path, factor_name)
            print(factor_load_path)
            # if not os.path.exists(os.path.join(save_pnl_path, factor_name[:-4] + '.csv')):

            factor_df = pd.read_pickle(factor_load_path)
            factor_df = factor_df.reindex(columns=xnms,
                                          index=xinx,
                                          fill_value=0)
            daily_pos = deal_mix_factor(factor_df,
                                        sector_df,
                                        suspendday_df,
                                        limit_buy_sell_df,
                                        hold_time=5,
                                        lag=2,
                                        if_only_long=False)

            pnl_df = (daily_pos * return_choose).sum(axis=1)

            bt.AZ_Path_create(save_pnl_path)
            pnl_df = pd.DataFrame(pnl_df, columns=[factor_name[:-4]])
            # pnl_df.to_csv(os.path.join(save_pnl_path, factor_name[:-4] + '.csv'))
            if len(pnl_df.replace(0, np.nan).dropna()) / len(pnl_df) < 0.3:
                print(factor_name + ' is delete')
                os.remove(factor_load_path)
            else:
                pnl_df.to_csv(
                    os.path.join(save_pnl_path, factor_name[:-4] + '.csv'))
            print('pnl create!')
Exemplo n.º 5
0
    # for sector_name in ['market_top_100', 'market_top_500', 'market_top_1000']:
    sector_name = 'market_top_300'
    index_name = '000300'

    # sector
    sector_df = load_sector_data(begin_date, end_date, sector_name)
    sector_mean = sector_df.sum(axis=1).mean()
    sector_set = sector_df.columns
    # suspend or limit up_dn
    locked_df = load_locked_data(begin_date, end_date, sector_set)
    # return

    # index data
    # index_df = load_index_data(begin_date, end_date, index_name)

    index_df = load_index_data(begin_date, end_date, index_name)
    for file_name in sorted(
            os.listdir('/mnt/mfs/dat_whs/data/intra_factor_data')):
        vwap_file_path = os.path.join(
            '/mnt/mfs/dat_whs/data/base_data/intra_vwap_tab_{}.pkl'.format(
                file_name.split('_')[3]))
        vwap_df = pd.read_pickle(vwap_file_path)
        vwap_df.columns = [x[2:] + '.' + x[:2] for x in vwap_df.columns]
        factor_df = pd.read_pickle('/mnt/mfs/dat_whs/data/intra_factor_data/' +
                                   file_name)

        # return_choose = load_pct(begin_date, end_date, sector_set)

        xnms = sorted(
            list(
                set(vwap_df.columns) & set(factor_df.columns)