Exemplo n.º 1
0
def sqlize_db_industry(subdb):
    """
    将 industry sql化

    @subdb (str): 子数据库名 
    """

    db_path = DB_PATH_LIB['industry']
    subdb_path = os.path.join(db_path, subdb)
    trading_days = listdir_advanced(subdb_path, 'json', strip_suffix=True)
    with SqliteProxy(log=False) as proxy:
        for year, dates in classify_dates_by_year(trading_days).items():
            path = os.path.join(db_path, '{}.db'.format(year))
            proxy.connect(path)
            if subdb not in proxy.list_tables:
                create_db_03(proxy, subdb)

            for date in dates:
                js = json2dict(os.path.join(subdb_path,
                                            '{}.json'.format(date)))
                df = pd.DataFrame(list(js.items()),
                                  columns=['sec_id', 'industry'])
                df['date'] = date
                try:
                    proxy.write_from_dataframe(df, "A_SWL1")
                except Exception:
                    Logger.error(
                        "Error occurred when sqlizing {} on {}.".format(
                            subdb, date))
                    traceback.print_exc()
Exemplo n.º 2
0
def update_schema(db_name, sub_name):
    """
    更新schema相关的begin date,end date, last update 适用于非factor_return相关的数据库

    @db_name (str): db的名称 eg. FACTOR 排除factor_return
    @sub_name (str): db中各子数据库的名称 eg. VALUE GROWTH
    """

    schema = json2dict(os.path.join(DB_PATH_LIB[db_name], 'schema'))

    assert sub_name

    date_list = get_date_lists_in_table(DB_PATH_LIB[db_name], sub_name)

    schema[sub_name]['begin date'] = date_list[0]
    schema[sub_name]['end date'] = date_list[-1]
    schema[sub_name]['last update'] = datetime.now().strftime(
        '%Y-%m-%d %H:%M:%S')

    Logger.info("schema updated: {}".format(sub_name))
    dict2json(schema, os.path.join(DB_PATH_LIB[db_name], 'schema'), log=False)
    a = pd.DataFrame(schema).T
    col_names = [
        'aspect', 'type', 'begin date', 'end date', 'last update', 'col_names',
        'field', 'kwargs', 'explanation'
    ]
    b = a.reindex(columns=col_names).reset_index().rename(columns={
        'index': 'indicator'
    }).sort_values(['type', 'aspect', 'field'])
    b.to_csv(os.path.join(DB_PATH_LIB[db_name], 'schema.csv'), index=False)
Exemplo n.º 3
0
def get_schema(db_name):
    """
    获得某个数据库schema的相关信息

    @db_name (int): 数据库名称
    @return dict of schema
    """
    path = os.path.join(SCHEMA_PATH, "schema_{}.json".format(db_name))
    schema = json2dict(path)
    return schema
Exemplo n.º 4
0
def get_schema(db_name):
    """
    获得某个数据库schema的相关信息

    @db_name (int): 数据库名称
    @return dict of schema
    """

    schema = json2dict(SCHEMA_PATHS[db_name])
    return schema
Exemplo n.º 5
0
def mfs_by_score(date, windows_step1, num1):
    """
    打分法多因子选股: 给定指定日期,计算当天因子打分情况 返回选定股票
    @date <"%Y-%m-%d">: 开始建仓日期
    @windows_step1 <int>: 计算因子权重的滚动窗口 即根据近多少个交易日进行分组有效性检验
    @num1 <int>: 持仓股票数
    @return 选择的股票 
    """

    index_info = dk.json2dict(
        os.path.join(PROJECT_FILES_PATH, "factor_weight",
                     "cycle{}".format(str(windows_step1)),
                     "{}.json".format(date)))
    index_std_all = pd.DataFrame()
    weight = {key: index_info[key]['weight'] for key in index_info}  # 有效因子权重
    monotony = {key: index_info[key]['ascending']
                for key in index_info}  # 因子单调方向
    for index_std in weight:
        if len(index_std_all) == 0:
            index_std_all = get_secs_index_std(
                index_std, trading_days=[date])[['sec_id', index_std]]
        else:
            temp = get_secs_index_std(index_std,
                                      trading_days=[date
                                                    ])[['sec_id', index_std]]
            index_std_all = index_std_all.merge(temp,
                                                how='inner',
                                                on=['sec_id'])
    index_std_all = index_std_all.set_index(['sec_id'])
    # 获取每个因子的单调方向 目的是使最优因子排名靠前
    ascending = [key for key in monotony if monotony[key] == 1.0]  # 反向因子  越小越好
    descending = [key for key in monotony if monotony[key] == 0.0]  # 正向因子 越大越好
    part1 = index_std_all[descending].rank(ascending=False)  # 对正向因子进行排序
    part2 = index_std_all[ascending].rank(ascending=True)  # 对反向因子进行排序
    index_std_all1 = part1.merge(part2,
                                 how='inner',
                                 left_index=True,
                                 right_index=True)
    index_std_all2 = index_std_all1.copy()
    # 获取不同因子的加权排名
    for col in index_std_all1.columns:
        index_std_all2[col] = index_std_all2[col] * weight[col]
    selected_stocks = index_std_all2.sum(
        axis=1).sort_values().index[:num1].tolist()  # 选出排名前20的股票
    return selected_stocks
Exemplo n.º 6
0
def derive_next_month_rr(sec_ids, start_date, end_date):
    """
    获取给定股票列表在给定日期列表的下个月收益率
    @sec_ids <list>: 股票列表
    @start_date <"%Y-%m-%d">: 开始日期
    @end_date <"%Y-%m-%d">: 结束日期
    @return dataframe [sec_id yearmonth month_ret CLOSE]
    """
    # 取月初月末的交易日列表
    tds = dk.json2dict(os.path.join(DB_PATH,
                                    r"datasets\pre\trading_days.json"))
    tds = pd.DataFrame(tds, columns=['date'])
    tds = tds[tds.date.between(start_date, end_date)]
    tds['group'] = tds['date'].apply(lambda x: x[:7])
    tds = tds.sort_values(['date'])
    first = tds.groupby('group').head(1)
    last = tds.groupby('group').tail(1)
    tds = first.append(last)
    tds = tds.sort_values('date')
    # 获取月初月末交易日的收盘价
    close = DataAPI.api.get_secs_indicator_on_multidays(
        indicator="CLOSE", trading_days=tds.date.tolist(), sec_ids=sec_ids)
    close = close.sort_values(['sec_id', 'date'])
    close['yearmonth'] = close['date'].apply(lambda x: x[:7])

    # 获取月收益率
    def cal_ret(x):
        close_id = x.columns.tolist().index('CLOSE')
        return x.iloc[-1, close_id] / x.iloc[0, close_id] - 1

    df_target = close.groupby(['sec_id', 'yearmonth'],
                              as_index=False).apply(lambda x: cal_ret(x))
    df_target = df_target.reset_index().rename(columns={0: 'month_ret'})
    # 只取每个月月初的收盘价
    close = close.groupby(['sec_id', 'yearmonth'],
                          as_index=False).apply(lambda x: x.head(1))
    df_target = df_target.merge(close, how='inner', on=['sec_id', 'yearmonth'])
    del df_target['date']
    return df_target
Exemplo n.º 7
0
def update_factor_return_schema(factor):
    """
    更新factor_return的schema相关的begin date,end date, last update

    @factor (str): factor的名称
    """

    schema = json2dict(os.path.join(DB_PATH_LIB['factor_return'], 'schema'))

    filepath = os.path.join(DB_PATH_LIB['factor_return'],
                            "{}.csv".format(factor))
    df = pd.read_csv(filepath, encoding="utf-8")["date"]
    schema[factor]['begin date'] = df.min()

    schema[factor]['end date'] = df.max()

    schema[factor]['last update'] = \
        datetime.now().strftime('%Y-%m-%d %H:%M:%S')

    Logger.info("schema updated: {}".format(factor))
    dict2json(schema,
              os.path.join(DB_PATH_LIB['factor_return'], 'schema'),
              log=False)
Exemplo n.º 8
0
def get_secs_industry_from_json(industry_code, sec_ids=[]):
    path = os.path.join(DB_INDUSTRY, '{}.json'.format(industry_code))
    info = json2dict(path, validate=True)
    return {sec: info[sec] for sec in sec_ids if sec in info}
Exemplo n.º 9
0
def plot_LLJZ(yearmonth_st,
              yearmonth_end,
              model_name="XGBoost",
              label_kind="label_A",
              benchmark="hs300"):
    file_root = os.path.join(DB_PATH, "backtest", model_name, label_kind,
                             benchmark)
    file_LJJZ = os.path.join(file_root, "LLJZ",
                             "{}--{}".format(yearmonth_st, yearmonth_end))
    if not os.path.exists(file_LJJZ):
        os.makedirs(file_LJJZ)
    year_st_num = int(yearmonth_st[:4])
    year_end_num = int(yearmonth_end[:4])
    diff = year_end_num - year_st_num
    yearmonth_lst = [
        str(year_st_num + i) + '-{:0>2}'.format(str(1 + j))
        for i in range(0, diff + 1) for j in range(0, 12)
    ]
    yearmonth_lst = [
        yearmonth for yearmonth in yearmonth_lst
        if (yearmonth >= yearmonth_st) & (yearmonth <= yearmonth_end)
    ]
    records = {}
    for yearmonth in yearmonth_lst:
        records.update(
            dk.json2dict(
                os.path.join(file_root, "records_{}.json".format(yearmonth))))
    date_st = dk.date2char(
        dk.char2datetime(yearmonth_st + '-31') - dk.timedelta({'months': 1}))
    records[date_st] = {'benchmark': 1, 'model': 1}
    df_records = pd.DataFrame(records).T.reset_index().rename(
        columns={'index': 'yearmonth'})

    df_records.to_csv(os.path.join(file_LJJZ, "df_records.csv"), index=0)

    df_records_daily = df_records[['yearmonth', 'benchmark', 'model']]
    # 获取模型和基准累计净值
    for ix in df_records_daily.index:
        if ix == 0:
            df_records_daily.loc[ix, 'model_rr_cum'] = 1
            df_records_daily.loc[ix, 'benchmark_rr_cum'] = 1
        else:
            df_records_daily.loc[ix, 'model_rr_cum'] = df_records_daily.loc[
                ix - 1,
                'model_rr_cum'] * (1 + df_records_daily.loc[ix, 'model'])
            df_records_daily.loc[ix,
                                 'benchmark_rr_cum'] = df_records_daily.loc[
                                     ix - 1, 'benchmark_rr_cum'] * (
                                         1 +
                                         df_records_daily.loc[ix, 'benchmark'])
    df_records_daily = df_records_daily[[
        'yearmonth', 'benchmark_rr_cum', 'model_rr_cum'
    ]].set_index('yearmonth')
    # df_records_daily .to_csv(os.path.join(back_test_path, "df_records_daily_{}.csv".format(dataID)))
    fig, ax = plt.subplots()
    fig.set_size_inches(20, 10)
    ax.plot(df_records_daily)
    ax.legend(df_records_daily.columns, fontsize=22)
    plt.title("基于测试集{}选股累计收益与{}累计净值比较图 {}--{}".format(model_name, benchmark,
                                                      yearmonth_st,
                                                      yearmonth_end),
              fontsize=22)
    plt.xlabel("日期", fontsize=22)
    plt.ylabel("累计净值", fontsize=22)
    plt.savefig(os.path.join(file_LJJZ, "LJJZ.png"))