Exemplo n.º 1
0
def update_industry_to_json(industry, trading_days):
    try:
        date = trading_days[-1]
        index_code, loader = INDEX_LOADER_MAP[industry]
        info = loader(index_code, date, level=1)
    except Exception:
        Logger.error("Error occurred when loading {} on {}".format(industry, date))
        raise ValueError

    try:
        path = os.path.join(DB_INDUSTRY, '{}.json'.format(industry))
        copy_to = os.path.join(DB_INDUSTRY, '{}_backup.json'.format(industry))
        shutil.copy(path, copy_to)  # 保存副本,以防数据损坏
        dict2json(info, path, log=False)

        Logger.info("{} on {} is updated successfully".format(industry, date))
    except Exception:
        Logger.error("Error occurred when writing {} on {}".format(industry, date))
        raise ValueError

    # json files are different from sql, cannot use update_schema()
    # therefore update schema information explicitly
    try:
        now = datetime.now()
        schema = get_schema('industry')
        schema[industry]["begin date"] = ""
        schema[industry]["end date"] = now.strftime('%Y-%m-%d')
        schema[industry]['last update'] = now.strftime('%Y-%m-%d %H:%M:%S')
        save_schema(schema, 'industry')

        Logger.info("schema updated: {}".format(industry))
    except Exception:
        Logger.error("Error occurred when updating schema of {}".format(industry))
        traceback.print_exc()
        raise ValueError
Exemplo n.º 2
0
def update_schema(db_name, sub_name):
    """
    更新schema相关的begin date,end date, last update 适用于非factor_return相关的数据库

    @db_name (str): db的名称 eg. FACTOR 排除factor_return
    @sub_name (str): db中各子数据库的名称 eg. VALUE GROWTH
    """

    schema = json2dict(os.path.join(DB_PATH_LIB[db_name], 'schema'))

    assert sub_name

    date_list = get_date_lists_in_table(DB_PATH_LIB[db_name], sub_name)

    schema[sub_name]['begin date'] = date_list[0]
    schema[sub_name]['end date'] = date_list[-1]
    schema[sub_name]['last update'] = datetime.now().strftime(
        '%Y-%m-%d %H:%M:%S')

    Logger.info("schema updated: {}".format(sub_name))
    dict2json(schema, os.path.join(DB_PATH_LIB[db_name], 'schema'), log=False)
    a = pd.DataFrame(schema).T
    col_names = [
        'aspect', 'type', 'begin date', 'end date', 'last update', 'col_names',
        'field', 'kwargs', 'explanation'
    ]
    b = a.reindex(columns=col_names).reset_index().rename(columns={
        'index': 'indicator'
    }).sort_values(['type', 'aspect', 'field'])
    b.to_csv(os.path.join(DB_PATH_LIB[db_name], 'schema.csv'), index=False)
Exemplo n.º 3
0
def backtest_for_mfs(yearmonth,
                     selected_sec_ids,
                     benchmark,
                     model_name,
                     label_kind="label_A",
                     override=False):
    """
    多因子选股回测框架
    @yearmonth <"%Y-%m">: 年份和月份
    @selected_sec_ids <list>: yearmonth: selected sec_ids
    @benchmark <str>: 基准收益 目前支持 hs300 zz500 wind_ALL_A
    @plot <bool>: 是否绘制累计收益图
    @model_name <str>: 模型名称
    @label_kind <str>: 标记获得方法
    """
    back_test_path = os.path.join(
        DB_PATH, r"backtest\{}\{}\{}".format(model_name, label_kind,
                                             benchmark))
    file_path = os.path.join(back_test_path,
                             "records_{}.json".format(yearmonth))
    # yearmonth =
    if not os.path.exists(back_test_path):
        os.makedirs(back_test_path)

    if os.path.exists(file_path) and (not override):
        return
    records = {}
    # 获取所有各月度的收益率和月初股价
    df_rr_monthly = pd.read_csv(
        os.path.join(DB_PATH, r"datasets\pre\rr_monthly.csv"))
    df_rr_monthly_1 = df_rr_monthly.set_index(['sec_id', 'yearmonth'])
    df_rr_monthly_1 = df_rr_monthly_1.sort_index(level='sec_id')
    perform = df_rr_monthly_1.loc[(selected_sec_ids, yearmonth),
                                  ['CLOSE', 'month_ret']]
    model_ret = (perform['CLOSE'] *
                 perform['month_ret']).sum() / perform['CLOSE'].sum()
    benchmark_rr = df_rr_monthly[df_rr_monthly['sec_id'] == benchmark]
    benchmark_rr = benchmark_rr.set_index(['yearmonth'])
    benchmark_ret = benchmark_rr.loc[yearmonth, 'month_ret']
    records[yearmonth] = {
        "model": model_ret,
        "benchmark": benchmark_ret,
        "selected_sec_ids": ",".join(selected_sec_ids)
    }
    dk.dict2json(records, file_path)
Exemplo n.º 4
0
def update_factor_return_schema(factor):
    """
    更新factor_return的schema相关的begin date,end date, last update

    @factor (str): factor的名称
    """

    schema = json2dict(os.path.join(DB_PATH_LIB['factor_return'], 'schema'))

    filepath = os.path.join(DB_PATH_LIB['factor_return'],
                            "{}.csv".format(factor))
    df = pd.read_csv(filepath, encoding="utf-8")["date"]
    schema[factor]['begin date'] = df.min()

    schema[factor]['end date'] = df.max()

    schema[factor]['last update'] = \
        datetime.now().strftime('%Y-%m-%d %H:%M:%S')

    Logger.info("schema updated: {}".format(factor))
    dict2json(schema,
              os.path.join(DB_PATH_LIB['factor_return'], 'schema'),
              log=False)
Exemplo n.º 5
0
def save_schema(schema, db_name):
    dict2json(schema, SCHEMA_PATHS[db_name], log=False)