示例#1
0
def gen_future_info(d):
    future_info_file = os.path.join(d, 'future_info.json')
    if not os.path.exists(future_info_file):
        init_future_info(d)
        return

    with open(future_info_file, 'r') as f:
        all_futures_info = json.load(f)

    future_list = []
    symbol_list = []
    param = [
        'close_commission_ratio', 'close_commission_today_ratio',
        'commission_type', 'open_commission_ratio'
    ]

    for i in all_futures_info:
        if i.get('order_book_id'):
            future_list.append(i.get('order_book_id'))
        else:
            symbol_list.append(i.get('underlying_symbol'))

    futures_order_book_id = rqdatac.all_instruments(
        type='Future')['order_book_id'].unique()
    for future in futures_order_book_id:
        underlying_symbol = re.match(r'^[a-zA-Z]*', future).group()
        if future in future_list:
            continue
        future_dict = {}
        commission = rqdatac.futures.get_commission_margin(future)
        if not commission.empty:
            future_list.append(future)
            future_dict['order_book_id'] = future
            commission = commission.iloc[0]
            for p in param:
                future_dict[p] = commission[p]
            future_dict['tick_size'] = rqdatac.instruments(future).tick_size()
        elif underlying_symbol in symbol_list \
                or underlying_symbol in {'S', 'TC', 'ER', 'WS', 'WT', 'RO', 'ME'}:
            continue
        else:
            symbol_list.append(underlying_symbol)
            future_dict['underlying_symbol'] = underlying_symbol
            dominant = rqdatac.futures.get_dominant(underlying_symbol).iloc[-1]
            commission = rqdatac.futures.get_commission_margin(
                dominant).iloc[0]

            for p in param:
                future_dict[p] = commission[p]
            future_dict['tick_size'] = rqdatac.instruments(
                dominant).tick_size()
        all_futures_info.append(future_dict)

    with open(os.path.join(d, 'future_info.json'), 'w') as f:
        json.dump(all_futures_info, f, separators=(',', ':'), indent=2)
示例#2
0
def constraints_gen(clean_order_book_ids, asset_type, constraints=None):

    if constraints is not None:
        df = pd.DataFrame(index=clean_order_book_ids, columns=['type'])

        for key in constraints:
            if constraints[key][0] > constraints[key][1]:
                raise OptimizationError("Constraints setup error!")

        if asset_type is 'fund':
            for i in clean_order_book_ids:
                df.loc[i, 'type'] = rqdatac.fund.instruments(i).fund_type
        elif asset_type is 'stock':
            for i in clean_order_book_ids:
                df.loc[i, "type"] = rqdatac.instruments(i).shenwan_industry_name

        cons = list()
        for key in constraints:
            key_list = list(df[df['type'] == key].index)
            key_pos_list = list()
            for i in key_list:
                key_pos_list.append(df.index.get_loc(i))
            key_cons_fun_lb = lambda x: sum(x[t] for t in key_pos_list) - constraints[key][0]
            key_cons_fun_ub = lambda x: constraints[key][1] - sum(x[t] for t in key_pos_list)
            cons.append({"type": "ineq", "fun": key_cons_fun_lb})
            cons.append({"type": "ineq", "fun": key_cons_fun_ub})

        return tuple(cons.append({'type': 'eq', 'fun': lambda x: sum(x) - 1}))
    else:
        return {'type': 'eq', 'fun': lambda x: sum(x) - 1}
示例#3
0
def drop_recently_listed(universe, date, min_days_listed=60):
    # 一个列表,里面元素是Instrument对象
    instruments = rqd.instruments(universe)
    return [
        inst.order_book_id for inst in instruments
        if inst.days_from_listed(date) > min_days_listed
    ]
示例#4
0
 def crawl(self, begin_date=None, end_date=None):
     if begin_date is None and end_date is None:
         return
     # 区间交易日
     dates = rqd.get_trading_dates(begin_date, end_date)
     for date in dates:
         # 当天所有股票
         stocks = rqd.all_instruments(type='CS',
                                      date=date)['order_book_id'].tolist()
         update_requests = []
         for i in rqd.instruments(stocks):
             doc = i.__dict__
             # 股票信息与时间无关
             update_requests.append(
                 UpdateOne({'code': doc['order_book_id']}, {'$set': doc},
                           upsert=True))
         if len(update_requests) > 0:
             # bulk_write 批量写入
             # 写入daily数据集合(表) , 不按顺序ordered = False
             update_result = DB_CONN['basic'].bulk_write(update_requests,
                                                         ordered=False)
             print('保存-%s-%s数据 , 插入:%4d , 更新:%4d' %
                   (date, 'basic', update_result.upserted_count,
                    update_result.modified_count),
                   flush=True)
示例#5
0
def get_risk_indicators(previous_weight, current_weight, cov_matrix, asset_type):
    """
    Calculate the risk indicators
    :param previous_weight: list or array
    :param current_weight: list or array
    :param cov_matrix: data_frame
    :param asset_type: str
                    'fund' or 'stock'
    :return: list
            HRCs, HRCGs, Herfindahl, turnover_rate
    """

    # change list to array for later process
    # previous_weight_array = np.array(previous_weight)
    current_weight_array = np.array(current_weight)

    # refer to paper formula 2.19 2.20
    production_i = current_weight_array * (cov_matrix.dot(current_weight_array))
    productions = current_weight_array.dot(cov_matrix).dot(current_weight_array)


    # calculate individual's risk contributions
    HRCs = production_i / productions
    # calculate Herfindahl
    Herfindahl = np.sum(HRCs ** 2)



    # calculate group's risk contributions
    df1 = pd.DataFrame(columns=['HRCs', 'type'])
    df1['HRCs'] = HRCs
    if asset_type is 'fund':
        for i in current_weight.index:
            df1.loc[i, 'type'] = fund.instruments(i).fund_type
    elif asset_type is 'stock':
        for i in current_weight.index:
            df1.loc[i, "type"] = rqdatac.instruments(i).shenwan_industry_name

    productionG_i = df1.groupby(['type'])['HRCs'].sum()
    HRCGs = productionG_i

    # calculate group's Herfindahl
    # Herfindahl_G = np.sum(HRCGs ** 2)

    # weight turnover Rate (http://factors.chinascope.com/docs/factors/#turnover)
    # if previous_weight is missing some asset, set the weight to 0

    df2 = pd.DataFrame(columns = ['previous_weight', 'current_weight'])
    df2.current_weight = current_weight
    df2.previous_weight = previous_weight
    df2 = df2.fillna(0)

    turnover_rate = sum(abs(df2.current_weight - df2.previous_weight))/2

    # return_dic = {'individual_RC': HRCs, 'individual_Herfindahl': Herfindahl,
    #               'group_RC': HRCGs, 'group_Herfindahl': Herfindahl_G,
    #               'turnover_rate': turnover_rate}

    return HRCs, HRCGs, Herfindahl, turnover_rate
示例#6
0
def subnew_stocks_filter(stocks,date,subnewThres=360):
    """
    # 获得某日上市小于N天的次新股
    :param stocks: list 股票列表
    :param date: str eg. "2018-01-01"
    :param N: int 次新股过滤的阈值
    :return: list 列表中的次新股
    """
    return [s for s in stocks if (pd.Timestamp(date) - pd.Timestamp(rqdatac.instruments(s).listed_date)).days>subnewThres]
示例#7
0
def get_implicit_factor_return(date):

    latest_trading_date = str(
        rqdatac.get_previous_trading_date(
            datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1)))

    previous_trading_date = str(
        rqdatac.get_previous_trading_date(latest_trading_date))

    # 取前一交易日全市场已经上市的股票,保证日收益率计算

    stock_list = rqdatac.all_instruments(
        type='CS', date=previous_trading_date)['order_book_id'].tolist()

    # 剔除上市不满21天的股票
    trading_date_21_days_before = str(
        rqdatac.get_previous_trading_date(latest_trading_date,
                                          country='cn',
                                          n=21))

    stock_list = [
        i for i in stock_list
        if rqdatac.instruments(i).listed_date <= trading_date_21_days_before
    ]

    # 剔除ST股
    is_st_df = rqdatac.is_st_stock(stock_list,
                                   start_date=previous_trading_date,
                                   end_date=previous_trading_date)

    is_st_df.index = is_st_df.index.astype(str)

    stock_list = is_st_df.loc[previous_trading_date][
        is_st_df.loc[previous_trading_date].values == False].index.tolist()

    # 剔除停牌股
    trading_volume = rqdatac.get_price(stock_list,
                                       start_date=previous_trading_date,
                                       end_date=previous_trading_date,
                                       frequency='1d',
                                       fields='volume',
                                       country='cn')

    stock_list = trading_volume.loc[previous_trading_date][
        trading_volume.loc[previous_trading_date].values > 0].index.tolist()

    # 计算全市场前一交易日的行业暴露度

    factor_exposure = get_exposure(stock_list, str(previous_trading_date))

    # 根据上述四类暴露度计算因子收益率

    factor_returns = factor_return_estimation(latest_trading_date,
                                              factor_exposure)

    return factor_returns
def general_constraints_gen(order_book_ids, clean_order_book_ids, asset_type, constraints=None):

    if constraints is not None:
        df = pd.DataFrame(index=order_book_ids, columns=['type'])

        # Constraints setup error check
        temp_lb = 0
        temp_ub = 0

        if asset_type is 'fund':
            for i in order_book_ids:
                df.loc[i, 'type'] = rqdatac.fund.instruments(i).fund_type
        elif asset_type is 'stock':
            for i in order_book_ids:
                df.loc[i, "type"] = rqdatac.instruments(i).shenwan_industry_name

        for key in constraints:
            temp_lb += constraints[key][0]
            temp_ub += constraints[key][1]
            if constraints[key][0] > constraints[key][1]:
                raise OptimizationError("错误:合约类别 %s 的 constraints 下限高于上限。" % key)
            elif constraints[key][0] > 1 or constraints[key][1] < 0:
                raise OptimizationError("错误:合约类别 %s 的 constraints 下限大于1,或上限小于0。" % key)
            elif key not in df.type.unique():
                raise OptimizationError("错误:constraints 中包含 order_book_ids 没有资产类型 %s。" % key)
        if temp_lb > 1:
            raise OptimizationError("错误:constraints 下限之和大于1。")
        if temp_ub < 1 and len(constraints) == len(df.type.unique()):
            raise OptimizationError("错误:constraints 上限之和小于1。")

        cons = list()
        temp_ub = 0
        df = df.loc[clean_order_book_ids]

        def key_cons_fun_lb(pos_list, lb):
            return {"type": "ineq", "fun": lambda x: sum(x[t] for t in pos_list) - lb}

        def key_cons_fun_ub(pos_list, ub):
            return {"type": "ineq", "fun": lambda x: ub - sum(x[t] for t in pos_list)}

        for key in constraints:
            if key not in df.type.unique():
                raise OptimizationError("错误:数据剔除后constraints 中包含 order_book_ids 没有资产类型 %s。" % key)
            key_list = list(df[df['type'] == key].index)
            key_pos_list = list()
            for i in key_list:
                key_pos_list.append(df.index.get_loc(i))
            cons.append(key_cons_fun_lb(key_pos_list, constraints[key][0]))
            cons.append(key_cons_fun_ub(key_pos_list, constraints[key][1]))
            temp_ub += constraints[key][1]
        if len(df.type.unique()) == len(constraints) and temp_ub < 1:
            raise OptimizationError("错误:数据剔除后constraints 上限之和小于1。")
        cons.append({'type': 'eq', 'fun': lambda x: sum(x) - 1})
        return tuple(cons)
    else:
        return {'type': 'eq', 'fun': lambda x: sum(x) - 1}
def input_validation(order_book_ids, start_date, end_date, asset_type, method, rebalancing_frequency, window, bnds, cons,\
                     cov_shrinkage, expected_return, expected_return_cov, risk_aversion_coefficient, res_options):

    if (start_date < "2005-07-01"):
        raise OptimizationError('开始日期(start_date)不能早于2005年7月1日。')
        
    elif (end_date < start_date):
        raise OptimizationError('结束日期(end_date)不能早于开始日期(start_date)。')

    elif (asset_type != 'fund' and asset_type != 'stock'):
        raise OptimizationError('资产类型(asset_type)必须为股票或基金。')
    
    elif(method != 'risk_parity' and method != 'min_variance' and method != 'risk_parity_with_cons' and method != 'all'):
        raise OptimizationError('请选择合适的优化器(method)。')
        
    elif(rebalancing_frequency <= 0 or type(rebalancing_frequency) != int):
        raise OptimizationError('调仓频率(rebalancing_frequency)必须大于0,且必须为整数。')

    elif (window < 66 or type(window) != int):
        raise OptimizationError('协方差估计样本长度(window) 必须大于66 (不少于66个交易日) ,且必须为整数。')

    elif (type(cov_shrinkage) != bool):
        raise OptimizationError('cov_shrinkage 为布尔类型变量,请选择 True 或者 False。')
        
    elif (expected_return != None and len(expected_return) != len(order_book_ids)):
        raise OptimizationError('预期收益预测(expected_return)数目和资产(order_book_ids)数目不同。')
        
    elif (expected_return_cov != None and len(expected_return_cov) != len(order_book_ids)):
        raise OptimizationError('预期收益协方差矩阵(expected_return_cov)大小和资产数目(order_book_ids)不一致。')
    
    elif (risk_aversion_coefficient < 0):
        raise OptimizationError('风险厌恶系数(risk_aversion_coefficient)不能小于0。')
    
    elif (res_options != 'weights' and res_options != 'weights_indicators' and res_options != 'all'):
        raise OptimizationError('优化结果返回设置(res_options)只能选择 weights, weights_indicators 或 all。')
    
    elif (asset_type == 'stock'):
        
        asset_list = rqdatac.instruments(order_book_ids)
    
        # 收集股票类资产的类型标记(场内基金,分级基金等返回的类型不是“CS”,场外基金返回 None,均不进入 list 中)
        asset_type_list = [asset.type for asset in asset_list if asset.type == 'CS']
        
        if (len(asset_type_list) != len(order_book_ids)):
            raise OptimizationError('传入的合约(order_book_ids)中包含非股票类合约。')
    
    elif (asset_type == 'fund'):
        
        asset_list = rqdatac.fund.instruments(order_book_ids)
    
        # 收集公募基金类资产的类型标记(场内基金,分级基金等返回的类型不是“CS”,场外基金返回 None,均不进入 list 中)
        asset_type_list = [asset.type for asset in asset_list if asset.type == 'PublicFund']
        
        if (len(asset_type_list) != len(order_book_ids)):
            raise OptimizationError('传入的合约(order_book_ids)中包含非基金类合约(目前仅支持公募基金)。')
示例#10
0
def get_earnings_growth(date, year, market_cap_on_current_day):
    recent_report, annual_report, annual_report_last_year, annual_report_2_year_ago, annual_report_3_year_ago, annual_report_4_year_ago = last_five_annual_report(
        date)
    growth_listed_date_threshold = (datetime.strptime(date, "%Y-%m-%d") -
                                    timedelta(days=1825)).strftime("%Y-%m-%d")
    growth_qualified_stocks = [
        i for i in annual_report.index.tolist()
        if rqdatac.instruments(i).listed_date < growth_listed_date_threshold
    ]

    factor = pd.DataFrame(index=growth_qualified_stocks, columns=['EGRO'])

    for stock in growth_qualified_stocks:
        # 实际操作中发现有部分公司会在财报发布后对报表进行多次调整,调整后eps为空,比如'601519.XSHG',该公司报表在发布后经过多次调整,2014年年报主要财务指标表"基本eps"数据缺失,但是在利润表中"基本eps"数据存在,
        # 所以在取数据时进行判断,如果financial_indicator为首选表,income_statement 为备选表
        query_f = rqdatac.query(
            rqdatac.financials.financial_indicator.earnings_per_share).filter(
                rqdatac.financials.stockcode.in_([stock]))

        query_i = rqdatac.query(rqdatac.financials.income_statement.
                                basic_earnings_per_share).filter(
                                    rqdatac.financials.stockcode.in_([stock]))

        eps_recent = rqdatac.get_financials(query_f, annual_report[stock], '1q') if \
            rqdatac.get_financials(query_f, annual_report[stock], '1q').isnull().sum() == 0 \
            else rqdatac.get_financials(query_i, annual_report[stock], '1q')

        eps_last_year = rqdatac.get_financials(query_f, annual_report_last_year[stock], '1q') if \
            rqdatac.get_financials(query_f, annual_report_last_year[stock], '1q').isnull().sum() == 0 \
            else rqdatac.get_financials(query_i, annual_report_last_year[stock], '1q')

        eps_2_year_ago = rqdatac.get_financials(query_f, annual_report_2_year_ago[stock], '1q') if \
            rqdatac.get_financials(query_f, annual_report_2_year_ago[stock], '1q').isnull().sum() == 0 \
            else rqdatac.get_financials(query_i, annual_report_2_year_ago[stock], '1q')

        eps_3_year_ago = rqdatac.get_financials(query_f, annual_report_3_year_ago[stock], '1q') if \
            rqdatac.get_financials(query_f, annual_report_3_year_ago[stock], '1q').isnull().sum() == 0 \
            else rqdatac.get_financials(query_i, annual_report_3_year_ago[stock], '1q')

        eps_4_year_ago = rqdatac.get_financials(query_f, annual_report_4_year_ago[stock], '1q') if \
            rqdatac.get_financials(query_f, annual_report_4_year_ago[stock], '1q').isnull().sum() == 0 \
            else rqdatac.get_financials(query_i, annual_report_4_year_ago[stock], '1q')

        regression = linear_model.LinearRegression()
        eps = pd.Series([
            eps_recent, eps_last_year, eps_2_year_ago, eps_3_year_ago,
            eps_4_year_ago
        ]).fillna(value=0)
        regression.fit(year.reshape(-1, 1), eps)
        factor['EGRO'][stock] = float(regression.coef_) / abs(eps.mean())
    earning_growth = winsorization_and_market_cap_weighed_standardization(
        factor['EGRO'], market_cap_on_current_day)

    return earning_growth
def get_optimizer_indicators(weight0, cov_matrix, asset_type, type_tag=1):
    """
    To calculate highest rick contributor individually or grouply
    :param weight0: (list). weight at current time t
    :param cov_matrix: (np.matrix). cov_matrix calculated at current time t
    :param asset_type: (str). 'fund' or 'stock'
    :param type_tag: (int). indicator about whether we need group risk contributor or not. 0 means no, 1 means yes
    :return:
    """
    weight = np.array(weight0)
    # refer to paper formula 2.19 2.20
    production_i = weight * (cov_matrix.dot(weight))
    productions = weight.dot(cov_matrix).dot(weight)

    ## calculate for individual:
    # calculate hightest risk contributions
    HRCs = production_i / productions
    # index, value = max(enumerate(HRCs), key=operator.itemgetter(1))
    HRC, HRC_index = max([(v, i) for i, v in enumerate(HRCs)])

    # calculate Herfindahl
    Herfindahl = np.sum(HRCs**2)

    ## calculate for groups:
    # calculate hightest risk contributions

    df1 = pd.DataFrame(columns=['HRCs', 'type'])
    df1['HRCs'] = HRCs
    if asset_type is 'fund':
        for i in weight0.index:
            df1.loc[i, 'type'] = fund.instruments(i).fund_type
    elif asset_type is 'stock':
        for i in weight0.index:
            df1.loc[i, "type"] = rqdatac.instruments(i).shenwan_industry_name

    productionG_i = df1.groupby(['type'])['HRCs'].sum()
    HRCGs = productionG_i
    HRCG, HRCG_index = max([(v, i) for i, v in enumerate(HRCGs)])

    # calculate Herfindahl
    Herfindahl_G = np.sum(HRCGs**2)

    ## get asset code type
    HRC_id = HRCs.index[HRC_index]
    df1.loc[HRC_id]

    if type_tag == 0:
        return (HRC, HRCs.index(HRC_index), HRC_id, Herfindahl)
    else:
        return (HRC, HRCs.index[HRC_index], HRC_id, Herfindahl, HRCG,
                HRCGs.index[HRCG_index], Herfindahl_G)
示例#12
0
    def baseUpdate(self):
        df = rq.all_instruments(type='Future', date=public.getDate())
        u = df["underlying_symbol"].unique()

        FT = future_baseInfo()
        # 已有code及结束时间
        exists = FT.exists()
        exCodes = [c[0] for c in exists]
        endMap = {}
        for c in exists:
            endMap[c[0]] = c[1]

        docs = []
        mu = self.getMain(u)
        i = 0

        for d in u:
            n = df[df["underlying_symbol"] ==
                   d].loc[:, FT.keylist[2:]].values[-1].tolist()
            doc = FT.set([d, 0] + n)

            detais = rq.instruments(mu[i])
            # tick_size
            doc["tick_size"] = detais.tick_size()
            doc['contract_multiplier'] = detais.contract_multiplier
            doc['margin_rate'] = detais.margin_rate

            # 结束时间
            hh = detais.trading_hours.split(',')
            hs = hh[0].split('-')
            if hs[0][0:2] != '09':
                doc["nightEnd"] = hs[1]
            else:
                doc["nightEnd"] = hh[-1].split('-')[1]

            if d not in exCodes:
                doc['product'] = 'Commodity'
                docs.append(doc)
            else:
                # 更新结束时间
                if doc["nightEnd"] != endMap[d][0:len(doc["nightEnd"])]:
                    print(d, endMap[d][0:len(doc["nightEnd"])],
                          doc["nightEnd"])

                FT.setMulti(doc)
            i += 1

        if len(docs) > 0:
            FT.insertAll(docs)
            logger.info(('future base update finished, ', docs))
示例#13
0
def constraints_gen(clean_order_book_ids, asset_type, constraints=None):

    if constraints is not None:
        df = pd.DataFrame(index=clean_order_book_ids, columns=['type'])

        # Constraints setup error check
        temp_lb = 0
        temp_ub = 0
        for key in constraints:
            temp_lb += constraints[key][0]
            temp_ub += constraints[key][1]
            if constraints[key][0] > constraints[key][1]:
                raise OptimizationError("Constraints setup error for %s." %
                                        key)
            if constraints[key][0] > 1 or constraints[key][1] < 0:
                raise OptimizationError("Constraints setup error for %s." %
                                        key)
        if temp_ub < 1 or temp_lb > 1:
            raise OptimizationError("Constraints summation error.")

        if asset_type is 'fund':
            for i in clean_order_book_ids:
                df.loc[i, 'type'] = rqdatac.fund.instruments(i).fund_type
        elif asset_type is 'stock':
            for i in clean_order_book_ids:
                df.loc[i,
                       "type"] = rqdatac.instruments(i).shenwan_industry_name

        cons = list()
        for key in constraints:
            if key not in df.type.unique():
                raise OptimizationError(
                    "Non-existing category in constraints: %s" % key)
            key_list = list(df[df['type'] == key].index)
            key_pos_list = list()
            for i in key_list:
                key_pos_list.append(df.index.get_loc(i))
            key_cons_fun_lb = lambda x: sum(x[t] for t in key_pos_list
                                            ) - constraints[key][0]
            key_cons_fun_ub = lambda x: constraints[key][1] - sum(
                x[t] for t in key_pos_list)
            cons.append({"type": "ineq", "fun": key_cons_fun_lb})
            cons.append({"type": "ineq", "fun": key_cons_fun_ub})
        cons.append({'type': 'eq', 'fun': lambda x: sum(x) - 1})
        return tuple(cons)
    else:
        return {'type': 'eq', 'fun': lambda x: sum(x) - 1}
示例#14
0
def get_optimizer_indicators(weight0, cov_matrix, asset_type, type_tag=1):
    weight = np.array(weight0)
    # refer to paper formula 2.19 2.20
    production_i = weight * (cov_matrix.dot(weight))
    productions = weight.dot(cov_matrix).dot(weight)

    ## calculate for individual:
    # calculate hightest risk contributions
    HRCs = production_i / productions
    # index, value = max(enumerate(HRCs), key=operator.itemgetter(1))
    HRC, HRC_index = max([(v, i) for i, v in enumerate(HRCs)])

    # calculate Herfindahl
    Herfindahl = np.sum(HRCs**2)

    ## calculate for groups:
    # calculate hightest risk contributions

    df1 = pd.DataFrame(columns=['HRCs', 'type'])
    df1['HRCs'] = HRCs
    if asset_type is 'fund':
        for i in weight0.index:
            df1.loc[i, 'type'] = fund.instruments(i).fund_type
    elif asset_type is 'stock':
        for i in weight0.index:
            df1.loc[i, "type"] = rqdatac.instruments(i).shenwan_industry_name

    productionG_i = df1.groupby(['type'])['HRCs'].sum()
    HRCGs = productionG_i
    HRCG, HRCG_index = max([(v, i) for i, v in enumerate(HRCGs)])

    # calculate Herfindahl
    Herfindahl_G = np.sum(HRCGs**2)

    ## get asset code type
    HRC_id = HRCs.index[HRC_index]
    df1.loc[HRC_id]

    if type_tag == 0:
        return (HRC, HRCs.index(HRC_index), HRC_id, Herfindahl)
    else:
        return (HRC, HRCs.index[HRC_index], HRC_id, Herfindahl, HRCG,
                HRCGs.index[HRCG_index], Herfindahl_G)
def get_current_domain_future_info():
    for temp_future in need_to_monitor_future_list:
        temp_future_name = rq.get_dominant_future(temp_future,
                                                  time_of_today)[-1]
        temp_info = rq.instruments(temp_future_name)
        margin_rate = temp_info.margin_rate  # 保证金率
        contract_multiplier = temp_info.contract_multiplier  # 合约乘数
        margin_rate_list.append(margin_rate)
        contract_multiplier_list.append(contract_multiplier)
        symbol_list.append(temp_future_name)
    all_future_margin_and_contract_multiplier = pd.DataFrame({
        'future_name':
        symbol_list,
        'margin':
        margin_rate_list,
        'contract_multiplier':
        contract_multiplier_list
    })
    all_future_margin_and_contract_multiplier.to_csv(
        SAVE_PATH + "all_future_margin_and_contract_multiplier.csv")
示例#16
0
def get_industry_exposure(order_book_ids):
    SHENWAN_INDUSTRY_MAP = {
        "801010.INDX": "农林牧渔",
        "801020.INDX": "采掘",
        "801030.INDX": "化工",
        "801040.INDX": "钢铁",
        "801050.INDX": "有色金属",
        "801080.INDX": "电子",
        "801110.INDX": "家用电器",
        "801120.INDX": "食品饮料",
        "801130.INDX": "纺织服装",
        "801140.INDX": "轻工制造",
        "801150.INDX": "医药生物",
        "801160.INDX": "公用事业",
        "801170.INDX": "交通运输",
        "801180.INDX": "房地产",
        "801200.INDX": "商业贸易",
        "801210.INDX": "休闲服务",
        "801230.INDX": "综合",
        "801710.INDX": "建筑材料",
        "801720.INDX": "建筑装饰",
        "801730.INDX": "电气设备",
        "801740.INDX": "国防军工",
        "801750.INDX": "计算机",
        "801760.INDX": "传媒",
        "801770.INDX": "通信",
        "801780.INDX": "银行",
        "801790.INDX": "非银金融",
        "801880.INDX": "汽车",
        "801890.INDX": "机械设备"
    }
    df = pd.DataFrame(index=SHENWAN_INDUSTRY_MAP.keys(),
                      columns=order_book_ids)
    for stk in order_book_ids:
        try:
            df[stk][rqdatac.instruments(stk).shenwan_industry_code] = 1

        except:
            continue
    return df.fillna(0)  #将NaN赋为0
    # 最新具体合约的当天收盘价
    yesterday_price = pd.DataFrame(rq.get_price(newest_domiant_contract, start_date=last_trading_day,
                                                end_date=last_trading_day, frequency='1d'))
    newest_domiant_contract_close_price = yesterday_price['close']
    newest_domiant_contract_open_interest = yesterday_price['open_interest']
    # 获取前一交易日收盘价
    if newest_domiant_contract_close_price.empty:
        newest_domiant_contract_close_price = 0
    else:
        newest_domiant_contract_close_price = newest_domiant_contract_close_price[-1]
    # 获取前一交易日持仓量
    if newest_domiant_contract_open_interest.empty:
        newest_domiant_contract_open_interest = 0
    else:
        newest_domiant_contract_open_interest = newest_domiant_contract_open_interest[-1]
    temp_info = rq.instruments(newest_domiant_contract)
    margin_rate = temp_info.margin_rate
    open_interest = newest_domiant_contract_open_interest
    # 计算持仓金额
    open_interest_price = open_interest * newest_domiant_contract_close_price
    contract_multiplier = temp_info.contract_multiplier
    # 保证金 = 当日收盘价 * 合约乘数 * 保证金率 * 期货公司一般收取两倍
    margin = newest_domiant_contract_close_price * margin_rate * contract_multiplier * 2
    print_str1 = str(newest_domiant_contract) + "`s margin is" + str(int(margin))
    count += 1
    print print_str1
    future_name_list.append(temp_future_name)
    margin_list.append(margin)
    open_interest_list.append(open_interest)
    open_interest_price_list.append(open_interest_price)
示例#18
0
def optimizer(order_book_ids,
              start_date,
              asset_type,
              method,
              current_weight=None,
              bnds=None,
              cons=None,
              expected_return=None,
              expected_return_covar=None,
              risk_aversion_coefficient=1,
              windows=None,
              out_threshold_coefficient=None,
              data_freq=None,
              fun_tol=10**-8,
              max_iteration=10**3,
              disp=False,
              iprint=1,
              cov_enhancement=True,
              benchmark=None):
    """
    :param order_book_ids: str list. A list of assets(stocks or funds). Optional when expected_return_covar is given;
    :param start_date: str. Date to initialize a portfolio or re-balance a portfolio. Optional when
    expected_return_covar is given;
    :param asset_type: str. "stock" or "fund". Types of portfolio candidates, portfolio with mixed assets is not
    supported;
    :param method: str. Portfolio optimization model: "risk_parity", "min_variance", "mean_variance",
    "risk_parity_with_con", "min_TE", "all"("all" method only contains "risk_parity", "min_variance",
    "risk_parity_with_con"). When "min_TE" method is chosen, expected_return_covar must be None type.
    :param current_weight: floats list, optional. Default: 1/N(N: no. of assets). Initial guess for optimization.
    :param bnds: floats list, optional. Lower bounds and upper bounds for each asset in portfolio.
    Support input format: {"asset_code1": (lb1, up1), "asset_code2": (lb2, up2), ...} or {'full_list': (lb, up)} (set up
    universal bounds for all assets);
    :param cons: dict, optional. Lower bounds and upper bounds for each category of assets in portfolio;
    Supported funds type: Bond, Stock, Hybrid, Money, ShortBond, StockIndex, BondIndex, Related, QDII, Other; supported
    stocks industry sector: Shenwan_industry_name;
    cons: {"types1": (lb1, up1), "types2": (lb2, up2), ...};
    :param expected_return: pandas DataFrame. Default: Means of the returns for order_book_ids
    within windows(empirical means). Must input this if expected_return_covar is given to run "mean_variance" method.
    :param expected_return_covar: pandas DataFrame, optional. Covariance matrix of expected return. Default: covariance
    of the means of the returns of order_book_ids within windows. If expected_return_covar is given, any models involve
    covariance matrix will use expected_return_covar instead of estimating from sample data. Moreover, if
    expected_return_covar is given and "mean_variance" method is chosen, expected_return must also be given;
    :param risk_aversion_coefficient: float, optional. Risk aversion coefficient of Mean-Variance model. Default: 1.
    :param windows: int, optional. Default: 132. Data windows length.
    :param data_freq: str, optional. Default: "D". Support input: "D": daily data; "W": weekly data; "M": monthly data.
    Weekly data means the close price at the end of each week is taken; monthly means the close price at the end of each
    month. When weekly and monthly data are used, suspended days issues will not be considered. In addition, weekly and
    monthly data don't consider public holidays which have no trading. Users should use a windows a little bit larger
    to get desired data length.
    :param out_threshold_coefficient: float, optional. Determine the threshold to filter out assets with too short data
    which may cause problem in covariance matrix calculation. Whose data length is shorter than threshold will
    be eliminated. Default: 0.5(out_threshold = 0.5*windows).
    :param fun_tol: float, optional. Optimization accuracy requirement. The smaller, the more accurate, but cost more
    time. Default: 10E-12.
    :param max_iteration: int, optional. Max iteration number allows during optimization. Default: 1000.
    :param disp: bool, optional. Optimization summary display control. Override iprint interface. Default: False.
    :param cov_enhancement: bool, optional. Default: True. Use shrinkage method based on Ledoit and Wolf(2003) to
    improve the estimation for sample covariance matrix. It's recommended to set it to True when the stock pool is
    large.
    :param benchmark: str, optional. Target to track in minimum tracking error("min_TE") method.
    :param iprint: int, optional.
    The verbosity of optimization:
        * iprint <= 0 : Silent operation;
        * iprint == 1 : Print summary upon completion (default);
        * iprint >= 2 : Print status of each iterate and summary.
    :return:
    pandas DataFrame. A DataFrame contains assets' name and their corresponding optimal weights;
    pandas DataFrame. The covariance matrix for optimization;
    pandas DataFrame. The order_book_ids filtered out and the reasons of elimination;
    str. Optimization message. Return this only when methods other than "all".
    """

    if not disp:
        iprint = 0

    opts = {
        'maxiter': max_iteration,
        'ftol': fun_tol,
        'iprint': iprint,
        'disp': disp
    }

    log_barrier_risk_parity_iprint = {0: -1, 1: 0, 2: 1}
    log_barrier_risk_parity_opts = {
        'disp': log_barrier_risk_parity_iprint[disp * iprint]
    }

    if data_freq is None:
        data_freq = "D"
    if windows is None:
        windows = 132

    if expected_return_covar is None:
        # Get clean data and calculate covariance matrix if no expected_return_covar is given
        data_after_processing = data_process(order_book_ids, asset_type,
                                             start_date, windows, data_freq,
                                             out_threshold_coefficient)
        clean_period_prices = data_after_processing[0]
        reset_start_date = data_after_processing[2]

        # At least two assets are needed
        if clean_period_prices.shape[1] <= 1:
            raise OptimizationError("错误:数据剔除后order_book_ids数量不足。")

        # Generate enhanced estimation for covariance matrix
        period_daily_return_pct_change = clean_period_prices.pct_change()[1:]
        if cov_enhancement:
            c_m = cov_shrinkage(clean_period_prices)[0]
        else:
            c_m = period_daily_return_pct_change.cov()

        # Generate initial guess point with equal weights
        if current_weight is None:
            current_weight = [1 / clean_period_prices.shape[1]
                              ] * clean_period_prices.shape[1]
        else:
            new_current_weight = current_weight
            current_weight = list()
            for i in clean_period_prices.columns.values:
                current_weight.append(
                    new_current_weight[order_book_ids.index(i)])

        # Generate expected_return if not given
        if method is "mean_variance":
            empirical_mean = period_daily_return_pct_change.mean()
            if expected_return is None:
                expected_return = empirical_mean
            else:
                for i in expected_return.index.values:
                    if i in empirical_mean.index.values:
                        empirical_mean.loc[i] = expected_return.loc[i]
                expected_return = empirical_mean
    else:
        # Get preparation done when expected_return_covar is given
        c_m = expected_return_covar

        if current_weight is None:
            current_weight = [1 / c_m.shape[0]] * c_m.shape[0]

        order_book_ids = list(c_m.columns.values)

    # Read benchmark data for min tracking error model
    if method is "min_TE":
        if benchmark is None:
            raise OptimizationError("错误:没有选择基准。")
        benchmark_price = rqdatac.get_price(
            benchmark,
            start_date=reset_start_date,
            end_date=rqdatac.get_previous_trading_date(start_date),
            fields="close")
        if data_freq is not "D":
            benchmark_price_change = benchmark_price.asfreq(
                data_freq, method="pad").pct_change()[1:]
        else:
            benchmark_price_change = benchmark_price.pct_change()[1:]

    # Generate bounds
    clean_order_book_ids = list(c_m.columns.values)
    if method is "all":
        log_rp_bnds, general_bnds = bounds_gen(order_book_ids,
                                               clean_order_book_ids, method,
                                               bnds)
    elif method is "risk_parity":
        log_rp_bnds = bounds_gen(order_book_ids, clean_order_book_ids, method,
                                 bnds)
    else:
        general_bnds = bounds_gen(order_book_ids, clean_order_book_ids, method,
                                  bnds)

    # Generate constraints
    if method is not "risk_parity":

        #########################################################################
        # add for test purpose to set all constraints by zs on 0705
        if cons == 1:
            # get type and determine cons
            clean_order_book_ids = list(clean_period_prices.columns)
            df1 = pd.DataFrame(index=clean_order_book_ids, columns=['type'])

            if asset_type is 'fund':
                for i in clean_order_book_ids:
                    df1.loc[i, 'type'] = fund.instruments(i).fund_type
            elif asset_type is 'stock':
                for i in clean_order_book_ids:
                    df1.loc[i, "type"] = rqdatac.instruments(
                        i).shenwan_industry_name
            all_types = df1['type'].unique()
            cons_num = 1 / len(all_types)
            cons = {}
            for i in all_types:
                cons[i] = (cons_num - 0.03, cons_num + 0.03)
        #########################################################################
        general_cons = general_constraints_gen(order_book_ids,
                                               clean_order_book_ids,
                                               asset_type, cons)

    # Log barrier risk parity model
    c = 15

    def log_barrier_risk_parity_obj_fun(x):
        return np.dot(x, np.dot(c_m, x)) - c * sum(np.log(x))

    def log_barrier_risk_parity_gradient(x):
        return np.multiply(2, np.dot(c_m, x)) - np.multiply(
            c, np.reciprocal(x))

    def log_barrier_risk_parity_optimizer():
        optimization_res = sc_opt.minimize(
            log_barrier_risk_parity_obj_fun,
            current_weight,
            method='L-BFGS-B',
            jac=log_barrier_risk_parity_gradient,
            bounds=log_rp_bnds,
            options=log_barrier_risk_parity_opts)

        if not optimization_res.success:
            if optimization_res.nit >= max_iteration:
                optimal_weights = (optimization_res.x /
                                   sum(optimization_res.x))
                optimization_info = "Iteration limit exceeded"
                return optimal_weights, optimization_info
            else:
                temp = ' @ %s' % clean_period_prices.index[0]
                error_message = '错误:risk_parity 算法优化失败,' + str(
                    optimization_res.message) + temp
                raise OptimizationError(error_message)
        else:
            optimal_weights = (optimization_res.x / sum(optimization_res.x))
            optimization_info = "Optimization terminated successfully."
            return optimal_weights, optimization_info

    # Risk parity with constraints model
    def risk_parity_with_con_obj_fun(x):
        temp1 = np.multiply(x, np.dot(c_m, x))
        temp2 = temp1[:, None]
        return np.sum(scsp.distance.pdist(temp2, "euclidean"))

    def risk_parity_with_con_optimizer():
        optimization_res = sc_opt.minimize(risk_parity_with_con_obj_fun,
                                           current_weight,
                                           method='SLSQP',
                                           bounds=general_bnds,
                                           constraints=general_cons,
                                           options=opts)
        optimization_info = optimization_res.message
        if not optimization_res.success:
            if optimization_res.nit >= max_iteration:
                return optimization_res.x, optimization_info
            else:
                temp = ' @ %s' % clean_period_prices.index[0]
                error_message = '错误:带限制条件的risk_parity 算法优化失败,' + str(optimization_res.message) \
                                + temp
                raise OptimizationError(error_message)
        else:
            return optimization_res.x, optimization_info

    # Min variance model
    def min_variance_obj_fun(x):
        return np.dot(np.dot(x, c_m), x)

    def min_variance_gradient(x):
        return np.multiply(2, np.dot(c_m, x))

    def min_variance_optimizer():
        optimization_res = sc_opt.minimize(min_variance_obj_fun,
                                           current_weight,
                                           method='SLSQP',
                                           jac=min_variance_gradient,
                                           bounds=general_bnds,
                                           constraints=general_cons,
                                           options=opts)
        optimization_info = optimization_res.message
        if not optimization_res.success:
            if optimization_res.nit >= max_iteration:
                return optimization_res.x, optimization_info
            else:
                temp = ' @ %s' % clean_period_prices.index[0]
                error_message = '错误:min_variance 算法优化失败,' + str(
                    optimization_res.message) + temp
                raise OptimizationError(error_message)
        else:
            return optimization_res.x, optimization_info

    # Mean variance model
    def mean_variance_obj_fun(x):
        return (np.multiply(risk_aversion_coefficient / 2,
                            np.dot(np.dot(x, c_m), x)) -
                np.dot(x, expected_return))

    def mean_variance_gradient(x):
        return np.asfarray(
            np.multiply(risk_aversion_coefficient, np.dot(x, c_m)).transpose()
            - expected_return).flatten()

    def mean_variance_optimizer():
        optimization_res = sc_opt.minimize(mean_variance_obj_fun,
                                           current_weight,
                                           method='SLSQP',
                                           jac=mean_variance_gradient,
                                           bounds=general_bnds,
                                           constraints=general_cons,
                                           options=opts)
        optimization_info = optimization_res.message
        if not optimization_res.success:
            if optimization_res.nit >= max_iteration:
                return optimization_res.x, optimization_info
            else:
                temp = ' @ %s' % clean_period_prices.index[0]
                error_message = '错误:mean_variance 算法优化失败,' + str(
                    optimization_res.message) + temp
                raise OptimizationError(error_message)
        else:
            return optimization_res.x, optimization_info

    # Minimizing tracking error model
    def min_TE_obj_fun(x):
        return np.dot(
            np.subtract(benchmark_price_change,
                        np.dot(period_daily_return_pct_change, x)).T,
            np.subtract(benchmark_price_change,
                        np.dot(period_daily_return_pct_change, x)))

    def min_TE_optimizer():
        optimization_res = sc_opt.minimize(min_TE_obj_fun,
                                           current_weight,
                                           method='SLSQP',
                                           bounds=general_bnds,
                                           constraints=general_cons,
                                           options=opts)
        optimization_info = optimization_res.message
        if not optimization_res.success:
            if optimization_res.nit >= max_iteration:
                return optimization_res.x, optimization_info
            else:
                temp = ' @ %s' % clean_period_prices.index[0]
                error_message = '错误:min_TE 算法优化失败,' + str(
                    optimization_res.message) + temp
                raise OptimizationError(error_message)
        else:
            return optimization_res.x, optimization_info

    opt_dict = {
        'risk_parity':
        log_barrier_risk_parity_optimizer,
        'min_variance':
        min_variance_optimizer,
        'mean_variance':
        mean_variance_optimizer,
        'risk_parity_with_con':
        risk_parity_with_con_optimizer,
        "min_TE":
        min_TE_optimizer,
        'all': [
            log_barrier_risk_parity_optimizer, min_variance_optimizer,
            risk_parity_with_con_optimizer
        ]
    }

    if method is not 'all':
        if expected_return_covar is None:
            return pd.DataFrame(opt_dict[method]()[0], index=list(c_m.columns.values), columns=[method]), c_m, \
                   data_after_processing[1], opt_dict[method]()[1]
        else:
            pd.DataFrame(opt_dict[method]()[0], index=list(c_m.columns.values), columns=[method]), c_m, \
            opt_dict[method]()[1]
    else:
        temp1 = pd.DataFrame(
            index=list(c_m.columns.values),
            columns=['risk_parity', 'min_variance', "risk_parity_with_con"])
        temp2 = pd.DataFrame(
            index=["risk_parity", "min_variance", "risk_parity_with_con"],
            columns=["Opt Res Message"])
        n = 0
        for f in opt_dict[method]:
            temp1.iloc[:, n] = f()[0]
            temp2.iloc[n, 0] = f()[1]
            n = n + 1
        if expected_return_covar is None:
            return temp1, c_m, data_after_processing[1], temp2
        else:
            return temp1, c_m, temp2
示例#19
0
def init_future_info(d):
    all_futures_info = []
    underlying_symbol_list = []
    fields = [
        'close_commission_ratio', 'close_commission_today_ratio',
        'commission_type', 'open_commission_ratio'
    ]

    futures_order_book_id = rqdatac.all_instruments(
        type='Future')['order_book_id'].unique()
    for future in futures_order_book_id:
        future_dict = {}
        underlying_symbol = re.match(r'^[a-zA-Z]*', future).group()
        commission = rqdatac.futures.get_commission_margin(future)
        if not commission.empty:
            future_dict['order_book_id'] = future
            commission = commission.iloc[0]
            for p in fields:
                future_dict[p] = commission[p]
            future_dict['tick_size'] = rqdatac.instruments(future).tick_size()
        elif underlying_symbol not in underlying_symbol_list:
            if underlying_symbol in {'S', 'TC', 'ER', 'WS', 'WT', 'RO', 'ME'}:
                continue
            underlying_symbol_list.append(underlying_symbol)
            future_dict['underlying_symbol'] = underlying_symbol
            dominant = rqdatac.futures.get_dominant(underlying_symbol).iloc[-1]
            commission = rqdatac.futures.get_commission_margin(
                dominant).iloc[0]
            for p in fields:
                future_dict[p] = commission[p]
            future_dict['tick_size'] = rqdatac.instruments(
                dominant).tick_size()
        else:
            continue
        all_futures_info.append(future_dict)

    hard_info = [{
        'underlying_symbol': 'TC',
        'close_commission_ratio': 4.0,
        'close_commission_today_ratio': 0.0,
        'commission_type': "by_volume",
        'open_commission_ratio': 4.0,
        'tick_size': 0.2
    }, {
        'underlying_symbol': 'ER',
        'close_commission_ratio': 2.5,
        'close_commission_today_ratio': 2.5,
        'commission_type': "by_volume",
        'open_commission_ratio': 2.5,
        'tick_size': 1.0
    }, {
        'underlying_symbol': 'WS',
        'close_commission_ratio': 2.5,
        'close_commission_today_ratio': 0.0,
        'commission_type': "by_volume",
        'open_commission_ratio': 2.5,
        'tick_size': 1.0
    }, {
        'underlying_symbol': 'RO',
        'close_commission_ratio': 2.5,
        'close_commission_today_ratio': 0.0,
        'commission_type': "by_volume",
        'open_commission_ratio': 2.5,
        'tick_size': 2.0
    }, {
        'underlying_symbol': 'ME',
        'close_commission_ratio': 1.4,
        'close_commission_today_ratio': 0.0,
        'commission_type': "by_volume",
        'open_commission_ratio': 1.4,
        'tick_size': 1.0
    }]

    all_futures_info += hard_info

    with open(os.path.join(d, 'future_info.json'), 'w') as f:
        json.dump(all_futures_info, f, separators=(',', ':'), indent=2)
示例#20
0
def data_process(order_book_ids,
                 asset_type,
                 start_date,
                 windows,
                 data_freq,
                 out_threshold_coefficient=None):
    """
    Clean data for covariance matrix calculation
    :param order_book_ids: str list. A selected list of assets.
    :param asset_type: str. "fund" or "stock"
    :param start_date: str. The first day for backtest.
    :param windows: int. Interval length for sample.
    :param out_threshold_coefficient: float, optional. Determine the threshold to filter out assets with too short data
    which may cause problem in covariance matrix calculation. Whose data length is shorter than threshold will
    be eliminated. Default: 0.5(out_threshold = 0.5*windows).
    :param data_freq: str. Support input: "D": daily data; "W": weekly data; "M": monthly data.
    Weekly data means the close price at the end of each week is taken; monthly means the close price at the end of each
    month. When weekly and monthly data are used, suspended days issues will not be considered. In addition, weekly and
    monthly data don't consider public holidays which have no trading. Users should use a windows a little bit larger
    to get desired data length.
    Users should be very careful when using weekly or monthly data to avoid the observations have too short length.
    :return:
    pandas DataFrame. Contain the prices after cleaning;
    pandas DataFrame. The order_book_ids filtered out and the reasons of elimination;
    str. A new start date for covariance calculation which may differ from default windows setting.
    """

    end_date = rqdatac.get_previous_trading_date(start_date)
    end_date = pd.to_datetime(end_date)
    # Choose the start date based on the windows inputted, can't work if backtest start date is earlier than
    # "1995-01-01". The windows for weekly and monthly data don't consider any public holidays which have no trading.
    windows_dict = {
        "D": -(windows + 1),
        "W": -(windows + 1) * 5,
        "M": -(windows + 1) * 22
    }
    start_date = rqdatac.get_trading_dates("2005-01-01",
                                           end_date)[windows_dict[data_freq]]
    reset_start_date = pd.to_datetime(start_date)

    if asset_type is 'fund':
        period_prices = rqdatac.fund.get_nav(order_book_ids,
                                             reset_start_date,
                                             end_date,
                                             fields='adjusted_net_value')
    elif asset_type is 'stock':
        period_data = rqdatac.get_price(order_book_ids,
                                        reset_start_date,
                                        end_date,
                                        frequency='1d',
                                        fields=['close', 'volume'])

        period_prices = period_data['close']
        period_volume = period_data['volume']

    if data_freq is not "D":
        period_prices = period_prices.asfreq(data_freq, method="pad")

    # Set up the threshold of elimination
    if out_threshold_coefficient is None:
        out_threshold = ceil(windows * 0.5)
    else:
        out_threshold = ceil(windows * out_threshold_coefficient)

    kickout_assets = pd.DataFrame(columns=["剔除原因"])

    # Check whether any stocks has long suspended trading periods, have been delisted or new-listed for less than 132
    # trading days and generate list for such stocks. For weekly and monthly data, only those assets which have too late
    # beginning date, were delisted or new-listed will be eliminated.
    if asset_type is "stock":
        if data_freq is "D":
            for i in order_book_ids:
                period_volume_i = period_volume.loc[:, i]
                period_volume_i_value_counts = period_volume_i.value_counts()
                period_volume_i_value_counts_index = period_volume_i_value_counts.index.values
                instrument_i_de_listed_date = rqdatac.instruments(
                    i).de_listed_date
                instrument_i_listed_date = pd.to_datetime(
                    rqdatac.instruments(i).listed_date)
                if not period_volume_i_value_counts.empty:
                    # New-listed stock test
                    if (end_date - instrument_i_listed_date).days <= 132:
                        temp = pd.DataFrame({"剔除原因": "上市时间少于132个交易日"},
                                            index=[i])
                        kickout_assets = kickout_assets.append(temp)
                    # Delisted test
                    elif instrument_i_de_listed_date != "0000-00-00":
                        if pd.to_datetime(
                                instrument_i_de_listed_date) < end_date:
                            temp = pd.DataFrame({"剔除原因": "已退市"}, index=[i])
                            kickout_assets = kickout_assets.append(temp)
                    # Long suspended test
                    elif 0 in period_volume_i_value_counts_index:
                        if period_volume_i_value_counts[
                                period_volume_i_value_counts_index ==
                                0][0] >= out_threshold:
                            temp = pd.DataFrame({"剔除原因": "停牌交易日数量过多"},
                                                index=[i])
                            kickout_assets = kickout_assets.append(temp)
                    # Late beginning day test and just-in-case test for missing values
                    elif period_volume_i.isnull().sum() >= out_threshold:
                        temp = pd.DataFrame({"剔除原因": "缺失值过多"}, index=[i])
                        kickout_assets = kickout_assets.append(temp)
                else:
                    temp = pd.DataFrame({"剔除原因": "无相关股票数据"}, index=[i])
                    kickout_assets = kickout_assets.append(temp)
        else:
            for i in order_book_ids:
                period_prices_i = period_prices.loc[:, i]
                instrument_i_de_listed_date = rqdatac.instruments(
                    i).de_listed_date
                instrument_i_listed_date = pd.to_datetime(
                    rqdatac.instruments(i).listed_date)
                if not ((period_prices_i.isnull() == 0).sum() == 0):
                    # New-listed test
                    if (end_date - instrument_i_listed_date).days <= 132:
                        temp = pd.DataFrame({"剔除原因": "股票上市时间少于132个交易日"},
                                            index=[i])
                        kickout_assets = kickout_assets.append(temp)
                    # Delisted test
                    elif instrument_i_de_listed_date != "0000-00-00":
                        if pd.to_datetime(
                                instrument_i_de_listed_date) < end_date:
                            temp = pd.DataFrame({"剔除原因": "股票已退市"}, index=[i])
                            kickout_assets = kickout_assets.append(temp)
                    # Late beginning day test and just-in-case test for missing values
                    elif period_prices_i.isnull().sum() >= out_threshold:
                        temp = pd.DataFrame({"剔除原因": "缺失值过多"}, index=[i])
                        kickout_assets = kickout_assets.append(temp)
                else:
                    temp = pd.DataFrame({"剔除原因": "无相关股票数据"}, index=[i])
                    kickout_assets = kickout_assets.append(temp)

        # # Check whether any ST stocks are included and generate a list for ST stocks
        # st_list = list(period_prices.columns.values[rqdatac.is_st_stock(order_book_ids,
        #                                                                 reset_start_date, end_date).sum(axis=0) > 0])
        # kickout_assets = kickout_assets.append(pd.DataFrame(["ST stocks"] * len(st_list),
        #                                                     columns=["剔除原因"], index=[st_list]))
    elif asset_type is "fund":
        for i in order_book_ids:
            period_prices_i = period_prices.loc[:, i]
            instrument_i_de_listed_date = rqdatac.fund.instruments(
                i).de_listed_date
            instrument_i_listed_date = pd.to_datetime(
                rqdatac.fund.instruments(i).listed_date)
            if not ((period_prices_i.isnull() == 0).sum() == 0):
                # New-listed test
                if (end_date - instrument_i_listed_date).days <= 132:
                    temp = pd.DataFrame({"剔除原因": "基金发行时间少于132个交易日"}, index=[i])
                    kickout_assets = kickout_assets.append(temp)
                # Delisted test
                elif instrument_i_de_listed_date != "0000-00-00":
                    if pd.to_datetime(instrument_i_de_listed_date) < end_date:
                        temp = pd.DataFrame({"剔除原因": "基金已清算"}, index=[i])
                        kickout_assets = kickout_assets.append(temp)
                elif period_prices_i.isnull().sum() >= out_threshold:
                    temp = pd.DataFrame({"剔除原因": "缺失值过多"}, index=[i])
                    kickout_assets = kickout_assets.append(temp)
            else:
                temp = pd.DataFrame({"剔除原因": "无相关基金数据"}, index=[i])
                kickout_assets = kickout_assets.append(temp)

    period_prices = period_prices.fillna(method="pad")
    # Generate final kickout list which includes all the above
    final_kickout_list = list(set(kickout_assets.index))
    # Generate clean data and keep the original input id order
    clean_order_book_ids = list(set(order_book_ids) - set(final_kickout_list))

    clean_period_prices = period_prices.loc[reset_start_date:end_date,
                                            clean_order_book_ids]
    return clean_period_prices, kickout_assets, reset_start_date
示例#21
0
def gen_instruments(d):
    stocks = sorted(list(rqdatac.all_instruments().order_book_id))
    instruments = [i.__dict__ for i in rqdatac.instruments(stocks)]
    with open(os.path.join(d, 'instruments.pk'), 'wb') as out:
        pickle.dump(instruments, out, protocol=2)
示例#22
0
 def detail(self, code):
     return rq.instruments(code)
示例#23
0
def optimizer(order_book_ids, start_date, asset_type, method, current_weight=None, bnds=None, cons=None,
              expected_return=None, expected_return_covar=None, risk_aversion_coefficient=1,
              fun_tol=10**-12, max_iteration=10**5):
    """

    :param order_book_ids: list. A list of assets(stocks or funds);
    :param start_date: str. Date to initialize a portfolio or rebalance a portfolio;
    :param asset_type: str or str list. Types of portfolio candidates,  "stock" or "fund", portfolio with mixed assets
    is not supported;
    :param method: str. Portfolio optimization model: "risk_parity", "min_variance", "mean_variance",
    "risk_parity_with_cons", "all"("all" method only contains "risk_parity", "min_variance", "risk_parity_with_cons" but
    not "mean_variance");
    :param current_weight:
    :param bnds: list of floats. Lower bounds and upper bounds for each asset in portfolio.
    Support input format: {"asset_code1": (lb1, up1), "asset_code2": (lb2, up2), ...} or {'full_list': (lb, up)} (set up
    universal bounds for all assets);
    :param cons: dict, optional. Lower bounds and upper bounds for each category of assets in portfolio;
    Supported funds type: Bond, Stock, Hybrid, Money, ShortBond, StockIndex, BondIndex, Related, QDII, Other; supported
    stocks industry sector: Shenwan_industry_name;
    cons: {"types1": (lb1, up1), "types2": (lb2, up2), ...};
    :param expected_return: column vector of floats, optional. Default: Means of the returns of order_book_ids
    within window.
    :param expected_return_covar: numpy matrix, optional. Covariance matrix of expected return. Default: covariance of
    the means of the returns of order_book_ids within window;
    :param risk_aversion_coefficient: float, optional. Risk aversion coefficient of Mean-Variance model. Default: 1.
    :param fun_tol: int. Optimization accuracy requirement. The smaller, the more accurate, but cost more time.
    :param max_iteration: int. Max iteration number allows during optimization.
    :return: DataFrame(containing optimal weights), covariance matrix, kickout_list(str list, list of asssets been
    filtered out due to unqualify in covariance calculation)
    """

    # Get clean data and calculate covariance matrix
    window = 132
    data_after_processing = data_process(order_book_ids, asset_type, start_date, window)
    clean_period_prices = data_after_processing[0]
    period_daily_return_pct_change = clean_period_prices.pct_change()
    c_m = period_daily_return_pct_change.cov()

    if clean_period_prices.shape[1] == 0:
        # print('All selected funds have been ruled out')
        return data_after_processing[1]
    else:
        if current_weight is None:
            current_weight = [1 / clean_period_prices.shape[1]] * clean_period_prices.shape[1]
        else:
            new_current_weight = current_weight
            current_weight = list()
            for i in clean_period_prices.columns.values:
                current_weight.append(new_current_weight[order_book_ids.index(i)])

        if method is "all":
            log_rp_bnds, general_bnds = bounds_gen(order_book_ids, list(clean_period_prices.columns), method, bnds)
        elif method is "risk_parity":
            log_rp_bnds = bounds_gen(order_book_ids, list(clean_period_prices.columns), method, bnds)
        else:
            general_bnds = bounds_gen(order_book_ids, list(clean_period_prices.columns), method, bnds)


        #########################################################################
        # add for test purpose to set all constraints by zs on 0705
        if cons == 1:
            # get type and determine cons
            clean_order_book_ids = list(clean_period_prices.columns)
            df1 = pd.DataFrame(index=clean_order_book_ids, columns=['type'])

            if asset_type is 'fund':
                for i in clean_order_book_ids:
                    df1.loc[i, 'type'] = fund.instruments(i).fund_type
            elif asset_type is 'stock':
                for i in clean_order_book_ids:
                    df1.loc[i, "type"] = rqdatac.instruments(i).shenwan_industry_name
            all_types = df1['type'].unique()
            cons_num = 1 / len(all_types)
            cons = {}
            for i in all_types:
                cons[i] = (0, cons_num)
        #########################################################################
        general_cons = constraints_gen(list(clean_period_prices.columns), asset_type, cons)

        # Log barrier risk parity model
        c = 15

        def log_barrier_risk_parity_obj_fun(x):
            return np.dot(np.dot(x, c_m), x) - c * sum(np.log(x))

        def log_barrier_risk_parity_gradient(x):
            return np.multiply(2, np.dot(c_m, x)) - np.multiply(c, np.reciprocal(x))

        def log_barrier_risk_parity_optimizer():
            optimization_res = sc_opt.minimize(log_barrier_risk_parity_obj_fun, current_weight, method='L-BFGS-B',
                                               jac=log_barrier_risk_parity_gradient, bounds=log_rp_bnds)
            if not optimization_res.success:
                temp = ' @ %s' % clean_period_prices.index[0]
                error_message = 'Risk parity optimization failed, ' + str(optimization_res.message) + temp
                raise OptimizationError(error_message)
            else:
                optimal_weights = (optimization_res.x / sum(optimization_res.x))
                return optimal_weights

        # Risk parity with constraints model
        def risk_parity_with_cons_obj_fun(x):
            temp1 = np.multiply(x, np.dot(c_m, x))
            c = temp1[:, None]
            return np.sum(scsp.distance.pdist(c, "euclidean"))

        # risk_parity_with_cons_gradient = nd.Gradient(risk_parity_with_cons_obj_fun)

        def risk_parity_with_cons_optimizer():
            optimization_res = sc_opt.minimize(risk_parity_with_cons_obj_fun, current_weight, method='SLSQP',
                                               bounds=general_bnds, constraints=general_cons,
                                               options={"ftol": fun_tol, 'maxiter': max_iteration})
            if not optimization_res.success:
                temp = ' @ %s' % clean_period_prices.index[0]
                error_message = 'Risk parity with constraints optimization failed, ' + str(optimization_res.message) \
                                + temp
                raise OptimizationError(error_message)
            else:
                return optimization_res.x

        # Min variance model
        min_variance_obj_fun = lambda x: np.dot(np.dot(x, c_m), x)

        def min_variance_gradient(x):
            return np.multiply(2, np.dot(c_m, x))

        def min_variance_optimizer():
            optimization_res = sc_opt.minimize(min_variance_obj_fun, current_weight, method='SLSQP',
                                               jac=min_variance_gradient, bounds=general_bnds, constraints=general_cons,
                                               options={"ftol": fun_tol, 'maxiter': max_iteration})
            if not optimization_res.success:
                temp = ' @ %s' % clean_period_prices.index[0]
                error_message = 'Min variance optimization failed, ' + str(optimization_res.message) + temp
                raise OptimizationError(error_message)
            else:
                return optimization_res.x

        # Mean variance model
        if expected_return is None:
            expected_return = period_daily_return_pct_change.mean()
        if expected_return_covar is None:
            expected_return_covar = c_m

        def mean_variance_obj_fun(x):
            return (np.multiply(risk_aversion_coefficient/2, np.dot(np.dot(x, expected_return_covar), x)) -
                    np.dot(x, expected_return))

        def mean_variance_gradient(x):
            return np.asfarray(np.multiply(risk_aversion_coefficient, np.dot(x, expected_return_covar)).transpose()
                               - expected_return).flatten()

        def mean_variance_optimizer():
            optimization_res = sc_opt.minimize(mean_variance_obj_fun, current_weight, method='SLSQP',
                                               jac=mean_variance_gradient, bounds=general_bnds,
                                               constraints=general_cons,
                                               options={"ftol": fun_tol, "maxiter": max_iteration})
            if not optimization_res.success:
                temp = ' @ %s' % clean_period_prices.index[0]
                error_message = 'Mean variance optimization failed, ' + str(optimization_res.message) + temp
                raise OptimizationError(error_message)
            else:
                return optimization_res.x

        opt_dict = {'risk_parity': log_barrier_risk_parity_optimizer,
                    'min_variance': min_variance_optimizer,
                    'mean_variance': mean_variance_optimizer,
                    'risk_parity_with_cons': risk_parity_with_cons_optimizer,
                    'all': [log_barrier_risk_parity_optimizer, min_variance_optimizer, risk_parity_with_cons_optimizer]}

        if method is not 'all':
            return pd.DataFrame(opt_dict[method](), index=clean_period_prices.columns.values, columns=[method]), \
                   c_m, data_after_processing[1]
        else:
            temp1 = pd.DataFrame(index=clean_period_prices.columns.values, columns=['risk_parity', 'min_variance',
                                                                                    "risk_parity_with_cons"])
            n = 0
            for f in opt_dict[method]:
                temp1.iloc[:, n] = f()
                n = n + 1
            return temp1, c_m, data_after_processing[1]
示例#24
0
def get_sales_growth(date, year, market_cap_on_current_day):
    recent_report, annual_report, annual_report_last_year, annual_report_2_year_ago, annual_report_3_year_ago, annual_report_4_year_ago = last_five_annual_report(
        date)
    growth_listed_date_threshold = (datetime.strptime(date, "%Y-%m-%d") -
                                    timedelta(days=1825)).strftime("%Y-%m-%d")
    growth_qualified_stocks = [
        i for i in annual_report.index.tolist()
        if rqdatac.instruments(i).listed_date < growth_listed_date_threshold
    ]

    factor = pd.DataFrame(index=growth_qualified_stocks, columns=['SGRO'])

    # 根据年报数据计算每只股票过去五年每年的sales per share

    for stock in growth_qualified_stocks:

        query = rqdatac.query(
            rqdatac.financials.income_statement.revenue).filter(
                rqdatac.financials.stockcode.in_([stock]))
        sales_recent = rqdatac.get_financials(query, annual_report[stock],
                                              '1q')

        latest_trading_date_recent = str(
            rqdatac.get_previous_trading_date(
                datetime.strptime(annual_report[stock][:4] +
                                  '-12-31', '%Y-%m-%d') + timedelta(days=1)))

        shares_recent = rqdatac.get_shares(
            stock,
            start_date=latest_trading_date_recent,
            end_date=latest_trading_date_recent,
            fields='total')

        sales_per_share_recent = sales_recent.values / shares_recent.values

        sales_last_year = rqdatac.get_financials(
            query, annual_report_last_year[stock], '1q')

        latest_trading_date_last_year = str(
            rqdatac.get_previous_trading_date(
                datetime.strptime(
                    annual_report_last_year[stock][:4] +
                    '-12-31', '%Y-%m-%d') + timedelta(days=1)))

        shares_last_year = rqdatac.get_shares(
            stock,
            start_date=latest_trading_date_last_year,
            end_date=latest_trading_date_last_year,
            fields='total')

        sales_per_share_last_year = sales_last_year.values / shares_last_year.values

        sales_2_year_ago = rqdatac.get_financials(
            query, annual_report_2_year_ago[stock], '1q')

        latest_trading_date_2_year_ago = str(
            rqdatac.get_previous_trading_date(
                datetime.strptime(
                    annual_report_2_year_ago[stock][:4] +
                    '-12-31', '%Y-%m-%d') + timedelta(days=1)))

        shares_2_year_ago = rqdatac.get_shares(
            stock,
            start_date=latest_trading_date_2_year_ago,
            end_date=latest_trading_date_2_year_ago,
            fields='total')

        sales_per_share_2_year_ago = sales_2_year_ago.values / shares_2_year_ago.values

        sales_3_year_ago = rqdatac.get_financials(
            query, annual_report_3_year_ago[stock], '1q')

        latest_trading_date_3_year_ago = str(
            rqdatac.get_previous_trading_date(
                datetime.strptime(
                    annual_report_3_year_ago[stock][:4] +
                    '-12-31', '%Y-%m-%d') + timedelta(days=1)))

        shares_3_year_ago = rqdatac.get_shares(
            stock,
            start_date=latest_trading_date_3_year_ago,
            end_date=latest_trading_date_3_year_ago,
            fields='total')

        sales_per_share_3_year_ago = sales_3_year_ago.values / shares_3_year_ago.values

        sales_4_year_ago = rqdatac.get_financials(
            query, annual_report_4_year_ago[stock], '1q')

        latest_trading_date_4_year_ago = str(
            rqdatac.get_previous_trading_date(
                datetime.strptime(
                    annual_report_4_year_ago[stock][:4] +
                    '-12-31', '%Y-%m-%d') + timedelta(days=1)))

        shares_4_year_ago = rqdatac.get_shares(
            stock,
            start_date=latest_trading_date_4_year_ago,
            end_date=latest_trading_date_4_year_ago,
            fields='total')

        sales_per_share_4_year_ago = sales_4_year_ago.values / shares_4_year_ago.values

        regression = linear_model.LinearRegression()
        sales_per_share = pd.Series([
            sales_per_share_recent, sales_per_share_last_year,
            sales_per_share_2_year_ago, sales_per_share_3_year_ago,
            sales_per_share_4_year_ago
        ]).fillna(value=0)
        regression.fit(year.reshape(-1, 1), sales_per_share)
        factor['SGRO'][stock] = float(
            regression.coef_) / abs(sales_per_share).mean()

    sale_growth = winsorization_and_market_cap_weighed_standardization(
        factor['SGRO'], market_cap_on_current_day)

    return sale_growth
示例#25
0
def get_customized_factor_return(date, universe, options, method):
    """

    PARAMETERS
    ----------
    date: str
         分析日期

    stock_list:list 用户指定的股票池


    options: dict 其他选择参数,

    包括:drop_st_stock: boolean, 是否剔除ST股 ; drop_new_stock: np.int 选择股票的上市日期限制(自然日); drop_suspended_stock: boolean,是否剔除停牌股

    method: str default: implicit 可选"explicit" 用户选择计算因子收益率的方式


    RETURN
    ----------

    factor_return: Series, 依据用户指定的股票池计算出的因子收益率

    """

    latest_trading_date = str(
        rqdatac.get_previous_trading_date(
            datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1)))

    previous_trading_date = str(
        rqdatac.get_previous_trading_date(latest_trading_date))

    # 依据用户的选择参数,对stock_list进行筛选

    # 若用户选择剔除ST股:

    if options.get('drop_st_stock') == True:

        is_st_df = rqdatac.is_st_stock(universe,
                                       start_date=date,
                                       end_date=date)

        is_st_df.index = is_st_df.index.astype(str)

        stock_list = is_st_df.loc[date][is_st_df.loc[date].values ==
                                        False].index.tolist()

    # 若用户选择剔除停牌股:

    if options.get('drop_suspended_stock') == True:

        trading_volume = rqdatac.get_price(stock_list,
                                           start_date=date,
                                           end_date=date,
                                           frequency='1d',
                                           fields='volume',
                                           country='cn')

        stock_list = trading_volume.loc[date][
            trading_volume.loc[date].values > 0].index.tolist()

    # 根据用户输入的上市日期限制,剔除新股

    threshold = [
        latest_trading_date if options.get('drop_new_stock') == None else str(
            datetime.strptime(latest_trading_date, "%Y-%m-%d") -
            timedelta(days=options.get('drop_new_stock')))
    ][0]

    stock_list = [
        stock for stock in stock_list
        if rqdatac.instruments(stock).listed_date <= threshold
    ]

    # 计算指定股票池内股票前一交易日的行业暴露度

    factor_exposure = get_exposure(stock_list, str(previous_trading_date))

    # 根据上述暴露度计算因子收益率

    if method == 'implicit':

        factor_return = customized_factor_return_estimation(
            date, factor_exposure, stock_list)

    else:

        factor_return = get_explicit_factor_returns(date, stock_list)

    return factor_return
def input_validation(order_book_ids, rebalancing_date, asset_type, method, window, bnds,
                     cons, cov_shrinkage, benchmark, expected_return, industry_matching, risk_aversion_coefficient):




    if (rebalancing_date < "2005-07-01"):
        return('调仓日期(rebalancing_date)不能早于2005年7月1日。')

    if (asset_type != 'fund' and asset_type != 'stock'):
        return('资产类型(asset_type)必须为股票或基金。')

    if (method != 'risk_parity' and method != 'min_variance' and method != 'mean_variance' and method != 'min_TE'):
        return('请选择合适的优化算法(method)。')

    if (window < 66 or type(window) != int):
        return('协方差估计样本长度(window) 必须大于66 (不少于66个交易日) ,且必须为整数。')

    if (type(cov_shrinkage) != bool):
        return('cov_shrinkage 为布尔类型变量,请选择 True 或者 False。')
        
    if (method == 'min_TE' and benchmark == 'equal_weight'):
        return('min_TE 方法需要传入指数型 benchmark。')

    if benchmark == 'equal_weight' and industry_matching == True:
        return '行业配齐需要传入指数型benchmark。'

    if method == 'mean_variance':
        if (type(expected_return) != None):
            if (type(expected_return) == pd.Series):
                missing_asset = [asset for asset in expected_return.index if asset not in  order_book_ids]
                if (len(missing_asset) != 0):
                    return('预期收益预测(expected_return)和所选合约(order_book_ids)不一致。')
                else:
                    return('预期收益预测(expected_return)的类型应为 pandas.Series。')


    #elif (expected_return_cov != None and len(expected_return_cov) != len(order_book_ids)):
    #    return('预期收益协方差矩阵(expected_return_cov)大小和资产数目(order_book_ids)不一致。')

    if (risk_aversion_coefficient < 0):
        return('风险厌恶系数(risk_aversion_coefficient)不能小于0。')


    if (asset_type == 'stock'):

        asset_list = rqdatac.instruments(order_book_ids)

        # 收集股票类资产的类型标记(场内基金,分级基金等返回的类型不是“CS”,场外基金返回 None,均不进入 list 中)
        asset_type_list = [asset.type for asset in asset_list if asset.type == 'CS']

        if (len(asset_type_list) != len(order_book_ids)):
            return('传入的合约(order_book_ids)中包含非股票类合约。')

    if (asset_type == 'fund'):

        asset_list = rqdatac.fund.instruments(order_book_ids)

        # 收集公募基金类资产的类型标记
        asset_type_list = [asset.type for asset in asset_list if asset.type == 'PublicFund']

        if (len(asset_type_list) != len(order_book_ids)):
            return('传入的合约(order_book_ids)中包含非基金类合约(目前仅支持公募基金)。')

    return 0