def index_components(order_book_id, date=None): # type: (str, Optional[Union[str, datetime.date]]) -> List[str] """ 获取某一指数的股票构成列表,也支持指数的历史构成查询。 :param order_book_id: 指数代码,可传入order_book_id :param date: 查询日期,默认为策略当前日期。如指定,则应保证该日期不晚于策略当前日期 :return: 构成该指数股票的 order_book_id :example: 得到上证指数在策略当前日期的构成股票的列表: .. code-block:: python3 :linenos: index_components('000001.XSHG') #[Out]['600000.XSHG', '600004.XSHG', ...] """ env = Environment.get_instance() dt = env.trading_dt.date() if date is None: date = dt else: date = to_date(date) if date > dt: raise RQInvalidArgument( _('in index_components, date {} is no earlier than test date {}' ).format(date, dt)) order_book_id = assure_order_book_id(order_book_id) return rqdatac.index_components(order_book_id, date=date)
def init(context): # 在context中保存全局变量 equity_funds_list = rqdatac.index_components("000300.XSHG", "2014-01-01") to_test_list = list( np.random.choice(equity_funds_list, size=100, replace=False)) context.s1 = to_test_list frequency = 15 context.rebalance_time = [get_next_trading_date(datetime.date(2014, 1, 1))] for i in range(11): temp = context.rebalance_time[-1] + datetime.timedelta(weeks=frequency) context.rebalance_time.append(get_next_trading_date(temp)) # 实时打印日志 logger.info("RunInfo: {}".format(context.run_info))
def get_idx_cons(self, idx_id=None, ticker=None, index_date=None): ''' :param idx_id: 1:上证综指; 1782:沪深300 :return: ''' if self.source == 0: ret = list(DataAPI.IdxConsGet(ticker=ticker, isNew=u"", intoDate=index_date, field=["consTickerSymbol", 'consExchangeCD'], pandas="1").values) return ['{0}.{1}'.format(item[0], item[1]) for item in ret] elif self.source == 1: # TODO to be added for rqdata when the proxy for SDK is solved ret = rqdatac.index_components('000016.XSHG') return ret
def get_explicit_factor_returns(date): """ :param date:日期 :return: pandas.Series """ previous_trading_date = rqdatac.get_previous_trading_date(date) all_a_stocks = rqdatac.all_instruments(type="CS",date=previous_trading_date).order_book_id.tolist() filtered_stocks = noisy_stocks_filter(all_a_stocks,previous_trading_date) # print(all_a_stocks,previous_trading_date) factor_exposures = rqdatac.get_style_factor_exposure(all_a_stocks, previous_trading_date, previous_trading_date, "all").sort_index() factor_exposures.index=factor_exposures.index.droplevel(1) closePrice = rqdatac.get_price(all_a_stocks, rqdatac.get_previous_trading_date(previous_trading_date), previous_trading_date, fields="close") priceChange = closePrice.pct_change().iloc[-1] index_mapping = {"csi_300":'000300.XSHG',"csi_500":"000905.XSHG","csi_800":"000906.XSHG"} all_stocks = {index:rqdatac.index_components(index_mapping.get(index),date=previous_trading_date) for index in index_mapping} all_stocks['whole_market'] = filtered_stocks def _calc_explicitReturns_with_stocksList(stocksList): # 根据股票池计算收益率 _sizeBeta = factor_exposures[['size','beta']].loc[stocksList] _quantileGroup = _sizeBeta.apply(lambda x:pd.cut(x,bins=3,labels=False)+1).reset_index() _quantileStocks = _quantileGroup.groupby(['size','beta']).apply(lambda x:x.index.tolist()) market_neutralize_stocks = _quantileStocks.apply( lambda x: pd.Series(stocksList).loc[x].values.tolist()).values.tolist() return factor_exposures.loc[stocksList].apply(lambda x,y=market_neutralize_stocks:_calc_single_explicit_returns(x,y)) def _calc_single_explicit_returns(_factor_exposure,market_neutralize_stocks): # 计算单一因子收益率 def _deuce(series): median = series.median() return [series[series<=median].index.tolist(),series[series>median].index.tolist()] deuceResults = np.array([_deuce(_factor_exposure[neutralized_stks]) for neutralized_stks in market_neutralize_stocks]).flatten() short_stocksList = list(reduce(lambda x,y:set(x)|set(y),np.array([s for i,s in enumerate(deuceResults) if i%2==0]))) long_stockList = list(reduce(lambda x,y:set(x)|set(y),np.array([s for i,s in enumerate(deuceResults) if i%2==1]))) return priceChange[long_stockList].mean() - priceChange[short_stocksList].mean() results = {key: _calc_explicitReturns_with_stocksList(all_stocks.get(key)) for key in all_stocks} return pd.DataFrame(results)[['whole_market','csi_300','csi_500','csi_800']]
def get_index_component_industry_and_marketcap(matching_index, matching_date): """ get matching_index industry and market_cap :param matching_index: :param matching_date: :return: """ # get index components, industry i_c = rqdatac.index_components(matching_index, matching_date) matching_index_df = pd.DataFrame(index=i_c) matching_index_df['industry'] = [ rqdatac.shenwan_instrument_industry(s, matching_date) for s in matching_index_df.index ] # get index market_cap market_cap = rqdatac.get_fundamentals(query( fundamentals.eod_derivative_indicator.market_cap).filter( fundamentals.eod_derivative_indicator.stockcode.in_( matching_index_df.index)), entry_date=matching_date) market_cap_ = market_cap.loc[market_cap.items[0]].transpose() # paste them as one df matching_index_cap_df = pd.concat([matching_index_df, market_cap_], axis=1) # change the column name matching_index_cap_df.columns.values[1] = 'market_cap' # calculate each component's percent by its market_cap total_market_cap = sum(matching_index_cap_df.market_cap) matching_index_cap_df[ 'percent'] = matching_index_cap_df.market_cap / total_market_cap # sort them by industry and market_cap res = matching_index_cap_df.sort_values(['industry', 'market_cap']) return res
def factor_return_estimation(latest_trading_date, factor_exposure): previous_trading_date = rqdatac.get_previous_trading_date( latest_trading_date) # 计算无风险日收益率 daily_return = rqdatac.get_price( order_book_ids=factor_exposure.index.tolist(), start_date=previous_trading_date, end_date=latest_trading_date, fields='close').pct_change()[-1:].T compounded_risk_free_return = rqdatac.get_yield_curve( start_date=latest_trading_date, end_date=latest_trading_date, tenor='3M')['3M'] daily_risk_free_return = (((1 + compounded_risk_free_return)**(1 / 252)) - 1) daily_excess_return = daily_return.subtract( daily_risk_free_return.values).T # 以市场平方根作为加权最小二乘法的加权系数 market_cap = rqdatac.get_factor( id_or_symbols=factor_exposure.index.tolist(), factor='a_share_market_val', start_date=previous_trading_date, end_date=previous_trading_date) missing_market_cap_stock = market_cap[market_cap.isnull() == True].index.tolist() if len(missing_market_cap_stock) > 0: price = rqdatac.get_price(missing_market_cap_stock, previous_trading_date, previous_trading_date, fields='close', frequency='1d').T shares = rqdatac.get_shares(missing_market_cap_stock, previous_trading_date, previous_trading_date, fields='total_a').T market_cap[market_cap.isnull() == True] = ( price * shares)[previous_trading_date] normalized_regression_weight = market_cap.pow(0.5) / market_cap.pow( 0.5).sum() # 各行业市值之和,用于行业收益率约束条件 if str(previous_trading_date) > '2014-01-01': industry_factors = ['农林牧渔', '采掘', '化工', '钢铁', '有色金属', '电子', '家用电器', '食品饮料', '纺织服装', '轻工制造',\ '医药生物', '公用事业', '交通运输', '房地产', '商业贸易', '休闲服务','综合', '建筑材料', '建筑装饰', '电气设备',\ '国防军工', '计算机', '传媒', '通信', '银行', '非银金融', '汽车', '机械设备'] else: industry_factors = [ '金融服务', '房地产', '医药生物', '有色金属', '餐饮旅游', '综合', '建筑建材', '家用电器', '交运设备', '食品饮料', '电子', '信息设备', '交通运输', '轻工制造', '公用事业', '机械设备', '纺织服装', '农林牧渔', '商业贸易', '化工', '信息服务', '采掘', '黑色金属' ] #style_factor = ['beta', 'momentum', 'earnings_yield', 'residual_volatility', 'growth', 'book_to_price', # 'leverage', 'liquidity','size','non_linear_size'] industry_total_market_cap = market_cap.dot( factor_exposure.loc[market_cap.index][industry_factors]) factor_return_series = pd.DataFrame() # 对10个风格因子不添加约束,对 GICS 32个行业添加约束 factor_return_series['whole_market'] = constrainted_weighted_least_square(Y = daily_excess_return[market_cap.index].values[0], X = factor_exposure.loc[market_cap.index], weight = normalized_regression_weight,\ industry_total_market_cap = industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(industry_total_market_cap)) # 沪深300 csi_300_components = rqdatac.index_components(index_name='000300.XSHG', date=previous_trading_date) csi_300_components = list( set(market_cap.index.tolist()).intersection(set(csi_300_components))) # 各行业市值之和,用于行业收益率约束条件 csi_300_industry_total_market_cap = market_cap[csi_300_components].dot( factor_exposure[industry_factors].loc[csi_300_components]) # 若行业市值之和小于100,则认为基准没有配置该行业 missing_industry = csi_300_industry_total_market_cap[ csi_300_industry_total_market_cap < 100].index csi_300_industry_total_market_cap = csi_300_industry_total_market_cap.drop( missing_industry) # 将沪深300股票池中非线性市值暴露度与市值暴露度做正交化处理,根据定义重新计算 csi_300_factor_exposure = factor_exposure.loc[csi_300_components] csi_300_factor_exposure['non_linear_size'] = orthogonalize( target_variable=np.power(csi_300_factor_exposure['size'], 3), reference_variable=csi_300_factor_exposure['size'], regression_weight=np.sqrt(market_cap[csi_300_components]) / (np.sqrt(market_cap[csi_300_components]).sum())) factor_return_series['csi_300'] = constrainted_weighted_least_square(Y = daily_excess_return[factor_exposure.index][csi_300_components].values[0], X = csi_300_factor_exposure.drop(missing_industry, axis=1), weight = normalized_regression_weight[factor_exposure.index][csi_300_components],\ industry_total_market_cap = csi_300_industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(csi_300_industry_total_market_cap)) # 中证500 csi_500_components = rqdatac.index_components(index_name='000905.XSHG', date=previous_trading_date) csi_500_components = list( set(market_cap.index.tolist()).intersection(set(csi_500_components))) csi_500_industry_total_market_cap = market_cap[csi_500_components].dot( factor_exposure[industry_factors].loc[csi_500_components]) missing_industry = csi_500_industry_total_market_cap[ csi_500_industry_total_market_cap < 100].index csi_500_industry_total_market_cap = csi_500_industry_total_market_cap.drop( missing_industry) # 将中证500股票池中非线性市值暴露度与市值暴露度做正交化处理,根据定义重新计算 csi_500_factor_exposure = factor_exposure.loc[csi_500_components] csi_500_factor_exposure['non_linear_size'] = orthogonalize( target_variable=np.power(csi_500_factor_exposure['size'], 3), reference_variable=csi_500_factor_exposure['size'], regression_weight=np.sqrt(market_cap[csi_500_components]) / (np.sqrt(market_cap[csi_500_components]).sum())) factor_return_series['csi_500'] = constrainted_weighted_least_square(Y = daily_excess_return[factor_exposure.index][csi_500_components].values[0], X = csi_500_factor_exposure.drop(missing_industry, axis=1), weight = normalized_regression_weight[factor_exposure.index][csi_500_components],\ industry_total_market_cap = csi_500_industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(csi_500_industry_total_market_cap)) ### 中证800 csi_800_components = rqdatac.index_components(index_name='000906.XSHG', date=previous_trading_date) csi_800_components = list( set(market_cap.index.tolist()).intersection(set(csi_800_components))) csi_800_industry_total_market_cap = market_cap[csi_800_components].dot( factor_exposure[industry_factors].loc[csi_800_components]) missing_industry = csi_800_industry_total_market_cap[ csi_800_industry_total_market_cap < 100].index csi_800_industry_total_market_cap = csi_800_industry_total_market_cap.drop( missing_industry) # 将中证800股票池中非线性市值暴露度与市值暴露度做正交化处理,根据定义重新计算 csi_800_factor_exposure = factor_exposure.loc[csi_800_components] csi_800_factor_exposure['non_linear_size'] = orthogonalize( target_variable=np.power(csi_800_factor_exposure['size'], 3), reference_variable=csi_800_factor_exposure['size'], regression_weight=np.sqrt(market_cap[csi_800_components]) / (np.sqrt(market_cap[csi_800_components]).sum())) factor_return_series['csi_800'] = constrainted_weighted_least_square(Y = daily_excess_return[factor_exposure.index][csi_800_components].values[0], X = csi_800_factor_exposure.drop(missing_industry, axis =1), weight = normalized_regression_weight[factor_exposure.index][csi_800_components],\ industry_total_market_cap = csi_800_industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(csi_800_industry_total_market_cap)) # 若指数在特定行业中没有配置任何股票,则因子收益率为 0 return factor_return_series.replace(np.nan, 0)
def get_factor(order_book_ids, factor, start_date=None, end_date=None, universe=None, expect_df=False, **kwargs): """获取因子 :param order_book_ids: 股票代码或代码列表 :param factor: 如 'total_income' :param date: 如 date='2015-01-05', 默认为前一交易日 :param start_date: 开始日期'2015-01-05', 默认为前一交易日, 最小起始日期为'2000-01-04' :param end_date: 结束日期 :param universe: 股票池,默认为全A股 :param expect_df: 返回 MultiIndex DataFrame (Default value = False) :returns: pd.DataFrame """ order_book_ids = ensure_order_book_ids(order_book_ids, type="CS") order_book_ids = list(set(order_book_ids)) factor = ensure_list_of_string(factor) factor = list(OrderedDict.fromkeys(factor)) if start_date and end_date: start_date, end_date = ensure_date_range(start_date, end_date, datetime.timedelta(days=15)) if start_date < 20000104: warnings.warn( "start_date is earlier than 2000-01-04, adjusted to 2000-01-04" ) start_date = 20000104 elif start_date: raise ValueError("Expect end_date") elif end_date: raise ValueError("Expect start_date") else: date = kwargs.pop("date", None) date = ensure_date_int( date or get_previous_trading_date(datetime.date.today())) start_date = end_date = date if kwargs: raise ValueError('unknown kwargs: {}'.format(kwargs)) if universe is not None: universe = ensure_string(universe, "universe") if universe != "all": universe = ensure_order_book_id(universe, type="INDX") from rqdatac import index_components allowed_order_book_ids = set( index_components(universe, date=end_date) or []) not_permit_order_book_ids = [ order_book_id for order_book_id in order_book_ids if order_book_id not in allowed_order_book_ids ] if not_permit_order_book_ids: warnings.warn( "%s not in universe pool, value of those order_book_ids will always be NaN" % not_permit_order_book_ids) data = get_client().execute("get_factor_from_store", order_book_ids, factor, start_date, end_date, universe=universe) if not data: return factor_value_length = len(data[0][2]) if factor_value_length == 0: return dates = pd.to_datetime(get_trading_dates(start_date, end_date)) if len(dates) > factor_value_length: _get_factor_warning_msg(dates[factor_value_length], dates[-1]) dates = dates[0:factor_value_length] if expect_df or len(factor) > 1: order_book_id_index_map = {o: i for i, o in enumerate(order_book_ids)} factor_index_map = {f: i for i, f in enumerate(factor)} arr = np.full((len(order_book_ids) * len(dates), len(factor)), np.nan) for order_book_id, factor_name, values in data: order_book_id_index = order_book_id_index_map[order_book_id] factor_index = factor_index_map[factor_name] slice_ = slice(order_book_id_index * len(dates), (order_book_id_index + 1) * len(dates), None) arr[slice_, factor_index] = values multi_index = pd.MultiIndex.from_product( [order_book_ids, dates], names=["order_book_id", "date"]) df = pd.DataFrame(index=multi_index, columns=factor, data=arr) return df order_book_id_index_map = {o: i for i, o in enumerate(order_book_ids)} arr = np.full((len(dates), len(order_book_ids)), np.nan) for order_book_id, _, values in data: arr[:, order_book_id_index_map[order_book_id]] = values df = pd.DataFrame(index=dates, columns=order_book_ids, data=arr) if len(df.index) == 1: return df.iloc[0] if len(df.columns) == 1: return df[df.columns[0]] return df
cons = [ rqoptimizer2.WildcardIndustryConstraint( lower_limit=-0.01, upper_limit=0.01, relative=True, hard=False, classification=rqoptimizer2.IndustryClassification.ZX), rqoptimizer2.WildcardStyleConstraint(lower_limit=-0.3, upper_limit=0.3, relative=True, hard=False) ] # 获取前一交易日中证800成分股的净利润增长率(TTM) previous_date = rqdatac.get_previous_trading_date(date) index_component = rqdatac.index_components('000906.XSHG', previous_date) indicator_series = rqdatac.get_factor(index_component, 'net_profit_growth_ratio_ttm', previous_date, previous_date).dropna() selected_stock = generate_stock_pool(previous_date, indicator_series, stock_number=5) # 个股指标得分范围调整至0.1-1.1,避免权重过分集中于部分指标得分较大的个股 adjusted_series = ((indicator_series.loc[selected_stock] - indicator_series.loc[selected_stock].min()) / (indicator_series.loc[selected_stock].max() - indicator_series.loc[selected_stock].min())) + 0.1 portfolio_weight = rqoptimizer2.portfolio_optimize( selected_stock, date, bnds=bounds,
def index_compose(self, code): return rq.index_components(code)
def factor_return_estimation(stock_list, date, factor_exposure): latest_trading_date = rqdatac.get_previous_trading_date( datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1)) previous_trading_date = rqdatac.get_previous_trading_date( latest_trading_date) # 计算无风险日收益率 daily_return = rqdatac.get_price(order_book_ids=stock_list, start_date=previous_trading_date, end_date=latest_trading_date, fields='close').pct_change()[-1:].T compounded_risk_free_return = rqdatac.get_yield_curve( start_date=latest_trading_date, end_date=latest_trading_date, tenor='3M')['3M'] daily_risk_free_return = (((1 + compounded_risk_free_return)**(1 / 252)) - 1) daily_excess_return = daily_return.subtract( daily_risk_free_return.values).T # 以市场平方根作为加权最小二乘法的加权系数 market_cap = rqdatac.get_factor(id_or_symbols=stock_list, factor='a_share_market_val', start_date=latest_trading_date, end_date=latest_trading_date) normalized_regression_weight = market_cap.pow(0.5) / market_cap.pow( 0.5).sum() # 各行业市值之和,用于行业收益率约束条件 industry_factors = ['CNE5S_ENERGY', 'CNE5S_CHEM', 'CNE5S_CONMAT', 'CNE5S_MTLMIN', 'CNE5S_MATERIAL', 'CNE5S_AERODEF',\ 'CNE5S_BLDPROD', 'CNE5S_CNSTENG', 'CNE5S_ELECEQP', 'CNE5S_INDCONG', 'CNE5S_MACH','CNE5S_TRDDIST',\ 'CNE5S_COMSERV', 'CNE5S_AIRLINE', 'CNE5S_MARINE', 'CNE5S_RDRLTRAN', 'CNE5S_AUTO', 'CNE5S_HOUSEDUR',\ 'CNE5S_LEISLUX', 'CNE5S_CONSSERV', 'CNE5S_MEDIA', 'CNE5S_RETAIL', 'CNE5S_PERSPRD', 'CNE5S_BEV',\ 'CNE5S_SOFTWARE', 'CNE5S_HDWRSEMI', 'CNE5S_UTILITIE'] industry_total_market_cap = market_cap.dot( factor_exposure[industry_factors]) factor_return_series = pd.DataFrame() # 对10个风格因子不添加约束,对 GICS 32个行业添加约束 factor_return_series['whole_market'] = constrainted_weighted_least_square(Y = daily_excess_return[factor_exposure.index].values[0], X = factor_exposure, weight = normalized_regression_weight[factor_exposure.index],\ industry_total_market_cap = industry_total_market_cap, unconstrained_variables = 10, constrained_variables = 32) ### 沪深300 csi_300_components = rqdatac.index_components(index_name='000300.XSHG', date=previous_trading_date) # 各行业市值之和,用于行业收益率约束条件 csi_300_industry_total_market_cap = market_cap[csi_300_components].dot( factor_exposure[industry_factors].loc[csi_300_components]) # 若行业市值之和小于100,则认为基准没有配置该行业 missing_industry = csi_300_industry_total_market_cap[ csi_300_industry_total_market_cap < 100].index csi_300_industry_total_market_cap = csi_300_industry_total_market_cap.drop( missing_industry) factor_return_series['csi_300'] = constrainted_weighted_least_square(Y = daily_excess_return[factor_exposure.index][csi_300_components].values[0], X = factor_exposure.drop(missing_industry, axis =1).loc[csi_300_components], weight = normalized_regression_weight[factor_exposure.index][csi_300_components],\ industry_total_market_cap = csi_300_industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(csi_300_industry_total_market_cap)) ### 中证500 csi_500_components = rqdatac.index_components(index_name='000905.XSHG', date=previous_trading_date) csi_500_industry_total_market_cap = market_cap[csi_500_components].dot( factor_exposure[industry_factors].loc[csi_500_components]) missing_industry = csi_500_industry_total_market_cap[ csi_500_industry_total_market_cap < 100].index csi_500_industry_total_market_cap = csi_500_industry_total_market_cap.drop( missing_industry) factor_return_series['csi_500'] = constrainted_weighted_least_square(Y = daily_excess_return[factor_exposure.index][csi_500_components].values[0], X = factor_exposure.drop(missing_industry, axis =1).loc[csi_500_components], weight = normalized_regression_weight[factor_exposure.index][csi_500_components],\ industry_total_market_cap = csi_500_industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(csi_500_industry_total_market_cap)) ### 中证800 csi_800_components = rqdatac.index_components(index_name='000906.XSHG', date=previous_trading_date) csi_800_industry_total_market_cap = market_cap[csi_800_components].dot( factor_exposure[industry_factors].loc[csi_800_components]) missing_industry = csi_800_industry_total_market_cap[ csi_800_industry_total_market_cap < 100].index csi_800_industry_total_market_cap = csi_800_industry_total_market_cap.drop( missing_industry) factor_return_series['csi_800'] = constrainted_weighted_least_square(Y = daily_excess_return[factor_exposure.index][csi_800_components].values[0], X = factor_exposure.drop(missing_industry, axis =1).loc[csi_800_components], weight = normalized_regression_weight[factor_exposure.index][csi_800_components],\ industry_total_market_cap = csi_800_industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(csi_800_industry_total_market_cap)) # 若指数在特定行业中没有配置任何股票,则因子收益率为 0 return factor_return_series.replace(np.nan, 0)