def fetch_risk_model( self, ref_date: str, codes: Iterable[int], risk_model: str = 'short', excluded: Iterable[str] = None ) -> Tuple[pd.DataFrame, pd.DataFrame]: risk_cov_table, special_risk_col = _map_risk_model_table(risk_model) cov_risk_cols = [ risk_cov_table.__table__.columns[f] for f in total_risk_factors ] query = select([risk_cov_table.FactorID, risk_cov_table.Factor] + cov_risk_cols).where( risk_cov_table.trade_date == ref_date) risk_cov = pd.read_sql(query, self.engine).sort_values('FactorID') if excluded: risk_exposure_cols = [ FullFactor.__table__.columns[f] for f in total_risk_factors if f not in set(excluded) ] else: risk_exposure_cols = [ FullFactor.__table__.columns[f] for f in total_risk_factors ] query = select([FullFactor.code, special_risk_col] + risk_exposure_cols) \ .where(and_(FullFactor.trade_date == ref_date, FullFactor.code.in_(codes))).distinct() risk_exp = pd.read_sql(query, self.engine) return risk_cov, risk_exp
def fetch_risk_model_range( self, universe: Universe, start_date: str = None, end_date: str = None, dates: Iterable[str] = None, risk_model: str = 'short', excluded: Iterable[str] = None ) -> Tuple[pd.DataFrame, pd.DataFrame]: risk_cov_table, special_risk_col = _map_risk_model_table(risk_model) cov_risk_cols = [ risk_cov_table.__table__.columns[f] for f in total_risk_factors ] cond = risk_cov_table.trade_date.in_( dates) if dates else risk_cov_table.trade_date.between( start_date, end_date) query = select([ risk_cov_table.trade_date, risk_cov_table.FactorID, risk_cov_table.Factor ] + cov_risk_cols).where(cond) risk_cov = pd.read_sql(query, self.engine).sort_values( ['trade_date', 'FactorID']) if not excluded: excluded = [] risk_exposure_cols = [ FullFactor.__table__.columns[f] for f in total_risk_factors if f not in set(excluded) ] cond = universe._query_statements(start_date, end_date, dates) big_table = join( FullFactor, UniverseTable, and_(FullFactor.trade_date == UniverseTable.trade_date, FullFactor.code == UniverseTable.code, cond)) query = select( [FullFactor.trade_date, FullFactor.code, special_risk_col] + risk_exposure_cols).select_from(big_table) \ .distinct() risk_exp = pd.read_sql(query, self.engine) if universe.is_filtered: codes = universe.query(self, start_date, end_date, dates) risk_exp = pd.merge(risk_exp, codes, how='inner', on=['trade_date', 'code' ]).sort_values(['trade_date', 'code']) return risk_cov, risk_exp
def fetch_risk_model( self, ref_date: str, codes: Iterable[int], risk_model: str = 'short', excluded: Iterable[str] = None, model_type: str = None ) -> Union[FactorRiskModel, Tuple[pd.DataFrame, pd.DataFrame]]: risk_cov_table, special_risk_table = _map_risk_model_table(risk_model) cov_risk_cols = [ risk_cov_table.__table__.columns[f] for f in total_risk_factors ] query = select([risk_cov_table.FactorID, risk_cov_table.Factor] + cov_risk_cols).where( risk_cov_table.trade_date == ref_date) risk_cov = pd.read_sql(query, self.engine).sort_values('FactorID') if excluded: risk_exposure_cols = [ RiskExposure.__table__.columns[f] for f in total_risk_factors if f not in set(excluded) ] else: risk_exposure_cols = [ RiskExposure.__table__.columns[f] for f in total_risk_factors ] big_table = join( RiskExposure, special_risk_table, and_(RiskExposure.code == special_risk_table.code, RiskExposure.trade_date == special_risk_table.trade_date)) query = select( [RiskExposure.code.label("code"), special_risk_table.SRISK.label('srisk')] + risk_exposure_cols) \ .select_from(big_table).where( and_(RiskExposure.trade_date == ref_date, RiskExposure.code.in_(codes), RiskExposure.flag == 1 )) risk_exp = pd.read_sql( query, self.engine).dropna().drop_duplicates(subset=["code"]) if not model_type: return risk_cov, risk_exp elif model_type == 'factor': factor_names = risk_cov.Factor.tolist() new_risk_cov = risk_cov.set_index('Factor') factor_cov = new_risk_cov.loc[factor_names, factor_names] / 10000. new_risk_exp = risk_exp.set_index('code') factor_loading = new_risk_exp.loc[:, factor_names] idsync = new_risk_exp['srisk'] * new_risk_exp['srisk'] / 10000 return FactorRiskModel(factor_cov, factor_loading, idsync), risk_cov, risk_exp
def fetch_risk_model_range(self, universe: Universe, start_date: str = None, end_date: str = None, dates: Iterable[str] = None, risk_model: str = 'short', excluded: Iterable[str] = None) -> Tuple[pd.DataFrame, pd.DataFrame]: risk_cov_table, special_risk_table = _map_risk_model_table(risk_model) cov_risk_cols = [risk_cov_table.__table__.columns[f] for f in total_risk_factors] cond = risk_cov_table.trade_date.in_(dates) if dates else risk_cov_table.trade_date.between(start_date, end_date) query = select([risk_cov_table.trade_date, risk_cov_table.FactorID, risk_cov_table.Factor] + cov_risk_cols).where( cond ) risk_cov = pd.read_sql(query, self.engine).sort_values(['trade_date', 'FactorID']) if not excluded: excluded = [] risk_exposure_cols = [RiskExposure.__table__.columns[f] for f in total_risk_factors if f not in set(excluded)] cond = universe._query_statements(start_date, end_date, dates) big_table = join(RiskExposure, UniverseTable, and_( RiskExposure.trade_date == UniverseTable.trade_date, RiskExposure.code == UniverseTable.code, cond ) ) big_table = join(special_risk_table, big_table, and_( RiskExposure.code == special_risk_table.code, RiskExposure.trade_date == special_risk_table.trade_date, )) query = select( [RiskExposure.trade_date, RiskExposure.code, special_risk_table.SRISK.label('srisk')] + risk_exposure_cols).select_from(big_table) \ .distinct() risk_exp = pd.read_sql(query, self.engine).sort_values(['trade_date', 'code']).dropna() return risk_cov, risk_exp
def fetch_risk_model( self, ref_date: str, codes: Iterable[int], risk_model: str = 'short', excluded: Iterable[str] = None ) -> Tuple[pd.DataFrame, pd.DataFrame]: risk_cov_table, special_risk_table = _map_risk_model_table(risk_model) cov_risk_cols = [ risk_cov_table.__table__.columns[f] for f in total_risk_factors ] query = select([risk_cov_table.FactorID, risk_cov_table.Factor] + cov_risk_cols).where( risk_cov_table.trade_date == ref_date) risk_cov = pd.read_sql(query, self.engine).sort_values('FactorID') if excluded: risk_exposure_cols = [ RiskExposure.__table__.columns[f] for f in total_risk_factors if f not in set(excluded) ] else: risk_exposure_cols = [ RiskExposure.__table__.columns[f] for f in total_risk_factors ] big_table = join( RiskExposure, special_risk_table, and_(RiskExposure.code == special_risk_table.code, RiskExposure.trade_date == special_risk_table.trade_date)) query = select([RiskExposure.code, special_risk_table.SRISK.label('srisk')] + risk_exposure_cols) \ .select_from(big_table).where( and_(RiskExposure.trade_date == ref_date, RiskExposure.code.in_(codes) )).distinct() risk_exp = pd.read_sql(query, self.engine).dropna() return risk_cov, risk_exp.drop_duplicates(['code'])
def fetch_risk_model_range( self, universe: Universe, start_date: str = None, end_date: str = None, dates: Iterable[str] = None, risk_model: str = 'short', excluded: Iterable[str] = None, model_type: str = None) -> Tuple[pd.DataFrame, pd.DataFrame]: risk_cov_table, special_risk_table = _map_risk_model_table(risk_model) cov_risk_cols = [ risk_cov_table.__table__.columns[f] for f in total_risk_factors ] cond = risk_cov_table.trade_date.in_( dates) if dates else risk_cov_table.trade_date.between( start_date, end_date) query = select([ risk_cov_table.trade_date, risk_cov_table.FactorID, risk_cov_table.Factor ] + cov_risk_cols).where(cond) risk_cov = pd.read_sql(query, self.engine).sort_values( ['trade_date', 'FactorID']) risk_cov["trade_date"] = pd.to_datetime(risk_cov["trade_date"]) if not excluded: excluded = [] risk_exposure_cols = [ RiskExposure.__table__.columns[f] for f in total_risk_factors if f not in set(excluded) ] cond = universe._query_statements(start_date, end_date, dates) big_table = join( RiskExposure, UniverseTable, and_(RiskExposure.trade_date == UniverseTable.trade_date, RiskExposure.code == UniverseTable.code, RiskExposure.flag == 1, cond)) big_table = join( special_risk_table, big_table, and_( RiskExposure.code == special_risk_table.code, RiskExposure.trade_date == special_risk_table.trade_date, )) query = select( [RiskExposure.trade_date, RiskExposure.code.label("code"), special_risk_table.SRISK.label('srisk')] + risk_exposure_cols).select_from(big_table) \ .distinct() risk_exp = pd.read_sql(query, self.engine).sort_values(['trade_date', 'code']) \ .dropna().drop_duplicates(["trade_date", "code"]) risk_exp["trade_date"] = pd.to_datetime(risk_exp["trade_date"]) if not model_type: return risk_cov, risk_exp elif model_type == 'factor': new_risk_cov = risk_cov.set_index('Factor') new_risk_exp = risk_exp.set_index('code') risk_cov_groups = new_risk_cov.groupby('trade_date') risk_exp_groups = new_risk_exp.groupby('trade_date') models = dict() for ref_date, cov_g in risk_cov_groups: exp_g = risk_exp_groups.get_group(ref_date) factor_names = cov_g.index.tolist() factor_cov = cov_g.loc[factor_names, factor_names] / 10000. factor_loading = exp_g.loc[:, factor_names] idsync = exp_g['srisk'] * exp_g['srisk'] / 10000 models[ref_date] = FactorRiskModel(factor_cov, factor_loading, idsync) return pd.Series(models), risk_cov, risk_exp