def fetch_trade_status_range(self, universe: Universe, start_date: str = None, end_date: str = None, dates: Iterable[str] = None, offset=0): codes = universe.query(self, start_date, end_date, dates) if dates: start_date = dates[0] end_date = dates[-1] end_date = advanceDateByCalendar('china.sse', end_date, str(offset) + 'b').strftime('%Y-%m-%d') stats = func.lead(Market.isOpen, offset).over( partition_by=Market.code, order_by=Market.trade_date).label('is_open') cte = select([Market.trade_date, Market.code, stats]).where( and_( Market.trade_date.between(start_date, end_date), Market.code.in_(codes.code.unique().tolist()) ) ).cte('cte') query = select([cte]).select_from(cte).order_by(cte.columns['trade_date'], cte.columns['code']) df = pd.read_sql(query, self.engine) return pd.merge(df, codes[['trade_date', 'code']], on=['trade_date', 'code'])
def fetch_codes(self, ref_date: str, universe: Universe) -> List[int]: cond = universe.query(ref_date) query = select([UniverseTable.trade_date, UniverseTable.code]).distinct().where(cond) cursor = self.engine.execute(query) codes_set = {c[1] for c in cursor.fetchall()} return sorted(codes_set)
def fetch_industry_range(self, universe: Universe, start_date: str = None, end_date: str = None, dates: Iterable[str] = None, category: str = 'sw', level: int = 1): industry_category_name = _map_industry_category(category) cond = universe._query_statements(start_date, end_date, dates) big_table = join( Industry, UniverseTable, and_(Industry.trade_date == UniverseTable.trade_date, Industry.code == UniverseTable.code, Industry.industry == industry_category_name, cond)) code_name = 'industryID' + str(level) category_name = 'industryName' + str(level) query = select([ Industry.trade_date, Industry.code, getattr(Industry, code_name).label('industry_code'), getattr(Industry, category_name).label('industry') ]).select_from(big_table).distinct() df = pd.read_sql(query, self.engine).dropna() if universe.is_filtered: codes = universe.query(self, start_date, end_date, dates) df = pd.merge(df, codes, how='inner', on=['trade_date', 'code']).sort_values(['trade_date', 'code']) return df.drop_duplicates(['trade_date', 'code'])
def fetch_factor_range(self, universe: Universe, factors: Union[Transformer, Iterable[object]], start_date: str = None, end_date: str = None, dates: Iterable[str] = None, external_data: pd.DataFrame = None, used_factor_tables=None) -> pd.DataFrame: if isinstance(factors, Transformer): transformer = factors else: transformer = Transformer(factors) dependency = transformer.dependency if used_factor_tables: factor_cols = _map_factors(dependency, used_factor_tables) else: factor_cols = _map_factors(dependency, factor_tables) big_table = Market joined_tables = set() joined_tables.add(Market.__table__.name) for t in set(factor_cols.values()): if t.__table__.name not in joined_tables: if dates is not None: big_table = outerjoin(big_table, t, and_(Market.trade_date == t.trade_date, Market.code == t.code, Market.trade_date.in_(dates))) else: big_table = outerjoin(big_table, t, and_(Market.trade_date == t.trade_date, Market.code == t.code, Market.trade_date.between(start_date, end_date))) joined_tables.add(t.__table__.name) universe_df = universe.query(self, start_date, end_date, dates) query = select( [Market.trade_date, Market.code, Market.chgPct] + list(factor_cols.keys())) \ .select_from(big_table).where( and_( Market.code.in_(universe_df.code.unique().tolist()), Market.trade_date.in_(dates) if dates is not None else Market.trade_date.between(start_date, end_date) ) ).distinct() df = pd.read_sql(query, self.engine).replace([-np.inf, np.inf], np.nan) if external_data is not None: df = pd.merge(df, external_data, on=['trade_date', 'code']).dropna() df.sort_values(['trade_date', 'code'], inplace=True) df.set_index('trade_date', inplace=True) res = transformer.transform('code', df).replace([-np.inf, np.inf], np.nan) res['chgPct'] = df.chgPct res = res.reset_index() return pd.merge(res, universe_df[['trade_date', 'code']], how='inner').drop_duplicates(['trade_date', 'code'])
def fetch_risk_model_range( self, universe: Universe, start_date: str = None, end_date: str = None, dates: Iterable[str] = None, risk_model: str = 'short', excluded: Iterable[str] = None ) -> Tuple[pd.DataFrame, pd.DataFrame]: risk_cov_table, special_risk_col = _map_risk_model_table(risk_model) cov_risk_cols = [ risk_cov_table.__table__.columns[f] for f in total_risk_factors ] cond = risk_cov_table.trade_date.in_( dates) if dates else risk_cov_table.trade_date.between( start_date, end_date) query = select([ risk_cov_table.trade_date, risk_cov_table.FactorID, risk_cov_table.Factor ] + cov_risk_cols).where(cond) risk_cov = pd.read_sql(query, self.engine).sort_values( ['trade_date', 'FactorID']) if not excluded: excluded = [] risk_exposure_cols = [ FullFactor.__table__.columns[f] for f in total_risk_factors if f not in set(excluded) ] cond = universe._query_statements(start_date, end_date, dates) big_table = join( FullFactor, UniverseTable, and_(FullFactor.trade_date == UniverseTable.trade_date, FullFactor.code == UniverseTable.code, cond)) query = select( [FullFactor.trade_date, FullFactor.code, special_risk_col] + risk_exposure_cols).select_from(big_table) \ .distinct() risk_exp = pd.read_sql(query, self.engine) if universe.is_filtered: codes = universe.query(self, start_date, end_date, dates) risk_exp = pd.merge(risk_exp, codes, how='inner', on=['trade_date', 'code' ]).sort_values(['trade_date', 'code']) return risk_cov, risk_exp
def fetch_factor_range_forward(self, universe: Universe, factors: Union[Transformer, object], start_date: str = None, end_date: str = None, dates: Iterable[str] = None): if isinstance(factors, Transformer): transformer = factors else: transformer = Transformer(factors) dependency = transformer.dependency factor_cols = _map_factors(dependency, factor_tables) codes = universe.query(self, start_date, end_date, dates) total_codes = codes.code.unique().tolist() total_dates = codes.trade_date.astype(str).unique().tolist() big_table = Market joined_tables = set() joined_tables.add(Market.__table__.name) for t in set(factor_cols.values()): if t.__table__.name not in joined_tables: if dates is not None: big_table = outerjoin( big_table, t, and_(Market.trade_date == t.trade_date, Market.code == t.code, Market.trade_date.in_(dates))) else: big_table = outerjoin( big_table, t, and_(Market.trade_date == t.trade_date, Market.code == t.code, Market.trade_date.between(start_date, end_date))) joined_tables.add(t.__table__.name) stats = func.lag(list(factor_cols.keys())[0], -1).over(partition_by=Market.code, order_by=Market.trade_date).label('dx') query = select([Market.trade_date, Market.code, Market.chgPct, stats]).select_from(big_table).where( and_(Market.trade_date.in_(total_dates), Market.code.in_(total_codes))) df = pd.read_sql(query, self.engine) \ .replace([-np.inf, np.inf], np.nan) \ .sort_values(['trade_date', 'code']) return pd.merge(df, codes[['trade_date', 'code']], how='inner').drop_duplicates(['trade_date', 'code'])
def fetch_codes_range(self, universe: Universe, start_date: str = None, end_date: str = None, dates: Iterable[str] = None) -> pd.DataFrame: return universe.query(self, start_date, end_date, dates)
def fetch_codes(self, ref_date: str, universe: Universe) -> List[int]: df = universe.query(self, ref_date, ref_date) return sorted(df.code.tolist())
def fetch_factor_range(self, universe: Universe, factors: Union[Transformer, Iterable[object]], start_date: str = None, end_date: str = None, dates: Iterable[str] = None, external_data: pd.DataFrame = None, used_factor_tables=None) -> pd.DataFrame: if isinstance(factors, Transformer): transformer = factors else: transformer = Transformer(factors) dependency = transformer.dependency if used_factor_tables: factor_cols = _map_factors(dependency, used_factor_tables) else: factor_cols = _map_factors(dependency, factor_tables) big_table = FullFactor joined_tables = set() joined_tables.add(FullFactor.__table__.name) for t in set(factor_cols.values()): if t.__table__.name not in joined_tables: if dates is not None: big_table = outerjoin( big_table, t, and_(FullFactor.trade_date == t.trade_date, FullFactor.code == t.code, FullFactor.trade_date.in_(dates))) else: big_table = outerjoin( big_table, t, and_( FullFactor.trade_date == t.trade_date, FullFactor.code == t.code, FullFactor.trade_date.between( start_date, end_date))) joined_tables.add(t.__table__.name) universe_df = universe.query(self, start_date, end_date, dates) query = select( [FullFactor.trade_date, FullFactor.code, FullFactor.isOpen] + list(factor_cols.keys())) \ .select_from(big_table).where( and_( FullFactor.code.in_(universe_df.code.unique().tolist()), FullFactor.trade_date.in_(dates) if dates is not None else FullFactor.trade_date.between(start_date, end_date) ) ).distinct() df = pd.read_sql(query, self.engine) if universe.is_filtered: codes = universe.query(self, start_date, end_date, dates) df = pd.merge(df, codes, how='inner', on=['trade_date', 'code']) if external_data is not None: df = pd.merge(df, external_data, on=['trade_date', 'code']).dropna() df.sort_values(['trade_date', 'code'], inplace=True) df.set_index('trade_date', inplace=True) res = transformer.transform('code', df) for col in res.columns: if col not in set(['code', 'isOpen']) and col not in df.columns: df[col] = res[col].values df['isOpen'] = df.isOpen.astype(bool) df = df.reset_index() return pd.merge(df, universe_df[['trade_date', 'code']], how='inner')