示例#1
0
    def fetch_industry_range(self,
                             universe: Universe,
                             start_date: str = None,
                             end_date: str = None,
                             dates: Iterable[str] = None,
                             category: str = 'sw'):
        industry_category_name = _map_industry_category(category)
        cond = universe.query_range(start_date, end_date, dates)

        if dates:
            big_table = join(
                Industry, UniverseTable,
                and_(Industry.trade_date == UniverseTable.trade_date,
                     Industry.code == UniverseTable.code,
                     Industry.industry == industry_category_name,
                     Industry.trade_date.in_(dates), cond))
        else:
            big_table = join(
                Industry, UniverseTable,
                and_(Industry.trade_date == UniverseTable.trade_date,
                     Industry.code == UniverseTable.code,
                     Industry.industry == industry_category_name,
                     Industry.trade_date.between(start_date, end_date), cond))

        query = select([
            Industry.trade_date, Industry.code,
            Industry.industryID1.label('industry_code'),
            Industry.industryName1.label('industry')
        ]).select_from(big_table).distinct()
        return pd.read_sql(query, self.engine)
示例#2
0
 def fetch_codes_range(self,
                       universe: Universe,
                       start_date: str = None,
                       end_date: str = None,
                       dates: Iterable[str] = None) -> pd.DataFrame:
     cond = universe.query_range(start_date, end_date, dates)
     query = select([UniverseTable.trade_date,
                     UniverseTable.code]).distinct().where(cond)
     return pd.read_sql(query, self.engine)
示例#3
0
    def fetch_risk_model_range(
            self,
            universe: Universe,
            start_date: str = None,
            end_date: str = None,
            dates: Iterable[str] = None,
            risk_model: str = 'short',
            excluded: Iterable[str] = None
    ) -> Tuple[pd.DataFrame, pd.DataFrame]:

        risk_cov_table, special_risk_col = _map_risk_model_table(risk_model)

        cov_risk_cols = [
            risk_cov_table.__table__.columns[f] for f in total_risk_factors
        ]

        cond = risk_cov_table.trade_date.in_(
            dates) if dates else risk_cov_table.trade_date.between(
                start_date, end_date)
        query = select([
            risk_cov_table.trade_date, risk_cov_table.FactorID,
            risk_cov_table.Factor
        ] + cov_risk_cols).where(cond)

        risk_cov = pd.read_sql(query, self.engine).sort_values(
            ['trade_date', 'FactorID'])

        if not excluded:
            excluded = []

        risk_exposure_cols = [
            FullFactor.__table__.columns[f] for f in total_risk_factors
            if f not in set(excluded)
        ]

        cond = universe.query_range(start_date, end_date, dates)
        big_table = join(
            FullFactor, UniverseTable,
            and_(FullFactor.trade_date == UniverseTable.trade_date,
                 FullFactor.code == UniverseTable.code, cond))

        query = select(
            [FullFactor.trade_date, FullFactor.code, special_risk_col] + risk_exposure_cols) \
            .select_from(big_table).distinct()

        risk_exp = pd.read_sql(query, self.engine)

        return risk_cov, risk_exp
示例#4
0
    def fetch_factor_range(self,
                           universe: Universe,
                           factors: Union[Transformer, Iterable[object]],
                           start_date: str = None,
                           end_date: str = None,
                           dates: Iterable[str] = None,
                           external_data: pd.DataFrame = None,
                           used_factor_tables=None) -> pd.DataFrame:

        if isinstance(factors, Transformer):
            transformer = factors
        else:
            transformer = Transformer(factors)

        dependency = transformer.dependency

        if used_factor_tables:
            factor_cols = _map_factors(dependency, used_factor_tables)
        else:
            factor_cols = _map_factors(dependency, factor_tables)

        cond = universe.query_range(start_date, end_date, dates)

        big_table = FullFactor

        for t in set(factor_cols.values()):
            if t.__table__.name != FullFactor.__table__.name:
                if dates is not None:
                    big_table = outerjoin(
                        big_table, t,
                        and_(FullFactor.trade_date == t.trade_date,
                             FullFactor.code == t.code,
                             FullFactor.trade_date.in_(dates)))
                else:
                    big_table = outerjoin(
                        big_table, t,
                        and_(
                            FullFactor.trade_date == t.trade_date,
                            FullFactor.code == t.code,
                            FullFactor.trade_date.between(
                                start_date, end_date)))

        big_table = join(
            big_table, UniverseTable,
            and_(FullFactor.trade_date == UniverseTable.trade_date,
                 FullFactor.code == UniverseTable.code, cond))

        query = select(
            [FullFactor.trade_date, FullFactor.code, FullFactor.isOpen] + list(factor_cols.keys())) \
            .select_from(big_table).distinct()

        df = pd.read_sql(query,
                         self.engine).sort_values(['trade_date', 'code'])

        if external_data is not None:
            df = pd.merge(df, external_data, on=['trade_date',
                                                 'code']).dropna()

        df.set_index('trade_date', inplace=True)
        res = transformer.transform('code', df)

        for col in res.columns:
            if col not in set(['code', 'isOpen']) and col not in df.columns:
                df[col] = res[col].values

        df['isOpen'] = df.isOpen.astype(bool)
        return df.reset_index()