Пример #1
0
 def ingest(
     environ: Mapping,
     asset_db_writer: AssetDBWriter,
     minute_bar_writer: BcolzMinuteBarWriter,
     daily_bar_writer: BcolzDailyBarWriter,
     adjustment_writer: SQLiteAdjustmentWriter,
     calendar: TradingCalendar,
     start_session: pd.Timestamp,
     end_session: pd.Timestamp,
     cache: dataframe_cache,
     show_progress: bool,
     output_dir: Text,
 ) -> NoReturn:
     sid_map = list(zip(range(len(symbols)), symbols))
     asset_db_writer.write(
         futures=_get_metadata(sid_map),
         exchanges=pd.DataFrame(data=[['bitmex', 'UTC']],
                                columns=['exchange', 'timezone']),
     )
     minute_bar_writer.write(
         _get_minute_bars(sid_map, start_session, end_session, cache),
         show_progress=show_progress,
     )
     daily_bar_writer.write(
         _get_daily_bars(sid_map, start_session, end_session, cache),
         show_progress=show_progress,
     )
Пример #2
0
 def init_instance_fixtures(self):
     super(TestWrite, self).init_instance_fixtures()
     self.assets_db_path = path = os.path.join(
         self.instance_tmpdir.path,
         'assets.db',
     )
     self.writer = AssetDBWriter(path)
Пример #3
0
    def test_v5_to_v4_selects_most_recent_ticker(self):
        T = pd.Timestamp
        AssetDBWriter(self.engine).write(
            equities=pd.DataFrame(
                [['A', 'A', T('2014-01-01'), T('2014-01-02')],
                 ['B', 'B', T('2014-01-01'), T('2014-01-02')],
                 # these two are both ticker sid 2
                 ['B', 'C', T('2014-01-03'), T('2014-01-04')],
                 ['C', 'C', T('2014-01-01'), T('2014-01-02')]],
                index=[0, 1, 2, 2],
                columns=['symbol', 'asset_name', 'start_date', 'end_date'],
            ),
        )

        downgrade(self.engine, 4)
        metadata = sa.MetaData(self.engine)
        metadata.reflect()

        def select_fields(r):
            return r.sid, r.symbol, r.asset_name, r.start_date, r.end_date

        expected_data = {
            (0, 'A', 'A', T('2014-01-01').value, T('2014-01-02').value),
            (1, 'B', 'B', T('2014-01-01').value, T('2014-01-02').value),
            (2, 'B', 'C', T('2014-01-01').value, T('2014-01-04').value),
        }
        actual_data = set(map(
            select_fields,
            sa.select(metadata.tables['equities'].c).execute(),
        ))

        assert_equal(expected_data, actual_data)
Пример #4
0
    def __init__(self,
                 load=None,
                 bm_symbol='^GSPC',
                 exchange_tz="US/Eastern",
                 trading_schedule=default_nyse_schedule,
                 asset_db_path=':memory:'):

        self.bm_symbol = bm_symbol
        if not load:
            load = load_market_data

        self.benchmark_returns, self.treasury_curves = load(
            trading_schedule.day,
            trading_schedule.schedule.index,
            self.bm_symbol,
        )

        self.exchange_tz = exchange_tz

        if isinstance(asset_db_path, string_types):
            asset_db_path = 'sqlite:///%s' % asset_db_path
            self.engine = engine = create_engine(asset_db_path)
        else:
            self.engine = engine = asset_db_path

        if engine is not None:
            AssetDBWriter(engine).init_db()
            self.asset_finder = AssetFinder(engine)
        else:
            self.asset_finder = None
Пример #5
0
    def write_data(self, **kwargs):
        """Write data into the asset_db.

        Parameters
        ----------
        **kwargs
            Forwarded to AssetDBWriter.write
        """
        AssetDBWriter(self.engine).write(**kwargs)
Пример #6
0
    def __init__(self,
                 load=None,
                 bm_symbol='^GSPC',
                 exchange_tz="US/Eastern",
                 min_date=None,
                 max_date=None,
                 env_trading_calendar=tradingcalendar,
                 asset_db_path=':memory:'):
        self.trading_day = env_trading_calendar.trading_day.copy()

        # `tc_td` is short for "trading calendar trading days"
        tc_td = env_trading_calendar.trading_days

        self.trading_days = tc_td[tc_td.slice_indexer(min_date, max_date)]

        self.first_trading_day = self.trading_days[0]
        self.last_trading_day = self.trading_days[-1]

        self.early_closes = env_trading_calendar.get_early_closes(
            self.first_trading_day, self.last_trading_day)

        self.open_and_closes = env_trading_calendar.open_and_closes.loc[
            self.trading_days]

        self.bm_symbol = bm_symbol
        if not load:
            load = load_market_data

        self.benchmark_returns, self.treasury_curves = \
            load(self.trading_day, self.trading_days, self.bm_symbol)

        if max_date:
            tr_c = self.treasury_curves
            # Mask the treasury curves down to the current date.
            # In the case of live trading, the last date in the treasury
            # curves would be the day before the date considered to be
            # 'today'.
            self.treasury_curves = tr_c[tr_c.index <= max_date]

        self.exchange_tz = exchange_tz

        if isinstance(asset_db_path, string_types):
            asset_db_path = 'sqlite:///%s' % asset_db_path
            self.engine = engine = create_engine(asset_db_path)
        else:
            self.engine = engine = asset_db_path

        if engine is not None:
            AssetDBWriter(engine).init_db()
            self.asset_finder = AssetFinder(engine)
        else:
            self.asset_finder = None
Пример #7
0
    def __init__(
        self,
        load=None,
        bm_symbol='SPY',
        exchange_tz="US/Eastern",
        trading_calendar=None,
        trading_day=None,
        trading_days=None,
        asset_db_path=':memory:',
        future_chain_predicates=CHAIN_PREDICATES,
        environ=None,
    ):

        self.bm_symbol = bm_symbol
        if not load:
            load = partial(load_market_data, environ=environ)

        if trading_day is None:
            if not trading_calendar:
                trading_calendar = get_calendar("NYSE")
            trading_day = trading_calendar.day
        if trading_days is None:
            if not trading_calendar:
                trading_calendar = get_calendar("NYSE")
            trading_days = trading_calendar.schedule.index

        self.benchmark_returns, self.treasury_curves = load(
            trading_day,
            trading_days,
            self.bm_symbol,
        )

        self.exchange_tz = exchange_tz

        if isinstance(asset_db_path, string_types):
            asset_db_path = 'sqlite:///' + asset_db_path
            self.engine = engine = create_engine(asset_db_path)
        else:
            self.engine = engine = asset_db_path

        if engine is not None:
            AssetDBWriter(engine).init_db()
            self.asset_finder = AssetFinder(
                engine,
                future_chain_predicates=future_chain_predicates)
        else:
            self.asset_finder = None
Пример #8
0
    def __init__(
        self,
        load=None,
        bm_symbol=None,
        exchange_tz="Asia/Shanghai",
        trading_calendar=None,
        asset_db_path=':memory:',
        future_chain_predicates=CHAIN_PREDICATES,
        environ=None,
    ):

        self.bm_symbol = bm_symbol
        #if not load:
        #    load = partial(load_market_data, environ=environ)
        if not load and self.bm_symbol is None:
            load = load_dump_data
        if not load and not self.bm_symbol is not None:
            load = load_market_data
        if not trading_calendar:
            trading_calendar = get_calendar("SHSZ")

        self.benchmark_returns, self.treasury_curves = load(
            trading_calendar.day,
            trading_calendar.schedule.index,
            self.bm_symbol,
        )

        self.exchange_tz = exchange_tz

        if isinstance(asset_db_path, string_types):
            asset_db_path = 'sqlite:///' + asset_db_path
            self.engine = engine = create_engine(asset_db_path)
        else:
            self.engine = engine = asset_db_path

        if engine is not None:
            AssetDBWriter(engine).init_db()
            self.asset_finder = AssetFinder(
                engine, future_chain_predicates=future_chain_predicates)
        else:
            self.asset_finder = None
Пример #9
0
    def ingest(name, environ=os.environ, timestamp=None, show_progress=False):
        """Ingest data for a given bundle.

        Parameters
        ----------
        name : str
            The name of the bundle.
        environ : mapping, optional
            The environment variables. By default this is os.environ.
        timestamp : datetime, optional
            The timestamp to use for the load.
            By default this is the current time.
        show_progress : bool, optional
            Tell the ingest function to display the progress where possible.
        """
        try:
            bundle = bundles[name]
        except KeyError:
            raise UnknownBundle(name)

        if timestamp is None:
            timestamp = pd.Timestamp.utcnow()
        timestamp = timestamp.tz_convert('utc').tz_localize(None)
        timestr = to_bundle_ingest_dirname(timestamp)
        cachepath = cache_path(name, environ=environ)
        pth.ensure_directory(pth.data_path([name, timestr], environ=environ))
        pth.ensure_directory(cachepath)
        with dataframe_cache(cachepath, clean_on_failure=False) as cache, \
                ExitStack() as stack:
            # we use `cleanup_on_failure=False` so that we don't purge the
            # cache directory if the load fails in the middle
            if bundle.create_writers:
                wd = stack.enter_context(
                    working_dir(pth.data_path([], environ=environ)))
                daily_bars_path = wd.ensure_dir(*daily_equity_relative(
                    name,
                    timestr,
                    environ=environ,
                ))
                daily_bar_writer = BcolzDailyBarWriter(
                    daily_bars_path,
                    bundle.calendar,
                    bundle.start_session,
                    bundle.end_session,
                )
                # Do an empty write to ensure that the daily ctables exist
                # when we create the SQLiteAdjustmentWriter below. The
                # SQLiteAdjustmentWriter needs to open the daily ctables so
                # that it can compute the adjustment ratios for the dividends.

                daily_bar_writer.write(())
                minute_bar_writer = BcolzMinuteBarWriter(
                    wd.ensure_dir(*minute_equity_relative(
                        name, timestr, environ=environ)),
                    bundle.calendar,
                    bundle.start_session,
                    bundle.end_session,
                    minutes_per_day=bundle.minutes_per_day,
                )
                asset_db_writer = AssetDBWriter(
                    wd.getpath(*asset_db_relative(
                        name,
                        timestr,
                        environ=environ,
                    )))

                adjustment_db_writer = stack.enter_context(
                    SQLiteAdjustmentWriter(
                        wd.getpath(*adjustment_db_relative(
                            name, timestr, environ=environ)),
                        BcolzDailyBarReader(daily_bars_path),
                        bundle.calendar.all_sessions,
                        overwrite=True,
                    ))
            else:
                daily_bar_writer = None
                minute_bar_writer = None
                asset_db_writer = None
                adjustment_db_writer = None
            bundle.ingest(
                environ,
                asset_db_writer,
                minute_bar_writer,
                daily_bar_writer,
                adjustment_db_writer,
                bundle.calendar,
                bundle.start_session,
                bundle.end_session,
                cache,
                show_progress,
                pth.data_path([name, timestr], environ=environ),
            )
Пример #10
0
 def __enter__(self):
     self._eng = eng = create_engine(self._url)
     AssetDBWriter(eng).write(**self._frames)
     return eng
Пример #11
0
class TestWrite(WithInstanceTmpDir, ZiplineTestCase):
    def init_instance_fixtures(self):
        super(TestWrite, self).init_instance_fixtures()
        self.assets_db_path = path = os.path.join(
            self.instance_tmpdir.path,
            'assets.db',
        )
        self.writer = AssetDBWriter(path)

    def new_asset_finder(self):
        return AssetFinder(self.assets_db_path)

    def test_write_multiple_exchanges(self):
        # Incrementing by two so that start and end dates for each
        # generated Asset don't overlap (each Asset's end_date is the
        # day after its start date).
        dates = pd.date_range('2013-01-01', freq='2D', periods=5, tz='UTC')
        sids = list(range(5))
        df = pd.DataFrame.from_records([
            {
                'sid': sid,
                'real_sid': str(sid),
                'currency': 'USD',
                'symbol': str(sid),
                'start_date': date.value,
                'end_date': (date + timedelta(days=1)).value,

                # Change the exchange with each mapping period. We don't
                # currently support point in time exchange information,
                # so we just take the most recent by end date.
                'exchange': 'EXCHANGE-%d-%d' % (sid, n),
            } for n, date in enumerate(dates) for sid in sids
        ])
        self.writer.write(equities=df)

        reader = self.new_asset_finder()
        equities = reader.retrieve_all(reader.sids)

        for eq in equities:
            expected_exchange = 'EXCHANGE-%d-%d' % (eq.sid, len(dates) - 1)
            assert_equal(eq.exchange, expected_exchange)

    def test_write_direct(self):
        # don't include anything with a default to test that those work.
        equities = pd.DataFrame({
            'sid': [0, 1],
            'real_sid': ['0', '1'],
            'currency': ['USD', 'CAD'],
            'asset_name': ['Ayy Inc.', 'Lmao LP'],
            # the full exchange name
            'exchange': ['NYSE', 'TSE'],
        })
        equity_symbol_mappings = pd.DataFrame({
            'sid': [0, 1],
            'symbol': ['AYY', 'LMAO'],
            'company_symbol': ['AYY', 'LMAO'],
            'share_class_symbol': ['', ''],
        })
        exchanges = pd.DataFrame({
            'exchange': ['NYSE', 'TSE'],
            'country_code': ['US', 'JP'],
        })

        self.writer.write_direct(
            equities=equities,
            equity_symbol_mappings=equity_symbol_mappings,
            exchanges=exchanges,
        )

        reader = self.new_asset_finder()

        equities = reader.retrieve_all(reader.sids)
        expected_equities = [
            Equity(
                0,
                '0',
                ExchangeInfo('NYSE', 'NYSE', 'US'),
                currency='USD',
                symbol='AYY',
                asset_name='Ayy Inc.',
                start_date=pd.Timestamp(0, tz='UTC'),
                end_date=pd.Timestamp.max.tz_localize('UTC'),
                first_traded=None,
                auto_close_date=None,
                tick_size=0.01,
                multiplier=1.0,
            ),
            Equity(
                1,
                '1',
                ExchangeInfo('TSE', 'TSE', 'JP'),
                currency='CAD',
                symbol='LMAO',
                asset_name='Lmao LP',
                start_date=pd.Timestamp(0, tz='UTC'),
                end_date=pd.Timestamp.max.tz_localize('UTC'),
                first_traded=None,
                auto_close_date=None,
                tick_size=0.01,
                multiplier=1.0,
            )
        ]
        assert_equal(equities, expected_equities)

        exchange_info = reader.exchange_info
        expected_exchange_info = {
            'NYSE': ExchangeInfo('NYSE', 'NYSE', 'US'),
            'TSE': ExchangeInfo('TSE', 'TSE', 'JP'),
        }
        assert_equal(exchange_info, expected_exchange_info)
Пример #12
0
    def init_instance_fixtures(self):
        super(AssetFinderMultipleCountries, self).init_instance_fixtures()

        conn = self.enter_instance_context(empty_assets_db())
        self._asset_writer = AssetDBWriter(conn)
        self.asset_finder = AssetFinder(conn)
Пример #13
0
    def init_instance_fixtures(self):
        super(AssetFinderTestCase, self).init_instance_fixtures()

        conn = self.enter_instance_context(empty_assets_db())
        self._asset_writer = AssetDBWriter(conn)
        self.asset_finder = self.asset_finder_type(conn)
Пример #14
0
    def ingest(name,
               environ=os.environ,
               timestamp=None,
               assets_versions=(),
               show_progress=False):
        """Ingest data for a given bundle.

        Parameters
        ----------
        name : str
            The name of the bundle.
        environ : mapping, optional
            The environment variables. By default this is os.environ.
        timestamp : datetime, optional
            The timestamp to use for the load.
            By default this is the current time.
        assets_versions : Iterable[int], optional
            Versions of the assets db to which to downgrade.
        show_progress : bool, optional
            Tell the ingest function to display the progress where possible.
        """
        try:
            bundle = bundles[name]
        except KeyError:
            raise UnknownBundle(name)

        calendar = get_calendar(bundle.calendar_name)

        start_session = bundle.start_session
        end_session = bundle.end_session

        if start_session is None or start_session < calendar.first_session:
            start_session = calendar.first_session

        if end_session is None or end_session > calendar.last_session:
            end_session = calendar.last_session

        if timestamp is None:
            timestamp = pd.Timestamp.utcnow()
        timestamp = timestamp.tz_convert('utc').tz_localize(None)

        timestr = to_bundle_ingest_dirname(timestamp)
        cachepath = cache_path(name, environ=environ)
        pth.ensure_directory(pth.data_path([name, timestr], environ=environ))
        pth.ensure_directory(cachepath)
        with dataframe_cache(cachepath, clean_on_failure=False) as cache, \
                ExitStack() as stack:
            # we use `cleanup_on_failure=False` so that we don't purge the
            # cache directory if the load fails in the middle
            if bundle.create_writers:
                wd = stack.enter_context(
                    working_dir(pth.data_path([], environ=environ)))
                daily_bars_path = wd.ensure_dir(
                    *daily_equity_relative(name, timestr))
                daily_bar_writer = BcolzDailyBarWriter(
                    daily_bars_path,
                    calendar,
                    start_session,
                    end_session,
                )
                # Do an empty write to ensure that the daily ctables exist
                # when we create the SQLiteAdjustmentWriter below. The
                # SQLiteAdjustmentWriter needs to open the daily ctables so
                # that it can compute the adjustment ratios for the dividends.

                daily_bar_writer.write(())
                minute_bar_writer = BcolzMinuteBarWriter(
                    wd.ensure_dir(*minute_equity_relative(name, timestr)),
                    calendar,
                    start_session,
                    end_session,
                    minutes_per_day=bundle.minutes_per_day,
                )
                assets_db_path = wd.getpath(*asset_db_relative(name, timestr))
                asset_db_writer = AssetDBWriter(assets_db_path)

                adjustment_db_writer = stack.enter_context(
                    SQLiteAdjustmentWriter(
                        wd.getpath(*adjustment_db_relative(name, timestr)),
                        BcolzDailyBarReader(daily_bars_path),
                        overwrite=True,
                    ))
            else:
                daily_bar_writer = None
                minute_bar_writer = None
                asset_db_writer = None
                adjustment_db_writer = None
                if assets_versions:
                    raise ValueError('Need to ingest a bundle that creates '
                                     'writers in order to downgrade the assets'
                                     ' db.')
            log.info("Ingesting {}.", name)
            bundle.ingest(
                environ,
                asset_db_writer,
                minute_bar_writer,
                daily_bar_writer,
                adjustment_db_writer,
                calendar,
                start_session,
                end_session,
                cache,
                show_progress,
                pth.data_path([name, timestr], environ=environ),
            )

            for version in sorted(set(assets_versions), reverse=True):
                version_path = wd.getpath(*asset_db_relative(
                    name,
                    timestr,
                    db_version=version,
                ))
                with working_file(version_path) as wf:
                    shutil.copy2(assets_db_path, wf.path)
                    downgrade(wf.path, version)
Пример #15
0
    def asset_finder(self):
        AssetDBWriter(self.db_engine).init_db()

        return AssetFinder(self.db_engine)