Exemplo n.º 1
0
    def test_bundle(self):
        with open(test_resource_path(
            'quandl_samples',
                'QUANDL_ARCHIVE.zip'), 'rb') as quandl_response:

            self.responses.add(
                self.responses.GET,
                'https://file_url.mock.quandl',
                body=quandl_response.read(),
                content_type='application/zip',
                status=200,
            )

        url_map = {
            format_metadata_url(self.api_key): test_resource_path(
                'quandl_samples',
                'metadata.csv.gz',
            )
        }

        zipline_root = self.enter_instance_context(tmp_dir()).path
        environ = {
            'ZIPLINE_ROOT': zipline_root,
            'QUANDL_API_KEY': self.api_key,
        }

        with patch_read_csv(url_map):
            ingest('quandl', environ=environ)

        bundle = load('quandl', environ=environ)
        sids = 0, 1, 2, 3
        assert_equal(set(bundle.asset_finder.sids), set(sids))

        sessions = self.calendar.all_sessions
        actual = bundle.equity_daily_bar_reader.load_raw_arrays(
            self.columns,
            sessions[sessions.get_loc(self.start_date, 'bfill')],
            sessions[sessions.get_loc(self.end_date, 'ffill')],
            sids,
        )
        expected_pricing, expected_adjustments = self._expected_data(
            bundle.asset_finder,
        )
        assert_equal(actual, expected_pricing, array_decimal=2)

        adjs_for_cols = bundle.adjustment_reader.load_pricing_adjustments(
            self.columns,
            sessions,
            pd.Index(sids),
        )

        for column, adjustments, expected in zip(self.columns,
                                                 adjs_for_cols,
                                                 expected_adjustments):
            assert_equal(
                adjustments,
                expected,
                msg=column,
            )
Exemplo n.º 2
0
    def test_bundle(self):
        with open(test_resource_path(
                    'quandl_samples',
                    'QUANDL_ARCHIVE.zip'), 'rb') as quandl_response:

            self.responses.add(
                self.responses.GET,
                'https://file_url.mock.quandl',
                body=quandl_response.read(),
                content_type='application/zip',
                status=200,
            )

        url_map = {
            format_metadata_url(self.api_key): test_resource_path(
                'quandl_samples',
                'metadata.csv.gz',
            )
        }

        zipline_root = self.enter_instance_context(tmp_dir()).path
        environ = {
            'ZIPLINE_ROOT': zipline_root,
            'QUANDL_API_KEY': self.api_key,
        }

        with patch_read_csv(url_map):
            ingest('quandl', environ=environ)

        bundle = load('quandl', environ=environ)
        sids = 0, 1, 2, 3
        assert_equal(set(bundle.asset_finder.sids), set(sids))

        sessions = self.calendar.all_sessions
        actual = bundle.equity_daily_bar_reader.load_raw_arrays(
            self.columns,
            sessions[sessions.get_loc(self.start_date, 'bfill')],
            sessions[sessions.get_loc(self.end_date, 'ffill')],
            sids,
        )
        expected_pricing, expected_adjustments = self._expected_data(
            bundle.asset_finder,
        )
        assert_equal(actual, expected_pricing, array_decimal=2)

        adjs_for_cols = bundle.adjustment_reader.load_pricing_adjustments(
            self.columns,
            sessions,
            pd.Index(sids),
        )

        for column, adjustments, expected in zip(self.columns,
                                                 adjs_for_cols,
                                                 expected_adjustments):
            assert_equal(
                adjustments,
                expected,
                msg=column,
            )
Exemplo n.º 3
0
    def init_class_fixtures(cls):
        super(ExamplesTests, cls).init_class_fixtures()

        register('test', lambda *args: None)
        cls.add_class_callback(partial(unregister, 'test'))

        with tarfile.open(test_resource_path('example_data.tar.gz')) as tar:
            tar.extractall(cls.tmpdir.path)

        cls.expected_perf = dataframe_cache(
            cls.tmpdir.getpath(
                'example_data/expected_perf/%s' %
                pd.__version__.replace('.', '-'), ),
            serialization='pickle',
        )

        # We need to call gc.collect before tearing down our class because we
        # have a cycle between TradingAlgorithm and AlgorithmSimulator which
        # ultimately holds a reference to the pipeline engine passed to the
        # tests here.

        # This means that we're not guaranteed to have deleted our disk-backed
        # resource readers (e.g. SQLiteAdjustmentReader) before trying to
        # delete the tempdir, which causes failures on Windows because Windows
        # doesn't allow you to delete a file if someone still has an open
        # handle to that file.

        # :(
        cls.add_class_callback(gc.collect)
Exemplo n.º 4
0
 def per_symbol(symbol):
     df = pd.read_csv(
         test_resource_path('quandl_samples', symbol + '.csv.gz'),
         parse_dates=['Date'],
         index_col='Date',
         usecols=[
             'Open',
             'High',
             'Low',
             'Close',
             'Volume',
             'Date',
             'Ex-Dividend',
             'Split Ratio',
         ],
         na_values=['NA'],
     ).rename(columns={
         'Open': 'open',
         'High': 'high',
         'Low': 'low',
         'Close': 'close',
         'Volume': 'volume',
         'Date': 'date',
         'Ex-Dividend': 'ex_dividend',
         'Split Ratio': 'split_ratio',
     })
     df['sid'] = sids[symbol]
     return df
Exemplo n.º 5
0
 def per_symbol(symbol):
     df = pd.read_csv(
         test_resource_path('quandl_samples', symbol + '.csv.gz'),
         parse_dates=['Date'],
         index_col='Date',
         usecols=[
             'Open',
             'High',
             'Low',
             'Close',
             'Volume',
             'Date',
             'Ex-Dividend',
             'Split Ratio',
         ],
         na_values=['NA'],
     ).rename(columns={
         'Open': 'open',
         'High': 'high',
         'Low': 'low',
         'Close': 'close',
         'Volume': 'volume',
         'Date': 'date',
         'Ex-Dividend': 'ex_dividend',
         'Split Ratio': 'split_ratio',
     })
     df['sid'] = sids[symbol]
     return df
Exemplo n.º 6
0
    def init_class_fixtures(cls):
        super(ExamplesTests, cls).init_class_fixtures()

        register("test", lambda *args: None)
        cls.add_class_callback(partial(unregister, "test"))

        with tarfile.open(test_resource_path("example_data.tar.gz")) as tar:
            tar.extractall(cls.tmpdir.path)

        cls.expected_perf = dataframe_cache(
            cls.tmpdir.getpath("example_data/expected_perf/%s" % pd.__version__.replace(".", "-")),
            serialization="pickle",
        )

        # We need to call gc.collect before tearing down our class because we
        # have a cycle between TradingAlgorithm and AlgorithmSimulator which
        # ultimately holds a reference to the pipeline engine passed to the
        # tests here.

        # This means that we're not guaranteed to have deleted our disk-backed
        # resource readers (e.g. SQLiteAdjustmentReader) before trying to
        # delete the tempdir, which causes failures on Windows because Windows
        # doesn't allow you to delete a file if someone still has an open
        # handle to that file.

        # :(
        cls.add_class_callback(gc.collect)
Exemplo n.º 7
0
    def test_bundle(self):
        environ = {
            'CSVDIR': test_resource_path('csvdir_samples', 'csvdir')
        }

        ingest('csvdir', environ=environ)
        bundle = load('csvdir', environ=environ)
        sids = 0, 1, 2, 3
        assert_equal(set(bundle.asset_finder.sids), set(sids))

        for equity in bundle.asset_finder.retrieve_all(sids):
            assert_equal(equity.start_date, self.asset_start, msg=equity)
            assert_equal(equity.end_date, self.asset_end, msg=equity)

        sessions = self.calendar.all_sessions
        actual = bundle.equity_daily_bar_reader.load_raw_arrays(
            self.columns,
            sessions[sessions.get_loc(self.asset_start, 'bfill')],
            sessions[sessions.get_loc(self.asset_end, 'ffill')],
            sids,
        )

        expected_pricing, expected_adjustments = self._expected_data(
            bundle.asset_finder,
        )
        assert_equal(actual, expected_pricing, array_decimal=2)

        adjs_for_cols = bundle.adjustment_reader.load_pricing_adjustments(
            self.columns,
            sessions,
            pd.Index(sids),
        )
        assert_equal([sorted(adj.keys()) for adj in adjs_for_cols],
                     expected_adjustments)
Exemplo n.º 8
0
    def test_bundle(self):
        zipline_root = self.enter_instance_context(tmp_dir()).path
        environ = {
            'ZIPLINE_ROOT': zipline_root,
            'QUANDL_API_KEY': self.api_key,
        }

        # custom bundles need to be registered before use or they will not
        # be recognized
        register(
            'ZacksQuandl',
            from_zacks_dump(
                test_resource_path('zacks_samples', 'fictitious.csv')))
        ingest('ZacksQuandl', environ=environ)

        # load bundle now that it has been ingested
        bundle = load('ZacksQuandl', environ=environ)
        sids = 0, 1, 2

        # check sids match
        assert_equal(set(bundle.asset_finder.sids), set(sids))

        # check asset_{start, end} is the same as {start, end}_date
        for equity in bundle.asset_finder.retrieve_all(sids):
            assert_equal(equity.start_date, self.asset_start, msg=equity)
            assert_equal(equity.end_date, self.asset_end, msg=equity)

        # get daily OHLCV data from bundle
        sessions = self.calendar.all_sessions
        actual = bundle.equity_daily_bar_reader.load_raw_arrays(
            self.columns,
            sessions[sessions.get_loc(self.asset_start, 'bfill')],
            sessions[sessions.get_loc(self.asset_end, 'ffill')],
            sids,
        )

        # get expected data from csv
        expected_pricing, expected_adjustments = self._expected_data(
            bundle.asset_finder, )

        # check OHLCV data matches
        assert_equal(actual, expected_pricing, array_decimal=2)

        adjustments_for_cols = bundle.adjustment_reader.load_adjustments(
            self.columns,
            sessions,
            pd.Index(sids),
        )

        for column, adjustments, expected in zip(self.columns,
                                                 adjustments_for_cols,
                                                 expected_adjustments):
            assert_equal(
                adjustments,
                expected,
                msg=column,
            )
Exemplo n.º 9
0
    def init_class_fixtures(cls):
        super(ExamplesTests, cls).init_class_fixtures()

        register("test", lambda *args: None)
        cls.add_class_callback(partial(unregister, "test"))

        with tarfile.open(test_resource_path("example_data.tar.gz")) as tar:
            tar.extractall(cls.tmpdir.path)

        cls.expected_perf = dataframe_cache(
            cls.tmpdir.getpath("example_data/expected_perf/%s" % pd.__version__.replace(".", "-")),
            serialization="pickle",
        )
Exemplo n.º 10
0
 def pricing_callback(request):
     headers = {
         'content-encoding': 'gzip',
         'content-type': 'text/csv',
     }
     path = test_resource_path(
         'yahoo_samples',
         get_symbol_from_url(request.url) + '.csv.gz',
     )
     with open(path, 'rb') as f:
         return (
             200,
             headers,
             f.read(),
         )
Exemplo n.º 11
0
    def init_class_fixtures(cls):
        super(ExamplesTests, cls).init_class_fixtures()

        register('test', lambda *args: None)
        cls.add_class_callback(partial(unregister, 'test'))

        with tarfile.open(test_resource_path('example_data.tar.gz')) as tar:
            tar.extractall(cls.tmpdir.path)

        cls.expected_perf = dataframe_cache(
            cls.tmpdir.getpath(
                'example_data/expected_perf/%s' %
                pd.__version__.replace('.', '-'), ),
            serialization='pickle',
        )
Exemplo n.º 12
0
 def pricing_callback(request):
     headers = {
         'content-encoding': 'gzip',
         'content-type': 'text/csv',
     }
     path = test_resource_path(
         'yahoo_samples',
         get_symbol_from_url(request.url) + '.csv.gz',
     )
     with open(path, 'rb') as f:
         return (
             200,
             headers,
             f.read(),
         )
Exemplo n.º 13
0
    def init_class_fixtures(cls):
        super(ExamplesTests, cls).init_class_fixtures()

        register('test', lambda *args: None)
        cls.add_class_callback(partial(unregister, 'test'))

        with tarfile.open(test_resource_path('example_data.tar.gz')) as tar:
            tar.extractall(cls.tmpdir.path)

        cls.expected_perf = dataframe_cache(
            cls.tmpdir.getpath(
                'example_data/expected_perf/%s' %
                pd.__version__.replace('.', '-'),
            ),
            serialization='pickle',
        )
Exemplo n.º 14
0
    def _expected_data(self, asset_finder):
        sids = {
            symbol: asset_finder.lookup_symbol(
                symbol,
                self.asset_start,
            ).sid
            for symbol in self.symbols
        }

        # load data from CSV
        df = pd.read_csv(test_resource_path('zacks_samples', 'fictitious.csv'),
                         index_col='date',
                         parse_dates=['date'],
                         usecols=[
                             'date', 'open', 'high', 'low', 'close', 'volume',
                             'ticker'
                         ],
                         na_values=['NA'])
        # drop NA rows (non trading days) or loader will wipe out entire column
        df = df.dropna()

        df = df.replace({"ticker": sids})  # convert ticker to sids
        df = df.rename(columns={"ticker": "sid"})

        # zacks data contains fractional shares, these get dropped
        df["volume"] = np.floor(df["volume"])

        # split one large DataFrame into one per sid
        # (also drops unwanted tickers)
        subs = [df[df['sid'] == sid] for sid in sorted(sids.values())]

        # package up data from CSV so that it is in the same format as data
        # coming out of the bundle the format is a list of 5 2D arrays one
        # for each OHLCV
        pricing = []
        for column in self.columns:
            vs = np.zeros((subs[0].shape[0], len(subs)))
            for i, sub in enumerate(subs):
                vs[:, i] = sub[column].values
            if column == 'volume':
                vs = np.nan_to_num(vs)
            pricing.append(vs)

        return pricing, []
Exemplo n.º 15
0
    def init_class_fixtures(cls):
        super(ExamplesTests, cls).init_class_fixtures()

        register('test', lambda *args: None)
        cls.add_class_callback(partial(unregister, 'test'))

        with tarfile.open(test_resource_path('example_data.tar.gz')) as tar:
            tar.extractall(cls.tmpdir.path)

        cls.expected_perf = dataframe_cache(
            cls.tmpdir.getpath(
                'example_data/expected_perf/%s' %
                pd.__version__.replace('.', '-'), ),
            serialization='pickle',
        )

        market_data = ('SPY_benchmark.csv', 'treasury_curves.csv')
        for data in market_data:
            ensure_file(cls.tmpdir.getpath('example_data/root/data/' + data))
Exemplo n.º 16
0
 def per_symbol(symbol):
     df = pd.read_csv(
         test_resource_path('csvdir_samples', 'csvdir',
                            'daily', symbol + '.csv.gz'),
         parse_dates=['date'],
         index_col='date',
         usecols=[
             'open',
             'high',
             'low',
             'close',
             'volume',
             'date',
             'dividend',
             'split',
         ],
         na_values=['NA'],
     )
     df['sid'] = sids[symbol]
     return df
Exemplo n.º 17
0
    def init_class_fixtures(cls):
        super().init_class_fixtures()

        register('test', lambda *args: None)
        cls.add_class_callback(partial(unregister, 'test'))

        with tarfile.open(test_resource_path('example_data.tar.gz')) as tar:
            tar.extractall(cls.tmpdir.path)

        cls.expected_perf = dataframe_cache(
            cls.tmpdir.getpath(
                'example_data/expected_perf/%s' %
                pd.__version__.replace('.', '-'), ),
            serialization='pickle',
        )

        cls.no_benchmark_expected_perf = {
            example_name:
            cls._no_benchmark_expectations_applied(expected_perf.copy())
            for example_name, expected_perf in cls.expected_perf.items()
        }
Exemplo n.º 18
0
    def init_class_fixtures(cls):
        super(ExamplesTests, cls).init_class_fixtures()

        register("test", lambda *args: None)
        cls.add_class_callback(partial(unregister, "test"))

        with tarfile.open(test_resource_path("example_data.tar.gz")) as tar:
            tar.extractall(cls.tmpdir.path)

        cls.expected_perf = dataframe_cache(
            cls.tmpdir.getpath(
                "example_data/expected_perf/%s" %
                pd.__version__.replace(".", "-"), ),
            serialization="pickle",
        )

        cls.no_benchmark_expected_perf = {
            example_name:
            cls._no_benchmark_expectations_applied(expected_perf.copy())
            for example_name, expected_perf in cls.expected_perf.items()
        }
Exemplo n.º 19
0
    def init_class_fixtures(cls):
        super(ExamplesTests, cls).init_class_fixtures()

        register('test', lambda *args: None)
        cls.add_class_callback(partial(unregister, 'test'))

        with tarfile.open(test_resource_path('example_data.tar.gz')) as tar:
            tar.extractall(cls.tmpdir.path)

        cls.expected_perf = dataframe_cache(
            cls.tmpdir.getpath(
                'example_data/expected_perf/%s' %
                pd.__version__.replace('.', '-'),
            ),
            serialization='pickle',
        )

        market_data = ('SPY_benchmark.csv', 'treasury_curves.csv')
        for data in market_data:
            update_modified_time(
                cls.tmpdir.getpath(
                    'example_data/root/data/' + data
                )
            )
Exemplo n.º 20
0
def zipfile_path(symbol):
    return test_resource_path('quandl_samples', symbol + '.csv.gz')
Exemplo n.º 21
0
    def test_bundle(self):
        url_map = merge(
            {
                format_wiki_url(
                    self.api_key,
                    symbol,
                    self.start_date,
                    self.end_date,
                ): test_resource_path('quandl_samples', symbol + '.csv.gz')
                for symbol in self.symbols
            },
            {
                format_metadata_url(self.api_key, n): test_resource_path(
                    'quandl_samples',
                    'metadata-%d.csv.gz' % n,
                )
                for n in (1, 2)
            },
        )
        zipline_root = self.enter_instance_context(tmp_dir()).path
        environ = {
            'ZIPLINE_ROOT': zipline_root,
            'QUANDL_API_KEY': self.api_key,
        }

        with patch_read_csv(url_map, strict=True):
            ingest('quandl', environ=environ)

        bundle = load('quandl', environ=environ)
        sids = 0, 1, 2, 3
        assert_equal(set(bundle.asset_finder.sids), set(sids))

        for equity in bundle.asset_finder.retrieve_all(sids):
            assert_equal(equity.start_date, self.asset_start, msg=equity)
            assert_equal(equity.end_date, self.asset_end, msg=equity)

        sessions = self.calendar.all_sessions
        actual = bundle.equity_daily_bar_reader.load_raw_arrays(
            self.columns,
            sessions[sessions.get_loc(self.asset_start, 'bfill')],
            sessions[sessions.get_loc(self.asset_end, 'ffill')],
            sids,
        )
        expected_pricing, expected_adjustments = self._expected_data(
            bundle.asset_finder,
        )
        assert_equal(actual, expected_pricing, array_decimal=2)

        adjustments_for_cols = bundle.adjustment_reader.load_adjustments(
            self.columns,
            sessions,
            pd.Index(sids),
        )

        for column, adjustments, expected in zip(self.columns,
                                                 adjustments_for_cols,
                                                 expected_adjustments):
            assert_equal(
                adjustments,
                expected,
                msg=column,
            )
Exemplo n.º 22
0
    def _expected_data(self, asset_finder):
        sids = {
            symbol: asset_finder.lookup_symbol(
                symbol,
                None,
            ).sid
            for symbol in self.symbols
        }

        # Load raw data from quandl test resources.
        data = load_data_table(
            file=test_resource_path(
                'quandl_samples',
                'QUANDL_ARCHIVE.zip'
            ),
            index_col='date'
        )
        data['sid'] = pd.factorize(data.symbol)[0]

        all_ = data.set_index(
            'sid',
            append=True,
        ).unstack()

        # fancy list comprehension with statements
        @list
        @apply
        def pricing():
            for column in self.columns:
                vs = all_[column].values
                if column == 'volume':
                    vs = np.nan_to_num(vs)
                yield vs

        # the first index our written data will appear in the files on disk
        start_idx = (
            self.calendar.all_sessions.get_loc(self.start_date, 'ffill') + 1
        )

        # convert an index into the raw dataframe into an index into the
        # final data
        i = op.add(start_idx)

        def expected_dividend_adjustment(idx, symbol):
            sid = sids[symbol]
            return (
                1 -
                all_.ix[idx, ('ex_dividend', sid)] /
                all_.ix[idx - 1, ('close', sid)]
            )

        adjustments = [
            # ohlc
            {
                # dividends
                i(24): [Float64Multiply(
                    first_row=0,
                    last_row=i(24),
                    first_col=sids['AAPL'],
                    last_col=sids['AAPL'],
                    value=expected_dividend_adjustment(24, 'AAPL'),
                )],
                i(87): [Float64Multiply(
                    first_row=0,
                    last_row=i(87),
                    first_col=sids['AAPL'],
                    last_col=sids['AAPL'],
                    value=expected_dividend_adjustment(87, 'AAPL'),
                )],
                i(150): [Float64Multiply(
                    first_row=0,
                    last_row=i(150),
                    first_col=sids['AAPL'],
                    last_col=sids['AAPL'],
                    value=expected_dividend_adjustment(150, 'AAPL'),
                )],
                i(214): [Float64Multiply(
                    first_row=0,
                    last_row=i(214),
                    first_col=sids['AAPL'],
                    last_col=sids['AAPL'],
                    value=expected_dividend_adjustment(214, 'AAPL'),
                )],

                i(31): [Float64Multiply(
                    first_row=0,
                    last_row=i(31),
                    first_col=sids['MSFT'],
                    last_col=sids['MSFT'],
                    value=expected_dividend_adjustment(31, 'MSFT'),
                )],
                i(90): [Float64Multiply(
                    first_row=0,
                    last_row=i(90),
                    first_col=sids['MSFT'],
                    last_col=sids['MSFT'],
                    value=expected_dividend_adjustment(90, 'MSFT'),
                )],
                i(158): [Float64Multiply(
                    first_row=0,
                    last_row=i(158),
                    first_col=sids['MSFT'],
                    last_col=sids['MSFT'],
                    value=expected_dividend_adjustment(158, 'MSFT'),
                )],
                i(222): [Float64Multiply(
                    first_row=0,
                    last_row=i(222),
                    first_col=sids['MSFT'],
                    last_col=sids['MSFT'],
                    value=expected_dividend_adjustment(222, 'MSFT'),
                )],

                # splits
                i(108): [Float64Multiply(
                    first_row=0,
                    last_row=i(108),
                    first_col=sids['AAPL'],
                    last_col=sids['AAPL'],
                    value=1.0 / 7.0,
                )],
            },
        ] * (len(self.columns) - 1) + [
            # volume
            {
                i(108): [Float64Multiply(
                    first_row=0,
                    last_row=i(108),
                    first_col=sids['AAPL'],
                    last_col=sids['AAPL'],
                    value=7.0,
                )],
            }
        ]
        return pricing, adjustments
Exemplo n.º 23
0
def zipfile_path(symbol):
    return test_resource_path('quandl_samples', symbol + '.csv.gz')
Exemplo n.º 24
0
 def adjustments_callback(request):
     path = test_resource_path(
         'yahoo_samples',
         get_symbol_from_url(request.url) + '.adjustments.gz',
     )
     return 200, {}, read_compressed(path)
def main(ctx, rebuild_input):
    """Rebuild the perf data for test_examples"""
    example_path = test_resource_path("example_data.tar.gz")

    with tmp_dir() as d:
        with tarfile.open(example_path) as tar:
            tar.extractall(d.path)

        # The environ here should be the same (modulo the tempdir location)
        # as we use in test_examples.py.
        environ = {"ZIPLINE_ROOT": d.getpath("example_data/root")}

        if rebuild_input:
            raise NotImplementedError(
                "We cannot rebuild input for Yahoo because of "
                "changes Yahoo made to their API, so we cannot "
                "use Yahoo data bundles anymore. This will be fixed in "
                "a future release",
            )

        # we need to register the bundle; it is already ingested and saved in
        # the example_data.tar.gz file
        @register("test")
        def nop_ingest(*args, **kwargs):
            raise NotImplementedError("we cannot rebuild the test buindle")

        new_perf_path = d.getpath(
            "example_data/new_perf/%s" % pd.__version__.replace(".", "-"),
        )
        c = dataframe_cache(
            new_perf_path,
            serialization="pickle:2",
        )
        with c:
            for name in EXAMPLE_MODULES:
                c[name] = examples.run_example(
                    EXAMPLE_MODULES,
                    name,
                    environ=environ,
                    benchmark_returns=read_checked_in_benchmark_data(),
                )

            correct_called = [False]

            console = None

            def _exit(*args, **kwargs):
                console.raw_input = eof

            def correct():
                correct_called[0] = True
                _exit()

            expected_perf_path = d.getpath(
                "example_data/expected_perf/%s"
                % pd.__version__.replace(".", "-"),
            )

            # allow users to run some analysis to make sure that the new
            # results check out
            console = InteractiveConsole(
                {
                    "correct": correct,
                    "exit": _exit,
                    "incorrect": _exit,
                    "new": c,
                    "np": np,
                    "old": dataframe_cache(
                        expected_perf_path,
                        serialization="pickle",
                    ),
                    "pd": pd,
                    "cols_to_check": examples._cols_to_check,
                    "changed_results": changed_results,
                }
            )
            console.interact(banner)

            if not correct_called[0]:
                ctx.fail(
                    "`correct()` was not called! This means that the new"
                    " results will not be written",
                )

            # move the new results to the expected path
            shutil.rmtree(expected_perf_path)
            shutil.copytree(new_perf_path, expected_perf_path)

        # Clear out all the temporary new perf so it doesn't get added to the
        # tarball.
        shutil.rmtree(d.getpath("example_data/new_perf/"))

        with tarfile.open(example_path, "w|gz") as tar:
            tar.add(d.getpath("example_data"), "example_data")
Exemplo n.º 26
0
    def test_bundle(self):
        url_map = merge(
            {
                format_wiki_url(
                    self.api_key,
                    symbol,
                    self.start_date,
                    self.end_date,
                ): test_resource_path('quandl_samples', symbol + '.csv.gz')
                for symbol in self.symbols
            },
            {
                format_metadata_url(self.api_key, n): test_resource_path(
                    'quandl_samples',
                    'metadata-%d.csv.gz' % n,
                )
                for n in (1, 2)
            },
        )
        zipline_root = self.enter_instance_context(tmp_dir()).path
        environ = {
            'ZIPLINE_ROOT': zipline_root,
            'QUANDL_API_KEY': self.api_key,
        }

        with patch_read_csv(url_map, strict=True):
            ingest('quandl', environ=environ)

        bundle = load('quandl', environ=environ)
        sids = 0, 1, 2, 3
        assert_equal(set(bundle.asset_finder.sids), set(sids))

        for equity in bundle.asset_finder.retrieve_all(sids):
            assert_equal(equity.start_date, self.asset_start, msg=equity)
            assert_equal(equity.end_date, self.asset_end, msg=equity)

        cal = self.calendar
        actual = bundle.daily_bar_reader.load_raw_arrays(
            self.columns,
            cal[cal.get_loc(self.asset_start, 'bfill')],
            cal[cal.get_loc(self.asset_end, 'ffill')],
            sids,
        )
        expected_pricing, expected_adjustments = self._expected_data(
            bundle.asset_finder,
        )
        assert_equal(actual, expected_pricing, array_decimal=2)

        adjustments_for_cols = bundle.adjustment_reader.load_adjustments(
            self.columns,
            cal,
            pd.Index(sids),
        )

        for column, adjustments, expected in zip(self.columns,
                                                 adjustments_for_cols,
                                                 expected_adjustments):
            assert_equal(
                adjustments,
                expected,
                msg=column,
            )
Exemplo n.º 27
0
 def adjustments_callback(request):
     path = test_resource_path(
         'yahoo_samples',
         get_symbol_from_url(request.url) + '.adjustments.gz',
     )
     return 200, {}, read_compressed(path)
def zipfile_path(file_name):
    return test_resource_path('quandl_samples', file_name)
Exemplo n.º 29
0
def zipfile_path(file_name):
    return test_resource_path('quandl_samples', file_name)
Exemplo n.º 30
0
    def _expected_data(self, asset_finder):
        sids = {
            symbol: asset_finder.lookup_symbol(
                symbol,
                None,
            ).sid
            for symbol in self.symbols
        }

        # Load raw data from quandl test resources.
        data = load_data_table(
            file=test_resource_path("quandl_samples", "QUANDL_ARCHIVE.zip"),
            index_col="date",
        )
        data["sid"] = pd.factorize(data.symbol)[0]

        all_ = data.set_index(
            "sid",
            append=True,
        ).unstack()

        # fancy list comprehension with statements
        @list
        @apply
        def pricing():
            for column in self.columns:
                vs = all_[column].values
                if column == "volume":
                    vs = np.nan_to_num(vs)
                yield vs

        # the first index our written data will appear in the files on disk
        start_idx = self.calendar.all_sessions.get_loc(self.start_date,
                                                       "ffill") + 1

        # convert an index into the raw dataframe into an index into the
        # final data
        i = op.add(start_idx)

        def expected_dividend_adjustment(idx, symbol):
            sid = sids[symbol]
            return (1 - all_.iloc[idx]["ex_dividend", sid] /
                    all_.iloc[idx - 1]["close", sid])

        adjustments = [
            # ohlc
            {
                # dividends
                i(24): [
                    Float64Multiply(
                        first_row=0,
                        last_row=i(24),
                        first_col=sids["AAPL"],
                        last_col=sids["AAPL"],
                        value=expected_dividend_adjustment(24, "AAPL"),
                    )
                ],
                i(87): [
                    Float64Multiply(
                        first_row=0,
                        last_row=i(87),
                        first_col=sids["AAPL"],
                        last_col=sids["AAPL"],
                        value=expected_dividend_adjustment(87, "AAPL"),
                    )
                ],
                i(150): [
                    Float64Multiply(
                        first_row=0,
                        last_row=i(150),
                        first_col=sids["AAPL"],
                        last_col=sids["AAPL"],
                        value=expected_dividend_adjustment(150, "AAPL"),
                    )
                ],
                i(214): [
                    Float64Multiply(
                        first_row=0,
                        last_row=i(214),
                        first_col=sids["AAPL"],
                        last_col=sids["AAPL"],
                        value=expected_dividend_adjustment(214, "AAPL"),
                    )
                ],
                i(31): [
                    Float64Multiply(
                        first_row=0,
                        last_row=i(31),
                        first_col=sids["MSFT"],
                        last_col=sids["MSFT"],
                        value=expected_dividend_adjustment(31, "MSFT"),
                    )
                ],
                i(90): [
                    Float64Multiply(
                        first_row=0,
                        last_row=i(90),
                        first_col=sids["MSFT"],
                        last_col=sids["MSFT"],
                        value=expected_dividend_adjustment(90, "MSFT"),
                    )
                ],
                i(158): [
                    Float64Multiply(
                        first_row=0,
                        last_row=i(158),
                        first_col=sids["MSFT"],
                        last_col=sids["MSFT"],
                        value=expected_dividend_adjustment(158, "MSFT"),
                    )
                ],
                i(222): [
                    Float64Multiply(
                        first_row=0,
                        last_row=i(222),
                        first_col=sids["MSFT"],
                        last_col=sids["MSFT"],
                        value=expected_dividend_adjustment(222, "MSFT"),
                    )
                ],
                # splits
                i(108): [
                    Float64Multiply(
                        first_row=0,
                        last_row=i(108),
                        first_col=sids["AAPL"],
                        last_col=sids["AAPL"],
                        value=1.0 / 7.0,
                    )
                ],
            },
        ] * (len(self.columns) - 1) + [
            # volume
            {
                i(108): [
                    Float64Multiply(
                        first_row=0,
                        last_row=i(108),
                        first_col=sids["AAPL"],
                        last_col=sids["AAPL"],
                        value=7.0,
                    )
                ],
            }
        ]
        return pricing, adjustments