Exemplo n.º 1
0
    def _run_pipeline(self, expr, deltas, expected_views, expected_output,
                      finder, calendar, start, end, window_length, compute_fn):
        loader = BlazeLoader()
        ds = from_blaze(
            expr,
            deltas,
            loader=loader,
            no_deltas_rule=no_deltas_rules.raise_,
        )
        p = Pipeline()

        # prevent unbound locals issue in the inner class
        window_length_ = window_length

        class TestFactor(CustomFactor):
            inputs = ds.value,
            window_length = window_length_

            def compute(self, today, assets, out, data):
                assert_array_almost_equal(data, expected_views[today])
                out[:] = compute_fn(data)

        p.add(TestFactor(), 'value')

        result = SimplePipelineEngine(
            loader,
            calendar,
            finder,
        ).run_pipeline(p, start, end)

        assert_frame_equal(
            result,
            expected_output,
            check_dtype=False,
        )
Exemplo n.º 2
0
    def test_custom_query_time_tz(self):
        df = self.df.copy()
        df['timestamp'] = (
            pd.DatetimeIndex(df['timestamp'], tz='EST') +
            timedelta(hours=8, minutes=44)).tz_convert('utc').tz_localize(None)
        df.ix[3:5, 'timestamp'] = pd.Timestamp('2014-01-01 13:45')
        expr = bz.Data(df, name='expr', dshape=self.dshape)
        loader = BlazeLoader(data_query_time=time(8, 45), data_query_tz='EST')
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule=no_deltas_rules.ignore,
        )
        p = Pipeline()
        p.add(ds.value.latest, 'value')
        dates = self.dates

        with tmp_asset_finder() as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        expected = df.drop('asof_date', axis=1)
        expected['timestamp'] = expected['timestamp'].dt.normalize().astype(
            'datetime64[ns]', )
        expected.ix[3:5, 'timestamp'] += timedelta(days=1)
        expected.set_index(['timestamp', 'sid'], inplace=True)
        expected.index = pd.MultiIndex.from_product((
            expected.index.levels[0],
            finder.retrieve_all(expected.index.levels[1]),
        ))
        assert_frame_equal(result, expected, check_dtype=False)
Exemplo n.º 3
0
    def test_id_macro_dataset(self):
        expr = bz.Data(self.macro_df, name='expr', dshape=self.macro_dshape)
        loader = BlazeLoader()
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule='ignore',
        )
        p = Pipeline()
        p.add(ds.value.latest, 'value')
        dates = self.dates

        asset_info = asset_infos[0][0]
        with tmp_asset_finder(asset_info) as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        nassets = len(asset_info)
        expected = pd.DataFrame(
            list(concatv([0] * nassets, [1] * nassets, [2] * nassets)),
            index=pd.MultiIndex.from_product((
                self.macro_df.timestamp,
                finder.retrieve_all(asset_info.index),
            )),
            columns=('value', ),
        )
        assert_frame_equal(result, expected, check_dtype=False)
Exemplo n.º 4
0
    def test_id(self):
        expr = bz.Data(self.df, name='expr', dshape=self.dshape)
        loader = BlazeLoader()
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule='ignore',
        )
        p = Pipeline()
        p.add(ds.value.latest, 'value')
        dates = self.dates

        with tmp_asset_finder() as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        expected = self.df.drop('asof_date',
                                axis=1).set_index(['timestamp', 'sid'], )
        expected.index = pd.MultiIndex.from_product((
            expected.index.levels[0],
            finder.retrieve_all(expected.index.levels[1]),
        ))
        assert_frame_equal(result, expected, check_dtype=False)
Exemplo n.º 5
0
    def init_class_fixtures(cls):
        super(BlazeToPipelineTestCase, cls).init_class_fixtures()
        cls.dates = dates = pd.date_range('2014-01-01', '2014-01-03')
        dates = cls.dates.repeat(3)
        cls.df = df = pd.DataFrame({
            'sid':
            cls.ASSET_FINDER_EQUITY_SIDS * 3,
            'value': (0., 1., 2., 1., 2., 3., 2., 3., 4.),
            'int_value': (0, 1, 2, 1, 2, 3, 2, 3, 4),
            'asof_date':
            dates,
            'timestamp':
            dates,
        })
        cls.dshape = dshape("""
        var * {
            sid: ?int64,
            value: ?float64,
            int_value: ?int64,
            asof_date: datetime,
            timestamp: datetime
        }
        """)
        cls.macro_df = df[df.sid == 65].drop('sid', axis=1)
        dshape_ = OrderedDict(cls.dshape.measure.fields)
        del dshape_['sid']
        cls.macro_dshape = var * Record(dshape_)

        cls.garbage_loader = BlazeLoader()
        cls.missing_values = {'int_value': 0}
Exemplo n.º 6
0
    def setUpClass(cls):
        cls.dates = dates = pd.date_range('2014-01-01', '2014-01-03')
        dates = cls.dates.repeat(3)
        cls.sids = sids = ord('A'), ord('B'), ord('C')
        cls.df = df = pd.DataFrame({
            'sid':
            sids * 3,
            'value': (0., 1., 2., 1., 2., 3., 2., 3., 4.),
            'int_value': (0, 1, 2, 1, 2, 3, 2, 3, 4),
            'asof_date':
            dates,
            'timestamp':
            dates,
        })
        cls.dshape = dshape("""
        var * {
            sid: ?int64,
            value: ?float64,
            int_value: ?int64,
            asof_date: datetime,
            timestamp: datetime
        }
        """)
        cls.macro_df = df[df.sid == 65].drop('sid', axis=1)
        dshape_ = OrderedDict(cls.dshape.measure.fields)
        del dshape_['sid']
        cls.macro_dshape = var * Record(dshape_)

        cls.garbage_loader = BlazeLoader()
        cls.missing_values = {'int_value': 0}
Exemplo n.º 7
0
    def _test_id(self, df, dshape, expected, finder, add):
        expr = bz.Data(df, name='expr', dshape=dshape)
        loader = BlazeLoader()
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule=no_deltas_rules.ignore,
            missing_values=self.missing_values,
        )
        p = Pipeline()
        for a in add:
            p.add(getattr(ds, a).latest, a)
        dates = self.dates

        with tmp_asset_finder() as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        assert_frame_equal(
            result,
            _utc_localize_index_level_0(expected),
            check_dtype=False,
        )
Exemplo n.º 8
0
    def test_id_macro_dataset_multiple_columns(self):
        """
        input (df):
           asof_date  timestamp  other  value
        0 2014-01-01 2014-01-01      1      0
        3 2014-01-02 2014-01-02      2      1
        6 2014-01-03 2014-01-03      3      2

        output (expected):
                                   other  value
        2014-01-01 Equity(65 [A])      1      0
                   Equity(66 [B])      1      0
                   Equity(67 [C])      1      0
        2014-01-02 Equity(65 [A])      2      1
                   Equity(66 [B])      2      1
                   Equity(67 [C])      2      1
        2014-01-03 Equity(65 [A])      3      2
                   Equity(66 [B])      3      2
                   Equity(67 [C])      3      2
        """
        df = self.macro_df.copy()
        df['other'] = df.value + 1
        fields = OrderedDict(self.macro_dshape.measure.fields)
        fields['other'] = fields['value']
        expr = bz.Data(df, name='expr', dshape=var * Record(fields))
        loader = BlazeLoader()
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule=no_deltas_rules.ignore,
        )
        p = Pipeline()
        p.add(ds.value.latest, 'value')
        p.add(ds.other.latest, 'other')
        dates = self.dates

        asset_info = asset_infos[0][0]
        with tmp_asset_finder(equities=asset_info) as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        expected = pd.DataFrame(
            np.array([[0, 1], [1, 2], [2, 3]]).repeat(3, axis=0),
            index=pd.MultiIndex.from_product((
                df.timestamp,
                finder.retrieve_all(asset_info.index),
            )),
            columns=('value', 'other'),
        ).sort_index(axis=1)
        assert_frame_equal(
            result,
            expected.sort_index(axis=1),
            check_dtype=False,
        )
Exemplo n.º 9
0
 def test_auto_deltas_fail_raise(self):
     loader = BlazeLoader()
     expr = bz.Data(self.df, dshape=self.dshape)
     with self.assertRaises(ValueError) as e:
         from_blaze(
             expr,
             loader=loader,
             no_deltas_rule=no_deltas_rules.raise_,
         )
     self.assertIn(str(expr), str(e.exception))
Exemplo n.º 10
0
 def test_auto_deltas_fail_warn(self):
     with warnings.catch_warnings(record=True) as ws:
         warnings.simplefilter('always')
         loader = BlazeLoader()
         expr = bz.Data(self.df, dshape=self.dshape)
         from_blaze(
             expr,
             loader=loader,
             no_deltas_rule=no_deltas_rules.warn,
         )
     self.assertEqual(len(ws), 1)
     w = ws[0].message
     self.assertIsInstance(w, NoDeltasWarning)
     self.assertIn(str(expr), str(w))
Exemplo n.º 11
0
 def test_auto_deltas(self):
     expr = bz.Data(
         {
             'ds': self.df,
             'ds_deltas': pd.DataFrame(columns=self.df.columns)
         },
         dshape=var * Record((
             ('ds', self.dshape.measure),
             ('ds_deltas', self.dshape.measure),
         )),
     )
     loader = BlazeLoader()
     ds = from_blaze(expr.ds, loader=loader)
     self.assertEqual(len(loader), 1)
     exprdata = loader[ds]
     self.assertTrue(exprdata.expr.isidentical(expr.ds))
     self.assertTrue(exprdata.deltas.isidentical(expr.ds_deltas))
Exemplo n.º 12
0
    extensions=[],
    strict=True,
    environ=os.environ,
)


# Set-Up Pricing Data Access
trading_calendar = get_calendar('NYSE')
bundle = 'sharadar-prices' #'quandl'
bundle_data = bundles.load(bundle)


loaders = {}

# create and empty BlazeLoader
blaze_loader = BlazeLoader()

def my_dispatcher(column):
    return loaders[column]

pipeline_loader = USEquityPricingLoader(
    bundle_data.equity_daily_bar_reader,
    bundle_data.adjustment_reader,
)

def choose_loader(column):
    if column in USEquityPricing.columns:
        return pipeline_loader
    try:
        return my_dispatcher(column)
    except: