示例#1
0
    def test_tabular(self):
        name = "expr"
        expr = bz.Data(self.df, name=name, dshape=self.dshape)
        ds = from_blaze(
            expr, loader=self.garbage_loader, no_deltas_rule=no_deltas_rules.ignore, missing_values=self.missing_values
        )
        self.assertEqual(ds.__name__, name)
        self.assertTrue(issubclass(ds, DataSet))

        self.assertIs(ds.value.dtype, float64_dtype)
        self.assertIs(ds.int_value.dtype, int64_dtype)

        self.assertTrue(np.isnan(ds.value.missing_value))
        self.assertEqual(ds.int_value.missing_value, 0)

        invalid_type_fields = ("asof_date",)

        for field in invalid_type_fields:
            with self.assertRaises(AttributeError) as e:
                getattr(ds, field)
            self.assertIn("'%s'" % field, str(e.exception))
            self.assertIn("'datetime'", str(e.exception))

        # test memoization
        self.assertIs(
            from_blaze(
                expr,
                loader=self.garbage_loader,
                no_deltas_rule=no_deltas_rules.ignore,
                missing_values=self.missing_values,
            ),
            ds,
        )
示例#2
0
    def test_tabular(self):
        name = 'expr'
        expr = bz.data(self.df, name=name, dshape=self.dshape)
        ds = from_blaze(
            expr,
            loader=self.garbage_loader,
            no_deltas_rule=no_deltas_rules.ignore,
            missing_values=self.missing_values,
        )
        self.assertEqual(ds.__name__, name)
        self.assertTrue(issubclass(ds, DataSet))

        self.assertIs(ds.value.dtype, float64_dtype)
        self.assertIs(ds.int_value.dtype, int64_dtype)

        self.assertTrue(np.isnan(ds.value.missing_value))
        self.assertEqual(ds.int_value.missing_value, 0)

        # test memoization
        self.assertIs(
            from_blaze(
                expr,
                loader=self.garbage_loader,
                no_deltas_rule=no_deltas_rules.ignore,
                missing_values=self.missing_values,
            ),
            ds,
        )
示例#3
0
    def test_tabular(self):
        name = 'expr'
        expr = bz.Data(self.df, name=name, dshape=self.dshape)
        ds = from_blaze(
            expr,
            loader=self.garbage_loader,
            no_deltas_rule=no_deltas_rules.ignore,
        )
        self.assertEqual(ds.__name__, name)
        self.assertTrue(issubclass(ds, DataSet))
        self.assertEqual(
            {c.name: c.dtype
             for c in ds.columns},
            {
                'sid': np.int64,
                'value': np.float64
            },
        )

        for field in ('timestamp', 'asof_date'):
            with self.assertRaises(AttributeError) as e:
                getattr(ds, field)
            self.assertIn("'%s'" % field, str(e.exception))
            self.assertIn("'datetime'", str(e.exception))

        # test memoization
        self.assertIs(
            from_blaze(
                expr,
                loader=self.garbage_loader,
                no_deltas_rule=no_deltas_rules.ignore,
            ),
            ds,
        )
示例#4
0
    def test_tabular(self):
        name = 'expr'
        expr = bz.Data(self.df, name=name, dshape=self.dshape)
        ds = from_blaze(
            expr,
            loader=self.garbage_loader,
            no_deltas_rule=no_deltas_rules.ignore,
        )
        self.assertEqual(ds.__name__, name)
        self.assertTrue(issubclass(ds, DataSet))
        self.assertEqual(
            {c.name: c.dtype for c in ds.columns},
            {'sid': np.int64, 'value': np.float64},
        )

        for field in ('timestamp', 'asof_date'):
            with self.assertRaises(AttributeError) as e:
                getattr(ds, field)
            self.assertIn("'%s'" % field, str(e.exception))
            self.assertIn("'datetime'", str(e.exception))

        # test memoization
        self.assertIs(
            from_blaze(
                expr,
                loader=self.garbage_loader,
                no_deltas_rule=no_deltas_rules.ignore,
            ),
            ds,
        )
示例#5
0
    def test_tabular(self):
        name = 'expr'
        expr = bz.data(self.df, name=name, dshape=self.dshape)
        ds = from_blaze(
            expr,
            loader=self.garbage_loader,
            no_deltas_rule=no_deltas_rules.ignore,
            missing_values=self.missing_values,
        )
        self.assertEqual(ds.__name__, name)
        self.assertTrue(issubclass(ds, DataSet))

        self.assertIs(ds.value.dtype, float64_dtype)
        self.assertIs(ds.int_value.dtype, int64_dtype)

        self.assertTrue(np.isnan(ds.value.missing_value))
        self.assertEqual(ds.int_value.missing_value, 0)

        # test memoization
        self.assertIs(
            from_blaze(
                expr,
                loader=self.garbage_loader,
                no_deltas_rule=no_deltas_rules.ignore,
                missing_values=self.missing_values,
            ),
            ds,
        )
示例#6
0
 def test_auto_deltas_fail_raise(self):
     loader = BlazeLoader()
     expr = bz.Data(self.df, dshape=self.dshape)
     with self.assertRaises(ValueError) as e:
         from_blaze(
             expr,
             loader=loader,
             no_deltas_rule=no_deltas_rules.raise_,
         )
     self.assertIn(str(expr), str(e.exception))
示例#7
0
 def test_auto_deltas_fail_warn(self):
     with warnings.catch_warnings(record=True) as ws:
         warnings.simplefilter("always")
         loader = BlazeLoader()
         expr = bz.Data(self.df, dshape=self.dshape)
         from_blaze(expr, loader=loader, no_deltas_rule=no_deltas_rules.warn, missing_values=self.missing_values)
     self.assertEqual(len(ws), 1)
     w = ws[0].message
     self.assertIsInstance(w, NoDeltasWarning)
     self.assertIn(str(expr), str(w))
示例#8
0
 def test_auto_deltas_fail_raise(self):
     loader = BlazeLoader()
     expr = bz.Data(self.df, dshape=self.dshape)
     with self.assertRaises(ValueError) as e:
         from_blaze(
             expr,
             loader=loader,
             no_deltas_rule=no_deltas_rules.raise_,
         )
     self.assertIn(str(expr), str(e.exception))
示例#9
0
 def test_auto_deltas_fail_warn(self):
     with warnings.catch_warnings(record=True) as ws:
         warnings.simplefilter('always')
         loader = BlazeLoader()
         expr = bz.Data(self.df, dshape=self.dshape)
         from_blaze(
             expr,
             loader=loader,
             no_deltas_rule=no_deltas_rules.warn,
         )
     self.assertEqual(len(ws), 1)
     w = ws[0].message
     self.assertIsInstance(w, NoDeltasWarning)
     self.assertIn(str(expr), str(w))
示例#10
0
    def _test_id(self, df, dshape, expected, finder, add):
        expr = bz.Data(df, name='expr', dshape=dshape)
        loader = BlazeLoader()
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule=no_deltas_rules.ignore,
            missing_values=self.missing_values,
        )
        p = Pipeline()
        for a in add:
            p.add(getattr(ds, a).latest, a)
        dates = self.dates

        with tmp_asset_finder() as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        assert_frame_equal(
            result,
            _utc_localize_index_level_0(expected),
            check_dtype=False,
        )
示例#11
0
    def test_id(self):
        expr = bz.Data(self.df, name='expr', dshape=self.dshape)
        loader = BlazeLoader()
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule=no_deltas_rules.ignore,
        )
        p = Pipeline()
        p.add(ds.value.latest, 'value')
        dates = self.dates

        with tmp_asset_finder() as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        expected = self.df.drop('asof_date', axis=1).set_index(
            ['timestamp', 'sid'],
        )
        expected.index = pd.MultiIndex.from_product((
            expected.index.levels[0],
            finder.retrieve_all(expected.index.levels[1]),
        ))
        assert_frame_equal(result, expected, check_dtype=False)
示例#12
0
    def test_id_macro_dataset(self):
        expr = bz.Data(self.macro_df, name='expr', dshape=self.macro_dshape)
        loader = BlazeLoader()
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule=no_deltas_rules.ignore,
        )
        p = Pipeline()
        p.add(ds.value.latest, 'value')
        dates = self.dates

        asset_info = asset_infos[0][0]
        with tmp_asset_finder(equities=asset_info) as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        nassets = len(asset_info)
        expected = pd.DataFrame(
            list(concatv([0] * nassets, [1] * nassets, [2] * nassets)),
            index=pd.MultiIndex.from_product((
                self.macro_df.timestamp,
                finder.retrieve_all(asset_info.index),
            )),
            columns=('value',),
        )
        assert_frame_equal(result, expected, check_dtype=False)
示例#13
0
    def _test_id(self, df, dshape, expected, finder, add):
        expr = bz.data(df, name='expr', dshape=dshape)
        loader = BlazeLoader()
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule=no_deltas_rules.ignore,
            missing_values=self.missing_values,
        )
        p = Pipeline()
        for a in add:
            p.add(getattr(ds, a).latest, a)
        dates = self.dates

        with tmp_asset_finder() as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        assert_frame_equal(
            result,
            _utc_localize_index_level_0(expected),
            check_dtype=False,
        )
示例#14
0
    def test_missing_asof(self):
        expr = bz.Data(
            self.df.loc[:, ["sid", "value", "timestamp"]],
            name="expr",
            dshape="""
            var * {
                sid: ?int64,
                value: float64,
                timestamp: datetime,
            }""",
        )

        with self.assertRaises(TypeError) as e:
            from_blaze(expr, loader=self.garbage_loader, no_deltas_rule=no_deltas_rules.ignore)
        self.assertIn("'asof_date'", str(e.exception))
        self.assertIn(repr(str(expr.dshape.measure)), str(e.exception))
示例#15
0
    def _run_pipeline(
        self, expr, deltas, expected_views, expected_output, finder, calendar, start, end, window_length, compute_fn
    ):
        loader = BlazeLoader()
        ds = from_blaze(
            expr, deltas, loader=loader, no_deltas_rule=no_deltas_rules.raise_, missing_values=self.missing_values
        )
        p = Pipeline()

        # prevent unbound locals issue in the inner class
        window_length_ = window_length

        class TestFactor(CustomFactor):
            inputs = (ds.value,)
            window_length = window_length_

            def compute(self, today, assets, out, data):
                assert_array_almost_equal(data, expected_views[today])
                out[:] = compute_fn(data)

        p.add(TestFactor(), "value")

        result = SimplePipelineEngine(loader, calendar, finder).run_pipeline(p, start, end)

        assert_frame_equal(result, _utc_localize_index_level_0(expected_output), check_dtype=False)
示例#16
0
    def test_custom_query_time_tz(self):
        df = self.df.copy()
        df["timestamp"] = (
            (pd.DatetimeIndex(df["timestamp"], tz="EST") + timedelta(hours=8, minutes=44))
            .tz_convert("utc")
            .tz_localize(None)
        )
        df.ix[3:5, "timestamp"] = pd.Timestamp("2014-01-01 13:45")
        expr = bz.Data(df, name="expr", dshape=self.dshape)
        loader = BlazeLoader(data_query_time=time(8, 45), data_query_tz="EST")
        ds = from_blaze(expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, missing_values=self.missing_values)
        p = Pipeline()
        p.add(ds.value.latest, "value")
        p.add(ds.int_value.latest, "int_value")
        dates = self.dates

        with tmp_asset_finder() as finder:
            result = SimplePipelineEngine(loader, dates, finder).run_pipeline(p, dates[0], dates[-1])

        expected = df.drop("asof_date", axis=1)
        expected["timestamp"] = expected["timestamp"].dt.normalize().astype("datetime64[ns]").dt.tz_localize("utc")
        expected.ix[3:5, "timestamp"] += timedelta(days=1)
        expected.set_index(["timestamp", "sid"], inplace=True)
        expected.index = pd.MultiIndex.from_product(
            (expected.index.levels[0], finder.retrieve_all(expected.index.levels[1]))
        )
        assert_frame_equal(result, expected, check_dtype=False)
示例#17
0
    def test_custom_query_time_tz(self):
        df = self.df.copy()
        df['timestamp'] = (
            pd.DatetimeIndex(df['timestamp'], tz='EST') +
            timedelta(hours=8, minutes=44)).tz_convert('utc').tz_localize(None)
        df.ix[3:5, 'timestamp'] = pd.Timestamp('2014-01-01 13:45')
        expr = bz.Data(df, name='expr', dshape=self.dshape)
        loader = BlazeLoader(data_query_time=time(8, 45), data_query_tz='EST')
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule=no_deltas_rules.ignore,
        )
        p = Pipeline()
        p.add(ds.value.latest, 'value')
        dates = self.dates

        with tmp_asset_finder() as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        expected = df.drop('asof_date', axis=1)
        expected['timestamp'] = expected['timestamp'].dt.normalize().astype(
            'datetime64[ns]', )
        expected.ix[3:5, 'timestamp'] += timedelta(days=1)
        expected.set_index(['timestamp', 'sid'], inplace=True)
        expected.index = pd.MultiIndex.from_product((
            expected.index.levels[0],
            finder.retrieve_all(expected.index.levels[1]),
        ))
        assert_frame_equal(result, expected, check_dtype=False)
示例#18
0
    def test_id_macro_dataset(self):
        expr = bz.Data(self.macro_df, name='expr', dshape=self.macro_dshape)
        loader = BlazeLoader()
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule='ignore',
        )
        p = Pipeline()
        p.add(ds.value.latest, 'value')
        dates = self.dates

        asset_info = asset_infos[0][0]
        with tmp_asset_finder(asset_info) as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        nassets = len(asset_info)
        expected = pd.DataFrame(
            list(concatv([0] * nassets, [1] * nassets, [2] * nassets)),
            index=pd.MultiIndex.from_product((
                self.macro_df.timestamp,
                finder.retrieve_all(asset_info.index),
            )),
            columns=('value', ),
        )
        assert_frame_equal(result, expected, check_dtype=False)
示例#19
0
    def test_id(self):
        expr = bz.Data(self.df, name='expr', dshape=self.dshape)
        loader = BlazeLoader()
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule='ignore',
        )
        p = Pipeline()
        p.add(ds.value.latest, 'value')
        dates = self.dates

        with tmp_asset_finder() as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        expected = self.df.drop('asof_date',
                                axis=1).set_index(['timestamp', 'sid'], )
        expected.index = pd.MultiIndex.from_product((
            expected.index.levels[0],
            finder.retrieve_all(expected.index.levels[1]),
        ))
        assert_frame_equal(result, expected, check_dtype=False)
示例#20
0
    def _run_pipeline(self, expr, deltas, expected_views, expected_output,
                      finder, calendar, start, end, window_length, compute_fn):
        loader = BlazeLoader()
        ds = from_blaze(
            expr,
            deltas,
            loader=loader,
            no_deltas_rule=no_deltas_rules.raise_,
        )
        p = Pipeline()

        # prevent unbound locals issue in the inner class
        window_length_ = window_length

        class TestFactor(CustomFactor):
            inputs = ds.value,
            window_length = window_length_

            def compute(self, today, assets, out, data):
                assert_array_almost_equal(data, expected_views[today])
                out[:] = compute_fn(data)

        p.add(TestFactor(), 'value')

        result = SimplePipelineEngine(
            loader,
            calendar,
            finder,
        ).run_pipeline(p, start, end)

        assert_frame_equal(
            result,
            expected_output,
            check_dtype=False,
        )
示例#21
0
    def test_id_macro_dataset_multiple_columns(self):
        """
        input (df):
           asof_date  timestamp  other  value
        0 2014-01-01 2014-01-01      1      0
        3 2014-01-02 2014-01-02      2      1
        6 2014-01-03 2014-01-03      3      2

        output (expected):
                                   other  value
        2014-01-01 Equity(65 [A])      1      0
                   Equity(66 [B])      1      0
                   Equity(67 [C])      1      0
        2014-01-02 Equity(65 [A])      2      1
                   Equity(66 [B])      2      1
                   Equity(67 [C])      2      1
        2014-01-03 Equity(65 [A])      3      2
                   Equity(66 [B])      3      2
                   Equity(67 [C])      3      2
        """
        df = self.macro_df.copy()
        df['other'] = df.value + 1
        fields = OrderedDict(self.macro_dshape.measure.fields)
        fields['other'] = fields['value']
        expr = bz.Data(df, name='expr', dshape=var * Record(fields))
        loader = BlazeLoader()
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule=no_deltas_rules.ignore,
        )
        p = Pipeline()
        p.add(ds.value.latest, 'value')
        p.add(ds.other.latest, 'other')
        dates = self.dates

        asset_info = asset_infos[0][0]
        with tmp_asset_finder(equities=asset_info) as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        expected = pd.DataFrame(
            np.array([[0, 1],
                      [1, 2],
                      [2, 3]]).repeat(3, axis=0),
            index=pd.MultiIndex.from_product((
                df.timestamp,
                finder.retrieve_all(asset_info.index),
            )),
            columns=('value', 'other'),
        ).sort_index(axis=1)
        assert_frame_equal(
            result,
            expected.sort_index(axis=1),
            check_dtype=False,
        )
示例#22
0
    def test_id_macro_dataset_multiple_columns(self):
        """
        input (df):
           asof_date  timestamp  other  value
        0 2014-01-01 2014-01-01      1      0
        3 2014-01-02 2014-01-02      2      1
        6 2014-01-03 2014-01-03      3      2

        output (expected):
                                   other  value
        2014-01-01 Equity(65 [A])      1      0
                   Equity(66 [B])      1      0
                   Equity(67 [C])      1      0
        2014-01-02 Equity(65 [A])      2      1
                   Equity(66 [B])      2      1
                   Equity(67 [C])      2      1
        2014-01-03 Equity(65 [A])      3      2
                   Equity(66 [B])      3      2
                   Equity(67 [C])      3      2
        """
        df = self.macro_df.copy()
        df['other'] = df.value + 1
        fields = OrderedDict(self.macro_dshape.measure.fields)
        fields['other'] = fields['value']
        expr = bz.Data(df, name='expr', dshape=var * Record(fields))
        loader = BlazeLoader()
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule=no_deltas_rules.ignore,
        )
        p = Pipeline()
        p.add(ds.value.latest, 'value')
        p.add(ds.other.latest, 'other')
        dates = self.dates

        asset_info = asset_infos[0][0]
        with tmp_asset_finder(equities=asset_info) as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        expected = pd.DataFrame(
            np.array([[0, 1], [1, 2], [2, 3]]).repeat(3, axis=0),
            index=pd.MultiIndex.from_product((
                df.timestamp,
                finder.retrieve_all(asset_info.index),
            )),
            columns=('value', 'other'),
        ).sort_index(axis=1)
        assert_frame_equal(
            result,
            expected.sort_index(axis=1),
            check_dtype=False,
        )
示例#23
0
    def test_column(self):
        exprname = 'expr'
        expr = bz.data(self.df, name=exprname, dshape=self.dshape)
        value = from_blaze(
            expr.value,
            loader=self.garbage_loader,
            no_deltas_rule=no_deltas_rules.ignore,
            missing_values=self.missing_values,
        )
        self.assertEqual(value.name, 'value')
        self.assertIsInstance(value, BoundColumn)
        self.assertIs(value.dtype, float64_dtype)

        # test memoization
        self.assertIs(
            from_blaze(
                expr.value,
                loader=self.garbage_loader,
                no_deltas_rule=no_deltas_rules.ignore,
                missing_values=self.missing_values,
            ),
            value,
        )
        self.assertIs(
            from_blaze(
                expr,
                loader=self.garbage_loader,
                no_deltas_rule=no_deltas_rules.ignore,
                missing_values=self.missing_values,
            ).value,
            value,
        )

        # test the walk back up the tree
        self.assertIs(
            from_blaze(
                expr,
                loader=self.garbage_loader,
                no_deltas_rule=no_deltas_rules.ignore,
                missing_values=self.missing_values,
            ),
            value.dataset,
        )
        self.assertEqual(value.dataset.__name__, exprname)
示例#24
0
    def test_column(self):
        exprname = 'expr'
        expr = bz.data(self.df, name=exprname, dshape=self.dshape)
        value = from_blaze(
            expr.value,
            loader=self.garbage_loader,
            no_deltas_rule=no_deltas_rules.ignore,
            missing_values=self.missing_values,
        )
        self.assertEqual(value.name, 'value')
        self.assertIsInstance(value, BoundColumn)
        self.assertIs(value.dtype, float64_dtype)

        # test memoization
        self.assertIs(
            from_blaze(
                expr.value,
                loader=self.garbage_loader,
                no_deltas_rule=no_deltas_rules.ignore,
                missing_values=self.missing_values,
            ),
            value,
        )
        self.assertIs(
            from_blaze(
                expr,
                loader=self.garbage_loader,
                no_deltas_rule=no_deltas_rules.ignore,
                missing_values=self.missing_values,
            ).value,
            value,
        )

        # test the walk back up the tree
        self.assertIs(
            from_blaze(
                expr,
                loader=self.garbage_loader,
                no_deltas_rule=no_deltas_rules.ignore,
                missing_values=self.missing_values,
            ),
            value.dataset,
        )
        self.assertEqual(value.dataset.__name__, exprname)
示例#25
0
 def test_auto_deltas(self):
     expr = bz.Data(
         {"ds": self.df, "ds_deltas": pd.DataFrame(columns=self.df.columns)},
         dshape=var * Record((("ds", self.dshape.measure), ("ds_deltas", self.dshape.measure))),
     )
     loader = BlazeLoader()
     ds = from_blaze(expr.ds, loader=loader, missing_values=self.missing_values)
     self.assertEqual(len(loader), 1)
     exprdata = loader[ds]
     self.assertTrue(exprdata.expr.isidentical(expr.ds))
     self.assertTrue(exprdata.deltas.isidentical(expr.ds_deltas))
示例#26
0
    def test_missing_asof(self):
        expr = bz.Data(
            self.df.loc[:, ['sid', 'value', 'timestamp']],
            name='expr',
            dshape="""
            var * {
                sid: ?int64,
                value: float64,
                timestamp: datetime,
            }""",
        )

        with self.assertRaises(TypeError) as e:
            from_blaze(
                expr,
                loader=self.garbage_loader,
                no_deltas_rule=no_deltas_rules.ignore,
            )
        self.assertIn("'asof_date'", str(e.exception))
        self.assertIn(repr(str(expr.dshape.measure)), str(e.exception))
示例#27
0
def gen_data_set(table_name):
    """读取bcolz格式的数据,生成DataSet"""
    expr = _gen_expr(table_name)
    return from_blaze(
        expr,
        # loader=None,
        no_deltas_rule='ignore',
        no_checkpoints_rule='ignore',
        # odo_kwargs=gen_odo_kwargs(expr, utc=False),
        missing_values=fillvalue_for_expr(expr),
        domain=CN_EQUITIES,
    )
示例#28
0
def gen_data_set(table_name):
    """读取bcolz格式的数据,生成DataSet"""
    expr = _gen_expr(table_name)
    return from_blaze(
        expr,
        # loader=global_loader,
        no_deltas_rule='ignore',
        no_checkpoints_rule='ignore',
        # odo_kwargs=gen_odo_kwargs(expr),
        missing_values=fillvalue_for_expr(expr),
        domain=CN_EQUITIES,
    )
示例#29
0
 def test_non_numpy_field(self):
     expr = bz.Data(
         [],
         dshape="""
         var * {
              a: datetime,
              asof_date: datetime,
              timestamp: datetime,
         }""",
     )
     ds = from_blaze(expr, loader=self.garbage_loader, no_deltas_rule=no_deltas_rules.ignore)
     with self.assertRaises(AttributeError):
         ds.a
     self.assertIsInstance(object.__getattribute__(ds, "a"), NonNumpyField)
示例#30
0
    def test_tabular(self):
        name = 'expr'
        expr = bz.Data(self.df, name=name, dshape=self.dshape)
        ds = from_blaze(
            expr,
            loader=self.garbage_loader,
            no_deltas_rule=no_deltas_rules.ignore,
            missing_values=self.missing_values,
        )
        self.assertEqual(ds.__name__, name)
        self.assertTrue(issubclass(ds, DataSet))

        self.assertIs(ds.value.dtype, float64_dtype)
        self.assertIs(ds.int_value.dtype, int64_dtype)

        self.assertTrue(np.isnan(ds.value.missing_value))
        self.assertEqual(ds.int_value.missing_value, 0)

        invalid_type_fields = ('asof_date', )

        for field in invalid_type_fields:
            with self.assertRaises(AttributeError) as e:
                getattr(ds, field)
            self.assertIn("'%s'" % field, str(e.exception))
            self.assertIn("'datetime'", str(e.exception))

        # test memoization
        self.assertIs(
            from_blaze(
                expr,
                loader=self.garbage_loader,
                no_deltas_rule=no_deltas_rules.ignore,
                missing_values=self.missing_values,
            ),
            ds,
        )
示例#31
0
 def test_auto_deltas(self):
     expr = bz.Data(
         {'ds': self.df,
          'ds_deltas': pd.DataFrame(columns=self.df.columns)},
         dshape=var * Record((
             ('ds', self.dshape.measure),
             ('ds_deltas', self.dshape.measure),
         )),
     )
     loader = BlazeLoader()
     ds = from_blaze(expr.ds, loader=loader)
     self.assertEqual(len(loader), 1)
     exprdata = loader[ds]
     self.assertTrue(exprdata.expr.isidentical(expr.ds))
     self.assertTrue(exprdata.deltas.isidentical(expr.ds_deltas))
示例#32
0
 def test_non_pipeline_field(self):
     # NOTE: This test will fail if we ever allow string types in
     # the Pipeline API. If this happens, change the dtype of the `a` field
     # of expr to another type we don't allow.
     expr = bz.Data(
         [],
         dshape="""
         var * {
              a: string,
              asof_date: datetime,
              timestamp: datetime,
         }""",
     )
     ds = from_blaze(expr, loader=self.garbage_loader, no_deltas_rule=no_deltas_rules.ignore)
     with self.assertRaises(AttributeError):
         ds.a
     self.assertIsInstance(object.__getattribute__(ds, "a"), NonPipelineField)
示例#33
0
 def test_auto_deltas(self):
     expr = bz.Data(
         {
             'ds': self.df,
             'ds_deltas': pd.DataFrame(columns=self.df.columns)
         },
         dshape=var * Record((
             ('ds', self.dshape.measure),
             ('ds_deltas', self.dshape.measure),
         )),
     )
     loader = BlazeLoader()
     ds = from_blaze(expr.ds, loader=loader)
     self.assertEqual(len(loader), 1)
     exprdata = loader[ds]
     self.assertTrue(exprdata.expr.isidentical(expr.ds))
     self.assertTrue(exprdata.deltas.isidentical(expr.ds_deltas))
示例#34
0
 def test_non_numpy_field(self):
     expr = bz.Data(
         [],
         dshape="""
         var * {
              a: datetime,
              asof_date: datetime,
              timestamp: datetime,
         }""",
     )
     ds = from_blaze(
         expr,
         loader=self.garbage_loader,
         no_deltas_rule=no_deltas_rules.ignore,
     )
     with self.assertRaises(AttributeError):
         ds.a
     self.assertIsInstance(object.__getattribute__(ds, 'a'), NonNumpyField)
示例#35
0
    def test_complex_expr(self):
        expr = bz.data(self.df, dshape=self.dshape)
        # put an Add in the table
        expr_with_add = bz.transform(expr, value=expr.value + 1)

        # Test that we can have complex expressions with no deltas
        from_blaze(
            expr_with_add,
            deltas=None,
            loader=self.garbage_loader,
            missing_values=self.missing_values,
        )

        with self.assertRaises(TypeError):
            from_blaze(
                expr.value + 1,  # put an Add in the column
                deltas=None,
                loader=self.garbage_loader,
                missing_values=self.missing_values,
            )

        deltas = bz.data(
            pd.DataFrame(columns=self.df.columns),
            dshape=self.dshape,
        )
        with self.assertRaises(TypeError):
            from_blaze(
                expr_with_add,
                deltas=deltas,
                loader=self.garbage_loader,
                missing_values=self.missing_values,
            )

        with self.assertRaises(TypeError):
            from_blaze(
                expr.value + 1,
                deltas=deltas,
                loader=self.garbage_loader,
                missing_values=self.missing_values,
            )
示例#36
0
    def test_complex_expr(self):
        expr = bz.data(self.df, dshape=self.dshape)
        # put an Add in the table
        expr_with_add = bz.transform(expr, value=expr.value + 1)

        # Test that we can have complex expressions with no deltas
        from_blaze(
            expr_with_add,
            deltas=None,
            loader=self.garbage_loader,
            missing_values=self.missing_values,
        )

        with self.assertRaises(TypeError):
            from_blaze(
                expr.value + 1,  # put an Add in the column
                deltas=None,
                loader=self.garbage_loader,
                missing_values=self.missing_values,
            )

        deltas = bz.data(
            pd.DataFrame(columns=self.df.columns),
            dshape=self.dshape,
        )
        with self.assertRaises(TypeError):
            from_blaze(
                expr_with_add,
                deltas=deltas,
                loader=self.garbage_loader,
                missing_values=self.missing_values,
            )

        with self.assertRaises(TypeError):
            from_blaze(
                expr.value + 1,
                deltas=deltas,
                loader=self.garbage_loader,
                missing_values=self.missing_values,
            )
示例#37
0
 def test_non_pipeline_field(self):
     expr = bz.data(
         [],
         dshape="""
         var * {
              a: complex,
              asof_date: datetime,
              timestamp: datetime,
         }""",
     )
     ds = from_blaze(
         expr,
         loader=self.garbage_loader,
         no_deltas_rule=no_deltas_rules.ignore,
     )
     with self.assertRaises(AttributeError):
         ds.a
     self.assertIsInstance(
         object.__getattribute__(ds, 'a'),
         NonPipelineField,
     )
示例#38
0
    def test_custom_query_time_tz(self):
        df = self.df.copy()
        df['timestamp'] = (
            pd.DatetimeIndex(df['timestamp'], tz='EST') +
            timedelta(hours=8, minutes=44)
        ).tz_convert('utc').tz_localize(None)
        df.ix[3:5, 'timestamp'] = pd.Timestamp('2014-01-01 13:45')
        expr = bz.data(df, name='expr', dshape=self.dshape)
        loader = BlazeLoader(data_query_time=time(8, 45), data_query_tz='EST')
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule=no_deltas_rules.ignore,
            missing_values=self.missing_values,
        )
        p = Pipeline()
        p.add(ds.value.latest, 'value')
        p.add(ds.int_value.latest, 'int_value')
        dates = self.dates

        with tmp_asset_finder() as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        expected = df.drop('asof_date', axis=1)
        expected['timestamp'] = expected['timestamp'].dt.normalize().astype(
            'datetime64[ns]',
        ).dt.tz_localize('utc')
        expected.ix[3:5, 'timestamp'] += timedelta(days=1)
        expected.set_index(['timestamp', 'sid'], inplace=True)
        expected.index = pd.MultiIndex.from_product((
            expected.index.levels[0],
            finder.retrieve_all(expected.index.levels[1]),
        ))
        assert_frame_equal(result, expected, check_dtype=False)
示例#39
0
 def test_non_pipeline_field(self):
     # NOTE: This test will fail if we ever allow string types in
     # the Pipeline API. If this happens, change the dtype of the `a` field
     # of expr to another type we don't allow.
     expr = bz.Data(
         [],
         dshape="""
         var * {
              a: string,
              asof_date: datetime,
              timestamp: datetime,
         }""",
     )
     ds = from_blaze(
         expr,
         loader=self.garbage_loader,
         no_deltas_rule=no_deltas_rules.ignore,
     )
     with self.assertRaises(AttributeError):
         ds.a
     self.assertIsInstance(
         object.__getattribute__(ds, 'a'),
         NonPipelineField,
     )
示例#40
0
from datashape import dshape

from alphatools.research import loaders, blaze_loader
from zipline.data import bundles
from zipline.utils.calendars import get_calendar
from zipline.pipeline.loaders.blaze import from_blaze

from os import path

this_file = path.dirname(__file__)

with open(path.join(this_file, 'factory/data_sources.json')) as f:
    data_sources = json.load(f)

Factory = {}

for source in data_sources.keys():
    loc = data_sources[source]['url']
    shape = dshape(data_sources[source]['schema'])

    loc = path.expandvars(loc)

    expr = bz.data(loc, dshape=shape)

    # create the DataSet
    ds = from_blaze(expr,
                    no_deltas_rule='ignore',
                    no_checkpoints_rule='ignore',
                    loader=blaze_loader)
    Factory = {source: ds}