Exemplo n.º 1
0
    def test_bad_input(self):
        data = arange(100).reshape(self.ndates, self.nsids)
        baseline = DataFrame(data, index=self.dates, columns=self.sids)
        loader = DataFrameLoader(
            USEquityPricing.close,
            baseline,
        )

        with self.assertRaises(ValueError):
            # Wrong column.
            loader.load_adjusted_array(
                US_EQUITIES,
                [USEquityPricing.open],
                self.dates,
                self.sids,
                self.mask,
            )

        with self.assertRaises(ValueError):
            # Too many columns.
            loader.load_adjusted_array(
                US_EQUITIES,
                [USEquityPricing.open, USEquityPricing.close],
                self.dates,
                self.sids,
                self.mask,
            )
Exemplo n.º 2
0
    def test_bad_input(self):
        data = arange(100).reshape(self.ndates, self.nsids)
        baseline = DataFrame(data, index=self.dates, columns=self.sids)
        loader = DataFrameLoader(
            USEquityPricing.close,
            baseline,
        )

        with self.assertRaises(ValueError):
            # Wrong column.
            loader.load_adjusted_array(
                US_EQUITIES,
                [USEquityPricing.open],
                self.dates,
                self.sids,
                self.mask,
            )

        with self.assertRaises(ValueError):
            # Too many columns.
            loader.load_adjusted_array(
                US_EQUITIES,
                [USEquityPricing.open, USEquityPricing.close],
                self.dates,
                self.sids,
                self.mask,
            )
Exemplo n.º 3
0
    def _load_events(self, name_map, indexer, columns, dates, sids, mask):
        def to_frame(array):
            return pd.DataFrame(array, index=dates, columns=sids)

        assert indexer.shape == (len(dates), len(sids))

        out = {}
        for c in columns:
            # Array holding the value for column `c` for every event we have.
            col_array = self.events[name_map[c]]

            if not len(col_array):
                # We don't have **any** events, so return col.missing_value
                # every day for every sid. We have to special case empty events
                # because in normal branch we depend on being able to index
                # with -1 for missing values, which fails if there are no
                # events at all.
                raw = np.full((len(dates), len(sids)),
                              c.missing_value,
                              dtype=c.dtype)
            else:
                # Slot event values into sid/date locations using `indexer`.
                # This produces a 2D array of the same shape as `indexer`,
                # which must be (len(dates), len(sids))`.
                raw = col_array[indexer]

                # indexer will be -1 for locations where we don't have a known
                # value. Overwrite those locations with c.missing_value.
                raw[indexer < 0] = c.missing_value

            # Delegate the actual array formatting logic to a DataFrameLoader.
            loader = DataFrameLoader(c, to_frame(raw), adjustments=None)
            out[c] = loader.load_adjusted_array([c], dates, sids, mask)[c]
        return out
    def load_adjusted_array(self, domain, columns, dates, sids, mask):
        out = {}
        for column in columns:
            fundamentals_df = self.fundamentals_reader.read(
                column.name,
                dates,
                sids,
            )
            df_loader = DataFrameLoader(column, fundamentals_df)
            out.update(df_loader.load_adjusted_array(domain, [column,], dates, sids, mask))

        return out
Exemplo n.º 5
0
    def _load_events(self, name_map, indexer, columns, dates, sids, mask):
        def to_frame(array):
            return pd.DataFrame(array, index=dates, columns=sids)

        out = {}
        for c in columns:
            raw = self.events[name_map[c]][indexer]
            # indexer will be -1 for locations where we don't have a known
            # value.
            raw[indexer < 0] = c.missing_value

            # Delegate the actual array formatting logic to a DataFrameLoader.
            loader = DataFrameLoader(c, to_frame(raw), adjustments=None)
            out[c] = loader.load_adjusted_array([c], dates, sids, mask)[c]
        return out
Exemplo n.º 6
0
    def _load_events(self, name_map, indexer, columns, dates, sids, mask):
        def to_frame(array):
            return pd.DataFrame(array, index=dates, columns=sids)

        out = {}
        for c in columns:
            raw = self.events[name_map[c]][indexer]
            # indexer will be -1 for locations where we don't have a known
            # value.
            raw[indexer < 0] = c.missing_value

            # Delegate the actual array formatting logic to a DataFrameLoader.
            loader = DataFrameLoader(c, to_frame(raw), adjustments=None)
            out[c] = loader.load_adjusted_array([c], dates, sids, mask)[c]
        return out
Exemplo n.º 7
0
    def test_baseline(self):
        data = arange(100).reshape(self.ndates, self.nsids)
        baseline = DataFrame(data, index=self.dates, columns=self.sids)
        loader = DataFrameLoader(USEquityPricing.close, baseline)

        dates_slice = slice(None, 10, None)
        sids_slice = slice(1, 3, None)
        [adj_array] = loader.load_adjusted_array(
            [USEquityPricing.close],
            self.dates[dates_slice],
            self.sids[sids_slice],
            self.mask[dates_slice, sids_slice],
        )

        for idx, window in enumerate(adj_array.traverse(window_length=3)):
            expected = baseline.values[dates_slice, sids_slice][idx:idx + 3]
            assert_array_equal(window, expected)
Exemplo n.º 8
0
    def test_baseline(self):
        data = arange(100).reshape(self.ndates, self.nsids)
        baseline = DataFrame(data, index=self.dates, columns=self.sids)
        loader = DataFrameLoader(USEquityPricing.close, baseline)

        dates_slice = slice(None, 10, None)
        sids_slice = slice(1, 3, None)
        [adj_array] = loader.load_adjusted_array(
            [USEquityPricing.close],
            self.dates[dates_slice],
            self.sids[sids_slice],
            self.mask[dates_slice, sids_slice],
        ).values()

        for idx, window in enumerate(adj_array.traverse(window_length=3)):
            expected = baseline.values[dates_slice, sids_slice][idx:idx + 3]
            assert_array_equal(window, expected)
Exemplo n.º 9
0
    def _load_events(self,
                     name_map,
                     indexer,
                     domain,
                     columns,
                     dates,
                     sids,
                     mask):
        def to_frame(array):
            return pd.DataFrame(array, index=dates, columns=sids)

        assert indexer.shape == (len(dates), len(sids))

        out = {}
        for c in columns:
            # Array holding the value for column `c` for every event we have.
            col_array = self.events[name_map[c]]

            if not len(col_array):
                # We don't have **any** events, so return col.missing_value
                # every day for every sid. We have to special case empty events
                # because in normal branch we depend on being able to index
                # with -1 for missing values, which fails if there are no
                # events at all.
                raw = np.full(
                    (len(dates), len(sids)),
                    c.missing_value,
                    dtype=c.dtype,
                )
            else:
                # Slot event values into sid/date locations using `indexer`.
                # This produces a 2D array of the same shape as `indexer`,
                # which must be (len(dates), len(sids))`.
                raw = col_array[indexer]

                # indexer will be -1 for locations where we don't have a known
                # value. Overwrite those locations with c.missing_value.
                raw[indexer < 0] = c.missing_value

            # Delegate the actual array formatting logic to a DataFrameLoader.
            loader = DataFrameLoader(c, to_frame(raw), adjustments=None)
            out[c] = loader.load_adjusted_array(
                domain, [c], dates, sids, mask,
            )[c]
        return out
Exemplo n.º 10
0
    def test_adjustments(self):
        data = arange(100).reshape(self.ndates, self.nsids)
        baseline = DataFrame(data, index=self.dates, columns=self.sids)

        # Use the dates from index 10 on and sids 1-3.
        dates_slice = slice(10, None, None)
        sids_slice = slice(1, 4, None)

        # Adjustments that should actually affect the output.
        relevant_adjustments = [
            {
                'sid': 1,
                'start_date': None,
                'end_date': self.dates[15],
                'apply_date': self.dates[16],
                'value': 0.5,
                'kind': MULTIPLY,
            },
            {
                'sid': 2,
                'start_date': self.dates[5],
                'end_date': self.dates[15],
                'apply_date': self.dates[16],
                'value': 1.0,
                'kind': ADD,
            },
            {
                'sid': 2,
                'start_date': self.dates[15],
                'end_date': self.dates[16],
                'apply_date': self.dates[17],
                'value': 1.0,
                'kind': ADD,
            },
            {
                'sid': 3,
                'start_date': self.dates[16],
                'end_date': self.dates[17],
                'apply_date': self.dates[18],
                'value': 99.0,
                'kind': OVERWRITE,
            },
        ]

        # These adjustments shouldn't affect the output.
        irrelevant_adjustments = [
            {  # Sid Not Requested
                'sid': 0,
                'start_date': self.dates[16],
                'end_date': self.dates[17],
                'apply_date': self.dates[18],
                'value': -9999.0,
                'kind': OVERWRITE,
            },
            {  # Sid Unknown
                'sid': 9999,
                'start_date': self.dates[16],
                'end_date': self.dates[17],
                'apply_date': self.dates[18],
                'value': -9999.0,
                'kind': OVERWRITE,
            },
            {  # Date Not Requested
                'sid': 2,
                'start_date': self.dates[1],
                'end_date': self.dates[2],
                'apply_date': self.dates[3],
                'value': -9999.0,
                'kind': OVERWRITE,
            },
            {  # Date Before Known Data
                'sid': 2,
                'start_date': self.dates[0] - (2 * trading_day),
                'end_date': self.dates[0] - trading_day,
                'apply_date': self.dates[0] - trading_day,
                'value': -9999.0,
                'kind': OVERWRITE,
            },
            {  # Date After Known Data
                'sid': 2,
                'start_date': self.dates[-1] + trading_day,
                'end_date': self.dates[-1] + (2 * trading_day),
                'apply_date': self.dates[-1] + (3 * trading_day),
                'value': -9999.0,
                'kind': OVERWRITE,
            },
        ]

        adjustments = DataFrame(relevant_adjustments + irrelevant_adjustments)
        loader = DataFrameLoader(
            USEquityPricing.close,
            baseline,
            adjustments=adjustments,
        )

        expected_baseline = baseline.iloc[dates_slice, sids_slice]

        formatted_adjustments = loader.format_adjustments(
            self.dates[dates_slice],
            self.sids[sids_slice],
        )
        expected_formatted_adjustments = {
            6: [
                Float64Multiply(
                    first_row=0,
                    last_row=5,
                    first_col=0,
                    last_col=0,
                    value=0.5,
                ),
                Float64Add(
                    first_row=0,
                    last_row=5,
                    first_col=1,
                    last_col=1,
                    value=1.0,
                ),
            ],
            7: [
                Float64Add(
                    first_row=5,
                    last_row=6,
                    first_col=1,
                    last_col=1,
                    value=1.0,
                ),
            ],
            8: [
                Float64Overwrite(
                    first_row=6,
                    last_row=7,
                    first_col=2,
                    last_col=2,
                    value=99.0,
                )
            ],
        }
        self.assertEqual(formatted_adjustments, expected_formatted_adjustments)

        mask = self.mask[dates_slice, sids_slice]
        with patch('zipline.pipeline.loaders.frame.adjusted_array') as m:
            loader.load_adjusted_array(
                columns=[USEquityPricing.close],
                dates=self.dates[dates_slice],
                assets=self.sids[sids_slice],
                mask=mask,
            )

        self.assertEqual(m.call_count, 1)

        args, kwargs = m.call_args
        assert_array_equal(kwargs['data'], expected_baseline.values)
        assert_array_equal(kwargs['mask'], mask)
        self.assertEqual(kwargs['adjustments'], expected_formatted_adjustments)
Exemplo n.º 11
0
    def test_adjustments(self):
        data = arange(100).reshape(self.ndates, self.nsids)
        baseline = DataFrame(data, index=self.dates, columns=self.sids)

        # Use the dates from index 10 on and sids 1-3.
        dates_slice = slice(10, None, None)
        sids_slice = slice(1, 4, None)

        # Adjustments that should actually affect the output.
        relevant_adjustments = [
            {
                'sid': 1,
                'start_date': None,
                'end_date': self.dates[15],
                'apply_date': self.dates[16],
                'value': 0.5,
                'kind': MULTIPLY,
            },
            {
                'sid': 2,
                'start_date': self.dates[5],
                'end_date': self.dates[15],
                'apply_date': self.dates[16],
                'value': 1.0,
                'kind': ADD,
            },
            {
                'sid': 2,
                'start_date': self.dates[15],
                'end_date': self.dates[16],
                'apply_date': self.dates[17],
                'value': 1.0,
                'kind': ADD,
            },
            {
                'sid': 3,
                'start_date': self.dates[16],
                'end_date': self.dates[17],
                'apply_date': self.dates[18],
                'value': 99.0,
                'kind': OVERWRITE,
            },
        ]

        # These adjustments shouldn't affect the output.
        irrelevant_adjustments = [
            {  # Sid Not Requested
                'sid': 0,
                'start_date': self.dates[16],
                'end_date': self.dates[17],
                'apply_date': self.dates[18],
                'value': -9999.0,
                'kind': OVERWRITE,
            },
            {  # Sid Unknown
                'sid': 9999,
                'start_date': self.dates[16],
                'end_date': self.dates[17],
                'apply_date': self.dates[18],
                'value': -9999.0,
                'kind': OVERWRITE,
            },
            {  # Date Not Requested
                'sid': 2,
                'start_date': self.dates[1],
                'end_date': self.dates[2],
                'apply_date': self.dates[3],
                'value': -9999.0,
                'kind': OVERWRITE,
            },
            {  # Date Before Known Data
                'sid': 2,
                'start_date': self.dates[0] - (2 * trading_day),
                'end_date': self.dates[0] - trading_day,
                'apply_date': self.dates[0] - trading_day,
                'value': -9999.0,
                'kind': OVERWRITE,
            },
            {  # Date After Known Data
                'sid': 2,
                'start_date': self.dates[-1] + trading_day,
                'end_date': self.dates[-1] + (2 * trading_day),
                'apply_date': self.dates[-1] + (3 * trading_day),
                'value': -9999.0,
                'kind': OVERWRITE,
            },
        ]

        adjustments = DataFrame(relevant_adjustments + irrelevant_adjustments)
        loader = DataFrameLoader(
            USEquityPricing.close,
            baseline,
            adjustments=adjustments,
        )

        expected_baseline = baseline.iloc[dates_slice, sids_slice]

        formatted_adjustments = loader.format_adjustments(
            self.dates[dates_slice],
            self.sids[sids_slice],
        )
        expected_formatted_adjustments = {
            6: [
                Float64Multiply(
                    first_row=0,
                    last_row=5,
                    first_col=0,
                    last_col=0,
                    value=0.5,
                ),
                Float64Add(
                    first_row=0,
                    last_row=5,
                    first_col=1,
                    last_col=1,
                    value=1.0,
                ),
            ],
            7: [
                Float64Add(
                    first_row=5,
                    last_row=6,
                    first_col=1,
                    last_col=1,
                    value=1.0,
                ),
            ],
            8: [
                Float64Overwrite(
                    first_row=6,
                    last_row=7,
                    first_col=2,
                    last_col=2,
                    value=99.0,
                )
            ],
        }
        self.assertEqual(formatted_adjustments, expected_formatted_adjustments)

        mask = self.mask[dates_slice, sids_slice]
        with patch('zipline.pipeline.loaders.frame.AdjustedArray') as m:
            loader.load_adjusted_array(
                columns=[USEquityPricing.close],
                dates=self.dates[dates_slice],
                assets=self.sids[sids_slice],
                mask=mask,
            )

        self.assertEqual(m.call_count, 1)

        args, kwargs = m.call_args
        assert_array_equal(kwargs['data'], expected_baseline.values)
        assert_array_equal(kwargs['mask'], mask)
        self.assertEqual(kwargs['adjustments'], expected_formatted_adjustments)
Exemplo n.º 12
0
    def test_adjustments(self):
        data = np.arange(100).reshape(self.ndates, self.nsids)
        baseline = pd.DataFrame(data, index=self.dates, columns=self.sids)

        # Use the dates from index 10 on and sids 1-3.
        dates_slice = slice(10, None, None)
        sids_slice = slice(1, 4, None)

        # Adjustments that should actually affect the output.
        relevant_adjustments = [
            {
                "sid": 1,
                "start_date": None,
                "end_date": self.dates[15],
                "apply_date": self.dates[16],
                "value": 0.5,
                "kind": MULTIPLY,
            },
            {
                "sid": 2,
                "start_date": self.dates[5],
                "end_date": self.dates[15],
                "apply_date": self.dates[16],
                "value": 1.0,
                "kind": ADD,
            },
            {
                "sid": 2,
                "start_date": self.dates[15],
                "end_date": self.dates[16],
                "apply_date": self.dates[17],
                "value": 1.0,
                "kind": ADD,
            },
            {
                "sid": 3,
                "start_date": self.dates[16],
                "end_date": self.dates[17],
                "apply_date": self.dates[18],
                "value": 99.0,
                "kind": OVERWRITE,
            },
        ]

        # These adjustments shouldn't affect the output.
        irrelevant_adjustments = [
            {  # Sid Not Requested
                "sid": 0,
                "start_date": self.dates[16],
                "end_date": self.dates[17],
                "apply_date": self.dates[18],
                "value": -9999.0,
                "kind": OVERWRITE,
            },
            {  # Sid Unknown
                "sid": 9999,
                "start_date": self.dates[16],
                "end_date": self.dates[17],
                "apply_date": self.dates[18],
                "value": -9999.0,
                "kind": OVERWRITE,
            },
            {  # Date Not Requested
                "sid": 2,
                "start_date": self.dates[1],
                "end_date": self.dates[2],
                "apply_date": self.dates[3],
                "value": -9999.0,
                "kind": OVERWRITE,
            },
            {  # Date Before Known Data
                "sid": 2,
                "start_date": self.dates[0] - (2 * self.trading_day),
                "end_date": self.dates[0] - self.trading_day,
                "apply_date": self.dates[0] - self.trading_day,
                "value": -9999.0,
                "kind": OVERWRITE,
            },
            {  # Date After Known Data
                "sid": 2,
                "start_date": self.dates[-1] + self.trading_day,
                "end_date": self.dates[-1] + (2 * self.trading_day),
                "apply_date": self.dates[-1] + (3 * self.trading_day),
                "value": -9999.0,
                "kind": OVERWRITE,
            },
        ]

        adjustments = pd.DataFrame(relevant_adjustments +
                                   irrelevant_adjustments)
        loader = DataFrameLoader(
            USEquityPricing.close,
            baseline,
            adjustments=adjustments,
        )

        expected_baseline = baseline.iloc[dates_slice, sids_slice]

        formatted_adjustments = loader.format_adjustments(
            self.dates[dates_slice],
            self.sids[sids_slice],
        )
        expected_formatted_adjustments = {
            6: [
                Float64Multiply(
                    first_row=0,
                    last_row=5,
                    first_col=0,
                    last_col=0,
                    value=0.5,
                ),
                Float64Add(
                    first_row=0,
                    last_row=5,
                    first_col=1,
                    last_col=1,
                    value=1.0,
                ),
            ],
            7: [
                Float64Add(
                    first_row=5,
                    last_row=6,
                    first_col=1,
                    last_col=1,
                    value=1.0,
                ),
            ],
            8: [
                Float64Overwrite(
                    first_row=6,
                    last_row=7,
                    first_col=2,
                    last_col=2,
                    value=99.0,
                )
            ],
        }
        assert formatted_adjustments == expected_formatted_adjustments

        mask = self.mask[dates_slice, sids_slice]
        with patch("zipline.pipeline.loaders.frame.AdjustedArray") as m:
            loader.load_adjusted_array(
                US_EQUITIES,
                columns=[USEquityPricing.close],
                dates=self.dates[dates_slice],
                sids=self.sids[sids_slice],
                mask=mask,
            )

        assert m.call_count == 1

        args, kwargs = m.call_args
        assert_array_equal(kwargs["data"], expected_baseline.values)
        assert kwargs["adjustments"] == expected_formatted_adjustments