def _test_inspect(self):
        data = arange(15, dtype=float).reshape(5, 3)
        adj_array = AdjustedArray(
            data,
            NOMASK,
            {4: [Float64Multiply(2, 3, 0, 0, 4.0)]},
            float('nan'),
        )

        expected = dedent("""\
            Adjusted Array (float64):

            Data:
            array([[  0.,   1.,   2.],
                   [  3.,   4.,   5.],
                   [  6.,   7.,   8.],
                   [  9.,  10.,  11.],
                   [ 12.,  13.,  14.]])

            Adjustments:
            {4: [Float64Multiply(first_row=2, last_row=3, first_col=0, \
last_col=0, value=4.000000)]}
            """)
        got = adj_array.inspect()
        self.assertEqual(expected, got)
    def expected_adjustments(self, start_date, end_date):
        price_adjustments = {}
        volume_adjustments = {}
        query_days = self.calendar_days_between(start_date, end_date)
        start_loc = query_days.get_loc(start_date)

        for table in SPLITS, MERGERS, DIVIDENDS_EXPECTED:
            for eff_date_secs, ratio, sid in table.itertuples(index=False):
                eff_date = Timestamp(eff_date_secs, unit='s', tz='UTC')

                # Ignore adjustments outside the query bounds.
                if not (start_date <= eff_date <= end_date):
                    continue

                eff_date_loc = query_days.get_loc(eff_date)
                delta = eff_date_loc - start_loc

                # Pricing adjustments should be applied on the date
                # corresponding to the effective date of the input data. They
                # should affect all rows **before** the effective date.
                price_adjustments.setdefault(delta, []).append(
                    Float64Multiply(
                        first_row=0,
                        last_row=delta,
                        first_col=sid - 1,
                        last_col=sid - 1,
                        value=ratio,
                    ))
                # Volume is *inversely* affected by *splits only*.
                if table is SPLITS:
                    volume_adjustments.setdefault(delta, []).append(
                        Float64Multiply(
                            first_row=0,
                            last_row=delta,
                            first_col=sid - 1,
                            last_col=sid - 1,
                            value=1.0 / ratio,
                        ))
        return price_adjustments, volume_adjustments
示例#3
0
 def split_adjustment(sid, volume):
     """The splits occur at index 252 // 2 with a ratio of (sid + 1):1
     """
     idx = 252 // 2
     return {
         idx: [
             Float64Multiply(
                 first_row=0,
                 last_row=idx,
                 first_col=sid,
                 last_col=sid,
                 value=(identity if volume else op.truediv(1))(sid + 2),
             )
         ],
     }
示例#4
0
        def dividend_adjustment(sid, which):
            """The dividends occur at indices 252 // 4 and 3 * 252 / 4
            with a cash amount of sid + 1 / 10 and sid + 2 / 10
            """
            if which == 'first':
                idx = 252 // 4
            else:
                idx = 3 * 252 // 4

            return {
                idx: [
                    Float64Multiply(
                        first_row=0,
                        last_row=idx,
                        first_col=sid,
                        last_col=sid,
                        value=float(1 - ((sid + 1 +
                                          (which == 'second')) / 10) /
                                    (idx - 1 + sid * 10000 + 2000)),
                    )
                ],
            }
    def test_adjustments(self):
        data = arange(100).reshape(self.ndates, self.nsids)
        baseline = DataFrame(data, index=self.dates, columns=self.sids)

        # Use the dates from index 10 on and sids 1-3.
        dates_slice = slice(10, None, None)
        sids_slice = slice(1, 4, None)

        # Adjustments that should actually affect the output.
        relevant_adjustments = [
            {
                'sid': 1,
                'start_date': None,
                'end_date': self.dates[15],
                'apply_date': self.dates[16],
                'value': 0.5,
                'kind': MULTIPLY,
            },
            {
                'sid': 2,
                'start_date': self.dates[5],
                'end_date': self.dates[15],
                'apply_date': self.dates[16],
                'value': 1.0,
                'kind': ADD,
            },
            {
                'sid': 2,
                'start_date': self.dates[15],
                'end_date': self.dates[16],
                'apply_date': self.dates[17],
                'value': 1.0,
                'kind': ADD,
            },
            {
                'sid': 3,
                'start_date': self.dates[16],
                'end_date': self.dates[17],
                'apply_date': self.dates[18],
                'value': 99.0,
                'kind': OVERWRITE,
            },
        ]

        # These adjustments shouldn't affect the output.
        irrelevant_adjustments = [
            {  # Sid Not Requested
                'sid': 0,
                'start_date': self.dates[16],
                'end_date': self.dates[17],
                'apply_date': self.dates[18],
                'value': -9999.0,
                'kind': OVERWRITE,
            },
            {  # Sid Unknown
                'sid': 9999,
                'start_date': self.dates[16],
                'end_date': self.dates[17],
                'apply_date': self.dates[18],
                'value': -9999.0,
                'kind': OVERWRITE,
            },
            {  # Date Not Requested
                'sid': 2,
                'start_date': self.dates[1],
                'end_date': self.dates[2],
                'apply_date': self.dates[3],
                'value': -9999.0,
                'kind': OVERWRITE,
            },
            {  # Date Before Known Data
                'sid': 2,
                'start_date': self.dates[0] - (2 * self.trading_day),
                'end_date': self.dates[0] - self.trading_day,
                'apply_date': self.dates[0] - self.trading_day,
                'value': -9999.0,
                'kind': OVERWRITE,
            },
            {  # Date After Known Data
                'sid': 2,
                'start_date': self.dates[-1] + self.trading_day,
                'end_date': self.dates[-1] + (2 * self.trading_day),
                'apply_date': self.dates[-1] + (3 * self.trading_day),
                'value': -9999.0,
                'kind': OVERWRITE,
            },
        ]

        adjustments = DataFrame(relevant_adjustments + irrelevant_adjustments)
        loader = DataFrameLoader(
            USEquityPricing.close,
            baseline,
            adjustments=adjustments,
        )

        expected_baseline = baseline.iloc[dates_slice, sids_slice]

        formatted_adjustments = loader.format_adjustments(
            self.dates[dates_slice],
            self.sids[sids_slice],
        )
        expected_formatted_adjustments = {
            6: [
                Float64Multiply(
                    first_row=0,
                    last_row=5,
                    first_col=0,
                    last_col=0,
                    value=0.5,
                ),
                Float64Add(
                    first_row=0,
                    last_row=5,
                    first_col=1,
                    last_col=1,
                    value=1.0,
                ),
            ],
            7: [
                Float64Add(
                    first_row=5,
                    last_row=6,
                    first_col=1,
                    last_col=1,
                    value=1.0,
                ),
            ],
            8: [
                Float64Overwrite(
                    first_row=6,
                    last_row=7,
                    first_col=2,
                    last_col=2,
                    value=99.0,
                )
            ],
        }
        self.assertEqual(formatted_adjustments, expected_formatted_adjustments)

        mask = self.mask[dates_slice, sids_slice]
        with patch('catalyst.pipeline.loaders.frame.AdjustedArray') as m:
            loader.load_adjusted_array(
                columns=[USEquityPricing.close],
                dates=self.dates[dates_slice],
                assets=self.sids[sids_slice],
                mask=mask,
            )

        self.assertEqual(m.call_count, 1)

        args, kwargs = m.call_args
        assert_array_equal(kwargs['data'], expected_baseline.values)
        assert_array_equal(kwargs['mask'], mask)
        self.assertEqual(kwargs['adjustments'], expected_formatted_adjustments)
示例#6
0
    def _get_adjustments_in_range(self, asset, dts, field):
        """
        Get the Float64Multiply objects to pass to an AdjustedArrayWindow.

        For the use of AdjustedArrayWindow in the loader, which looks back
        from current simulation time back to a window of data the dictionary is
        structured with:
        - the key into the dictionary for adjustments is the location of the
        day from which the window is being viewed.
        - the start of all multiply objects is always 0 (in each window all
          adjustments are overlapping)
        - the end of the multiply object is the location before the calendar
          location of the adjustment action, making all days before the event
          adjusted.

        Parameters
        ----------
        asset : Asset
            The assets for which to get adjustments.
        dts : iterable of datetime64-like
            The dts for which adjustment data is needed.
        field : str
            OHLCV field for which to get the adjustments.

        Returns
        -------
        out : dict[loc -> Float64Multiply]
            The adjustments as a dict of loc -> Float64Multiply
        """
        sid = int(asset)
        start = normalize_date(dts[0])
        end = normalize_date(dts[-1])
        adjs = {}
        if field != 'volume':
            mergers = self._adjustments_reader.get_adjustments_for_sid(
                'mergers', sid)
            for m in mergers:
                dt = m[0]
                if start < dt <= end:
                    end_loc = dts.searchsorted(dt)
                    adj_loc = end_loc
                    mult = Float64Multiply(0, end_loc - 1, 0, 0, m[1])
                    try:
                        adjs[adj_loc].append(mult)
                    except KeyError:
                        adjs[adj_loc] = [mult]
            divs = self._adjustments_reader.get_adjustments_for_sid(
                'dividends', sid)
            for d in divs:
                dt = d[0]
                if start < dt <= end:
                    end_loc = dts.searchsorted(dt)
                    adj_loc = end_loc
                    mult = Float64Multiply(0, end_loc - 1, 0, 0, d[1])
                    try:
                        adjs[adj_loc].append(mult)
                    except KeyError:
                        adjs[adj_loc] = [mult]
        splits = self._adjustments_reader.get_adjustments_for_sid(
            'splits', sid)
        for s in splits:
            dt = s[0]
            if start < dt <= end:
                if field == 'volume':
                    ratio = 1.0 / s[1]
                else:
                    ratio = s[1]
                end_loc = dts.searchsorted(dt)
                adj_loc = end_loc
                mult = Float64Multiply(0, end_loc - 1, 0, 0, ratio)
                try:
                    adjs[adj_loc].append(mult)
                except KeyError:
                    adjs[adj_loc] = [mult]
        return adjs
示例#7
0
    def test_ingest(self):
        calendar = get_calendar('NYSE')
        sessions = calendar.sessions_in_range(self.START_DATE, self.END_DATE)
        minutes = calendar.minutes_for_sessions_in_range(
            self.START_DATE,
            self.END_DATE,
        )

        sids = tuple(range(3))
        equities = make_simple_equity_info(
            sids,
            self.START_DATE,
            self.END_DATE,
        )

        daily_bar_data = make_bar_data(equities, sessions)
        minute_bar_data = make_bar_data(equities, minutes)
        first_split_ratio = 0.5
        second_split_ratio = 0.1
        splits = pd.DataFrame.from_records([
            {
                'effective_date': str_to_seconds('2014-01-08'),
                'ratio': first_split_ratio,
                'sid': 0,
            },
            {
                'effective_date': str_to_seconds('2014-01-09'),
                'ratio': second_split_ratio,
                'sid': 1,
            },
        ])

        @self.register(
            'bundle',
            calendar_name='NYSE',
            start_session=self.START_DATE,
            end_session=self.END_DATE,
        )
        def bundle_ingest(environ, asset_db_writer, minute_bar_writer,
                          daily_bar_writer, adjustment_writer, calendar,
                          start_session, end_session, cache, show_progress,
                          output_dir):
            assert_is(environ, self.environ)

            asset_db_writer.write(equities=equities)
            minute_bar_writer.write(minute_bar_data)
            daily_bar_writer.write(daily_bar_data)
            adjustment_writer.write(splits=splits)

            assert_is_instance(calendar, TradingCalendar)
            assert_is_instance(cache, dataframe_cache)
            assert_is_instance(show_progress, bool)

        self.ingest('bundle', environ=self.environ)
        bundle = self.load('bundle', environ=self.environ)

        assert_equal(set(bundle.asset_finder.sids), set(sids))

        columns = 'open', 'high', 'low', 'close', 'volume'

        actual = bundle.equity_minute_bar_reader.load_raw_arrays(
            columns,
            minutes[0],
            minutes[-1],
            sids,
        )

        for actual_column, colname in zip(actual, columns):
            assert_equal(
                actual_column,
                expected_bar_values_2d(minutes, equities, colname),
                msg=colname,
            )

        actual = bundle.equity_daily_bar_reader.load_raw_arrays(
            columns,
            self.START_DATE,
            self.END_DATE,
            sids,
        )
        for actual_column, colname in zip(actual, columns):
            assert_equal(
                actual_column,
                expected_bar_values_2d(sessions, equities, colname),
                msg=colname,
            )
        adjustments_for_cols = bundle.adjustment_reader.load_adjustments(
            columns,
            sessions,
            pd.Index(sids),
        )
        for column, adjustments in zip(columns, adjustments_for_cols[:-1]):
            # iterate over all the adjustments but `volume`
            assert_equal(
                adjustments,
                {
                    2: [
                        Float64Multiply(
                            first_row=0,
                            last_row=2,
                            first_col=0,
                            last_col=0,
                            value=first_split_ratio,
                        )
                    ],
                    3: [
                        Float64Multiply(
                            first_row=0,
                            last_row=3,
                            first_col=1,
                            last_col=1,
                            value=second_split_ratio,
                        )
                    ],
                },
                msg=column,
            )

        # check the volume, the value should be 1/ratio
        assert_equal(
            adjustments_for_cols[-1],
            {
                2: [
                    Float64Multiply(
                        first_row=0,
                        last_row=2,
                        first_col=0,
                        last_col=0,
                        value=1 / first_split_ratio,
                    )
                ],
                3: [
                    Float64Multiply(
                        first_row=0,
                        last_row=3,
                        first_col=1,
                        last_col=1,
                        value=1 / second_split_ratio,
                    )
                ],
            },
            msg='volume',
        )
示例#8
0
    def _expected_data(self, asset_finder):
        sids = {
            symbol: asset_finder.lookup_symbol(
                symbol,
                self.asset_start,
            ).sid
            for symbol in self.symbols
        }

        def per_symbol(symbol):
            df = pd.read_csv(
                test_resource_path('quandl_samples', symbol + '.csv.gz'),
                parse_dates=['Date'],
                index_col='Date',
                usecols=[
                    'Open',
                    'High',
                    'Low',
                    'Close',
                    'Volume',
                    'Date',
                    'Ex-Dividend',
                    'Split Ratio',
                ],
                na_values=['NA'],
            ).rename(
                columns={
                    'Open': 'open',
                    'High': 'high',
                    'Low': 'low',
                    'Close': 'close',
                    'Volume': 'volume',
                    'Date': 'date',
                    'Ex-Dividend': 'ex_dividend',
                    'Split Ratio': 'split_ratio',
                })
            df['sid'] = sids[symbol]
            return df

        all_ = pd.concat(map(per_symbol, self.symbols)).set_index(
            'sid',
            append=True,
        ).unstack()

        # fancy list comprehension with statements
        @list
        @apply
        def pricing():
            for column in self.columns:
                vs = all_[column].values
                if column == 'volume':
                    vs = np.nan_to_num(vs)
                yield vs

        # the first index our written data will appear in the files on disk
        start_idx = (
            self.calendar.all_sessions.get_loc(self.asset_start, 'ffill') + 1)

        # convert an index into the raw dataframe into an index into the
        # final data
        i = op.add(start_idx)

        def expected_dividend_adjustment(idx, symbol):
            sid = sids[symbol]
            return (1 -
                    all_.ix[idx,
                            ('ex_dividend', sid)] / all_.ix[idx - 1,
                                                            ('close', sid)])

        adjustments = [
            # ohlc
            {
                # dividends
                i(24): [
                    Float64Multiply(
                        first_row=0,
                        last_row=i(24),
                        first_col=sids['AAPL'],
                        last_col=sids['AAPL'],
                        value=expected_dividend_adjustment(24, 'AAPL'),
                    )
                ],
                i(87): [
                    Float64Multiply(
                        first_row=0,
                        last_row=i(87),
                        first_col=sids['AAPL'],
                        last_col=sids['AAPL'],
                        value=expected_dividend_adjustment(87, 'AAPL'),
                    )
                ],
                i(150): [
                    Float64Multiply(
                        first_row=0,
                        last_row=i(150),
                        first_col=sids['AAPL'],
                        last_col=sids['AAPL'],
                        value=expected_dividend_adjustment(150, 'AAPL'),
                    )
                ],
                i(214): [
                    Float64Multiply(
                        first_row=0,
                        last_row=i(214),
                        first_col=sids['AAPL'],
                        last_col=sids['AAPL'],
                        value=expected_dividend_adjustment(214, 'AAPL'),
                    )
                ],
                i(31): [
                    Float64Multiply(
                        first_row=0,
                        last_row=i(31),
                        first_col=sids['MSFT'],
                        last_col=sids['MSFT'],
                        value=expected_dividend_adjustment(31, 'MSFT'),
                    )
                ],
                i(90): [
                    Float64Multiply(
                        first_row=0,
                        last_row=i(90),
                        first_col=sids['MSFT'],
                        last_col=sids['MSFT'],
                        value=expected_dividend_adjustment(90, 'MSFT'),
                    )
                ],
                i(222): [
                    Float64Multiply(
                        first_row=0,
                        last_row=i(222),
                        first_col=sids['MSFT'],
                        last_col=sids['MSFT'],
                        value=expected_dividend_adjustment(222, 'MSFT'),
                    )
                ],

                # splits
                i(108): [
                    Float64Multiply(
                        first_row=0,
                        last_row=i(108),
                        first_col=sids['AAPL'],
                        last_col=sids['AAPL'],
                        value=1.0 / 7.0,
                    )
                ],
            },
        ] * (len(self.columns) - 1) + [
            # volume
            {
                i(108): [
                    Float64Multiply(
                        first_row=0,
                        last_row=i(108),
                        first_col=sids['AAPL'],
                        last_col=sids['AAPL'],
                        value=7.0,
                    )
                ],
            }
        ]
        return pricing, adjustments