Пример #1
0
    def test_securities_reindexed_like_intraday(self):
        """
        Tests get_securities_reindexed_like with Date and Time in the index.
        """
        closes = pd.DataFrame(np.random.rand(4, 2),
                              columns=[12345, 23456],
                              index=pd.MultiIndex.from_product(
                                  [
                                      pd.date_range(start="2018-05-01",
                                                    periods=2,
                                                    freq="D",
                                                    tz="America/New_York",
                                                    name="Date"),
                                      ["09:30:00", "09:31:00"],
                                  ],
                                  names=["Date", "Time"]))

        def mock_download_master_file(f, *args, **kwargs):
            securities = pd.DataFrame(
                dict(ConId=[12345, 23456], Symbol=["ABC", "DEF"]))
            securities.to_csv(f, index=False)
            f.seek(0)

        with patch('quantrocket.master.download_master_file',
                   new=mock_download_master_file):

            securities = get_securities_reindexed_like(closes,
                                                       domain="main",
                                                       fields="Symbol")

            securities = securities.reset_index()
            securities.loc[:, "Date"] = securities.Date.dt.strftime(
                "%Y-%m-%dT%H:%M:%S%z")

            self.assertListEqual(securities.to_dict(orient="records"),
                                 [{
                                     12345: 'ABC',
                                     23456: 'DEF',
                                     'Date': '2018-05-01T00:00:00-0400',
                                     'Field': 'Symbol',
                                     'Time': '09:30:00'
                                 }, {
                                     12345: 'ABC',
                                     23456: 'DEF',
                                     'Date': '2018-05-01T00:00:00-0400',
                                     'Field': 'Symbol',
                                     'Time': '09:31:00'
                                 }, {
                                     12345: 'ABC',
                                     23456: 'DEF',
                                     'Date': '2018-05-02T00:00:00-0400',
                                     'Field': 'Symbol',
                                     'Time': '09:30:00'
                                 }, {
                                     12345: 'ABC',
                                     23456: 'DEF',
                                     'Date': '2018-05-02T00:00:00-0400',
                                     'Field': 'Symbol',
                                     'Time': '09:31:00'
                                 }])
Пример #2
0
    def load_adjusted_array(self, domain, columns, dates, sids, mask):

        fields = [c.name for c in columns]
        real_sids = [
            self.zipline_sids_to_real_sids[zipline_sid] for zipline_sid in sids
        ]
        reindex_like = pd.DataFrame(None, index=dates, columns=real_sids)
        reindex_like.index.name = "Date"

        securities = get_securities_reindexed_like(reindex_like, fields=fields)

        out = {}

        for column in columns:
            missing_value = MISSING_VALUES_BY_DTYPE[column.dtype]
            if column.dtype == datetime64ns_dtype:
                # pd.to_datetime handles NaNs in pandas 0.22 while .astype(column.dtype) doesn't
                values = securities.loc[column.name].apply(
                    pd.to_datetime).fillna(missing_value).values
            else:
                values = securities.loc[column.name].astype(
                    column.dtype).fillna(missing_value).values

            out[column] = AdjustedArray(values,
                                        adjustments={},
                                        missing_value=missing_value)

        return out
    def add_securities_master_features(self, prices, features):
        """
        Features from the securities master:

        - ADR?
        - sector
        """
        closes = prices.loc["Close"]

        securities = get_securities_reindexed_like(
            closes, fields=["sharadar_Category", "sharadar_Sector"])

        # Is it an ADR?
        categories = securities.loc["sharadar_Category"]
        unique_categories = categories.iloc[0].unique()
        # this dataset includes several ADR classifications, all of which start with "ADR "
        features["are_adrs"] = categories.isin([
            cat for cat in unique_categories if cat.startswith("ADR ")
        ]).astype(int)

        # Which sector? (sectors must be one-hot encoded - see usage guide for more)
        sectors = securities.loc["sharadar_Sector"]
        for sector in sectors.stack().unique():
            features["sector_{}".format(sector)] = (
                sectors == sector).astype(int)
Пример #4
0
    def test_pass_conids_and_domain_based_on_reindex_like(
            self, mock_download_master_file):
        """
        Tests that conids and domain are correctly passed to the download_master_file
        function based on reindex_like.
        """
        closes = pd.DataFrame(np.random.rand(3, 2),
                              columns=[12345, 23456],
                              index=pd.date_range(start="2018-05-01",
                                                  periods=3,
                                                  freq="D",
                                                  name="Date"))

        def _mock_download_master_file(f, *args, **kwargs):
            securities = pd.DataFrame(
                dict(ConId=[12345, 23456],
                     Symbol=["ABC", "DEF"],
                     Etf=[1, 0],
                     Delisted=[0, 1],
                     Currency=["USD", "USD"]))
            securities.to_csv(f, index=False)
            f.seek(0)

        mock_download_master_file.side_effect = _mock_download_master_file

        get_securities_reindexed_like(
            closes,
            domain="main",
            fields=["Symbol", "Etf", "Delisted", "Currency"])

        download_master_file_call = mock_download_master_file.mock_calls[0]
        _, args, kwargs = download_master_file_call
        self.assertListEqual(kwargs["conids"], [12345, 23456])
        self.assertEqual(kwargs["domain"], "main")
        self.assertListEqual(kwargs["fields"],
                             ["Symbol", "Etf", "Delisted", "Currency"])
Пример #5
0
    def prices_to_signals(self, prices):
        closes = prices.loc["Close"]

        # Compute dollar volume mask
        dollar_volumes = prices.loc["Volume"] * closes
        avg_dollar_volumes = dollar_volumes.rolling(window=22).mean()
        are_eligible = avg_dollar_volumes >= self.MIN_DOLLAR_VOLUME

        sectypes = get_securities_reindexed_like(
            closes, "edi_SecTypeCode").loc["edi_SecTypeCode"]
        are_eligible &= sectypes == "EQS"

        if self.LIMIT_TO_CURRENCY:
            currencies = get_securities_reindexed_like(
                closes, "Currency").loc["Currency"]
            are_eligible &= currencies == self.LIMIT_TO_CURRENCY

        # Compute big losers mask
        prior_returns = (closes - closes.shift()) / closes.shift()
        big_losers = prior_returns <= -0.10

        short_signals = big_losers & are_eligible

        return -short_signals.astype(int)
Пример #6
0
    def load_adjusted_array(self, domain, columns, dates, sids, mask):

        fields = [c.name for c in columns]
        real_sids = [
            self.zipline_sids_to_real_sids[zipline_sid] for zipline_sid in sids
        ]
        reindex_like = pd.DataFrame(None, index=dates, columns=real_sids)
        reindex_like.index.name = "Date"

        securities = get_securities_reindexed_like(reindex_like, fields=fields)

        out = {}

        for column in columns:
            missing_value = MISSING_VALUES_BY_DTYPE[column.dtype]
            out[column] = AdjustedArray(securities.loc[column.name].astype(
                column.dtype).fillna(missing_value).values,
                                        adjustments={},
                                        missing_value=missing_value)

        return out
Пример #7
0
    def test_securities_reindexed_like(self):
        """
        Tests get_securities_reindexed_like.
        """
        closes = pd.DataFrame(np.random.rand(3, 2),
                              columns=[12345, 23456],
                              index=pd.date_range(start="2018-05-01",
                                                  periods=3,
                                                  freq="D",
                                                  tz="America/New_York",
                                                  name="Date"))

        def mock_download_master_file(f, *args, **kwargs):
            securities = pd.DataFrame(
                dict(ConId=[12345, 23456],
                     Symbol=["ABC", "DEF"],
                     Etf=[1, 0],
                     Delisted=[0, 1],
                     Currency=["USD", "EUR"]))
            securities.to_csv(f, index=False)
            f.seek(0)

        with patch('quantrocket.master.download_master_file',
                   new=mock_download_master_file):

            securities = get_securities_reindexed_like(
                closes,
                domain="main",
                fields=["Symbol", "Etf", "Delisted", "Currency"])

            securities = securities.reset_index()
            securities.loc[:, "Date"] = securities.Date.dt.strftime(
                "%Y-%m-%dT%H:%M:%S%z")
            self.assertListEqual(securities.to_dict(orient="records"),
                                 [{
                                     'Field': 'Currency',
                                     'Date': '2018-05-01T00:00:00-0400',
                                     12345: 'USD',
                                     23456: 'EUR'
                                 }, {
                                     'Field': 'Currency',
                                     'Date': '2018-05-02T00:00:00-0400',
                                     12345: 'USD',
                                     23456: 'EUR'
                                 }, {
                                     'Field': 'Currency',
                                     'Date': '2018-05-03T00:00:00-0400',
                                     12345: 'USD',
                                     23456: 'EUR'
                                 }, {
                                     'Field': 'Delisted',
                                     'Date': '2018-05-01T00:00:00-0400',
                                     12345: False,
                                     23456: True
                                 }, {
                                     'Field': 'Delisted',
                                     'Date': '2018-05-02T00:00:00-0400',
                                     12345: False,
                                     23456: True
                                 }, {
                                     'Field': 'Delisted',
                                     'Date': '2018-05-03T00:00:00-0400',
                                     12345: False,
                                     23456: True
                                 }, {
                                     'Field': 'Etf',
                                     'Date': '2018-05-01T00:00:00-0400',
                                     12345: True,
                                     23456: False
                                 }, {
                                     'Field': 'Etf',
                                     'Date': '2018-05-02T00:00:00-0400',
                                     12345: True,
                                     23456: False
                                 }, {
                                     'Field': 'Etf',
                                     'Date': '2018-05-03T00:00:00-0400',
                                     12345: True,
                                     23456: False
                                 }, {
                                     'Field': 'Symbol',
                                     'Date': '2018-05-01T00:00:00-0400',
                                     12345: 'ABC',
                                     23456: 'DEF'
                                 }, {
                                     'Field': 'Symbol',
                                     'Date': '2018-05-02T00:00:00-0400',
                                     12345: 'ABC',
                                     23456: 'DEF'
                                 }, {
                                     'Field': 'Symbol',
                                     'Date': '2018-05-03T00:00:00-0400',
                                     12345: 'ABC',
                                     23456: 'DEF'
                                 }])