Пример #1
0
    def bcolz_exchange_daily_write_read(self, exchange_name):
        start = pd.to_datetime('2017-10-01 00:00')
        end = pd.to_datetime('today')
        freq = 'daily'

        bundle = ExchangeBundle(exchange_name)

        df = self.generate_df(exchange_name, freq, start, end)

        print(df.index[0], df.index[-1])

        writer = BcolzExchangeBarWriter(rootdir=self.root_dir,
                                        start_session=df.index[0],
                                        end_session=df.index[-1],
                                        data_frequency=freq,
                                        write_metadata=True)

        data = []
        data.append((1, df))
        writer.write(data)

        reader = BcolzExchangeBarReader(rootdir=self.root_dir,
                                        data_frequency=freq)

        arrays = reader.load_raw_arrays(self.columns, start, end, [
            1,
        ])

        periods = bundle.get_calendar_periods_range(start, end, freq)

        dx = get_df_from_arrays(arrays, periods)

        assert_equals(df.equals(dx), True)
        pass
Пример #2
0
    def chunk_to_df(self, exchange_name, symbol, data_frequency, period):

        exchange = get_exchange(exchange_name)
        asset = exchange.get_asset(symbol)

        filename = get_bcolz_chunk(
            exchange_name=exchange_name,
            symbol=symbol,
            data_frequency=data_frequency,
            period=period
        )

        reader = BcolzExchangeBarReader(rootdir=filename,
                                        data_frequency=data_frequency)

        # metadata = BcolzMinuteBarMetadata.read(filename)

        start = reader.first_trading_day
        end = reader.last_available_dt

        if data_frequency == 'daily':
            end = end - pd.Timedelta(hours=23, minutes=59)

        print(start, end, data_frequency)

        arrays = reader.load_raw_arrays(self.columns, start, end,
                                        [asset.sid, ])

        bundle = ExchangeBundle(exchange_name)

        periods = bundle.get_calendar_periods_range(
            start, end, data_frequency
        )

        return get_df_from_arrays(arrays, periods)
Пример #3
0
    def bcolz_exchange_daily_write_read(self, exchange_name):
        start = pd.to_datetime('2017-10-01 00:00')
        end = pd.to_datetime('today')
        freq = 'daily'

        bundle = ExchangeBundle(exchange_name)

        df = self.generate_df(exchange_name, freq, start, end)

        print(df.index[0], df.index[-1])

        writer = BcolzExchangeBarWriter(
            rootdir=self.root_dir,
            start_session=df.index[0],
            end_session=df.index[-1],
            data_frequency=freq,
            write_metadata=True)

        data = []
        data.append((1, df))
        writer.write(data)

        reader = BcolzExchangeBarReader(rootdir=self.root_dir,
                                        data_frequency=freq)

        arrays = reader.load_raw_arrays(self.columns, start, end, [1, ])

        periods = bundle.get_calendar_periods_range(
            start, end, freq
        )

        dx = get_df_from_arrays(arrays, periods)

        assert_equals(df.equals(dx), True)
        pass
Пример #4
0
    def _bundle_to_csv(self,
                       asset,
                       exchange_name,
                       data_frequency,
                       filename,
                       path=None,
                       start_dt=None,
                       end_dt=None):
        bundle = ExchangeBundle(exchange_name)
        reader = bundle.get_reader(data_frequency, path=path)

        if start_dt is None:
            start_dt = reader.first_trading_day

        if end_dt is None:
            end_dt = reader.last_available_dt

        if data_frequency == 'daily':
            end_dt = end_dt - pd.Timedelta(hours=23, minutes=59)

        arrays = None
        try:
            arrays = reader.load_raw_arrays(
                sids=[asset.sid],
                fields=['open', 'high', 'low', 'close', 'volume'],
                start_dt=start_dt,
                end_dt=end_dt)
        except Exception as e:
            log.warn('skipping ctable for {} from {} to {}: {}'.format(
                asset.symbol, start_dt, end_dt, e))

        periods = bundle.get_calendar_periods_range(start_dt, end_dt,
                                                    data_frequency)
        df = get_df_from_arrays(arrays, periods)

        folder = os.path.join(tempfile.gettempdir(), 'catalyst', exchange_name,
                              asset.symbol)
        ensure_directory(folder)

        path = os.path.join(folder, filename + '.csv')

        log.info('creating csv file: {}'.format(path))
        print('HEAD\n{}'.format(df.head(100)))
        print('TAIL\n{}'.format(df.tail(100)))
        df.to_csv(path)
        pass
Пример #5
0
    def download_from_catalyst(self, asset, data_frequency, period):
        # Download and extract the bundle
        path = get_bcolz_chunk(exchange_name=self.exchange_name,
                               symbol=asset.symbol,
                               data_frequency=data_frequency,
                               period=period)

        reader = self.get_reader(data_frequency, path=path)
        if reader is None:
            try:
                log.warn('the reader is unable to use bundle: {}, '
                         'deleting it.'.format(path))
                shutil.rmtree(path)

            except Exception as e:
                log.warn('unable to remove temp bundle: {}'.format(e))

            raise TempBundleNotFoundError(path=path)

        start_dt = reader.first_trading_day
        end_dt = reader.last_available_dt

        if data_frequency == 'daily':
            end_dt = end_dt - pd.Timedelta(hours=23, minutes=59)

        arrays = None
        try:
            arrays = reader.load_raw_arrays(
                sids=[asset.sid],
                fields=['open', 'high', 'low', 'close', 'volume'],
                start_dt=start_dt,
                end_dt=end_dt)
        except Exception as e:
            log.warn('skipping ctable for {} from {} to {}: {}'.format(
                asset.symbol, start_dt, end_dt, e))

        if not arrays:
            return reader._rootdir

        periods = self.get_calendar_periods_range(start_dt, end_dt,
                                                  data_frequency)
        return get_df_from_arrays(arrays, periods), reader
Пример #6
0
    def ingest_ctable(self, asset, data_frequency, period,
                      writer, empty_rows_behavior='strip',
                      duplicates_threshold=100, cleanup=False):
        """
        Merge a ctable bundle chunk into the main bundle for the exchange.

        Parameters
        ----------
        asset: TradingPair
        data_frequency: str
        period: str
        writer:
        empty_rows_behavior: str
            Ensure that the bundle does not have any missing data.

        cleanup: bool
            Remove the temp bundle directory after ingestion.

        Returns
        -------
        list[str]
            A list of problems which occurred during ingestion.

        """
        problems = []

        # Download and extract the bundle
        path = get_bcolz_chunk(
            exchange_name=self.exchange_name,
            symbol=asset.symbol,
            data_frequency=data_frequency,
            period=period
        )

        reader = self.get_reader(data_frequency, path=path)
        if reader is None:
            try:
                log.warn('the reader is unable to use bundle: {}, '
                         'deleting it.'.format(path))
                shutil.rmtree(path)

            except Exception as e:
                log.warn('unable to remove temp bundle: {}'.format(e))

            raise TempBundleNotFoundError(path=path)

        start_dt = reader.first_trading_day
        end_dt = reader.last_available_dt

        if data_frequency == 'daily':
            end_dt = end_dt - pd.Timedelta(hours=23, minutes=59)

        arrays = None
        try:
            arrays = reader.load_raw_arrays(
                sids=[asset.sid],
                fields=['open', 'high', 'low', 'close', 'volume'],
                start_dt=start_dt,
                end_dt=end_dt
            )
        except Exception as e:
            log.warn('skipping ctable for {} from {} to {}: {}'.format(
                asset.symbol, start_dt, end_dt, e
            ))

        if not arrays:
            return reader._rootdir

        periods = self.get_calendar_periods_range(
            start_dt, end_dt, data_frequency
        )
        df = get_df_from_arrays(arrays, periods)
        problems += self.ingest_df(
            ohlcv_df=df,
            data_frequency=data_frequency,
            asset=asset,
            writer=writer,
            empty_rows_behavior=empty_rows_behavior,
            duplicates_threshold=duplicates_threshold
        )

        if cleanup:
            log.debug(
                'removing bundle folder following ingestion: {}'.format(
                    reader._rootdir)
            )
            shutil.rmtree(reader._rootdir)

        return filter(partial(is_not, None), problems)