示例#1
0
def download_with_progress(url, chunk_size, **progress_kwargs):
    """
    Download streaming data from a URL, printing progress information to the
    terminal.

    Parameters
    ----------
    url : str
        A URL that can be understood by ``requests.get``.
    chunk_size : int
        Number of bytes to read at a time from requests.
    **progress_kwargs
        Forwarded to click.progressbar.

    Returns
    -------
    data : BytesIO
        A BytesIO containing the downloaded data.
    """
    resp = requests.get(url, stream=True)
    resp.raise_for_status()

    total_size = int(resp.headers['content-length'])
    data = BytesIO()

    progress_kwargs['length'] = total_size
    with maybe_show_progress(None, True, **progress_kwargs) as pbar:
        for chunk in resp.iter_content(chunk_size=chunk_size):
            data.write(chunk)
            pbar.update(len(chunk))

    data.seek(0)
    return data
示例#2
0
    def write(self,
              data,
              length=None,
              show_progress=False,
              invalid_data_behavior='warn'):
        """Write a stream of minute data.

        Parameters
        ----------
        data : iterable[(int, pd.DataFrame)]
            The data to write. Each element should be a tuple of sid, data
            where data has the following format:
              columns : ('open', 'high', 'low', 'close', 'volume')
                  open : float64
                  high : float64
                  low  : float64
                  close : float64
                  volume : float64|int64
              index : DatetimeIndex of market minutes.
            A given sid may appear more than once in ``data``; however,
            the dates must be strictly increasing.
        show_progress : bool, optional
            Whether or not to show a progress bar while writing.
        """
        with maybe_show_progress(
                data,
                length=length,
                show_percent=False,
                show_progress=show_progress,
                item_show_func=item_show_count(length),
                label='Compiling five-minute data',
        ) as it:
            write_sid = self.write_sid
            for e in it:
                write_sid(*e, invalid_data_behavior=invalid_data_behavior)
示例#3
0
    def write(self, data, show_progress=False, invalid_data_behavior='warn'):
        """Write a stream of minute data.

        Parameters
        ----------
        data : iterable[(int, pd.DataFrame)]
            The data to write. Each element should be a tuple of sid, data
            where data has the following format:
              columns : ('open', 'high', 'low', 'close', 'volume')
                  open : float64
                  high : float64
                  low  : float64
                  close : float64
                  volume : float64|int64
              index : DatetimeIndex of market minutes.
            A given sid may appear more than once in ``data``; however,
            the dates must be strictly increasing.
        show_progress : bool, optional
            Whether or not to show a progress bar while writing.
        """
        ctx = maybe_show_progress(
            data,
            show_progress=show_progress,
            item_show_func=lambda e: e if e is None else str(e[0]),
            label="Merging minute equity files:",
        )
        write_sid = self.write_sid
        with ctx as it:
            for e in it:
                write_sid(*e, invalid_data_behavior=invalid_data_behavior)
示例#4
0
    def write(self, data, show_progress=False, invalid_data_behavior='warn'):
        """Write a stream of minute data.

        Parameters
        ----------
        data : iterable[(int, pd.DataFrame)]
            The data to write. Each element should be a tuple of sid, data
            where data has the following format:
              columns : ('open', 'high', 'low', 'close', 'volume')
                  open : float64
                  high : float64
                  low  : float64
                  close : float64
                  volume : float64|int64
              index : DatetimeIndex of market minutes.
            A given sid may appear more than once in ``data``; however,
            the dates must be strictly increasing.
        show_progress : bool, optional
            Whether or not to show a progress bar while writing.
        """
        ctx = maybe_show_progress(
            data,
            show_progress=show_progress,
            item_show_func=lambda e: e if e is None else str(e[0]),
            label="Merging minute equity files:",
        )
        write_sid = self.write_sid
        with ctx as it:
            for e in it:
                write_sid(*e, invalid_data_behavior=invalid_data_behavior)
示例#5
0
def download_with_progress(url, chunk_size, **progress_kwargs):
    """
    Download streaming data from a URL, printing progress information to the
    terminal.

    Parameters
    ----------
    url : str
        A URL that can be understood by ``requests.get``.
    chunk_size : int
        Number of bytes to read at a time from requests.
    **progress_kwargs
        Forwarded to click.progressbar.

    Returns
    -------
    data : BytesIO
        A BytesIO containing the downloaded data.
    """
    resp = requests.get(url, stream=True)
    resp.raise_for_status()

    total_size = int(resp.headers['content-length'])
    data = BytesIO()

    progress_kwargs['length'] = total_size
    with maybe_show_progress(None, True, **progress_kwargs) as pbar:
        for chunk in resp.iter_content(chunk_size=chunk_size):
            data.write(chunk)
            pbar.update(len(chunk))

    data.seek(0)
    return data
示例#6
0
    def ingest_assets(self,
                      assets,
                      start_dt,
                      end_dt,
                      data_frequency,
                      show_progress=False):
        """
        Determine if data is missing from the bundle and attempt to ingest it.

        :param assets:
        :param start_dt:
        :param end_dt:
        :return:
        """
        writer = self.get_writer(start_dt, end_dt, data_frequency)
        chunks = self.prepare_chunks(assets=assets,
                                     data_frequency=data_frequency,
                                     start_dt=start_dt,
                                     end_dt=end_dt)
        with maybe_show_progress(
                chunks,
                show_progress,
                label='Fetching {exchange} {frequency} candles: '.format(
                    exchange=self.exchange.name,
                    frequency=data_frequency)) as it:
            for chunk in it:
                self.ingest_ctable(asset=chunk['asset'],
                                   data_frequency=data_frequency,
                                   period=chunk['period'],
                                   start_dt=chunk['period_start'],
                                   end_dt=chunk['period_end'],
                                   writer=writer,
                                   empty_rows_behavior='strip')
示例#7
0
    def _post_process_metadata(self, metadata, cache, show_progress=False):
        # Create empty data frame using target metadata column names and dtypes
        final_metadata = pd.DataFrame(
            columns=self.md_column_names,
            index=metadata.index,
        )

        # Iterate over the available symbols, loading the asset's raw symbol
        # data from the cache.  The final metadata is computed and recorded in
        # the appropriate row depending on the asset's id.
        with maybe_show_progress(
                metadata.symbol.iteritems(),
                show_progress,
                label='Post-processing symbol metadata',
                item_show_func=item_show_count(len(metadata)),
                length=len(metadata),
                show_percent=False,
        ) as symbols_map:
            for asset_id, symbol in symbols_map:
                # Attempt to load data from disk, the cache should have an entry
                # for each symbol at this point of the execution. If one does
                # not exist, we should fail.
                key = '{sym}.daily.frame'.format(sym=symbol)
                try:
                    raw_data = cache[key]
                except KeyError:
                    raise ValueError(
                        'Unable to find cached data for symbol: {0}'.format(
                            symbol))

                # Perform and require post-processing of metadata.
                final_symbol_metadata = self.post_process_symbol_metadata(
                    asset_id,
                    metadata.iloc[asset_id],
                    raw_data,
                )

                # Record symbol's final metadata.
                final_metadata.iloc[asset_id] = final_symbol_metadata

            # Register all assets with the bundle's default exchange.
            final_metadata['exchange'] = self.exchange

        return final_metadata
示例#8
0
    def _post_process_metadata(self, metadata, cache, show_progress=False):
        # Create empty data frame using target metadata column names and dtypes
        final_metadata = pd.DataFrame(
            columns=self.md_column_names,
            index=metadata.index,
        )

        # Iterate over the available symbols, loading the asset's raw symbol
        # data from the cache.  The final metadata is computed and recorded in
        # the appropriate row depending on the asset's id.
        with maybe_show_progress(
            metadata.symbol.iteritems(),
            show_progress,
            label='Post-processing symbol metadata',
            item_show_func=item_show_count(len(metadata)),
            length=len(metadata),
            show_percent=False,
        ) as symbols_map:
            for asset_id, symbol in symbols_map:
                # Attempt to load data from disk, the cache should have an
                # entry for each symbol at this point of the execution. If one
                # does not exist, we should fail.
                key = '{sym}.daily.frame'.format(sym=symbol)
                try:
                    raw_data = cache[key]
                except KeyError:
                    raise ValueError(
                      'Unable to find cached data for symbol:'
                      ' {0}'.format(symbol))

                # Perform and require post-processing of metadata.
                final_symbol_metadata = self.post_process_symbol_metadata(
                    asset_id,
                    metadata.iloc[asset_id],
                    raw_data,
                )

                # Record symbol's final metadata.
                final_metadata.iloc[asset_id] = final_symbol_metadata

            # Register all assets with the bundle's default exchange.
            final_metadata['exchange'] = self.exchange

        return final_metadata
示例#9
0
    def write(self,
              data,
              assets=None,
              show_progress=False,
              invalid_data_behavior='warn'):
        """
        Parameters
        ----------
        data : iterable[tuple[int, pandas.DataFrame or bcolz.ctable]]
            The data chunks to write. Each chunk should be a tuple of sid
            and the data for that asset.
        assets : set[int], optional
            The assets that should be in ``data``. If this is provided
            we will check ``data`` against the assets and provide better
            progress information.
        show_progress : bool, optional
            Whether or not to show a progress bar while writing.
        invalid_data_behavior : {'warn', 'raise', 'ignore'}, optional
            What to do when data is encountered that is outside the range of
            a uint64.

        Returns
        -------
        table : bcolz.ctable
            The newly-written table.
        """
        total = None if assets is None else len(assets)
        ctx = maybe_show_progress(
            (
                (sid, self.to_ctable(df, invalid_data_behavior))
                for sid, df in data
            ),
            show_progress=show_progress,
            label=self.progress_bar_message,
            item_show_func=item_show_count(total),
            length=total,
            show_percent=False,
        )
        with ctx as it:
            return self._write_internal(it, assets)
示例#10
0
        def _pricing_iter():
            sid = 0
            with maybe_show_progress(
                    symbols,
                    show_progress,
                    label='Downloading Yahoo pricing data: ') as it, \
                    requests.Session() as session:
                for symbol in it:
                    path = _cachpath(symbol, 'ohlcv')
                    try:
                        df = cache[path]
                    except KeyError:
                        df = cache[path] = DataReader(
                            symbol,
                            'yahoo',
                            start,
                            end,
                            session=session,
                        ).sort_index()

                    # the start date is the date of the first trade and
                    # the end date is the date of the last trade
                    start_date = df.index[0]
                    end_date = df.index[-1]
                    # The auto_close date is the day after the last trade.
                    ac_date = end_date + pd.Timedelta(days=1)
                    metadata.iloc[sid] = start_date, end_date, ac_date, symbol

                    df.rename(
                        columns={
                            'Open': 'open',
                            'High': 'high',
                            'Low': 'low',
                            'Close': 'close',
                            'Volume': 'volume',
                        },
                        inplace=True,
                    )
                    yield sid, df
                    sid += 1
示例#11
0
    def write(self,
              data,
              assets=None,
              show_progress=False,
              invalid_data_behavior='warn'):
        """
        Parameters
        ----------
        data : iterable[tuple[int, pandas.DataFrame or bcolz.ctable]]
            The data chunks to write. Each chunk should be a tuple of sid
            and the data for that asset.
        assets : set[int], optional
            The assets that should be in ``data``. If this is provided
            we will check ``data`` against the assets and provide better
            progress information.
        show_progress : bool, optional
            Whether or not to show a progress bar while writing.
        invalid_data_behavior : {'warn', 'raise', 'ignore'}, optional
            What to do when data is encountered that is outside the range of
            a uint64.

        Returns
        -------
        table : bcolz.ctable
            The newly-written table.
        """
        total = None if assets is None else len(assets)
        ctx = maybe_show_progress(
            (
                (sid, self.to_ctable(df, invalid_data_behavior))
                for sid, df in data
            ),
            show_progress=show_progress,
            label=self.progress_bar_message,
            item_show_func=item_show_count(total),
            length=total,
            show_percent=False,
        )
        with ctx as it:
            return self._write_internal(it, assets)
示例#12
0
        def _pricing_iter():
            sid = 0
            with maybe_show_progress(
                    symbols,
                    show_progress,
                    label='Downloading Yahoo pricing data: ') as it, \
                    requests.Session() as session:
                for symbol in it:
                    path = _cachpath(symbol, 'ohlcv')
                    try:
                        df = cache[path]
                    except KeyError:
                        df = cache[path] = DataReader(
                            symbol,
                            'yahoo',
                            start,
                            end,
                            session=session,
                        ).sort_index()

                    # the start date is the date of the first trade and
                    # the end date is the date of the last trade
                    start_date = df.index[0]
                    end_date = df.index[-1]
                    # The auto_close date is the day after the last trade.
                    ac_date = end_date + pd.Timedelta(days=1)
                    metadata.iloc[sid] = start_date, end_date, ac_date, symbol

                    df.rename(
                        columns={
                            'Open': 'open',
                            'High': 'high',
                            'Low': 'low',
                            'Close': 'close',
                            'Volume': 'volume',
                        },
                        inplace=True,
                    )
                    yield sid, df
                    sid += 1
示例#13
0
    def _fetch_metadata_frame(self,
                              api_key,
                              cache,
                              retries=DEFAULT_RETRIES,
                              environ=None,
                              show_progress=False):

        # Setup raw metadata iterator to fetch pages if necessary.
        raw_iter = self._fetch_metadata_iter(api_key, cache, retries, environ)

        # Concatenate all frame in iterator to compute a single metadata frame.
        with maybe_show_progress(
                raw_iter,
                show_progress,
                label='Fetching symbol metadata',
                item_show_func=item_show_count(),
                length=3,
                show_percent=False,
        ) as blocks:
            metadata = pd.concat(blocks, ignore_index=True)

        return metadata
示例#14
0
    def _fetch_metadata_frame(self,
                              api_key,
                              cache,
                              retries=DEFAULT_RETRIES,
                              environ=None,
                              show_progress=False):

        # Setup raw metadata iterator to fetch pages if necessary.
        raw_iter = self._fetch_metadata_iter(api_key, cache, retries, environ)

        # Concatenate all frame in iterator to compute a single metadata frame.
        with maybe_show_progress(
            raw_iter,
            show_progress,
            label='Fetching symbol metadata',
            item_show_func=item_show_count(),
            length=3,
            show_percent=False,
        ) as blocks:
            metadata = pd.concat(blocks, ignore_index=True)

        return metadata
示例#15
0
    def ingest_assets(self,
                      assets,
                      data_frequency,
                      start_dt=None,
                      end_dt=None,
                      show_progress=False,
                      show_breakdown=False,
                      show_report=False):
        """
        Determine if data is missing from the bundle and attempt to ingest it.

        Parameters
        ----------
        assets: list[TradingPair]
        data_frequency: str
        start_dt: pd.Timestamp
        end_dt: pd.Timestamp
        show_progress: bool
        show_breakdown: bool

        """
        if start_dt is None:
            start_dt = self.calendar.first_session

        if end_dt is None:
            end_dt = pd.Timestamp.utcnow()

        get_start_end = get_month_start_end \
            if data_frequency == 'minute' else get_year_start_end

        # Assign the first and last day of the period
        start_dt, _ = get_start_end(start_dt)
        _, end_dt = get_start_end(end_dt)

        chunks = self.prepare_chunks(assets=assets,
                                     data_frequency=data_frequency,
                                     start_dt=start_dt,
                                     end_dt=end_dt)

        problems = []
        # This is the common writer for the entire exchange bundle
        # we want to give an end_date far in time
        writer = self.get_writer(start_dt, end_dt, data_frequency)
        if show_breakdown:
            for asset in chunks:
                with maybe_show_progress(
                        chunks[asset],
                        show_progress,
                        label='Ingesting {frequency} price data for '
                        '{symbol} on {exchange}'.format(
                            exchange=self.exchange_name,
                            frequency=data_frequency,
                            symbol=asset.symbol)) as it:
                    for chunk in it:
                        problems += self.ingest_ctable(
                            asset=chunk['asset'],
                            data_frequency=data_frequency,
                            period=chunk['period'],
                            writer=writer,
                            empty_rows_behavior='strip',
                            cleanup=True)
        else:
            all_chunks = list(chain.from_iterable(itervalues(chunks)))

            # We sort the chunks by end date to ingest most recent data first
            all_chunks.sort(key=lambda chunk: pd.to_datetime(chunk['period']))
            with maybe_show_progress(
                    all_chunks,
                    show_progress,
                    label='Ingesting {frequency} price data on '
                    '{exchange}'.format(
                        exchange=self.exchange_name,
                        frequency=data_frequency,
                    )) as it:
                for chunk in it:
                    problems += self.ingest_ctable(
                        asset=chunk['asset'],
                        data_frequency=data_frequency,
                        period=chunk['period'],
                        writer=writer,
                        empty_rows_behavior='strip',
                        cleanup=True)

        if show_report and len(problems) > 0:
            log.info('problems during ingestion:{}\n'.format(
                '\n'.join(problems)))
示例#16
0
def fetch_symbol_metadata_frame(api_key,
                                cache,
                                retries=5,
                                environ=None,
                                show_progress=False):
    """
    Download Quandl symbol metadata.

    Parameters
    ----------
    api_key : str
        The quandl api key to use. If this is None then no api key will be
        sent.
    cache : DataFrameCache
        The cache to use for persisting the intermediate data.
    retries : int, optional
        The number of times to retry each request before failing.
    environ : mapping[str -> str], optional
        The environment to use to find the catalyst home. By default this
        is ``os.environ``.
    show_progress : bool, optional
        Show a progress bar for the download of this data.

    Returns
    -------
    metadata_frame : pd.DataFrame
        A dataframe with the following columns:
          symbol: the asset's symbol
          name: the full name of the asset
          start_date: the first date of data for this asset
          end_date: the last date of data for this asset
          auto_close_date: end_date + one day
          exchange: the exchange for the asset; this is always 'quandl'
        The index of the dataframe will be used for symbol->sid mappings but
        otherwise does not have specific meaning.
    """
    raw_iter = _fetch_raw_metadata(api_key, cache, retries, environ)

    def item_show_func(_, _it=iter(count())):
        'Downloading page: %d' % next(_it)

    with maybe_show_progress(raw_iter,
                             show_progress,
                             item_show_func=item_show_func,
                             label='Downloading WIKI metadata: ') as blocks:
        data = pd.concat(blocks, ignore_index=True).rename(
            columns={
                'dataset_code': 'symbol',
                'name': 'asset_name',
                'oldest_available_date': 'start_date',
                'newest_available_date': 'end_date',
            }).sort_values('symbol')

    data = data[~data.symbol.isin(excluded_symbols)]
    # cut out all the other stuff in the name column
    # we need to escape the paren because it is actually splitting on a regex
    data.asset_name = data.asset_name.str.split(r' \(', 1).str.get(0)
    data['exchange'] = 'QUANDL'

    data['start_date'] = data['start_date'].astype(datetime)
    data['end_date'] = data['end_date'].astype(datetime)

    data['auto_close_date'] = data['end_date'] + pd.Timedelta(days=1)
    return data
示例#17
0
    def ingest(
            environ,
            asset_db_writer,
            minute_bar_writer,  # unused
            daily_bar_writer,
            adjustment_writer,
            calendar,
            start_session,
            end_session,
            cache,
            show_progress,
            output_dir,
            # pass these as defaults to make them 'nonlocal' in py2
            start=start,
            end=end):
        if start is None:
            start = start_session
        if end is None:
            end = None

        metadata = pd.DataFrame(
            np.empty(len(symbols),
                     dtype=[
                         ('start_date', 'datetime64[ns]'),
                         ('end_date', 'datetime64[ns]'),
                         ('auto_close_date', 'datetime64[ns]'),
                         ('symbol', 'object'),
                     ]))

        def _pricing_iter():
            sid = 0
            with maybe_show_progress(
                    symbols,
                    show_progress,
                    label='Downloading Yahoo pricing data: ') as it, \
                    requests.Session() as session:
                for symbol in it:
                    path = _cachpath(symbol, 'ohlcv')
                    try:
                        df = cache[path]
                    except KeyError:
                        df = cache[path] = DataReader(
                            symbol,
                            'yahoo',
                            start,
                            end,
                            session=session,
                        ).sort_index()

                    # the start date is the date of the first trade and
                    # the end date is the date of the last trade
                    start_date = df.index[0]
                    end_date = df.index[-1]
                    # The auto_close date is the day after the last trade.
                    ac_date = end_date + pd.Timedelta(days=1)
                    metadata.iloc[sid] = start_date, end_date, ac_date, symbol

                    df.rename(
                        columns={
                            'Open': 'open',
                            'High': 'high',
                            'Low': 'low',
                            'Close': 'close',
                            'Volume': 'volume',
                        },
                        inplace=True,
                    )
                    yield sid, df
                    sid += 1

        daily_bar_writer.write(_pricing_iter(), show_progress=show_progress)

        symbol_map = pd.Series(metadata.symbol.index, metadata.symbol)

        # Hardcode the exchange to "YAHOO" for all assets and (elsewhere)
        # register "YAHOO" to resolve to the NYSE calendar, because these are
        # all equities and thus can use the NYSE calendar.
        metadata['exchange'] = "YAHOO"
        asset_db_writer.write(equities=metadata)

        adjustments = []
        with maybe_show_progress(
                symbols,
                show_progress,
                label='Downloading Yahoo adjustment data: ') as it, \
                requests.Session() as session:
            for symbol in it:
                path = _cachpath(symbol, 'adjustment')
                try:
                    df = cache[path]
                except KeyError:
                    df = cache[path] = DataReader(
                        symbol,
                        'yahoo-actions',
                        start,
                        end,
                        session=session,
                    ).sort_index()

                df['sid'] = symbol_map[symbol]
                adjustments.append(df)

        adj_df = pd.concat(adjustments)
        adj_df.index.name = 'date'
        adj_df.reset_index(inplace=True)

        splits = adj_df[adj_df.action == 'SPLIT']
        splits = splits.rename(columns={
            'value': 'ratio',
            'date': 'effective_date'
        }, )
        splits.drop('action', axis=1, inplace=True)

        dividends = adj_df[adj_df.action == 'DIVIDEND']
        dividends = dividends.rename(columns={
            'value': 'amount',
            'date': 'ex_date'
        }, )
        dividends.drop('action', axis=1, inplace=True)
        # we do not have this data in the yahoo dataset
        dividends['record_date'] = pd.NaT
        dividends['declared_date'] = pd.NaT
        dividends['pay_date'] = pd.NaT

        adjustment_writer.write(splits=splits, dividends=dividends)
示例#18
0
    def ingest(environ,
               asset_db_writer,
               minute_bar_writer,  # unused
               daily_bar_writer,
               adjustment_writer,
               calendar,
               start_session,
               end_session,
               cache,
               show_progress,
               output_dir,
               # pass these as defaults to make them 'nonlocal' in py2
               start=start,
               end=end):
        if start is None:
            start = start_session
        if end is None:
            end = None

        metadata = pd.DataFrame(np.empty(len(symbols), dtype=[
            ('start_date', 'datetime64[ns]'),
            ('end_date', 'datetime64[ns]'),
            ('auto_close_date', 'datetime64[ns]'),
            ('symbol', 'object'),
        ]))

        def _pricing_iter():
            sid = 0
            with maybe_show_progress(
                    symbols,
                    show_progress,
                    label='Downloading Yahoo pricing data: ') as it, \
                    requests.Session() as session:
                for symbol in it:
                    path = _cachpath(symbol, 'ohlcv')
                    try:
                        df = cache[path]
                    except KeyError:
                        df = cache[path] = DataReader(
                            symbol,
                            'yahoo',
                            start,
                            end,
                            session=session,
                        ).sort_index()

                    # the start date is the date of the first trade and
                    # the end date is the date of the last trade
                    start_date = df.index[0]
                    end_date = df.index[-1]
                    # The auto_close date is the day after the last trade.
                    ac_date = end_date + pd.Timedelta(days=1)
                    metadata.iloc[sid] = start_date, end_date, ac_date, symbol

                    df.rename(
                        columns={
                            'Open': 'open',
                            'High': 'high',
                            'Low': 'low',
                            'Close': 'close',
                            'Volume': 'volume',
                        },
                        inplace=True,
                    )
                    yield sid, df
                    sid += 1

        daily_bar_writer.write(_pricing_iter(), show_progress=show_progress)

        symbol_map = pd.Series(metadata.symbol.index, metadata.symbol)

        # Hardcode the exchange to "YAHOO" for all assets and (elsewhere)
        # register "YAHOO" to resolve to the NYSE calendar, because these are
        # all equities and thus can use the NYSE calendar.
        metadata['exchange'] = "YAHOO"
        asset_db_writer.write(equities=metadata)

        adjustments = []
        with maybe_show_progress(
                symbols,
                show_progress,
                label='Downloading Yahoo adjustment data: ') as it, \
                requests.Session() as session:
            for symbol in it:
                path = _cachpath(symbol, 'adjustment')
                try:
                    df = cache[path]
                except KeyError:
                    df = cache[path] = DataReader(
                        symbol,
                        'yahoo-actions',
                        start,
                        end,
                        session=session,
                    ).sort_index()

                df['sid'] = symbol_map[symbol]
                adjustments.append(df)

        adj_df = pd.concat(adjustments)
        adj_df.index.name = 'date'
        adj_df.reset_index(inplace=True)

        splits = adj_df[adj_df.action == 'SPLIT']
        splits = splits.rename(
            columns={'value': 'ratio', 'date': 'effective_date'},
        )
        splits.drop('action', axis=1, inplace=True)

        dividends = adj_df[adj_df.action == 'DIVIDEND']
        dividends = dividends.rename(
            columns={'value': 'amount', 'date': 'ex_date'},
        )
        dividends.drop('action', axis=1, inplace=True)
        # we do not have this data in the yahoo dataset
        dividends['record_date'] = pd.NaT
        dividends['declared_date'] = pd.NaT
        dividends['pay_date'] = pd.NaT

        adjustment_writer.write(splits=splits, dividends=dividends)