def write(self, data, length=None, show_progress=False, invalid_data_behavior='warn'): """Write a stream of minute data. Parameters ---------- data : iterable[(int, pd.DataFrame)] The data to write. Each element should be a tuple of sid, data where data has the following format: columns : ('open', 'high', 'low', 'close', 'volume') open : float64 high : float64 low : float64 close : float64 volume : float64|int64 index : DatetimeIndex of market minutes. A given sid may appear more than once in ``data``; however, the dates must be strictly increasing. show_progress : bool, optional Whether or not to show a progress bar while writing. """ with maybe_show_progress( data, length=length, show_percent=False, show_progress=show_progress, item_show_func=item_show_count(length), label='Compiling five-minute data', ) as it: write_sid = self.write_sid for e in it: write_sid(*e, invalid_data_behavior=invalid_data_behavior)
def _post_process_metadata(self, metadata, cache, show_progress=False): # Create empty data frame using target metadata column names and dtypes final_metadata = pd.DataFrame( columns=self.md_column_names, index=metadata.index, ) # Iterate over the available symbols, loading the asset's raw symbol # data from the cache. The final metadata is computed and recorded in # the appropriate row depending on the asset's id. with maybe_show_progress( metadata.symbol.iteritems(), show_progress, label='Post-processing symbol metadata', item_show_func=item_show_count(len(metadata)), length=len(metadata), show_percent=False, ) as symbols_map: for asset_id, symbol in symbols_map: # Attempt to load data from disk, the cache should have an entry # for each symbol at this point of the execution. If one does # not exist, we should fail. key = '{sym}.daily.frame'.format(sym=symbol) try: raw_data = cache[key] except KeyError: raise ValueError( 'Unable to find cached data for symbol: {0}'.format( symbol)) # Perform and require post-processing of metadata. final_symbol_metadata = self.post_process_symbol_metadata( asset_id, metadata.iloc[asset_id], raw_data, ) # Record symbol's final metadata. final_metadata.iloc[asset_id] = final_symbol_metadata # Register all assets with the bundle's default exchange. final_metadata['exchange'] = self.exchange return final_metadata
def _post_process_metadata(self, metadata, cache, show_progress=False): # Create empty data frame using target metadata column names and dtypes final_metadata = pd.DataFrame( columns=self.md_column_names, index=metadata.index, ) # Iterate over the available symbols, loading the asset's raw symbol # data from the cache. The final metadata is computed and recorded in # the appropriate row depending on the asset's id. with maybe_show_progress( metadata.symbol.iteritems(), show_progress, label='Post-processing symbol metadata', item_show_func=item_show_count(len(metadata)), length=len(metadata), show_percent=False, ) as symbols_map: for asset_id, symbol in symbols_map: # Attempt to load data from disk, the cache should have an # entry for each symbol at this point of the execution. If one # does not exist, we should fail. key = '{sym}.daily.frame'.format(sym=symbol) try: raw_data = cache[key] except KeyError: raise ValueError( 'Unable to find cached data for symbol:' ' {0}'.format(symbol)) # Perform and require post-processing of metadata. final_symbol_metadata = self.post_process_symbol_metadata( asset_id, metadata.iloc[asset_id], raw_data, ) # Record symbol's final metadata. final_metadata.iloc[asset_id] = final_symbol_metadata # Register all assets with the bundle's default exchange. final_metadata['exchange'] = self.exchange return final_metadata
def write(self, data, assets=None, show_progress=False, invalid_data_behavior='warn'): """ Parameters ---------- data : iterable[tuple[int, pandas.DataFrame or bcolz.ctable]] The data chunks to write. Each chunk should be a tuple of sid and the data for that asset. assets : set[int], optional The assets that should be in ``data``. If this is provided we will check ``data`` against the assets and provide better progress information. show_progress : bool, optional Whether or not to show a progress bar while writing. invalid_data_behavior : {'warn', 'raise', 'ignore'}, optional What to do when data is encountered that is outside the range of a uint64. Returns ------- table : bcolz.ctable The newly-written table. """ total = None if assets is None else len(assets) ctx = maybe_show_progress( ( (sid, self.to_ctable(df, invalid_data_behavior)) for sid, df in data ), show_progress=show_progress, label=self.progress_bar_message, item_show_func=item_show_count(total), length=total, show_percent=False, ) with ctx as it: return self._write_internal(it, assets)
def _fetch_metadata_frame(self, api_key, cache, retries=DEFAULT_RETRIES, environ=None, show_progress=False): # Setup raw metadata iterator to fetch pages if necessary. raw_iter = self._fetch_metadata_iter(api_key, cache, retries, environ) # Concatenate all frame in iterator to compute a single metadata frame. with maybe_show_progress( raw_iter, show_progress, label='Fetching symbol metadata', item_show_func=item_show_count(), length=3, show_percent=False, ) as blocks: metadata = pd.concat(blocks, ignore_index=True) return metadata