def ffill_digest_frame_from_prior_values(freq, field, digest_frame, pv_frame, raw=False): """ Forward-fill a digest frame, falling back to the last known prior values if necessary. """ # convert to ndarray if necessary values = digest_frame if raw and isinstance(digest_frame, pd.DataFrame): values = digest_frame.values nan_sids = pd.isnull(values[0]) if np.any(nan_sids): # If we have any leading nans in the frame, use values from pv_frame to # seed values for those sids. key_loc = pv_frame.index.get_loc((freq.freq_str, field)) filler = pv_frame.values[key_loc, nan_sids] values[0, nan_sids] = filler if raw: filled = ffill(values) return filled return digest_frame.ffill()
def frame_to_series(self, field, frame, columns=None): """ Convert a frame with a DatetimeIndex and sid columns into a series with a sid index, using the aggregator defined by the given field. """ if isinstance(frame, pd.DataFrame): columns = frame.columns frame = frame.values if not len(frame): return pd.Series( data=(0 if field == 'volume' else np.nan), index=columns, ).values if field in ['price', 'close']: # shortcircuit for full last row vals = frame[-1] if np.all(~np.isnan(vals)): return vals return ffill(frame)[-1] elif field == 'open': return bfill(frame)[0] elif field == 'volume': return np.nansum(frame, axis=0) elif field == 'high': return np.nanmax(frame, axis=0) elif field == 'low': return np.nanmin(frame, axis=0) else: raise ValueError("Unknown field {}".format(field))
def frame_to_series(self, field, frame, columns=None): """ Convert a frame with a DatetimeIndex and sid columns into a series with a sid index, using the aggregator defined by the given field. """ if isinstance(frame, pd.DataFrame): columns = frame.columns frame = frame.values if not len(frame): return pd.Series( data=(0 if field == 'volume' else np.nan), index=columns, ).values if field in ['price', 'close_price']: # shortcircuit for full last row vals = frame[-1] if np.all(~np.isnan(vals)): return vals return ffill(frame)[-1] elif field == 'open_price': return bfill(frame)[0] elif field == 'volume': return np.nansum(frame, axis=0) elif field == 'high': return np.nanmax(frame, axis=0) elif field == 'low': return np.nanmin(frame, axis=0) else: raise ValueError("Unknown field {}".format(field))
def test_ffill(): # test ndim=1 N = 100 s = pd.Series(np.random.randn(N)) mask = random.sample(range(N), 10) s.iloc[mask] = np.nan correct = s.ffill().values test = ffill(s.values) assert_almost_equal(correct, test) # test ndim=2 df = pd.DataFrame(np.random.randn(N, N)) df.iloc[mask] = np.nan correct = df.ffill().values test = ffill(df.values) assert_almost_equal(correct, test)
def ffill_buffer_from_prior_values(freq, field, buffer_frame, digest_frame, pv_frame, raw=False): """ Forward-fill a buffer frame, falling back to the end-of-period values of a digest frame if the buffer frame has leading NaNs. """ # convert to ndarray if necessary digest_values = digest_frame if raw and isinstance(digest_frame, pd.DataFrame): digest_values = digest_frame.values buffer_values = buffer_frame if raw and isinstance(buffer_frame, pd.DataFrame): buffer_values = buffer_frame.values nan_sids = pd.isnull(buffer_values[0]) if np.any(nan_sids) and len(digest_values): # If we have any leading nans in the buffer and we have a non-empty # digest frame, use the oldest digest values as the initial buffer # values. buffer_values[0, nan_sids] = digest_values[-1, nan_sids] nan_sids = pd.isnull(buffer_values[0]) if np.any(nan_sids): # If we still have leading nans, fall back to the last known values # from before the digest. key_loc = pv_frame.index.get_loc((freq.freq_str, field)) filler = pv_frame.values[key_loc, nan_sids] buffer_values[0, nan_sids] = filler if raw: filled = ffill(buffer_values) return filled return buffer_frame.ffill()