def calculate_enhanced_meta(serie: pd.Series, periodicity: str) -> dict: """Crea o actualiza los metadatos enriquecidos de la serie pasada. El título de la misma DEBE ser el ID de la serie en la base de datos""" days_since_update = (datetime.now() - _get_last_day_of_period(serie, periodicity)).days last_index = serie.index.get_loc(serie.last_valid_index()) last = serie[last_index] second_to_last = serie[last_index - 1] if serie.index.size > 1 else None last_pct_change = last / second_to_last - 1 # Cálculos meta = { meta_keys.INDEX_START: serie.first_valid_index().date(), meta_keys.INDEX_END: serie.last_valid_index().date(), meta_keys.PERIODICITY: periodicity, meta_keys.INDEX_SIZE: _get_index_size(serie), meta_keys.DAYS_SINCE_LAST_UPDATE: days_since_update, meta_keys.LAST_VALUE: last, meta_keys.SECOND_TO_LAST_VALUE: second_to_last, meta_keys.LAST_PCT_CHANGE: last_pct_change, meta_keys.IS_UPDATED: _is_series_updated(days_since_update, periodicity), meta_keys.MAX: serie.max(), meta_keys.MIN: serie.min(), meta_keys.AVERAGE: serie.mean(), meta_keys.SIGNIFICANT_FIGURES: significant_figures(serie.values) } return meta
def test_first_last_valid(self): ts = self.ts.copy() ts[:5] = np.NaN index = ts.first_valid_index() assert index == ts.index[5] ts[-5:] = np.NaN index = ts.last_valid_index() assert index == ts.index[-6] ts[:] = np.nan assert ts.last_valid_index() is None assert ts.first_valid_index() is None ser = Series([], index=[]) assert ser.last_valid_index() is None assert ser.first_valid_index() is None # GH12800 empty = Series() assert empty.last_valid_index() is None assert empty.first_valid_index() is None # GH20499: its preserves freq with holes ts.index = date_range("20110101", periods=len(ts), freq="B") ts.iloc[1] = 1 ts.iloc[-2] = 1 assert ts.first_valid_index() == ts.index[1] assert ts.last_valid_index() == ts.index[-2] assert ts.first_valid_index().freq == ts.index.freq assert ts.last_valid_index().freq == ts.index.freq
def drift(x: Series, h: int) -> np.ndarray: # x : time serie data # h : number of future predictions # equation : Ŷt+h|t = Yt + h * ((Yt - Y1) / (t - 1)) diffRate = (x.get(x.last_valid_index()) - x.get(x.first_valid_index())) / (len(x.values) - 1) result = [] for t in range(h): result.append(x.get(x.last_valid_index()) + ((t + 1) * diffRate)) return Series(np.array(result))
class Player: def __init__(self, first_name, last_name, id): self.first_name = first_name self.last_name = last_name self.id = id self.hrs = [0, 0, 0, 0, 0, 0] #One for each month of the game self.hr_total = 0 self.hr_series = Series() self.hr_total_series = Series() def __str__(self): return str.format('{0} : {1}', self.id, self.last_name) def __repr__(self): return self.__str__() def add_hrs(self, count, date): self.hr_total += count self.hr_total_series[date] = self.hr_series.sum() + count if (self.hr_series.last_valid_index() == date): self.hr_series[date] = count + self.hr_series[date] else: self.hr_series[date] = count def name(self): return self.first_name + " " + self.last_name def get_player_hr_dataframe(self): return self.hr_series.to_frame(self.name()) def get_player_hr_total_dataframe(self): return self.hr_total_series.to_frame(self.name())
def generate_ema_list( closing_prices: pd.Series, sma_list: pd.Series, duration: int = 10 ) -> pd.Series: """Returns Exponential Moving Average List given pandas series of Closing Prices.""" # first exponential moving average reference point is simple # '1000' proxy for our furthest back available data # ema = ((current price - previous EMA) * weight) + previous EMA weight = 2 / (duration + 1) ret = [] if sma_list is None: sma_list = generate_sma_list(closing_prices, duration) last_valid_sma_idx = sma_list.last_valid_index() oldest_sma = sma_list[last_valid_sma_idx] # given most-current on top oldest_ema = ( (closing_prices[len(closing_prices) - duration] - oldest_sma) * weight ) + oldest_sma ret.append(oldest_ema) for index in range(1, len(closing_prices) - duration + 1): ret.insert( 0, (closing_prices[len(closing_prices) - duration - index] - ret[0]) * weight + ret[0], ) return pd.Series(ret)
class Player: def __init__(self, first_name, last_name, id): self.first_name = first_name self.last_name = last_name self.id = id self.hrs = [0,0,0,0,0,0] #One for each month of the game self.hr_total = 0 self.hr_series = Series() self.hr_total_series = Series() def __str__(self): return str.format('{0} : {1}', self.id, self.last_name) def __repr__(self): return self.__str__() def add_hrs(self, count, date): self.hr_total += count self.hr_total_series[date] = self.hr_series.sum() + count if(self.hr_series.last_valid_index() == date ): self.hr_series[date] = count + self.hr_series[date] else: self.hr_series[date] = count def name(self): return self.first_name + " " + self.last_name def get_player_hr_dataframe(self): return self.hr_series.to_frame(self.name()) def get_player_hr_total_dataframe(self): return self.hr_total_series.to_frame(self.name())
def update_enhanced_meta(serie: pd.Series, catalog_id: str, distribution_id: str): """Crea o actualiza los metadatos enriquecidos de la serie pasada. El título de la misma DEBE ser el ID de la serie en la base de datos""" field = Field.objects.get(distribution__dataset__catalog__identifier=catalog_id, distribution__identifier=distribution_id, identifier=serie.name) periodicity = meta_keys.get(field.distribution, meta_keys.PERIODICITY) days_since_update = (datetime.now() - _get_last_day_of_period(serie, periodicity)).days last = serie[-1] second_to_last = serie[-2] if serie.index.size > 1 else None last_pct_change = last / second_to_last - 1 # Cálculos meta = { meta_keys.INDEX_START: serie.first_valid_index().date(), meta_keys.INDEX_END: serie.last_valid_index().date(), meta_keys.PERIODICITY: periodicity, meta_keys.INDEX_SIZE: _get_index_size(serie), meta_keys.DAYS_SINCE_LAST_UPDATE: days_since_update, meta_keys.LAST_VALUE: last, meta_keys.SECOND_TO_LAST_VALUE: second_to_last, meta_keys.LAST_PCT_CHANGE: last_pct_change, meta_keys.IS_UPDATED: _is_series_updated(days_since_update, periodicity), meta_keys.MAX: serie.max(), meta_keys.MIN: serie.min(), meta_keys.AVERAGE: serie.mean(), } for meta_key, value in meta.items(): field.enhanced_meta.update_or_create(key=meta_key, defaults={'value': value})
def smooth_with_rolling_average( series: pd.Series, window: int = 7, include_trailing_zeros: bool = True, exclude_negatives: bool = True, ): """Smoothes series with a min period of 1. Series must have a datetime index. https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.rolling.html Port of Projections.ts: https://github.com/covid-projections/covid-projections/blob/master/src/common/models/Projection.ts#L715 Args: series: Series with datetime index to smooth. window: Sliding window to average. include_trailing_zeros: Whether or not to NaN out trailing zeroes. exclude_negatives: Exclude negative values from rolling averages. Returns: Smoothed series. """ # Drop trailing NAs so that we don't smooth for day we don't yet have data. series = series.loc[:series.last_valid_index()] if exclude_negatives: series = series.copy() series.loc[series < 0] = None def mean_with_no_trailing_nan(x): """Return mean of series unless last value is nan.""" if np.isnan(x.iloc[-1]): return np.nan return x.mean() # Apply function to a rolling window # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.core.window.rolling.Rolling.apply.html rolling_average = series.rolling( window, min_periods=1).apply(mean_with_no_trailing_nan) if include_trailing_zeros: return rolling_average last_valid_index = series.replace(0, np.nan).last_valid_index() if last_valid_index: rolling_average[last_valid_index + timedelta(days=1):] = np.nan return rolling_average else: # entirely empty series: return series
def test_first_last_valid(self): ts = self.ts.copy() ts[:5] = np.NaN index = ts.first_valid_index() self.assertEqual(index, ts.index[5]) ts[-5:] = np.NaN index = ts.last_valid_index() self.assertEqual(index, ts.index[-6]) ts[:] = np.nan self.assertIsNone(ts.last_valid_index()) self.assertIsNone(ts.first_valid_index()) ser = Series([], index=[]) self.assertIsNone(ser.last_valid_index()) self.assertIsNone(ser.first_valid_index()) # GH12800 empty = Series() self.assertIsNone(empty.last_valid_index()) self.assertIsNone(empty.first_valid_index())
def test_first_last_valid(self): ts = self.ts.copy() ts[:5] = np.NaN index = ts.first_valid_index() assert index == ts.index[5] ts[-5:] = np.NaN index = ts.last_valid_index() assert index == ts.index[-6] ts[:] = np.nan assert ts.last_valid_index() is None assert ts.first_valid_index() is None ser = Series([], index=[]) assert ser.last_valid_index() is None assert ser.first_valid_index() is None # GH12800 empty = Series() assert empty.last_valid_index() is None assert empty.first_valid_index() is None
def write_serie(self, serie: pd.Series, periodicity: str, fields: dict, writer: csv.writer): field_id = fields[serie.name] # Filtrado de NaN serie = serie[serie.first_valid_index():serie.last_valid_index()] df = serie.reset_index().apply(self.rows, axis=1, args=(self.fields_data, field_id, periodicity)) serie = pd.Series(df.values, index=serie.index) for row in serie: writer.writerow(row)
def interpolate_stalled_and_missing_values(series: pd.Series) -> pd.Series: """Interpolates periods where values have stopped increasing or have gaps. Args: series: Series with a datetime index """ series = series.copy() start, end = series.first_valid_index(), series.last_valid_index() series_with_values = series.loc[start:end] series_with_values[series_with_values.diff() == 0] = None # Use the index to determine breaks between data (so # missing data is not improperly interpolated) series.loc[start:end] = series_with_values.interpolate( method="time").apply(np.floor) return series
def _get_range(x: pd.Series): """Get a range of values so that there are no NaNs in the sequence.""" first_idx = x.first_valid_index() last_idx = x.last_valid_index() subset = x.loc[first_idx:last_idx] while subset.isnull().values.any() and \ (first_idx is not None or last_idx is not None): idx = subset.isna().idxmax() first_idx = subset.loc[idx:last_idx].first_valid_index() subset = x.loc[first_idx:last_idx] if first_idx is None or last_idx is None: return None, None return first_idx, last_idx
def test_first_last_valid(self): ts = self.ts.copy() ts[:5] = np.NaN index = ts.first_valid_index() self.assertEqual(index, ts.index[5]) ts[-5:] = np.NaN index = ts.last_valid_index() self.assertEqual(index, ts.index[-6]) ts[:] = np.nan self.assert_(ts.last_valid_index() is None) self.assert_(ts.first_valid_index() is None) ser = Series([], index=[]) self.assert_(ser.last_valid_index() is None) self.assert_(ser.first_valid_index() is None)
def fill_gaps(time_series: pd.Series) -> pd.Series: """ Fill gaps in a time series (i.e. value equals to NaN) inside the time-series (leading and ending missing values are untouched). Parameters ---------- time_series: pd.Series Time-series of load (can be NaNs) indexed with datetime indexes. Returns ------- time_series: pd.Series Corrected time series """ # First remove starting and ending nans time_series_trim = time_series.loc[time_series.first_valid_index( ):time_series.last_valid_index()] # For each remaining nan, we replace its value by the value of an identical hour in another day for which we have # data time_series_trim_valid = time_series_trim.dropna() nan_indexes = time_series_trim.index[time_series_trim.apply(np.isnan)] for index in nan_indexes: # Get all elements which have are on the same day, same hour similar_hours = time_series_trim_valid[ time_series_trim_valid.index.map(lambda x: x.weekday( ) == index.weekday() and x.hour == index.hour)] # Find closest valid hour closest_valid_hour_index = similar_hours.index[np.argmin( abs((similar_hours.index - index).days))] time_series_trim[index] = time_series_trim_valid[ closest_valid_hour_index] time_series[time_series_trim.index] = time_series_trim.values return time_series
def _assert_single_contiguous_dense_sequence( _series: pd.Series) -> None: """ Assert that the input series has no Null values after removing leading and trailing Nulls. An motivating example for this requirement is a ForecastCheck, which might have a main value series that ends with trailing Nulls, and a forecast series that begins with leading nulls, but the actual and forecast periods should have no nulls. This is a strong assertion, and I'm not 100% sure it's the right one, but I'm putting it in because I'd rather start out with more constraints. However, we can revisit this design choice. """ assert is_numeric_dtype(_series), 'The "Single Contiguous Dense Sequence" constraint should only be ' \ 'applied to numeric Series' assert (not _series.loc[_series.first_valid_index( ):_series.last_valid_index()].isnull().values.any()), ( 'Numeric series may have leading or trailing null values to represent missing or non-applicable ' 'data points. However, values for the series should otherwise be non-Null.' )
def generate_field_summary(series: pd.Series) -> pd.Series: has_value = not series.isnull().all() min_date = None max_date = None max_value = None min_value = None latest_value = None num_observations = 0 largest_delta = None largest_delta_date = None if has_value: min_date = series.first_valid_index() max_date = series.last_valid_index() latest_value = series[series.notnull()].iloc[-1] max_value = series.max() min_value = series.min() num_observations = len(series[series.notnull()]) largest_delta = series.diff().abs().max() # If a if len(series.diff().abs().dropna()): largest_delta_date = series.diff().abs().idxmax() results = { "has_value": has_value, "min_date": min_date, "max_date": max_date, "max_value": max_value, "min_value": min_value, "latest_value": latest_value, "num_observations": num_observations, "largest_delta": largest_delta, "largest_delta_date": largest_delta_date, } return pd.Series(results)
def interpolate_1d(xvalues, yvalues, method='linear', limit=None, limit_direction='forward', limit_area=None, fill_value=None, bounds_error=False, order=None, **kwargs): """ Logic for the 1-d interpolation. The result should be 1-d, inputs xvalues and yvalues will each be 1-d arrays of the same length. Bounds_error is currently hardcoded to False since non-scipy ones don't take it as an argumnet. """ # Treat the original, non-scipy methods first. invalid = isna(yvalues) valid = ~invalid if not valid.any(): # have to call np.asarray(xvalues) since xvalues could be an Index # which can't be mutated result = np.empty_like(np.asarray(xvalues), dtype=np.float64) result.fill(np.nan) return result if valid.all(): return yvalues if method == 'time': if not getattr(xvalues, 'is_all_dates', None): # if not issubclass(xvalues.dtype.type, np.datetime64): raise ValueError('time-weighted interpolation only works ' 'on Series or DataFrames with a ' 'DatetimeIndex') method = 'values' valid_limit_directions = ['forward', 'backward', 'both'] limit_direction = limit_direction.lower() if limit_direction not in valid_limit_directions: msg = ('Invalid limit_direction: expecting one of {valid!r}, ' 'got {invalid!r}.') raise ValueError(msg.format(valid=valid_limit_directions, invalid=limit_direction)) if limit_area is not None: valid_limit_areas = ['inside', 'outside'] limit_area = limit_area.lower() if limit_area not in valid_limit_areas: raise ValueError('Invalid limit_area: expecting one of {}, got ' '{}.'.format(valid_limit_areas, limit_area)) # default limit is unlimited GH #16282 if limit is None: # limit = len(xvalues) pass elif not is_integer(limit): raise ValueError('Limit must be an integer') elif limit < 1: raise ValueError('Limit must be greater than 0') from pandas import Series ys = Series(yvalues) # These are sets of index pointers to invalid values... i.e. {0, 1, etc... all_nans = set(np.flatnonzero(invalid)) start_nans = set(range(ys.first_valid_index())) end_nans = set(range(1 + ys.last_valid_index(), len(valid))) mid_nans = all_nans - start_nans - end_nans # Like the sets above, preserve_nans contains indices of invalid values, # but in this case, it is the final set of indices that need to be # preserved as NaN after the interpolation. # For example if limit_direction='forward' then preserve_nans will # contain indices of NaNs at the beginning of the series, and NaNs that # are more than'limit' away from the prior non-NaN. # set preserve_nans based on direction using _interp_limit if limit_direction == 'forward': preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0)) elif limit_direction == 'backward': preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit)) else: # both directions... just use _interp_limit preserve_nans = set(_interp_limit(invalid, limit, limit)) # if limit_area is set, add either mid or outside indices # to preserve_nans GH #16284 if limit_area == 'inside': # preserve NaNs on the outside preserve_nans |= start_nans | end_nans elif limit_area == 'outside': # preserve NaNs on the inside preserve_nans |= mid_nans # sort preserve_nans and covert to list preserve_nans = sorted(preserve_nans) xvalues = getattr(xvalues, 'values', xvalues) yvalues = getattr(yvalues, 'values', yvalues) result = yvalues.copy() if method in ['linear', 'time', 'index', 'values']: if method in ('values', 'index'): inds = np.asarray(xvalues) # hack for DatetimeIndex, #1646 if needs_i8_conversion(inds.dtype.type): inds = inds.view(np.int64) if inds.dtype == np.object_: inds = lib.maybe_convert_objects(inds) else: inds = xvalues result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid]) result[preserve_nans] = np.nan return result sp_methods = ['nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh', 'spline', 'polynomial', 'from_derivatives', 'piecewise_polynomial', 'pchip', 'akima'] if method in sp_methods: inds = np.asarray(xvalues) # hack for DatetimeIndex, #1646 if issubclass(inds.dtype.type, np.datetime64): inds = inds.view(np.int64) result[invalid] = _interpolate_scipy_wrapper(inds[valid], yvalues[valid], inds[invalid], method=method, fill_value=fill_value, bounds_error=bounds_error, order=order, **kwargs) result[preserve_nans] = np.nan return result
def interpolate_1d(xvalues, yvalues, method='linear', limit=None, limit_direction='forward', fill_value=None, bounds_error=False, order=None, **kwargs): """ Logic for the 1-d interpolation. The result should be 1-d, inputs xvalues and yvalues will each be 1-d arrays of the same length. Bounds_error is currently hardcoded to False since non-scipy ones don't take it as an argumnet. """ # Treat the original, non-scipy methods first. invalid = isnull(yvalues) valid = ~invalid if not valid.any(): # have to call np.asarray(xvalues) since xvalues could be an Index # which cant be mutated result = np.empty_like(np.asarray(xvalues), dtype=np.float64) result.fill(np.nan) return result if valid.all(): return yvalues if method == 'time': if not getattr(xvalues, 'is_all_dates', None): # if not issubclass(xvalues.dtype.type, np.datetime64): raise ValueError('time-weighted interpolation only works ' 'on Series or DataFrames with a ' 'DatetimeIndex') method = 'values' def _interp_limit(invalid, fw_limit, bw_limit): "Get idx of values that won't be filled b/c they exceed the limits." for x in np.where(invalid)[0]: if invalid[max(0, x - fw_limit):x + bw_limit + 1].all(): yield x valid_limit_directions = ['forward', 'backward', 'both'] limit_direction = limit_direction.lower() if limit_direction not in valid_limit_directions: raise ValueError('Invalid limit_direction: expecting one of %r, got ' '%r.' % (valid_limit_directions, limit_direction)) from pandas import Series ys = Series(yvalues) start_nans = set(range(ys.first_valid_index())) end_nans = set(range(1 + ys.last_valid_index(), len(valid))) # This is a list of the indexes in the series whose yvalue is currently # NaN, but whose interpolated yvalue will be overwritten with NaN after # computing the interpolation. For each index in this list, one of these # conditions is true of the corresponding NaN in the yvalues: # # a) It is one of a chain of NaNs at the beginning of the series, and # either limit is not specified or limit_direction is 'forward'. # b) It is one of a chain of NaNs at the end of the series, and limit is # specified and limit_direction is 'backward' or 'both'. # c) Limit is nonzero and it is further than limit from the nearest non-NaN # value (with respect to the limit_direction setting). # # The default behavior is to fill forward with no limit, ignoring NaNs at # the beginning (see issues #9218 and #10420) violate_limit = sorted(start_nans) if limit is not None: if not is_integer(limit): raise ValueError('Limit must be an integer') if limit < 1: raise ValueError('Limit must be greater than 0') if limit_direction == 'forward': violate_limit = sorted(start_nans | set(_interp_limit(invalid, limit, 0))) if limit_direction == 'backward': violate_limit = sorted(end_nans | set(_interp_limit(invalid, 0, limit))) if limit_direction == 'both': violate_limit = sorted(_interp_limit(invalid, limit, limit)) xvalues = getattr(xvalues, 'values', xvalues) yvalues = getattr(yvalues, 'values', yvalues) result = yvalues.copy() if method in ['linear', 'time', 'index', 'values']: if method in ('values', 'index'): inds = np.asarray(xvalues) # hack for DatetimeIndex, #1646 if needs_i8_conversion(inds.dtype.type): inds = inds.view(np.int64) if inds.dtype == np.object_: inds = lib.maybe_convert_objects(inds) else: inds = xvalues result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid]) result[violate_limit] = np.nan return result sp_methods = ['nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh', 'spline', 'polynomial', 'from_derivatives', 'piecewise_polynomial', 'pchip', 'akima'] if method in sp_methods: inds = np.asarray(xvalues) # hack for DatetimeIndex, #1646 if issubclass(inds.dtype.type, np.datetime64): inds = inds.view(np.int64) result[invalid] = _interpolate_scipy_wrapper(inds[valid], yvalues[valid], inds[invalid], method=method, fill_value=fill_value, bounds_error=bounds_error, order=order, **kwargs) result[violate_limit] = np.nan return result
def interpolate_1d(xvalues, yvalues, method='linear', limit=None, limit_direction='forward', fill_value=None, bounds_error=False, order=None, **kwargs): """ Logic for the 1-d interpolation. The result should be 1-d, inputs xvalues and yvalues will each be 1-d arrays of the same length. Bounds_error is currently hardcoded to False since non-scipy ones don't take it as an argumnet. """ # Treat the original, non-scipy methods first. invalid = isnull(yvalues) valid = ~invalid if not valid.any(): # have to call np.asarray(xvalues) since xvalues could be an Index # which cant be mutated result = np.empty_like(np.asarray(xvalues), dtype=np.float64) result.fill(np.nan) return result if valid.all(): return yvalues if method == 'time': if not getattr(xvalues, 'is_all_dates', None): # if not issubclass(xvalues.dtype.type, np.datetime64): raise ValueError('time-weighted interpolation only works ' 'on Series or DataFrames with a ' 'DatetimeIndex') method = 'values' valid_limit_directions = ['forward', 'backward', 'both'] limit_direction = limit_direction.lower() if limit_direction not in valid_limit_directions: raise ValueError('Invalid limit_direction: expecting one of %r, got ' '%r.' % (valid_limit_directions, limit_direction)) from pandas import Series ys = Series(yvalues) start_nans = set(range(ys.first_valid_index())) end_nans = set(range(1 + ys.last_valid_index(), len(valid))) # violate_limit is a list of the indexes in the series whose yvalue is # currently NaN, and should still be NaN after the interpolation. # Specifically: # # If limit_direction='forward' or None then the list will contain NaNs at # the beginning of the series, and NaNs that are more than 'limit' away # from the prior non-NaN. # # If limit_direction='backward' then the list will contain NaNs at # the end of the series, and NaNs that are more than 'limit' away # from the subsequent non-NaN. # # If limit_direction='both' then the list will contain NaNs that # are more than 'limit' away from any non-NaN. # # If limit=None, then use default behavior of filling an unlimited number # of NaNs in the direction specified by limit_direction # default limit is unlimited GH #16282 if limit is None: # limit = len(xvalues) pass elif not is_integer(limit): raise ValueError('Limit must be an integer') elif limit < 1: raise ValueError('Limit must be greater than 0') # each possible limit_direction # TODO: do we need sorted? if limit_direction == 'forward' and limit is not None: violate_limit = sorted(start_nans | set(_interp_limit(invalid, limit, 0))) elif limit_direction == 'forward': violate_limit = sorted(start_nans) elif limit_direction == 'backward' and limit is not None: violate_limit = sorted(end_nans | set(_interp_limit(invalid, 0, limit))) elif limit_direction == 'backward': violate_limit = sorted(end_nans) elif limit_direction == 'both' and limit is not None: violate_limit = sorted(_interp_limit(invalid, limit, limit)) else: violate_limit = [] xvalues = getattr(xvalues, 'values', xvalues) yvalues = getattr(yvalues, 'values', yvalues) result = yvalues.copy() if method in ['linear', 'time', 'index', 'values']: if method in ('values', 'index'): inds = np.asarray(xvalues) # hack for DatetimeIndex, #1646 if needs_i8_conversion(inds.dtype.type): inds = inds.view(np.int64) if inds.dtype == np.object_: inds = lib.maybe_convert_objects(inds) else: inds = xvalues result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid]) result[violate_limit] = np.nan return result sp_methods = [ 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh', 'spline', 'polynomial', 'from_derivatives', 'piecewise_polynomial', 'pchip', 'akima' ] if method in sp_methods: inds = np.asarray(xvalues) # hack for DatetimeIndex, #1646 if issubclass(inds.dtype.type, np.datetime64): inds = inds.view(np.int64) result[invalid] = _interpolate_scipy_wrapper(inds[valid], yvalues[valid], inds[invalid], method=method, fill_value=fill_value, bounds_error=bounds_error, order=order, **kwargs) result[violate_limit] = np.nan return result
def interpolate_1d(xvalues, yvalues, method='linear', limit=None, limit_direction='forward', limit_area=None, fill_value=None, bounds_error=False, order=None, **kwargs): """ Logic for the 1-d interpolation. The result should be 1-d, inputs xvalues and yvalues will each be 1-d arrays of the same length. Bounds_error is currently hardcoded to False since non-scipy ones don't take it as an argumnet. """ # Treat the original, non-scipy methods first. invalid = isna(yvalues) valid = ~invalid if not valid.any(): # have to call np.asarray(xvalues) since xvalues could be an Index # which can't be mutated result = np.empty_like(np.asarray(xvalues), dtype=np.float64) result.fill(np.nan) return result if valid.all(): return yvalues if method == 'time': if not getattr(xvalues, 'is_all_dates', None): # if not issubclass(xvalues.dtype.type, np.datetime64): raise ValueError('time-weighted interpolation only works ' 'on Series or DataFrames with a ' 'DatetimeIndex') method = 'values' valid_limit_directions = ['forward', 'backward', 'both'] limit_direction = limit_direction.lower() if limit_direction not in valid_limit_directions: msg = ('Invalid limit_direction: expecting one of {valid!r}, ' 'got {invalid!r}.') raise ValueError( msg.format(valid=valid_limit_directions, invalid=limit_direction)) if limit_area is not None: valid_limit_areas = ['inside', 'outside'] limit_area = limit_area.lower() if limit_area not in valid_limit_areas: raise ValueError('Invalid limit_area: expecting one of {}, got ' '{}.'.format(valid_limit_areas, limit_area)) # default limit is unlimited GH #16282 if limit is None: # limit = len(xvalues) pass elif not is_integer(limit): raise ValueError('Limit must be an integer') elif limit < 1: raise ValueError('Limit must be greater than 0') from pandas import Series ys = Series(yvalues) # These are sets of index pointers to invalid values... i.e. {0, 1, etc... all_nans = set(np.flatnonzero(invalid)) start_nans = set(range(ys.first_valid_index())) end_nans = set(range(1 + ys.last_valid_index(), len(valid))) mid_nans = all_nans - start_nans - end_nans # Like the sets above, preserve_nans contains indices of invalid values, # but in this case, it is the final set of indices that need to be # preserved as NaN after the interpolation. # For example if limit_direction='forward' then preserve_nans will # contain indices of NaNs at the beginning of the series, and NaNs that # are more than'limit' away from the prior non-NaN. # set preserve_nans based on direction using _interp_limit if limit_direction == 'forward': preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0)) elif limit_direction == 'backward': preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit)) else: # both directions... just use _interp_limit preserve_nans = set(_interp_limit(invalid, limit, limit)) # if limit_area is set, add either mid or outside indices # to preserve_nans GH #16284 if limit_area == 'inside': # preserve NaNs on the outside preserve_nans |= start_nans | end_nans elif limit_area == 'outside': # preserve NaNs on the inside preserve_nans |= mid_nans # sort preserve_nans and covert to list preserve_nans = sorted(preserve_nans) xvalues = getattr(xvalues, 'values', xvalues) yvalues = getattr(yvalues, 'values', yvalues) result = yvalues.copy() if method in ['linear', 'time', 'index', 'values']: if method in ('values', 'index'): inds = np.asarray(xvalues) # hack for DatetimeIndex, #1646 if needs_i8_conversion(inds.dtype.type): inds = inds.view(np.int64) if inds.dtype == np.object_: inds = lib.maybe_convert_objects(inds) else: inds = xvalues result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid]) result[preserve_nans] = np.nan return result sp_methods = [ 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh', 'spline', 'polynomial', 'from_derivatives', 'piecewise_polynomial', 'pchip', 'akima' ] if method in sp_methods: inds = np.asarray(xvalues) # hack for DatetimeIndex, #1646 if issubclass(inds.dtype.type, np.datetime64): inds = inds.view(np.int64) result[invalid] = _interpolate_scipy_wrapper(inds[valid], yvalues[valid], inds[invalid], method=method, fill_value=fill_value, bounds_error=bounds_error, order=order, **kwargs) result[preserve_nans] = np.nan return result
def interpolate_1d(xvalues, yvalues, method='linear', limit=None, limit_direction='forward', fill_value=None, bounds_error=False, order=None, **kwargs): """ Logic for the 1-d interpolation. The result should be 1-d, inputs xvalues and yvalues will each be 1-d arrays of the same length. Bounds_error is currently hardcoded to False since non-scipy ones don't take it as an argumnet. """ # Treat the original, non-scipy methods first. invalid = com.isnull(yvalues) valid = ~invalid if not valid.any(): # have to call np.asarray(xvalues) since xvalues could be an Index # which cant be mutated result = np.empty_like(np.asarray(xvalues), dtype=np.float64) result.fill(np.nan) return result if valid.all(): return yvalues if method == 'time': if not getattr(xvalues, 'is_all_dates', None): # if not issubclass(xvalues.dtype.type, np.datetime64): raise ValueError('time-weighted interpolation only works ' 'on Series or DataFrames with a ' 'DatetimeIndex') method = 'values' def _interp_limit(invalid, fw_limit, bw_limit): "Get idx of values that won't be filled b/c they exceed the limits." for x in np.where(invalid)[0]: if invalid[max(0, x - fw_limit):x + bw_limit + 1].all(): yield x valid_limit_directions = ['forward', 'backward', 'both'] limit_direction = limit_direction.lower() if limit_direction not in valid_limit_directions: raise ValueError('Invalid limit_direction: expecting one of %r, got ' '%r.' % (valid_limit_directions, limit_direction)) from pandas import Series ys = Series(yvalues) start_nans = set(range(ys.first_valid_index())) end_nans = set(range(1 + ys.last_valid_index(), len(valid))) # This is a list of the indexes in the series whose yvalue is currently # NaN, but whose interpolated yvalue will be overwritten with NaN after # computing the interpolation. For each index in this list, one of these # conditions is true of the corresponding NaN in the yvalues: # # a) It is one of a chain of NaNs at the beginning of the series, and # either limit is not specified or limit_direction is 'forward'. # b) It is one of a chain of NaNs at the end of the series, and limit is # specified and limit_direction is 'backward' or 'both'. # c) Limit is nonzero and it is further than limit from the nearest non-NaN # value (with respect to the limit_direction setting). # # The default behavior is to fill forward with no limit, ignoring NaNs at # the beginning (see issues #9218 and #10420) violate_limit = sorted(start_nans) if limit: if limit_direction == 'forward': violate_limit = sorted(start_nans | set(_interp_limit(invalid, limit, 0))) if limit_direction == 'backward': violate_limit = sorted(end_nans | set(_interp_limit(invalid, 0, limit))) if limit_direction == 'both': violate_limit = sorted(_interp_limit(invalid, limit, limit)) xvalues = getattr(xvalues, 'values', xvalues) yvalues = getattr(yvalues, 'values', yvalues) result = yvalues.copy() if method in ['linear', 'time', 'index', 'values']: if method in ('values', 'index'): inds = np.asarray(xvalues) # hack for DatetimeIndex, #1646 if issubclass(inds.dtype.type, np.datetime64): inds = inds.view(np.int64) if inds.dtype == np.object_: inds = lib.maybe_convert_objects(inds) else: inds = xvalues result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid]) result[violate_limit] = np.nan return result sp_methods = ['nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh', 'spline', 'polynomial', 'piecewise_polynomial', 'pchip', 'akima'] if method in sp_methods: inds = np.asarray(xvalues) # hack for DatetimeIndex, #1646 if issubclass(inds.dtype.type, np.datetime64): inds = inds.view(np.int64) result[invalid] = _interpolate_scipy_wrapper(inds[valid], yvalues[valid], inds[invalid], method=method, fill_value=fill_value, bounds_error=bounds_error, order=order, **kwargs) result[violate_limit] = np.nan return result
def _get_index_size(serie: pd.Series): # Filtro los NaN antes y después de la serie return len(serie[serie.first_valid_index():serie.last_valid_index()])
def interpolate_1d(xvalues, yvalues, method='linear', limit=None, limit_direction='forward', fill_value=None, bounds_error=False, order=None, **kwargs): """ Logic for the 1-d interpolation. The result should be 1-d, inputs xvalues and yvalues will each be 1-d arrays of the same length. Bounds_error is currently hardcoded to False since non-scipy ones don't take it as an argumnet. """ # Treat the original, non-scipy methods first. invalid = isnull(yvalues) valid = ~invalid if not valid.any(): # have to call np.asarray(xvalues) since xvalues could be an Index # which cant be mutated result = np.empty_like(np.asarray(xvalues), dtype=np.float64) result.fill(np.nan) return result if valid.all(): return yvalues if method == 'time': if not getattr(xvalues, 'is_all_dates', None): # if not issubclass(xvalues.dtype.type, np.datetime64): raise ValueError('time-weighted interpolation only works ' 'on Series or DataFrames with a ' 'DatetimeIndex') method = 'values' valid_limit_directions = ['forward', 'backward', 'both'] limit_direction = limit_direction.lower() if limit_direction not in valid_limit_directions: raise ValueError('Invalid limit_direction: expecting one of %r, got ' '%r.' % (valid_limit_directions, limit_direction)) from pandas import Series ys = Series(yvalues) start_nans = set(range(ys.first_valid_index())) end_nans = set(range(1 + ys.last_valid_index(), len(valid))) # violate_limit is a list of the indexes in the series whose yvalue is # currently NaN, and should still be NaN after the interpolation. # Specifically: # # If limit_direction='forward' or None then the list will contain NaNs at # the beginning of the series, and NaNs that are more than 'limit' away # from the prior non-NaN. # # If limit_direction='backward' then the list will contain NaNs at # the end of the series, and NaNs that are more than 'limit' away # from the subsequent non-NaN. # # If limit_direction='both' then the list will contain NaNs that # are more than 'limit' away from any non-NaN. # # If limit=None, then use default behavior of filling an unlimited number # of NaNs in the direction specified by limit_direction # default limit is unlimited GH #16282 if limit is None: # limit = len(xvalues) pass elif not is_integer(limit): raise ValueError('Limit must be an integer') elif limit < 1: raise ValueError('Limit must be greater than 0') # each possible limit_direction # TODO: do we need sorted? if limit_direction == 'forward' and limit is not None: violate_limit = sorted(start_nans | set(_interp_limit(invalid, limit, 0))) elif limit_direction == 'forward': violate_limit = sorted(start_nans) elif limit_direction == 'backward' and limit is not None: violate_limit = sorted(end_nans | set(_interp_limit(invalid, 0, limit))) elif limit_direction == 'backward': violate_limit = sorted(end_nans) elif limit_direction == 'both' and limit is not None: violate_limit = sorted(_interp_limit(invalid, limit, limit)) else: violate_limit = [] xvalues = getattr(xvalues, 'values', xvalues) yvalues = getattr(yvalues, 'values', yvalues) result = yvalues.copy() if method in ['linear', 'time', 'index', 'values']: if method in ('values', 'index'): inds = np.asarray(xvalues) # hack for DatetimeIndex, #1646 if needs_i8_conversion(inds.dtype.type): inds = inds.view(np.int64) if inds.dtype == np.object_: inds = lib.maybe_convert_objects(inds) else: inds = xvalues result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid]) result[violate_limit] = np.nan return result sp_methods = ['nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh', 'spline', 'polynomial', 'from_derivatives', 'piecewise_polynomial', 'pchip', 'akima'] if method in sp_methods: inds = np.asarray(xvalues) # hack for DatetimeIndex, #1646 if issubclass(inds.dtype.type, np.datetime64): inds = inds.view(np.int64) result[invalid] = _interpolate_scipy_wrapper(inds[valid], yvalues[valid], inds[invalid], method=method, fill_value=fill_value, bounds_error=bounds_error, order=order, **kwargs) result[violate_limit] = np.nan return result