def test_pi_sub_period(self): # GH#13071 idx = PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'], freq='M', name='idx') result = idx - pd.Period('2012-01', freq='M') off = idx.freq exp = pd.Index([-12 * off, -11 * off, -10 * off, -9 * off], name='idx') tm.assert_index_equal(result, exp) result = np.subtract(idx, pd.Period('2012-01', freq='M')) tm.assert_index_equal(result, exp) result = pd.Period('2012-01', freq='M') - idx exp = pd.Index([12 * off, 11 * off, 10 * off, 9 * off], name='idx') tm.assert_index_equal(result, exp) result = np.subtract(pd.Period('2012-01', freq='M'), idx) tm.assert_index_equal(result, exp) exp = pd.TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name='idx') tm.assert_index_equal(idx - pd.Period('NaT', freq='M'), exp) tm.assert_index_equal(pd.Period('NaT', freq='M') - idx, exp)
def mu_max(curves, norm_eqs=None, time_range=['2.5 hours', '15 hours'], blank='BLK'): if norm_eqs: for strain, norm_eq in norm_eqs.items(): try: curves.update( curves.assign(OD595norm=norm_eq(curves[ curves.name.str.contains(strain)].OD595))) except (Exception): curves = curves.assign(OD595norm=norm_eq(curves[ curves.name.str.contains(strain)].OD595)) curves.OD595 = curves.OD595norm #curves = curves.reset_index() #curves.rename(columns={'index': 'indexer'}) window_size = 12 curves.Time = pd.TimedeltaIndex(curves.Time, unit='h').round('T') data = curves.set_index(['Time', 'name', 'well']).unstack([1, 2]).resample('5T').mean() blank_val = data['OD595'][blank].mean() try: blank_val = blank_val.mean() except: pass data = data - blank_val rolling = data.rolling(window_size) growth_rates = (rolling.apply(lambda x: np.log(x[-1]/x[0])).OD595[time_range[0]:time_range[-1]]\ .max()/(window_size/12)).reset_index() # growth_rates = growth_rates.assign(Strain = growth_rates.name.apply(lambda x: str(x).split(' ')[0]), # Treatment = growth_rates.name.apply(lambda x: ' '.join(str(x).split(' ')[1:]))) # #data.pH = pd.to_numeric(data.pH, errors='ignore') # data = growth_rates.rename(columns={0: 'Max growth rate'}) # sns.barplot(data=growth_rates, y='Treatment', x='Max growth rate', hue='Strain') return growth_rates
def _nonempty_index(idx): typ = type(idx) if typ is pd.RangeIndex: return pd.RangeIndex(2, name=idx.name) elif typ in (pd.Int64Index, pd.Float64Index): return typ([1, 2], name=idx.name) elif typ is pd.Index: return pd.Index(['a', 'b'], name=idx.name) elif typ is pd.DatetimeIndex: start = '1970-01-01' data = [start, start] if idx.freq is None else None return pd.DatetimeIndex(data, start=start, periods=2, freq=idx.freq, tz=idx.tz, name=idx.name) elif typ is pd.PeriodIndex: return pd.PeriodIndex(start='1970-01-01', periods=2, freq=idx.freq, name=idx.name) elif typ is pd.TimedeltaIndex: start = np.timedelta64(1, 'D') data = [start, start] if idx.freq is None else None return pd.TimedeltaIndex(data, start=start, periods=2, freq=idx.freq, name=idx.name) elif typ is pd.CategoricalIndex: if len(idx.categories): data = [idx.categories[0]] * 2 cats = idx.categories else: data = _nonempty_index(idx.categories) cats = None return pd.CategoricalIndex(data, categories=cats, ordered=idx.ordered, name=idx.name) elif typ is pd.MultiIndex: levels = [_nonempty_index(i) for i in idx.levels] labels = [[0, 0] for i in idx.levels] return pd.MultiIndex(levels=levels, labels=labels, names=idx.names) raise TypeError("Don't know how to handle index of " "type {0}".format(type(idx).__name__))
def day_counts(index): """Days between DatetimeIndex values as a :any:`pandas.Series`. Parameters ---------- index : :any:`pandas.DatetimeIndex` The index for which to get day counts. Returns ------- day_counts : :any:`pandas.Series` A :any:`pandas.Series` with counts of days between periods. Counts are given on start dates of periods. """ # dont affect the original data index = index.copy() if len(index) == 0: return pd.Series([], index=index) timedeltas = (index[1:] - index[:-1]).append(pd.TimedeltaIndex([pd.NaT])) timedelta_days = timedeltas.total_seconds() / (60 * 60 * 24) return pd.Series(timedelta_days, index=index)
def test_pi_sub_period(self): # GH#13071 idx = PeriodIndex(["2011-01", "2011-02", "2011-03", "2011-04"], freq="M", name="idx") result = idx - pd.Period("2012-01", freq="M") off = idx.freq exp = pd.Index([-12 * off, -11 * off, -10 * off, -9 * off], name="idx") tm.assert_index_equal(result, exp) result = np.subtract(idx, pd.Period("2012-01", freq="M")) tm.assert_index_equal(result, exp) result = pd.Period("2012-01", freq="M") - idx exp = pd.Index([12 * off, 11 * off, 10 * off, 9 * off], name="idx") tm.assert_index_equal(result, exp) result = np.subtract(pd.Period("2012-01", freq="M"), idx) tm.assert_index_equal(result, exp) exp = pd.TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name="idx") tm.assert_index_equal(idx - pd.Period("NaT", freq="M"), exp) tm.assert_index_equal(pd.Period("NaT", freq="M") - idx, exp)
def calc_baseline_dumb(training_data, similar_moments, prediction_window): if type(prediction_window) is not timedelta: prediction_window = timedelta(minutes=prediction_window) k = len(similar_moments) r = np.zeros((49, 1)) for i in similar_moments: similar_day = (1 / k) * training_data[i:i + prediction_window].resample( timedelta(minutes=15)).mean() similar_day = similar_day[0:49] r += similar_day # r += (1 / k) * training_data[i:i + prediction_window].as_matrix baseline = np.squeeze(r) b = pd.DataFrame(baseline).set_index( pd.TimedeltaIndex(freq='15T', start=0, periods=49)).resample(timedelta(minutes=1)).ffill() baseline = np.squeeze(b.as_matrix()) baseline = np.concatenate((baseline, np.atleast_1d(baseline[-1]))) return baseline
def test_timedelta_other_units(self): idx = pd.TimedeltaIndex(['1 days', 'NaT', '2 days']) exp = np.array([False, True, False]) tm.assert_numpy_array_equal(isnull(idx), exp) tm.assert_numpy_array_equal(notnull(idx), ~exp) tm.assert_numpy_array_equal(isnull(idx.values), exp) tm.assert_numpy_array_equal(notnull(idx.values), ~exp) for dtype in ['timedelta64[D]', 'timedelta64[h]', 'timedelta64[m]', 'timedelta64[s]', 'timedelta64[ms]', 'timedelta64[us]', 'timedelta64[ns]']: values = idx.values.astype(dtype) exp = np.array([False, True, False]) tm.assert_numpy_array_equal(isnull(values), exp) tm.assert_numpy_array_equal(notnull(values), ~exp) exp = pd.Series([False, True, False]) s = pd.Series(values) tm.assert_series_equal(isnull(s), exp) tm.assert_series_equal(notnull(s), ~exp) s = pd.Series(values, dtype=object) tm.assert_series_equal(isnull(s), exp) tm.assert_series_equal(notnull(s), ~exp)
def test_construction_discrete(data, time, interpolation, shape_exp): """Test the construction of the TimeSeries class.""" # set expected values if isinstance(time, pint.Quantity): time_exp = pd.TimedeltaIndex(time.magnitude, unit="s") else: time_exp = time # create instance ts = TimeSeries(data=data, time=time, interpolation=interpolation) # check assert np.all(ts.data == data) assert np.all(ts.time == time_exp) assert ts.interpolation == interpolation assert ts.shape == shape_exp assert data.check(UREG.get_dimensionality(ts.units)) assert np.all(ts.data_array.data == data) assert ts.data_array.attrs["interpolation"] == interpolation if time_exp is None: assert "time" not in ts.data_array else: assert np.all(ts.data_array.time == time_exp)
def test_add_dti_td(self): # GH 17558 # Check that tz-aware DatetimeIndex + np.array(dtype="timedelta64") # and DatetimeIndex + TimedeltaIndex work as expected dti = pd.DatetimeIndex([pd.Timestamp("2017/01/01")], name="x").tz_localize('US/Eastern') expected = pd.DatetimeIndex([pd.Timestamp("2017/01/01 01:00")], name="x").tz_localize('US/Eastern') td_np = np.array([np.timedelta64(1, 'h')], dtype="timedelta64[ns]") results = [ dti + td_np, # add numpy array dti + td_np.astype(dtype="timedelta64[m]"), dti + pd.TimedeltaIndex(td_np, name=dti.name), dti + td_np[0], # add timedelta scalar dti + pd.to_timedelta(td_np[0]), ] for actual in results: tm.assert_index_equal(actual, expected) errmsg = r"cannot add DatetimeIndex and np.ndarray\[float64\]" with tm.assert_raises_regex(TypeError, errmsg): dti + np.array([0.1], dtype=np.float64)
def mean_std_model(data): """ function to build mean and standard deviation model args : data : onehot encoded dataframe return : df : dataframe containing mean and standard deviation """ df = data.copy() df["minute"] = df['D'].map(lambda x: x.minute) df["hour"] = df['D'].map(lambda x: x.hour) #get mean and standard deviation of each columnn df_g = df.groupby(['hour', 'minute']).agg(['mean', std]) df = df_g.reset_index() #set the mean as the predction df['minutes'] = df['minute'] + df['hour'] * 60 df['time'] = datetime.combine( date.today(), datetime.min.time()) + pd.TimedeltaIndex(df['minutes'], unit='m') df.drop(['minutes', 'hour', 'minute'], axis=1, inplace=True) return df