Exemplo n.º 1
0
    def test_pi_sub_period(self):
        # GH#13071
        idx = PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'],
                          freq='M',
                          name='idx')

        result = idx - pd.Period('2012-01', freq='M')
        off = idx.freq
        exp = pd.Index([-12 * off, -11 * off, -10 * off, -9 * off], name='idx')
        tm.assert_index_equal(result, exp)

        result = np.subtract(idx, pd.Period('2012-01', freq='M'))
        tm.assert_index_equal(result, exp)

        result = pd.Period('2012-01', freq='M') - idx
        exp = pd.Index([12 * off, 11 * off, 10 * off, 9 * off], name='idx')
        tm.assert_index_equal(result, exp)

        result = np.subtract(pd.Period('2012-01', freq='M'), idx)
        tm.assert_index_equal(result, exp)

        exp = pd.TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name='idx')
        tm.assert_index_equal(idx - pd.Period('NaT', freq='M'), exp)
        tm.assert_index_equal(pd.Period('NaT', freq='M') - idx, exp)
Exemplo n.º 2
0
def mu_max(curves,
           norm_eqs=None,
           time_range=['2.5 hours', '15 hours'],
           blank='BLK'):
    if norm_eqs:
        for strain, norm_eq in norm_eqs.items():
            try:
                curves.update(
                    curves.assign(OD595norm=norm_eq(curves[
                        curves.name.str.contains(strain)].OD595)))
            except (Exception):
                curves = curves.assign(OD595norm=norm_eq(curves[
                    curves.name.str.contains(strain)].OD595))
        curves.OD595 = curves.OD595norm
    #curves = curves.reset_index()
    #curves.rename(columns={'index': 'indexer'})
    window_size = 12
    curves.Time = pd.TimedeltaIndex(curves.Time, unit='h').round('T')
    data = curves.set_index(['Time', 'name',
                             'well']).unstack([1, 2]).resample('5T').mean()
    blank_val = data['OD595'][blank].mean()

    try:
        blank_val = blank_val.mean()
    except:
        pass
    data = data - blank_val
    rolling = data.rolling(window_size)
    growth_rates = (rolling.apply(lambda x: np.log(x[-1]/x[0])).OD595[time_range[0]:time_range[-1]]\
    .max()/(window_size/12)).reset_index()
    # growth_rates = growth_rates.assign(Strain = growth_rates.name.apply(lambda x: str(x).split(' ')[0]),
    #                    Treatment = growth_rates.name.apply(lambda x: ' '.join(str(x).split(' ')[1:])))
    # #data.pH = pd.to_numeric(data.pH, errors='ignore')
    # data = growth_rates.rename(columns={0: 'Max growth rate'})
    # sns.barplot(data=growth_rates, y='Treatment', x='Max growth rate', hue='Strain')
    return growth_rates
Exemplo n.º 3
0
def _nonempty_index(idx):
    typ = type(idx)
    if typ is pd.RangeIndex:
        return pd.RangeIndex(2, name=idx.name)
    elif typ in (pd.Int64Index, pd.Float64Index):
        return typ([1, 2], name=idx.name)
    elif typ is pd.Index:
        return pd.Index(['a', 'b'], name=idx.name)
    elif typ is pd.DatetimeIndex:
        start = '1970-01-01'
        data = [start, start] if idx.freq is None else None
        return pd.DatetimeIndex(data, start=start, periods=2, freq=idx.freq,
                                tz=idx.tz, name=idx.name)
    elif typ is pd.PeriodIndex:
        return pd.PeriodIndex(start='1970-01-01', periods=2, freq=idx.freq,
                              name=idx.name)
    elif typ is pd.TimedeltaIndex:
        start = np.timedelta64(1, 'D')
        data = [start, start] if idx.freq is None else None
        return pd.TimedeltaIndex(data, start=start, periods=2, freq=idx.freq,
                                 name=idx.name)
    elif typ is pd.CategoricalIndex:
        if len(idx.categories):
            data = [idx.categories[0]] * 2
            cats = idx.categories
        else:
            data = _nonempty_index(idx.categories)
            cats = None
        return pd.CategoricalIndex(data, categories=cats,
                                   ordered=idx.ordered, name=idx.name)
    elif typ is pd.MultiIndex:
        levels = [_nonempty_index(i) for i in idx.levels]
        labels = [[0, 0] for i in idx.levels]
        return pd.MultiIndex(levels=levels, labels=labels, names=idx.names)
    raise TypeError("Don't know how to handle index of "
                    "type {0}".format(type(idx).__name__))
Exemplo n.º 4
0
def day_counts(index):
    """Days between DatetimeIndex values as a :any:`pandas.Series`.

    Parameters
    ----------
    index : :any:`pandas.DatetimeIndex`
        The index for which to get day counts.

    Returns
    -------
    day_counts : :any:`pandas.Series`
        A :any:`pandas.Series` with counts of days between periods. Counts are
        given on start dates of periods.
    """
    # dont affect the original data
    index = index.copy()

    if len(index) == 0:
        return pd.Series([], index=index)

    timedeltas = (index[1:] - index[:-1]).append(pd.TimedeltaIndex([pd.NaT]))
    timedelta_days = timedeltas.total_seconds() / (60 * 60 * 24)

    return pd.Series(timedelta_days, index=index)
Exemplo n.º 5
0
    def test_pi_sub_period(self):
        # GH#13071
        idx = PeriodIndex(["2011-01", "2011-02", "2011-03", "2011-04"],
                          freq="M",
                          name="idx")

        result = idx - pd.Period("2012-01", freq="M")
        off = idx.freq
        exp = pd.Index([-12 * off, -11 * off, -10 * off, -9 * off], name="idx")
        tm.assert_index_equal(result, exp)

        result = np.subtract(idx, pd.Period("2012-01", freq="M"))
        tm.assert_index_equal(result, exp)

        result = pd.Period("2012-01", freq="M") - idx
        exp = pd.Index([12 * off, 11 * off, 10 * off, 9 * off], name="idx")
        tm.assert_index_equal(result, exp)

        result = np.subtract(pd.Period("2012-01", freq="M"), idx)
        tm.assert_index_equal(result, exp)

        exp = pd.TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name="idx")
        tm.assert_index_equal(idx - pd.Period("NaT", freq="M"), exp)
        tm.assert_index_equal(pd.Period("NaT", freq="M") - idx, exp)
Exemplo n.º 6
0
def calc_baseline_dumb(training_data, similar_moments, prediction_window):
    if type(prediction_window) is not timedelta:
        prediction_window = timedelta(minutes=prediction_window)

    k = len(similar_moments)

    r = np.zeros((49, 1))
    for i in similar_moments:
        similar_day = (1 /
                       k) * training_data[i:i + prediction_window].resample(
                           timedelta(minutes=15)).mean()
        similar_day = similar_day[0:49]
        r += similar_day
        # r += (1 / k) * training_data[i:i + prediction_window].as_matrix

    baseline = np.squeeze(r)

    b = pd.DataFrame(baseline).set_index(
        pd.TimedeltaIndex(freq='15T', start=0,
                          periods=49)).resample(timedelta(minutes=1)).ffill()
    baseline = np.squeeze(b.as_matrix())
    baseline = np.concatenate((baseline, np.atleast_1d(baseline[-1])))

    return baseline
Exemplo n.º 7
0
    def test_timedelta_other_units(self):
        idx = pd.TimedeltaIndex(['1 days', 'NaT', '2 days'])
        exp = np.array([False, True, False])
        tm.assert_numpy_array_equal(isnull(idx), exp)
        tm.assert_numpy_array_equal(notnull(idx), ~exp)
        tm.assert_numpy_array_equal(isnull(idx.values), exp)
        tm.assert_numpy_array_equal(notnull(idx.values), ~exp)

        for dtype in ['timedelta64[D]', 'timedelta64[h]', 'timedelta64[m]',
                      'timedelta64[s]', 'timedelta64[ms]', 'timedelta64[us]',
                      'timedelta64[ns]']:
            values = idx.values.astype(dtype)

            exp = np.array([False, True, False])
            tm.assert_numpy_array_equal(isnull(values), exp)
            tm.assert_numpy_array_equal(notnull(values), ~exp)

            exp = pd.Series([False, True, False])
            s = pd.Series(values)
            tm.assert_series_equal(isnull(s), exp)
            tm.assert_series_equal(notnull(s), ~exp)
            s = pd.Series(values, dtype=object)
            tm.assert_series_equal(isnull(s), exp)
            tm.assert_series_equal(notnull(s), ~exp)
Exemplo n.º 8
0
    def test_construction_discrete(data, time, interpolation, shape_exp):
        """Test the construction of the TimeSeries class."""
        # set expected values
        if isinstance(time, pint.Quantity):
            time_exp = pd.TimedeltaIndex(time.magnitude, unit="s")
        else:
            time_exp = time

        # create instance
        ts = TimeSeries(data=data, time=time, interpolation=interpolation)

        # check
        assert np.all(ts.data == data)
        assert np.all(ts.time == time_exp)
        assert ts.interpolation == interpolation
        assert ts.shape == shape_exp
        assert data.check(UREG.get_dimensionality(ts.units))

        assert np.all(ts.data_array.data == data)
        assert ts.data_array.attrs["interpolation"] == interpolation
        if time_exp is None:
            assert "time" not in ts.data_array
        else:
            assert np.all(ts.data_array.time == time_exp)
Exemplo n.º 9
0
    def test_add_dti_td(self):
        # GH 17558
        # Check that tz-aware DatetimeIndex + np.array(dtype="timedelta64")
        # and DatetimeIndex + TimedeltaIndex work as expected
        dti = pd.DatetimeIndex([pd.Timestamp("2017/01/01")],
                               name="x").tz_localize('US/Eastern')

        expected = pd.DatetimeIndex([pd.Timestamp("2017/01/01 01:00")],
                                    name="x").tz_localize('US/Eastern')

        td_np = np.array([np.timedelta64(1, 'h')], dtype="timedelta64[ns]")
        results = [
            dti + td_np,  # add numpy array
            dti + td_np.astype(dtype="timedelta64[m]"),
            dti + pd.TimedeltaIndex(td_np, name=dti.name),
            dti + td_np[0],  # add timedelta scalar
            dti + pd.to_timedelta(td_np[0]),
        ]
        for actual in results:
            tm.assert_index_equal(actual, expected)

        errmsg = r"cannot add DatetimeIndex and np.ndarray\[float64\]"
        with tm.assert_raises_regex(TypeError, errmsg):
            dti + np.array([0.1], dtype=np.float64)
Exemplo n.º 10
0
def mean_std_model(data):
    """
    function to build mean and standard deviation model
    args : 
        data : onehot encoded dataframe
    return : 
        df : dataframe containing mean and standard deviation
    """
    df = data.copy()
    df["minute"] = df['D'].map(lambda x: x.minute)
    df["hour"] = df['D'].map(lambda x: x.hour)

    #get mean and standard deviation of each columnn
    df_g = df.groupby(['hour', 'minute']).agg(['mean', std])
    df = df_g.reset_index()

    #set the mean as the predction
    df['minutes'] = df['minute'] + df['hour'] * 60
    df['time'] = datetime.combine(
        date.today(), datetime.min.time()) + pd.TimedeltaIndex(df['minutes'],
                                                               unit='m')
    df.drop(['minutes', 'hour', 'minute'], axis=1, inplace=True)

    return df