Пример #1
0
    def interpolate(x: pd.Series, dates: Union[List[date], List[time], pd.Series] = None,
                    method: Interpolate = Interpolate.INTERSECT) -> pd.Series:
        """
        Interpolate over specified dates or times
        Stepwize interpolation of series based on dates in second series:

        >>> a = generate_series(100)
        >>> b = generate_series(100)
        >>> interpolate(a, b, Interpolate.INTERSECT)

        """
        if dates is None:
            dates = x

        if isinstance(dates, pd.Series):
            align_series = dates
        else:
            align_series = pd.Series(np.nan, dates)

        if method == Interpolate.INTERSECT: # Only returns a value for valid dates
            return x.align(align_series, 'inner')[0]
        if method == Interpolate.NAN: # Value will be NaN for dates not present in the series
            return x.align(align_series, 'right')[0]
        if method == Interpolate.ZERO: # Value will be zero for dates not present in the series
            align_series = pd.Series(0.0, dates)
            return x.align(align_series, 'right', fill_value=0)[0]
        if method == Interpolate.STEP: # Value of the previous valid point
            return __interpolate_step(x, align_series)
        else:
            raise MqValueError('Unknown intersection type: ' + method)
Пример #2
0
def test_align_multiindex():
    # GH 10665

    midx = pd.MultiIndex.from_product(
        [range(2), range(3), range(2)], names=("a", "b", "c"))
    idx = pd.Index(range(2), name="b")
    s1 = Series(np.arange(12, dtype="int64"), index=midx)
    s2 = Series(np.arange(2, dtype="int64"), index=idx)

    # these must be the same results (but flipped)
    res1l, res1r = s1.align(s2, join="left")
    res2l, res2r = s2.align(s1, join="right")

    expl = s1
    tm.assert_series_equal(expl, res1l)
    tm.assert_series_equal(expl, res2r)
    expr = Series([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx)
    tm.assert_series_equal(expr, res1r)
    tm.assert_series_equal(expr, res2l)

    res1l, res1r = s1.align(s2, join="right")
    res2l, res2r = s2.align(s1, join="left")

    exp_idx = pd.MultiIndex.from_product(
        [range(2), range(2), range(2)], names=("a", "b", "c"))
    expl = Series([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx)
    tm.assert_series_equal(expl, res1l)
    tm.assert_series_equal(expl, res2r)
    expr = Series([0, 0, 1, 1] * 2, index=exp_idx)
    tm.assert_series_equal(expr, res1r)
    tm.assert_series_equal(expr, res2l)
Пример #3
0
class Align(object):
    def setup(self):
        size = 5 * 10**5
        rng = np.arange(0, 10**13, 10**7)
        stamps = np.datetime64('now').view('i8') + rng
        idx1 = np.sort(np.random.choice(stamps, size, replace=False))
        idx2 = np.sort(np.random.choice(stamps, size, replace=False))
        self.ts1 = Series(np.random.randn(size), idx1)
        self.ts2 = Series(np.random.randn(size), idx2)

    def time_series_align_int64_index(self):
        self.ts1 + self.ts2

    def time_series_align_left_monotonic(self):
        self.ts1.align(self.ts2, join='left')
Пример #4
0
    def test_subtracting_two_series_with_unordered_index_and_all_nan_index(
        self, data_result, data_expected
    ):
        # GH 38439
        a_index_result = MultiIndex.from_tuples(data_result[0])
        b_index_result = MultiIndex.from_tuples(data_result[1])
        a_series_result = Series(data_result[2], index=a_index_result)
        b_series_result = Series(data_result[3], index=b_index_result)
        result = a_series_result.align(b_series_result)

        a_index_expected = MultiIndex.from_tuples(data_expected[0])
        b_index_expected = MultiIndex.from_tuples(data_expected[1])
        a_series_expected = Series(data_expected[2], index=a_index_expected)
        b_series_expected = Series(data_expected[3], index=b_index_expected)
        a_series_expected.index = a_series_expected.index.set_levels(
            [
                a_series_expected.index.levels[0].astype("float"),
                a_series_expected.index.levels[1].astype("float"),
            ]
        )
        b_series_expected.index = b_series_expected.index.set_levels(
            [
                b_series_expected.index.levels[0].astype("float"),
                b_series_expected.index.levels[1].astype("float"),
            ]
        )

        tm.assert_series_equal(result[0], a_series_expected)
        tm.assert_series_equal(result[1], b_series_expected)
Пример #5
0
class Align(object):

    def setup(self):
        size = 5 * 10**5
        rng = np.arange(0, 10**13, 10**7)
        stamps = np.datetime64('now').view('i8') + rng
        idx1 = np.sort(np.random.choice(stamps, size, replace=False))
        idx2 = np.sort(np.random.choice(stamps, size, replace=False))
        self.ts1 = Series(np.random.randn(size), idx1)
        self.ts2 = Series(np.random.randn(size), idx2)

    def time_series_align_int64_index(self):
        self.ts1 + self.ts2

    def time_series_align_left_monotonic(self):
        self.ts1.align(self.ts2, join='left')
Пример #6
0
def test_align_with_dataframe_method(method):
    # GH31788
    ser = Series(range(3), index=range(3))
    df = pd.DataFrame(0.0, index=range(3), columns=range(3))

    result_ser, result_df = ser.align(df, method=method)
    tm.assert_series_equal(result_ser, ser)
    tm.assert_frame_equal(result_df, df)
Пример #7
0
    def test_align_series(self):
        rng = period_range('1/1/2000', '1/1/2010', freq='A')
        ts = Series(np.random.randn(len(rng)), index=rng)

        result = ts + ts[::2]
        expected = ts + ts
        expected[1::2] = np.nan
        tm.assert_series_equal(result, expected)

        result = ts + _permute(ts[::2])
        tm.assert_series_equal(result, expected)

        # it works!
        for kind in ['inner', 'outer', 'left', 'right']:
            ts.align(ts[::2], join=kind)
        msg = "Input has different freq=D from PeriodIndex\\(freq=A-DEC\\)"
        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
            ts + ts.asfreq('D', how="end")
Пример #8
0
    def test_align_series(self):
        rng = period_range('1/1/2000', '1/1/2010', freq='A')
        ts = Series(np.random.randn(len(rng)), index=rng)

        result = ts + ts[::2]
        expected = ts + ts
        expected[1::2] = np.nan
        tm.assert_series_equal(result, expected)

        result = ts + _permute(ts[::2])
        tm.assert_series_equal(result, expected)

        # it works!
        for kind in ['inner', 'outer', 'left', 'right']:
            ts.align(ts[::2], join=kind)
        msg = "Input has different freq=D from PeriodIndex\\(freq=A-DEC\\)"
        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
            ts + ts.asfreq('D', how="end")
Пример #9
0
    def test_series_align_aware(self):
        idx1 = date_range('2001', periods=5, freq='H', tz='US/Eastern')
        ser = Series(np.random.randn(len(idx1)), index=idx1)
        ser_central = ser.tz_convert('US/Central')
        # # different timezones convert to UTC

        new1, new2 = ser.align(ser_central)
        assert new1.index.tz == pytz.UTC
        assert new2.index.tz == pytz.UTC
Пример #10
0
    def test_series_align_aware(self):
        idx1 = date_range("2001", periods=5, freq="H", tz="US/Eastern")
        ser = Series(np.random.randn(len(idx1)), index=idx1)
        ser_central = ser.tz_convert("US/Central")
        # # different timezones convert to UTC

        new1, new2 = ser.align(ser_central)
        assert new1.index.tz == pytz.UTC
        assert new2.index.tz == pytz.UTC
Пример #11
0
def test_align_left_fewer_levels():
    # GH#45224
    left = Series([2],
                  index=pd.MultiIndex.from_tuples([(1, 3)], names=["a", "c"]))
    right = Series([1],
                   index=pd.MultiIndex.from_tuples([(1, 2, 3)],
                                                   names=["a", "b", "c"]))
    result_left, result_right = left.align(right)

    expected_right = Series([1],
                            index=pd.MultiIndex.from_tuples(
                                [(1, 3, 2)], names=["a", "c", "b"]))
    expected_left = Series([2],
                           index=pd.MultiIndex.from_tuples(
                               [(1, 3, 2)], names=["a", "c", "b"]))
    tm.assert_series_equal(result_left, expected_left)
    tm.assert_series_equal(result_right, expected_right)
Пример #12
0
def test_str_cat_align_indexed(index_or_series, join):
    # https://github.com/pandas-dev/pandas/issues/18657
    box = index_or_series

    s = Series(["a", "b", "c", "d"], index=["a", "b", "c", "d"])
    t = Series(["D", "A", "E", "B"], index=["d", "a", "e", "b"])
    sa, ta = s.align(t, join=join)
    # result after manual alignment of inputs
    expected = sa.str.cat(ta, na_rep="-")

    if box == Index:
        s = Index(s)
        sa = Index(sa)
        expected = Index(expected)

    result = s.str.cat(t, join=join, na_rep="-")
    assert_series_or_index_equal(result, expected)
Пример #13
0
def align_method_SERIES(left: Series, right, align_asobject: bool = False):
    """ align lhs and rhs Series """
    # ToDo: Different from align_method_FRAME, list, tuple and ndarray
    # are not coerced here
    # because Series has inconsistencies described in #13637

    if isinstance(right, ABCSeries):
        # avoid repeated alignment
        if not left.index.equals(right.index):

            if align_asobject:
                # to keep original value's dtype for bool ops
                left = left.astype(object)
                right = right.astype(object)

            left, right = left.align(right, copy=False)

    return left, right
Пример #14
0
    def __interpolate_step(x: pd.Series, dates: pd.Series = None) -> pd.Series:
        if x.empty:
            raise MqValueError('Cannot perform step interpolation on an empty series')

        first_date = pd.Timestamp(dates.index[0]) if isinstance(x.index[0], pd.Timestamp) else dates.index[0]

        # locate previous valid date or take first value from series
        prev = x.index[0] if first_date < x.index[0] else x.index[x.index.get_loc(first_date, 'pad')]

        current = x[prev]

        curve = x.align(dates, 'right', )[0]                  # only need values from dates

        for knot in curve.iteritems():
            if np.isnan(knot[1]):
                curve[knot[0]] = current
            else:
                current = knot[1]
        return curve
Пример #15
0
    def test_align_series_combinations(self):
        df = DataFrame({"a": [1, 3, 5], "b": [1, 3, 5]}, index=list("ACE"))
        s = Series([1, 2, 4], index=list("ABD"), name="x")

        # frame + series
        res1, res2 = df.align(s, axis=0)
        exp1 = DataFrame(
            {
                "a": [1, np.nan, 3, np.nan, 5],
                "b": [1, np.nan, 3, np.nan, 5]
            },
            index=list("ABCDE"),
        )
        exp2 = Series([1, 2, np.nan, 4, np.nan], index=list("ABCDE"), name="x")

        tm.assert_frame_equal(res1, exp1)
        tm.assert_series_equal(res2, exp2)

        # series + frame
        res1, res2 = s.align(df)
        tm.assert_series_equal(res1, exp2)
        tm.assert_frame_equal(res2, exp1)
Пример #16
0
def pairs_selection_test(price1: pd.Series, price2: pd.Series, max_lag: int,
                         convenient_periods: int) -> bool:
    """
    Parameters
    ----------
    price1 : pd.Series
        Price series of 1st security.
    price2 : pd.Series
        Price series of 2nd security.
    max_lag: int

    convenient_periods: int
        Specify the trading period. This will be use to filter out pairs
        whereby its half-life is not coherent with the trading period.

    Returns
    -------
    bool
        whether the pairs pass the selection test.
    """
    price1.replace([np.inf, -np.inf], np.nan, inplace=True)
    price1.dropna(inplace=True)
    price2.replace([np.inf, -np.inf], np.nan, inplace=True)
    price2.dropna(inplace=True)
    price1, price2 = price1.align(price2, join='inner')
    # Cointegrated pairs; Propose that the Engle-Granger test is run for the
    # two possible selections of the dependent variable and that the combination
    # that generated the lowest t-statistic is selected.
    _, pval1, _ = coint(price1, price2)
    _, pval2, _ = coint(price2, price1)
    pval = min(pval1, pval2)
    if pval >= .01:
        return False
    spread = np.log(price1 / price2)
    # Mean-reverting Hurst exponent; It aims to constrain false positives,
    # possibly arising as an effect of the multiple comparisons problem. The
    # condition imposed is that the Hurst exponent associated with the spread
    # of a given pair is enforced to be smaller than 0.5, assuring the process
    # leans towards mean-reversion.
    lags = range(2, max_lag)
    tau = [
        np.sqrt(np.std(np.subtract(spread[lag:], spread[:-lag])))
        for lag in lags
    ]
    poly = np.polyfit(np.log(lags), np.log(tau), 1)
    hurst_exponent = poly[0] * 2
    if hurst_exponent >= 0.5:
        return False
    # Suitable half-life; the value of theta which is obtained by running a
    # linear regression on the difference between mean of spread and spread,
    # and the difference between tomorrow's value of spread and today's value
    model = sm.OLS((np.mean(spread) - spread).iloc[:-1],
                   (spread.shift(-1) - spread).iloc[:-1])
    results = model.fit()
    half_life = -np.log(2) / results.params[0]
    if half_life > convenient_periods:
        return False
    # Monthly Mean Crossing; Enforce that every spread crosses its mean at least
    # once per month, to provide enough liquidity.
    spread_mu = spread.resample('MS').transform('mean')
    delta_sign = (np.sign(spread - spread_mu).diff().dropna() != 0).astype(int)
    num_of_cross_per_year = delta_sign.resample('Y').sum()
    num_of_year = len(num_of_cross_per_year)
    if (num_of_cross_per_year >= 12).sum() / num_of_year < 1:
        return False
    return True
Пример #17
0
    def test_align_broadcast_axis(self):
        # GH 13194
        # First four tests for DataFrame.align(Index)
        # For 'right' join
        df = DataFrame(np.array([[1., 2.], [3., 4.]]), columns=list('AB'))
        ts = Series([5., 6., 7.])

        result = df.align(ts, join='right', axis=0, broadcast_axis=1)
        expected1 = DataFrame(np.array([[1., 2.], [3., 4.],
                                        [pd.np.nan, pd.np.nan]]),
                              columns=list('AB'))
        expected2 = DataFrame(np.array([[5., 5.], [6., 6.], [7., 7.]]),
                              columns=list('AB'))
        assert_frame_equal(result[0], expected1)
        assert_frame_equal(result[1], expected2)

        # For 'right' join on different index
        result = df.align(ts, join='right', axis=1, broadcast_axis=1)
        expected1 = DataFrame(np.array([[1., 2.], [3., 4.]]),
                              columns=list('AB'))
        expected2 = DataFrame(np.array([[5., 5.], [6., 6.],
                                        [7., 7.]]),
                              columns=list('AB'))
        assert_frame_equal(result[0], expected1)
        assert_frame_equal(result[1], expected2)

        # For 'left' join
        result = df.align(ts, join='left', axis=0, broadcast_axis=1)
        expected1 = DataFrame(np.array([[1., 2.], [3., 4.]]),
                              columns=list('AB'))
        expected2 = DataFrame(np.array([[5., 5.], [6., 6.]]),
                              columns=list('AB'))
        assert_frame_equal(result[0], expected1)
        assert_frame_equal(result[1], expected2)

        # For 'left' join on different axis
        result = df.align(ts, join='left', axis=1, broadcast_axis=1)
        expected1 = DataFrame(np.array([[1., 2.], [3., 4.]]),
                              columns=list('AB'))
        expected2 = DataFrame(np.array([[5., 5.], [6., 6.], [7., 7.]]),
                              columns=list('AB'))
        assert_frame_equal(result[0], expected1)
        assert_frame_equal(result[1], expected2)

        # Series.align(DataFrame) tests, 'outer' join
        result = ts.align(df, join='outer', axis=0, broadcast_axis=1)
        expected1 = DataFrame(np.array([[5., 5.], [6., 6.], [7., 7.]]),
                              columns=list('AB'))
        expected2 = DataFrame(np.array([[1., 2.], [3., 4.],
                                        [pd.np.nan, pd.np.nan]]),
                              columns=list('AB'))
        assert_frame_equal(result[0], expected1)
        assert_frame_equal(result[1], expected2)

        # Series.align(DataFrame) tests, 'inner' join
        result = ts.align(df, join='inner', axis=0, broadcast_axis=1)
        expected1 = DataFrame(np.array([[5., 5.], [6., 6.]]),
                              columns=list('AB'))
        expected2 = DataFrame(np.array([[1., 2.], [3., 4.]]),
                              columns=list('AB'))
        assert_frame_equal(result[0], expected1)
        assert_frame_equal(result[1], expected2)
Пример #18
0
def interpolate(x: pd.Series, dates: Union[List[date], List[time], pd.Series] = None,
                method: Interpolate = Interpolate.INTERSECT) -> pd.Series:
    """
    Interpolate over specified dates or times

    :param x: timeseries to interpolate
    :param dates: array of dates/times or another series to interpolate
    :param method: interpolation method (default: intersect)
    :return: timeseries with specified dates

    **Usage**

    Interpolate the series X over the dates specified by the dates parameter. This can be an array of dates or another
    series, in which case the index of the series will be used to specify dates

    Interpolation methods:

    =========   ========================================================================
    Type        Behavior
    =========   ========================================================================
    intersect   Resultant series only has values on the intersection of dates /times.
                Will only contain intersection of valid dates / times in the series
    nan         Resultant series only has values on the intersection of dates /times.
                Value will be NaN for dates not present in the series
    zero        Resultant series has values on all requested dates / times. The series
                will have a value of zero where the requested date or time was not
                present in the series
    step        Resultant series has values on all requested dates / times. The series
                will use the value of the previous valid point if requested date does
                not exist. Values prior to the first date will be equivalent to the
                first available value
    =========   ========================================================================

    **Examples**

    Stepwize interpolation of series based on dates in second series:

    >>> a = generate_series(100)
    >>> b = generate_series(100)
    >>> interpolate(a, b, Interpolate.INTERSECT)

    **See also**

    :func:`sub`
    """
    if dates is None:
        dates = x

    if isinstance(dates, pd.Series):
        align_series = dates
    else:
        align_series = pd.Series(np.nan, dates)

    if method == Interpolate.INTERSECT:
        return x.align(align_series, 'inner')[0]
    if method == Interpolate.NAN:
        return x.align(align_series, 'right')[0]
    if method == Interpolate.ZERO:
        align_series = pd.Series(0.0, dates)
        return x.align(align_series, 'right', fill_value=0)[0]
    if method == Interpolate.STEP:
        return __interpolate_step(x, align_series)
    else:
        raise MqValueError('Unknown intersection type: ' + method)
Пример #19
0
    def test_align_series(self, join_type):
        rng = period_range('1/1/2000', '1/1/2010', freq='A')
        ts = Series(np.random.randn(len(rng)), index=rng)

        ts.align(ts[::2], join=join_type)
Пример #20
0
    def test_align_series(self, join_type):
        rng = period_range('1/1/2000', '1/1/2010', freq='A')
        ts = Series(np.random.randn(len(rng)), index=rng)

        ts.align(ts[::2], join=join_type)
Пример #21
0
# In[11]:

s1 = s[:4]
s1


# In[10]:

s2 = s[1:]
s2


# In[12]:

s1.align(s2)


# In[13]:

df.align(df2, join ='inner')


# In[ ]:




# In[ ]:

#filter and column selection in single statement
Пример #22
0
def test_align_periodindex(join_type):
    rng = period_range("1/1/2000", "1/1/2010", freq="A")
    ts = Series(np.random.randn(len(rng)), index=rng)

    # TODO: assert something?
    ts.align(ts[::2], join=join_type)