Exemplo n.º 1
0
    def testFiltering(self):
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = ols(y=self.panel_y2, x=self.panel_x2)

        x = result._x
        index = x.index.get_level_values(0)
        index = Index(sorted(set(index)))
        exp_index = Index([datetime(2000, 1, 1), datetime(2000, 1, 3)])
        self.assertTrue
        (exp_index.equals(index))

        index = x.index.get_level_values(1)
        index = Index(sorted(set(index)))
        exp_index = Index(["A", "B"])
        self.assertTrue(exp_index.equals(index))

        x = result._x_filtered
        index = x.index.get_level_values(0)
        index = Index(sorted(set(index)))
        exp_index = Index([datetime(2000, 1, 1), datetime(2000, 1, 3), datetime(2000, 1, 4)])
        self.assertTrue(exp_index.equals(index))

        assert_almost_equal(result._y.values.flat, [1, 4, 5])

        exp_x = [[6, 14, 1], [9, 17, 1], [30, 48, 1]]
        assert_almost_equal(exp_x, result._x.values)

        exp_x_filtered = [[6, 14, 1], [9, 17, 1], [30, 48, 1], [11, 20, 1], [12, 21, 1]]
        assert_almost_equal(exp_x_filtered, result._x_filtered.values)

        self.assertTrue(result._x_filtered.index.levels[0].equals(result.y_fitted.index))
Exemplo n.º 2
0
    def test_basic_index(self):

        for s, i in self.d.items():
            i_rec = self.encode_decode(i)
            self.assertTrue(i.equals(i_rec))

        # datetime with no freq (GH5506)
        i = Index([Timestamp('20130101'),Timestamp('20130103')])
        i_rec = self.encode_decode(i)
        self.assertTrue(i.equals(i_rec))

        # datetime with timezone
        i = Index([Timestamp('20130101 9:00:00'),Timestamp('20130103 11:00:00')]).tz_localize('US/Eastern')
        i_rec = self.encode_decode(i)
        self.assertTrue(i.equals(i_rec))
Exemplo n.º 3
0
    def test_constructor_range(self):

        self.assertRaises(TypeError, lambda: RangeIndex(range(1, 5, 2)))

        result = RangeIndex.from_range(range(1, 5, 2))
        expected = RangeIndex(1, 5, 2)
        self.assertTrue(result.equals(expected))

        result = RangeIndex.from_range(range(5, 6))
        expected = RangeIndex(5, 6, 1)
        self.assertTrue(result.equals(expected))

        # an invalid range
        result = RangeIndex.from_range(range(5, 1))
        expected = RangeIndex(0, 0, 1)
        self.assertTrue(result.equals(expected))

        result = RangeIndex.from_range(range(5))
        expected = RangeIndex(0, 5, 1)
        self.assertTrue(result.equals(expected))

        result = Index(range(1, 5, 2))
        expected = RangeIndex(1, 5, 2)
        self.assertTrue(result.equals(expected))

        self.assertRaises(TypeError,
                          lambda: Index(range(1, 5, 2), dtype='float64'))
Exemplo n.º 4
0
    def testWithXEffects(self):
        result = ols(y=self.panel_y2, x=self.panel_x2, x_effects=['x1'])

        assert_almost_equal(result._y.values.flat, [1, 4, 5])
        exp_x = [[0, 0, 14, 1], [0, 1, 17, 1], [1, 0, 48, 1]]
        assert_almost_equal(result._x.values, exp_x)

        exp_index = Index(['x1_30', 'x1_9', 'x2', 'intercept'])
        self.assertTrue(exp_index.equals(result._x.items))
Exemplo n.º 5
0
    def testWithXEffectsAndConversionAndDroppedDummies(self):
        result = ols(y=self.panel_y3, x=self.panel_x3, x_effects=["x1", "x2"], dropped_dummies={"x2": "foo"})

        assert_almost_equal(result._y.values.flat, [1, 2, 3, 4])
        exp_x = [[0, 0, 0, 0, 1], [1, 0, 1, 0, 1], [0, 1, 0, 1, 1], [0, 0, 0, 0, 1]]
        assert_almost_equal(result._x.values, exp_x)

        exp_index = Index(["x1_B", "x1_C", "x2_bar", "x2_baz", "intercept"])
        self.assertTrue(exp_index.equals(result._x.columns))
Exemplo n.º 6
0
    def testWithXEffectsAndConversion(self):
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = ols(y=self.panel_y3, x=self.panel_x3, x_effects=["x1", "x2"])

        assert_almost_equal(result._y.values.flat, [1, 2, 3, 4])
        exp_x = [[0, 0, 0, 1, 1], [1, 0, 0, 0, 1], [0, 1, 1, 0, 1], [0, 0, 0, 1, 1]]
        assert_almost_equal(result._x.values, exp_x)

        exp_index = Index(["x1_B", "x1_C", "x2_baz", "x2_foo", "intercept"])
        self.assertTrue(exp_index.equals(result._x.columns))
Exemplo n.º 7
0
    def testWithXEffectsAndConversion(self):
        result = ols(y=self.panel_y3, x=self.panel_x3, x_effects=['x1', 'x2'])

        assert_almost_equal(result._y.values.flat, [1, 2, 3, 4])
        exp_x = [[0, 0, 0, 1, 1], [1, 0, 0, 0, 1], [0, 1, 1, 0, 1],
                 [0, 0, 0, 1, 1]]
        assert_almost_equal(result._x.values, exp_x)

        exp_index = Index(['x1_B', 'x1_C', 'x2_baz', 'x2_foo', 'intercept'])
        self.assertTrue(exp_index.equals(result._x.items))
Exemplo n.º 8
0
    def testWithXEffectsAndConversionAndDroppedDummies(self):
        result = ols(y=self.panel_y3, x=self.panel_x3, x_effects=['x1', 'x2'],
                     dropped_dummies={'x2': 'foo'})

        assert_almost_equal(result._y.values.flat, [1, 2, 3, 4])
        exp_x = [[0, 0, 0, 0, 1], [1, 0, 1, 0, 1], [0, 1, 0, 1, 1],
                 [0, 0, 0, 0, 1]]
        assert_almost_equal(result._x.values, exp_x)

        exp_index = Index(['x1_B', 'x1_C', 'x2_bar', 'x2_baz', 'intercept'])
        self.assertTrue(exp_index.equals(result._x.columns))
Exemplo n.º 9
0
    def test_missing_values(self, closed):
        idx = Index([np.nan, Interval(0, 1, closed=closed),
                     Interval(1, 2, closed=closed)])
        idx2 = IntervalIndex.from_arrays(
            [np.nan, 0, 1], [np.nan, 1, 2], closed=closed)
        assert idx.equals(idx2)

        with pytest.raises(ValueError):
            IntervalIndex.from_arrays(
                [np.nan, 0, 1], np.array([0, 1, 2]), closed=closed)

        tm.assert_numpy_array_equal(isna(idx),
                                    np.array([True, False, False]))
Exemplo n.º 10
0
    def testFiltering(self):
        result = ols(y=self.panel_y2, x=self.panel_x2)

        x = result._x
        index = [x.major_axis[i] for i in x.major_labels]
        index = Index(sorted(set(index)))
        exp_index = Index([datetime(2000, 1, 1), datetime(2000, 1, 3)])
        self.assertTrue(exp_index.equals(index))

        index = [x.minor_axis[i] for i in x.minor_labels]
        index = Index(sorted(set(index)))
        exp_index = Index(['A', 'B'])
        self.assertTrue(exp_index.equals(index))

        x = result._x_filtered
        index = [x.major_axis[i] for i in x.major_labels]
        index = Index(sorted(set(index)))
        exp_index = Index([datetime(2000, 1, 1),
                           datetime(2000, 1, 3),
                           datetime(2000, 1, 4)])
        self.assertTrue(exp_index.equals(index))

        assert_almost_equal(result._y.values.flat, [1, 4, 5])

        exp_x = [[6, 14, 1],
                 [9, 17, 1],
                 [30, 48, 1]]
        assert_almost_equal(exp_x, result._x.values)

        exp_x_filtered = [[6, 14, 1],
                          [9, 17, 1],
                          [30, 48, 1],
                          [11, 20, 1],
                          [12, 21, 1]]
        assert_almost_equal(exp_x_filtered, result._x_filtered.values)

        self.assertTrue(result._x_filtered.major_axis.equals(
            result.y_fitted.index))
Exemplo n.º 11
0
    def testIndex(self):
        i = Index([23, 45, 18, 98, 43, 11], name="index")

        # column indexed
        outp = Index(ujson.decode(ujson.encode(i)))
        self.assert_(i.equals(outp))

        outp = Index(ujson.decode(ujson.encode(i), numpy=True))
        self.assert_(i.equals(outp))

        dec = _clean_dict(ujson.decode(ujson.encode(i, orient="split")))
        outp = Index(**dec)
        self.assert_(i.equals(outp))
        self.assertTrue(i.name == outp.name)

        dec = _clean_dict(ujson.decode(ujson.encode(i, orient="split"),
                          numpy=True))
        outp = Index(**dec)
        self.assert_(i.equals(outp))
        self.assertTrue(i.name == outp.name)

        outp = Index(ujson.decode(ujson.encode(i, orient="values")))
        self.assert_(i.equals(outp))

        outp = Index(ujson.decode(ujson.encode(i, orient="values"), numpy=True))
        self.assert_(i.equals(outp))

        outp = Index(ujson.decode(ujson.encode(i, orient="records")))
        self.assert_(i.equals(outp))

        outp = Index(ujson.decode(ujson.encode(i, orient="records"), numpy=True))
        self.assert_(i.equals(outp))

        outp = Index(ujson.decode(ujson.encode(i, orient="index")))
        self.assert_(i.equals(outp))

        outp = Index(ujson.decode(ujson.encode(i, orient="index"), numpy=True))
        self.assert_(i.equals(outp))
Exemplo n.º 12
0
    def testFiltering(self):
        result = ols(y=self.panel_y2, x=self.panel_x2)

        x = result._x
        index = x.index.get_level_values(0)
        index = Index(sorted(set(index)))
        exp_index = Index([datetime(2000, 1, 1), datetime(2000, 1, 3)])
        self.assertTrue;(exp_index.equals(index))

        index = x.index.get_level_values(1)
        index = Index(sorted(set(index)))
        exp_index = Index(['A', 'B'])
        self.assertTrue(exp_index.equals(index))

        x = result._x_filtered
        index = x.index.get_level_values(0)
        index = Index(sorted(set(index)))
        exp_index = Index([datetime(2000, 1, 1),
                           datetime(2000, 1, 3),
                           datetime(2000, 1, 4)])
        self.assertTrue(exp_index.equals(index))

        assert_almost_equal(result._y.values.flat, [1, 4, 5])

        exp_x = [[6, 14, 1],
                 [9, 17, 1],
                 [30, 48, 1]]
        assert_almost_equal(exp_x, result._x.values)

        exp_x_filtered = [[6, 14, 1],
                          [9, 17, 1],
                          [30, 48, 1],
                          [11, 20, 1],
                          [12, 21, 1]]
        assert_almost_equal(exp_x_filtered, result._x_filtered.values)

        self.assertTrue(result._x_filtered.index.levels[0].equals(
            result.y_fitted.index))
Exemplo n.º 13
0
def test_equals(idx):
    assert idx.equals(idx)
    assert idx.equals(idx.copy())
    assert idx.equals(idx.astype(object))

    assert not idx.equals(list(idx))
    assert not idx.equals(np.array(idx))

    same_values = Index(idx, dtype=object)
    assert idx.equals(same_values)
    assert same_values.equals(idx)

    if idx.nlevels == 1:
        # do not test MultiIndex
        assert not idx.equals(pd.Series(idx))
Exemplo n.º 14
0
    def test_equals(self):

        for name, idx in compat.iteritems(self.indices):
            self.assertTrue(idx.equals(idx))
            self.assertTrue(idx.equals(idx.copy()))
            self.assertTrue(idx.equals(idx.astype(object)))

            self.assertFalse(idx.equals(list(idx)))
            self.assertFalse(idx.equals(np.array(idx)))

            # Cannot pass in non-int64 dtype to RangeIndex
            if not isinstance(idx, RangeIndex):
                same_values = Index(idx, dtype=object)
                self.assertTrue(idx.equals(same_values))
                self.assertTrue(same_values.equals(idx))

            if idx.nlevels == 1:
                # do not test MultiIndex
                self.assertFalse(idx.equals(pd.Series(idx)))
Exemplo n.º 15
0
    def test_equals(self, indices):
        if isinstance(indices, IntervalIndex):
            # IntervalIndex tested separately
            return

        assert indices.equals(indices)
        assert indices.equals(indices.copy())
        assert indices.equals(indices.astype(object))

        assert not indices.equals(list(indices))
        assert not indices.equals(np.array(indices))

        # Cannot pass in non-int64 dtype to RangeIndex
        if not isinstance(indices, (RangeIndex, CategoricalIndex)):
            # TODO: CategoricalIndex can be re-allowed following GH#32167
            same_values = Index(indices, dtype=object)
            assert indices.equals(same_values)
            assert same_values.equals(indices)

        if indices.nlevels == 1:
            # do not test MultiIndex
            assert not indices.equals(Series(indices))
Exemplo n.º 16
0
    def test_equals(self, index):
        if isinstance(index, IntervalIndex):
            # IntervalIndex tested separately, the index.equals(index.astype(object))
            #  fails for IntervalIndex
            return

        assert index.equals(index)
        assert index.equals(index.copy())
        assert index.equals(index.astype(object))

        assert not index.equals(list(index))
        assert not index.equals(np.array(index))

        # Cannot pass in non-int64 dtype to RangeIndex
        if not isinstance(index, RangeIndex):
            same_values = Index(index, dtype=object)
            assert index.equals(same_values)
            assert same_values.equals(index)

        if index.nlevels == 1:
            # do not test MultiIndex
            assert not index.equals(Series(index))
Exemplo n.º 17
0
 def test_equals(self):
     same_values = Index(self.index, dtype=object)
     self.assertTrue(self.index.equals(same_values))
     self.assertTrue(same_values.equals(self.index))
Exemplo n.º 18
0
def assert_index_equal(
    left: Index,
    right: Index,
    exact: bool | str = "equiv",
    check_names: bool = True,
    check_less_precise: bool | int | NoDefault = no_default,
    check_exact: bool = True,
    check_categorical: bool = True,
    check_order: bool = True,
    rtol: float = 1.0e-5,
    atol: float = 1.0e-8,
    obj: str = "Index",
) -> None:
    """
    Check that left and right Index are equal.

    Parameters
    ----------
    left : Index
    right : Index
    exact : bool or {'equiv'}, default 'equiv'
        Whether to check the Index class, dtype and inferred_type
        are identical. If 'equiv', then RangeIndex can be substituted for
        Int64Index as well.
    check_names : bool, default True
        Whether to check the names attribute.
    check_less_precise : bool or int, default False
        Specify comparison precision. Only used when check_exact is False.
        5 digits (False) or 3 digits (True) after decimal points are compared.
        If int, then specify the digits to compare.

        .. deprecated:: 1.1.0
           Use `rtol` and `atol` instead to define relative/absolute
           tolerance, respectively. Similar to :func:`math.isclose`.
    check_exact : bool, default True
        Whether to compare number exactly.
    check_categorical : bool, default True
        Whether to compare internal Categorical exactly.
    check_order : bool, default True
        Whether to compare the order of index entries as well as their values.
        If True, both indexes must contain the same elements, in the same order.
        If False, both indexes must contain the same elements, but in any order.

        .. versionadded:: 1.2.0
    rtol : float, default 1e-5
        Relative tolerance. Only used when check_exact is False.

        .. versionadded:: 1.1.0
    atol : float, default 1e-8
        Absolute tolerance. Only used when check_exact is False.

        .. versionadded:: 1.1.0
    obj : str, default 'Index'
        Specify object name being compared, internally used to show appropriate
        assertion message.

    Examples
    --------
    >>> from pandas import testing as tm
    >>> a = pd.Index([1, 2, 3])
    >>> b = pd.Index([1, 2, 3])
    >>> tm.assert_index_equal(a, b)
    """
    __tracebackhide__ = True

    def _check_types(left, right, obj="Index") -> None:
        if not exact:
            return

        assert_class_equal(left, right, exact=exact, obj=obj)
        assert_attr_equal("inferred_type", left, right, obj=obj)

        # Skip exact dtype checking when `check_categorical` is False
        if is_categorical_dtype(left.dtype) and is_categorical_dtype(
                right.dtype):
            if check_categorical:
                assert_attr_equal("dtype", left, right, obj=obj)
                assert_index_equal(left.categories,
                                   right.categories,
                                   exact=exact)
            return

        assert_attr_equal("dtype", left, right, obj=obj)

    def _get_ilevel_values(index, level):
        # accept level number only
        unique = index.levels[level]
        level_codes = index.codes[level]
        filled = take_nd(unique._values,
                         level_codes,
                         fill_value=unique._na_value)
        return unique._shallow_copy(filled, name=index.names[level])

    if check_less_precise is not no_default:
        warnings.warn(
            "The 'check_less_precise' keyword in testing.assert_*_equal "
            "is deprecated and will be removed in a future version. "
            "You can stop passing 'check_less_precise' to silence this warning.",
            FutureWarning,
            stacklevel=find_stack_level(),
        )
        # https://github.com/python/mypy/issues/7642
        # error: Argument 1 to "_get_tol_from_less_precise" has incompatible
        # type "Union[bool, int, NoDefault]"; expected "Union[bool, int]"
        rtol = atol = _get_tol_from_less_precise(
            check_less_precise  # type: ignore[arg-type]
        )

    # instance validation
    _check_isinstance(left, right, Index)

    # class / dtype comparison
    _check_types(left, right, obj=obj)

    # level comparison
    if left.nlevels != right.nlevels:
        msg1 = f"{obj} levels are different"
        msg2 = f"{left.nlevels}, {left}"
        msg3 = f"{right.nlevels}, {right}"
        raise_assert_detail(obj, msg1, msg2, msg3)

    # length comparison
    if len(left) != len(right):
        msg1 = f"{obj} length are different"
        msg2 = f"{len(left)}, {left}"
        msg3 = f"{len(right)}, {right}"
        raise_assert_detail(obj, msg1, msg2, msg3)

    # If order doesn't matter then sort the index entries
    if not check_order:
        left = Index(safe_sort(left))
        right = Index(safe_sort(right))

    # MultiIndex special comparison for little-friendly error messages
    if left.nlevels > 1:
        left = cast(MultiIndex, left)
        right = cast(MultiIndex, right)

        for level in range(left.nlevels):
            # cannot use get_level_values here because it can change dtype
            llevel = _get_ilevel_values(left, level)
            rlevel = _get_ilevel_values(right, level)

            lobj = f"MultiIndex level [{level}]"
            assert_index_equal(
                llevel,
                rlevel,
                exact=exact,
                check_names=check_names,
                check_exact=check_exact,
                rtol=rtol,
                atol=atol,
                obj=lobj,
            )
            # get_level_values may change dtype
            _check_types(left.levels[level], right.levels[level], obj=obj)

    # skip exact index checking when `check_categorical` is False
    if check_exact and check_categorical:
        if not left.equals(right):
            mismatch = left._values != right._values

            diff = np.sum(mismatch.astype(int)) * 100.0 / len(left)
            msg = f"{obj} values are different ({np.round(diff, 5)} %)"
            raise_assert_detail(obj, msg, left, right)
    else:

        # if we have "equiv", this becomes True
        exact_bool = bool(exact)
        _testing.assert_almost_equal(
            left.values,
            right.values,
            rtol=rtol,
            atol=atol,
            check_dtype=exact_bool,
            obj=obj,
            lobj=left,
            robj=right,
        )

    # metadata comparison
    if check_names:
        assert_attr_equal("names", left, right, obj=obj)
    if isinstance(left, PeriodIndex) or isinstance(right, PeriodIndex):
        assert_attr_equal("freq", left, right, obj=obj)
    if isinstance(left, IntervalIndex) or isinstance(right, IntervalIndex):
        assert_interval_array_equal(left._values, right._values)

    if check_categorical:
        if is_categorical_dtype(left.dtype) or is_categorical_dtype(
                right.dtype):
            assert_categorical_equal(left._values,
                                     right._values,
                                     obj=f"{obj} category")
Exemplo n.º 19
0
def test_explicit_table_indexing(tmpdir, first_sub_index, sub_per_sample,
                                 main_index_column, subs_index_column):
    step_funs = {str: lambda s, i: chr(ord(s) + i), int: lambda x, i: x + i}
    step = step_funs[type(first_sub_index)]
    main_index_data = ["testA", "testB", "testC"]
    main_vals = [x for x in range(len(main_index_data))]
    subs_names = list(
        itertools.chain(*[sub_per_sample * [n] for n in main_index_data]))
    print("SUBS NAMES: {}".format(subs_names))
    subs_index_data = list(
        itertools.chain(
            *[[step(first_sub_index, i) for i in range(sub_per_sample)]
              for _ in main_index_data]))
    subs_vals = len(main_index_data) * list(range(len(main_index_data)))
    ext, sep = ".tsv", "\t"
    dat_col = "data"
    annsfile = tmpdir.join("anns" + ext).strpath
    subsfile = tmpdir.join("subs" + ext).strpath
    conffile = tmpdir.join("conf.yaml").strpath
    annstemp = "{}{}{}"
    annslines = [
        annstemp.format(name, sep, value)
        for name, value in [(main_index_column, dat_col)] +
        list(zip(main_index_data, main_vals))
    ]
    substemp = "{n}{sep}{sub}{sep}{v}"
    subslines = [
        substemp.format(n=name, sep=sep, sub=sub, v=val)
        for name, sub, val in [(main_index_column, subs_index_column,
                                dat_col)] +
        list(zip(subs_names, subs_index_data, subs_vals))
    ]
    with open(annsfile, 'w') as f:
        f.write("\n".join(annslines))
    with open(subsfile, 'w') as f:
        f.write("\n".join(subslines))
    with open(conffile, 'w') as f:
        yaml.dump(
            {
                METADATA_KEY: {
                    SAMPLE_ANNOTATIONS_KEY: annsfile,
                    SAMPLE_SUBANNOTATIONS_KEY: subsfile
                }
            }, f)
    p = SnakeProject(
        conffile, **{
            MAIN_INDEX_KEY: main_index_column,
            SUBS_INDEX_KEY: (main_index_column, subs_index_column)
        })
    print("SUBS TABLE:\n{}".format(p.subsample_table))
    exp_main_idx = Index(main_index_data, name=main_index_column)
    assert exp_main_idx.equals(p.sample_table.index)
    obs_subs_idx = p.subsample_table.index
    assert isinstance(obs_subs_idx, MultiIndex)
    exp_subs_names = [main_index_column, subs_index_column]
    exp_subs_levels = [main_index_data, subs_index_data[:sub_per_sample]]
    print("EXP NAMES: {}".format(exp_subs_names))
    print("EXP LEVELS: {}".format(exp_subs_levels))
    print("OBS NAMES: {}".format(obs_subs_idx.names))
    print("OBS LEVELS: {}".format(obs_subs_idx.levels))
    assert exp_subs_levels == [list(l) for l in obs_subs_idx.levels]
    assert exp_subs_names == obs_subs_idx.names
Exemplo n.º 20
0
def test_default_table_indexing(prj, exp_dat, observe):
    """ Verify expected default behavior for indexing of Project tables. """
    exp = Index(name=SNAKEMAKE_SAMPLE_COL, data=exp_dat(prj))
    assert exp.equals(observe(prj))
Exemplo n.º 21
0
 def test_equals(self):
     same_values = Index(self.index, dtype=object)
     self.assertTrue(self.index.equals(same_values))
     self.assertTrue(same_values.equals(self.index))