Пример #1
0
    def test_corr(self):
        # Disable arrow execution since corr() is using UDT internally which is not supported.
        with self.sql_conf({SPARK_CONF_ARROW_ENABLED: False}):
            # DataFrame
            # we do not handle NaNs for now
            pdf = makeMissingDataframe(0.3, 42).fillna(0)
            psdf = ps.from_pandas(pdf)

            self.assert_eq(psdf.corr(), pdf.corr(), check_exact=False)

            # Series
            pser_a = pdf.A
            pser_b = pdf.B
            psser_a = psdf.A
            psser_b = psdf.B

            self.assertAlmostEqual(psser_a.corr(psser_b), pser_a.corr(pser_b))
            self.assertRaises(TypeError, lambda: psser_a.corr(psdf))

            # multi-index columns
            columns = pd.MultiIndex.from_tuples([("X", "A"), ("X", "B"), ("Y", "C"), ("Z", "D")])
            pdf.columns = columns
            psdf.columns = columns

            self.assert_eq(psdf.corr(), pdf.corr(), check_exact=False)

            # Series
            pser_xa = pdf[("X", "A")]
            pser_xb = pdf[("X", "B")]
            psser_xa = psdf[("X", "A")]
            psser_xb = psdf[("X", "B")]

            self.assert_eq(psser_xa.corr(psser_xb), pser_xa.corr(pser_xb), almost=True)
Пример #2
0
        Series([1.0, 1.5, 3.2]),
        Series([1.0, 1.5, np.nan]),
        Series([1.0, 1.5, 3.2], index=[1.5, 1.1, 3.3]),
        Series(["a", "b", "c"]),
        Series(["a", np.nan, "c"]),
        Series(["a", None, "c"]),
        Series([True, False, True]),
        Series(dtype=object),
        Index([1, 2, 3]),
        Index([True, False, True]),
        DataFrame({
            "x": ["a", "b", "c"],
            "y": [1, 2, 3]
        }),
        DataFrame(),
        tm.makeMissingDataframe(),
        tm.makeMixedDataFrame(),
        tm.makeTimeDataFrame(),
        tm.makeTimeSeries(),
        tm.makeTimedeltaIndex(),
        tm.makePeriodIndex(),
        Series(tm.makePeriodIndex()),
        Series(pd.date_range("20130101", periods=3, tz="US/Eastern")),
        MultiIndex.from_product([
            range(5), ["foo", "bar", "baz"],
            pd.date_range("20130101", periods=2)
        ]),
        MultiIndex.from_product([pd.CategoricalIndex(list("aabc")),
                                 range(3)]),
    ],
)