示例#1
0
def test_dataframe_corr_with(setup):
    rs = np.random.RandomState(0)
    raw_df = rs.rand(20, 10)
    raw_df = pd.DataFrame(np.where(raw_df > 0.4, raw_df, np.nan),
                          columns=list('ABCDEFGHIJ'))
    raw_df2 = rs.rand(20, 10)
    raw_df2 = pd.DataFrame(np.where(raw_df2 > 0.4, raw_df2, np.nan),
                           columns=list('ACDEGHIJKL'))
    raw_s = rs.rand(20)
    raw_s = pd.Series(np.where(raw_s > 0.4, raw_s, np.nan))
    raw_s2 = rs.rand(10)
    raw_s2 = pd.Series(np.where(raw_s2 > 0.4, raw_s2, np.nan),
                       index=raw_df2.columns)

    df = DataFrame(raw_df)
    df2 = DataFrame(raw_df2)

    result = df.corrwith(df2)
    pd.testing.assert_series_equal(result.execute().fetch(),
                                   raw_df.corrwith(raw_df2))

    result = df.corrwith(df2, axis=1)
    pd.testing.assert_series_equal(result.execute().fetch(),
                                   raw_df.corrwith(raw_df2, axis=1))

    result = df.corrwith(df2, method='kendall')
    pd.testing.assert_series_equal(result.execute().fetch(),
                                   raw_df.corrwith(raw_df2, method='kendall'))

    df = DataFrame(raw_df, chunk_size=4)
    df2 = DataFrame(raw_df2, chunk_size=6)
    s = Series(raw_s, chunk_size=5)
    s2 = Series(raw_s2, chunk_size=5)

    with pytest.raises(Exception):
        df.corrwith(df2, method='kendall').execute()

    result = df.corrwith(df2)
    pd.testing.assert_series_equal(result.execute().fetch().sort_index(),
                                   raw_df.corrwith(raw_df2).sort_index())

    result = df.corrwith(df2, axis=1)
    pd.testing.assert_series_equal(
        result.execute().fetch().sort_index(),
        raw_df.corrwith(raw_df2, axis=1).sort_index())

    result = df.corrwith(s)
    pd.testing.assert_series_equal(result.execute().fetch().sort_index(),
                                   raw_df.corrwith(raw_s).sort_index())

    result = df.corrwith(s2, axis=1)
    pd.testing.assert_series_equal(
        result.execute().fetch().sort_index(),
        raw_df.corrwith(raw_s2, axis=1).sort_index())
    def testDataFrameCorrWith(self):
        rs = np.random.RandomState(0)
        raw_df = rs.rand(20, 10)
        raw_df = pd.DataFrame(np.where(raw_df > 0.4, raw_df, np.nan),
                              columns=list('ABCDEFGHIJ'))
        raw_df2 = rs.rand(20, 10)
        raw_df2 = pd.DataFrame(np.where(raw_df2 > 0.4, raw_df2, np.nan),
                               columns=list('ACDEGHIJKL'))
        raw_s = rs.rand(20)
        raw_s = pd.Series(np.where(raw_s > 0.4, raw_s, np.nan))
        raw_s2 = rs.rand(10)
        raw_s2 = pd.Series(np.where(raw_s2 > 0.4, raw_s2, np.nan),
                           index=raw_df2.columns)

        df = DataFrame(raw_df)
        df2 = DataFrame(raw_df2)

        result = df.corrwith(df2)
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(result, concat=True)[0],
            raw_df.corrwith(raw_df2))

        result = df.corrwith(df2, axis=1)
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(result, concat=True)[0],
            raw_df.corrwith(raw_df2, axis=1))

        result = df.corrwith(df2, method='kendall')
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(result, concat=True)[0],
            raw_df.corrwith(raw_df2, method='kendall'))

        df = DataFrame(raw_df, chunk_size=4)
        df2 = DataFrame(raw_df2, chunk_size=6)
        s = Series(raw_s, chunk_size=5)
        s2 = Series(raw_s2, chunk_size=5)

        with self.assertRaises(Exception):
            self.executor.execute_dataframe(df.corrwith(df2, method='kendall'),
                                            concat=True)

        result = df.corrwith(df2)
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(result,
                                            concat=True)[0].sort_index(),
            raw_df.corrwith(raw_df2).sort_index())

        result = df.corrwith(df2, axis=1)
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(result,
                                            concat=True)[0].sort_index(),
            raw_df.corrwith(raw_df2, axis=1).sort_index())

        result = df.corrwith(s)
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(result,
                                            concat=True)[0].sort_index(),
            raw_df.corrwith(raw_s).sort_index())

        result = df.corrwith(s2, axis=1)
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(result,
                                            concat=True)[0].sort_index(),
            raw_df.corrwith(raw_s2, axis=1).sort_index())