def test_series_quantile(): raw = pd.Series(np.random.rand(10)) s = series_from_pandas(raw, chunk_size=3) r = s.quantile() assert isinstance(r, Tensor) tile(r) s = series_from_pandas(raw, chunk_size=3) r = s.quantile([0.3, 0.7]) assert isinstance(r, Series) assert r.shape == (2, ) pd.testing.assert_index_equal(r.index_value.to_pandas(), pd.Index([0.3, 0.7])) tile(r)
def testSeriesQuantile(self): raw = pd.Series(np.random.rand(10)) s = series_from_pandas(raw, chunk_size=3) r = s.quantile() self.assertIsInstance(r, Tensor) r.tiles() s = series_from_pandas(raw, chunk_size=3) r = s.quantile([0.3, 0.7]) self.assertIsInstance(r, Series) self.assertEqual(r.shape, (2, )) pd.testing.assert_index_equal(r.index_value.to_pandas(), pd.Index([0.3, 0.7])) r.tiles()
def testAppendExecution(self): executor = ExecutorForTest(storage=new_session().context) df1 = pd.DataFrame(np.random.rand(10, 4), columns=list('ABCD')) df2 = pd.DataFrame(np.random.rand(10, 4), columns=list('ABCD')) mdf1 = from_pandas(df1, chunk_size=3) mdf2 = from_pandas(df2, chunk_size=3) adf = mdf1.append(mdf2) expected = df1.append(df2) result = self.executor.execute_dataframe(adf, concat=True)[0] pd.testing.assert_frame_equal(expected, result) adf = mdf1.append(mdf2, ignore_index=True) expected = df1.append(df2, ignore_index=True) result = executor.execute_dataframe(adf, concat=True)[0] pd.testing.assert_frame_equal(expected, result) mdf1 = from_pandas(df1, chunk_size=3) mdf2 = from_pandas(df2, chunk_size=2) adf = mdf1.append(mdf2) expected = df1.append(df2) result = self.executor.execute_dataframe(adf, concat=True)[0] pd.testing.assert_frame_equal(expected, result) adf = mdf1.append(mdf2, ignore_index=True) expected = df1.append(df2, ignore_index=True) result = executor.execute_dataframe(adf, concat=True)[0] pd.testing.assert_frame_equal(expected, result) df3 = pd.DataFrame(np.random.rand(8, 4), columns=list('ABCD')) mdf3 = from_pandas(df3, chunk_size=3) expected = df1.append([df2, df3]) adf = mdf1.append([mdf2, mdf3]) result = self.executor.execute_dataframe(adf, concat=True)[0] pd.testing.assert_frame_equal(expected, result) adf = mdf1.append(dict(A=1, B=2, C=3, D=4), ignore_index=True) expected = df1.append(dict(A=1, B=2, C=3, D=4), ignore_index=True) result = executor.execute_dataframe(adf, concat=True)[0] pd.testing.assert_frame_equal(expected, result) # test for series series1 = pd.Series(np.random.rand(10,)) series2 = pd.Series(np.random.rand(10,)) mseries1 = series_from_pandas(series1, chunk_size=3) mseries2 = series_from_pandas(series2, chunk_size=3) aseries = mseries1.append(mseries2) expected = series1.append(series2) result = self.executor.execute_dataframe(aseries, concat=True)[0] pd.testing.assert_series_equal(expected, result) aseries = mseries1.append(mseries2, ignore_index=True) expected = series1.append(series2, ignore_index=True) result = executor.execute_dataframe(aseries, concat=True)[0] pd.testing.assert_series_equal(expected, result) mseries1 = series_from_pandas(series1, chunk_size=3) mseries2 = series_from_pandas(series2, chunk_size=2) aseries = mseries1.append(mseries2) expected = series1.append(series2) result = self.executor.execute_dataframe(aseries, concat=True)[0] pd.testing.assert_series_equal(expected, result) aseries = mseries1.append(mseries2, ignore_index=True) expected = series1.append(series2, ignore_index=True) result = executor.execute_dataframe(aseries, concat=True)[0] pd.testing.assert_series_equal(expected, result) series3 = pd.Series(np.random.rand(4,)) mseries3 = series_from_pandas(series3, chunk_size=2) expected = series1.append([series2, series3]) aseries = mseries1.append([mseries2, mseries3]) result = self.executor.execute_dataframe(aseries, concat=True)[0] pd.testing.assert_series_equal(expected, result)
def test_concat(setup): df1 = pd.DataFrame(np.random.rand(10, 4), columns=list('ABCD')) df2 = pd.DataFrame(np.random.rand(10, 4), columns=list('ABCD')) mdf1 = from_pandas(df1, chunk_size=3) mdf2 = from_pandas(df2, chunk_size=3) r = concat([mdf1, mdf2]) expected = pd.concat([df1, df2]) result = r.execute().fetch() pd.testing.assert_frame_equal(expected, result) # test different chunk size and ignore_index=True mdf1 = from_pandas(df1, chunk_size=2) mdf2 = from_pandas(df2, chunk_size=3) r = concat([mdf1, mdf2], ignore_index=True) expected = pd.concat([df1, df2], ignore_index=True) result = r.execute(extra_config={'check_index_value': False}).fetch() pd.testing.assert_frame_equal(expected, result) # test axis=1 mdf1 = from_pandas(df1, chunk_size=2) mdf2 = from_pandas(df2, chunk_size=3) r = concat([mdf1, mdf2], axis=1) expected = pd.concat([df1, df2], axis=1) result = r.execute().fetch() pd.testing.assert_frame_equal(expected, result) # test multiply dataframes r = concat([mdf1, mdf2, mdf1]) expected = pd.concat([df1, df2, df1]) result = r.execute().fetch() pd.testing.assert_frame_equal(expected, result) df1 = pd.DataFrame(np.random.rand(10, 4), columns=list('ABCD')) df2 = pd.DataFrame(np.random.rand(10, 3), columns=list('ABC')) mdf1 = from_pandas(df1, chunk_size=3) mdf2 = from_pandas(df2, chunk_size=3) # test join=inner r = concat([mdf1, mdf2], join='inner') expected = pd.concat([df1, df2], join='inner') result = r.execute().fetch() pd.testing.assert_frame_equal(expected, result) # test for series series1 = pd.Series(np.random.rand(10, )) series2 = pd.Series(np.random.rand(10, )) mseries1 = series_from_pandas(series1, chunk_size=3) mseries2 = series_from_pandas(series2, chunk_size=3) r = concat([mseries1, mseries2]) expected = pd.concat([series1, series2]) result = r.execute().fetch() pd.testing.assert_series_equal(result, expected) # test different series and ignore_index mseries1 = series_from_pandas(series1, chunk_size=4) mseries2 = series_from_pandas(series2, chunk_size=3) r = concat([mseries1, mseries2], ignore_index=True) expected = pd.concat([series1, series2], ignore_index=True) result = r.execute(extra_config={'check_index_value': False}).fetch() pd.testing.assert_series_equal(result, expected) # test axis=1 mseries1 = series_from_pandas(series1, chunk_size=3) mseries2 = series_from_pandas(series2, chunk_size=3) r = concat([mseries1, mseries2], axis=1) expected = pd.concat([series1, series2], axis=1) result = r.execute(extra_config={'check_shape': False}).fetch() pd.testing.assert_frame_equal(result, expected) # test merge dataframe and series r = concat([mdf1, mseries2], ignore_index=True) expected = pd.concat([df1, series2], ignore_index=True) result = r.execute(extra_config={'check_index_value': False}).fetch() pd.testing.assert_frame_equal(result, expected) # test merge series and dataframe r = concat([mseries1, mdf2], ignore_index=True) expected = pd.concat([series1, df2], ignore_index=True) result = r.execute(extra_config={'check_index_value': False}).fetch() pd.testing.assert_frame_equal(result, expected) # test merge dataframe and series, axis=1 r = concat([mdf1, mseries2], axis=1) expected = pd.concat([df1, series2], axis=1) result = r.execute().fetch() pd.testing.assert_frame_equal(result, expected) # test merge series and dataframe, axis=1 r = concat([mseries1, mdf2], axis=1) expected = pd.concat([series1, df2], axis=1) result = r.execute().fetch() pd.testing.assert_frame_equal(result, expected)
def test_append_execution(setup): df1 = pd.DataFrame(np.random.rand(10, 4), columns=list('ABCD')) df2 = pd.DataFrame(np.random.rand(10, 4), columns=list('ABCD')) mdf1 = from_pandas(df1, chunk_size=3) mdf2 = from_pandas(df2, chunk_size=3) adf = mdf1.append(mdf2) expected = df1.append(df2) result = adf.execute().fetch() pd.testing.assert_frame_equal(expected, result) adf = mdf1.append(mdf2, ignore_index=True) expected = df1.append(df2, ignore_index=True) result = adf.execute(extra_config={'check_index_value': False}).fetch() pd.testing.assert_frame_equal(expected, result) mdf1 = from_pandas(df1, chunk_size=3) mdf2 = from_pandas(df2, chunk_size=2) adf = mdf1.append(mdf2) expected = df1.append(df2) result = adf.execute().fetch() pd.testing.assert_frame_equal(expected, result) adf = mdf1.append(mdf2, ignore_index=True) expected = df1.append(df2, ignore_index=True) result = adf.execute(extra_config={'check_index_value': False}).fetch() pd.testing.assert_frame_equal(expected, result) df3 = pd.DataFrame(np.random.rand(8, 4), columns=list('ABCD')) mdf3 = from_pandas(df3, chunk_size=3) expected = df1.append([df2, df3]) adf = mdf1.append([mdf2, mdf3]) result = adf.execute().fetch() pd.testing.assert_frame_equal(expected, result) adf = mdf1.append(dict(A=1, B=2, C=3, D=4), ignore_index=True) expected = df1.append(dict(A=1, B=2, C=3, D=4), ignore_index=True) result = adf.execute(extra_config={'check_index_value': False}).fetch() pd.testing.assert_frame_equal(expected, result) # test for series series1 = pd.Series(np.random.rand(10, )) series2 = pd.Series(np.random.rand(10, )) mseries1 = series_from_pandas(series1, chunk_size=3) mseries2 = series_from_pandas(series2, chunk_size=3) aseries = mseries1.append(mseries2) expected = series1.append(series2) result = aseries.execute().fetch() pd.testing.assert_series_equal(expected, result) aseries = mseries1.append(mseries2, ignore_index=True) expected = series1.append(series2, ignore_index=True) result = aseries.execute(extra_config={'check_index_value': False}).fetch() pd.testing.assert_series_equal(expected, result) mseries1 = series_from_pandas(series1, chunk_size=3) mseries2 = series_from_pandas(series2, chunk_size=2) aseries = mseries1.append(mseries2) expected = series1.append(series2) result = aseries.execute().fetch() pd.testing.assert_series_equal(expected, result) aseries = mseries1.append(mseries2, ignore_index=True) expected = series1.append(series2, ignore_index=True) result = aseries.execute(extra_config={'check_index_value': False}).fetch() pd.testing.assert_series_equal(expected, result) series3 = pd.Series(np.random.rand(4, )) mseries3 = series_from_pandas(series3, chunk_size=2) expected = series1.append([series2, series3]) aseries = mseries1.append([mseries2, mseries3]) result = aseries.execute().fetch() pd.testing.assert_series_equal(expected, result)
def testConcat(self): executor = ExecutorForTest(storage=new_session().context) df1 = pd.DataFrame(np.random.rand(10, 4), columns=list('ABCD')) df2 = pd.DataFrame(np.random.rand(10, 4), columns=list('ABCD')) mdf1 = from_pandas(df1, chunk_size=3) mdf2 = from_pandas(df2, chunk_size=3) r = concat([mdf1, mdf2]) expected = pd.concat([df1, df2]) result = self.executor.execute_dataframe(r, concat=True)[0] pd.testing.assert_frame_equal(expected, result) # test different chunk size and ignore_index=True mdf1 = from_pandas(df1, chunk_size=2) mdf2 = from_pandas(df2, chunk_size=3) r = concat([mdf1, mdf2], ignore_index=True) expected = pd.concat([df1, df2], ignore_index=True) result = executor.execute_dataframe(r, concat=True)[0] pd.testing.assert_frame_equal(expected, result) # test axis=1 mdf1 = from_pandas(df1, chunk_size=2) mdf2 = from_pandas(df2, chunk_size=3) r = concat([mdf1, mdf2], axis=1) expected = pd.concat([df1, df2], axis=1) result = self.executor.execute_dataframe(r, concat=True)[0] pd.testing.assert_frame_equal(expected, result) # test multiply dataframes r = concat([mdf1, mdf2, mdf1]) expected = pd.concat([df1, df2, df1]) result = self.executor.execute_dataframe(r, concat=True)[0] pd.testing.assert_frame_equal(expected, result) df1 = pd.DataFrame(np.random.rand(10, 4), columns=list('ABCD')) df2 = pd.DataFrame(np.random.rand(10, 3), columns=list('ABC')) mdf1 = from_pandas(df1, chunk_size=3) mdf2 = from_pandas(df2, chunk_size=3) # test join=inner r = concat([mdf1, mdf2], join='inner') expected = pd.concat([df1, df2], join='inner') result = self.executor.execute_dataframe(r, concat=True)[0] pd.testing.assert_frame_equal(expected, result) # test for series series1 = pd.Series(np.random.rand(10, )) series2 = pd.Series(np.random.rand(10, )) mseries1 = series_from_pandas(series1, chunk_size=3) mseries2 = series_from_pandas(series2, chunk_size=3) r = concat([mseries1, mseries2]) expected = pd.concat([series1, series2]) result = self.executor.execute_dataframe(r, concat=True)[0] pd.testing.assert_series_equal(result, expected) # test different series and ignore_index mseries1 = series_from_pandas(series1, chunk_size=4) mseries2 = series_from_pandas(series2, chunk_size=3) r = concat([mseries1, mseries2], ignore_index=True) expected = pd.concat([series1, series2], ignore_index=True) result = executor.execute_dataframe(r, concat=True)[0] pd.testing.assert_series_equal(result, expected) # test axis=1 mseries1 = series_from_pandas(series1, chunk_size=3) mseries2 = series_from_pandas(series2, chunk_size=3) r = concat([mseries1, mseries2], axis=1) expected = pd.concat([series1, series2], axis=1) result = self.executor.execute_dataframe(r, concat=True)[0] pd.testing.assert_frame_equal(result, expected) # test merge dataframe and series r = concat([mdf1, mseries2], ignore_index=True) expected = pd.concat([df1, series2], ignore_index=True) result = executor.execute_dataframe(r, concat=True)[0] pd.testing.assert_frame_equal(result, expected) # test merge series and dataframe r = concat([mseries1, mdf2], ignore_index=True) expected = pd.concat([series1, df2], ignore_index=True) result = executor.execute_dataframe(r, concat=True)[0] pd.testing.assert_frame_equal(result, expected) # test merge dataframe and series, axis=1 r = concat([mdf1, mseries2], axis=1) expected = pd.concat([df1, series2], axis=1) result = executor.execute_dataframe(r, concat=True)[0] pd.testing.assert_frame_equal(result, expected) # test merge series and dataframe, axis=1 r = concat([mseries1, mdf2], axis=1) expected = pd.concat([series1, df2], axis=1) result = executor.execute_dataframe(r, concat=True)[0] pd.testing.assert_frame_equal(result, expected)