def testGPUExecution(self): # test sort_values distinct_opts = ['0'] if sys.platform.lower().startswith('win') else [ '0', '1' ] for add_distinct in distinct_opts: os.environ['PSRS_DISTINCT_COL'] = add_distinct # test dataframe raw = pd.DataFrame(np.random.rand(100, 10), columns=['a' + str(i) for i in range(10)]) mdf = DataFrame(raw, chunk_size=30).to_gpu() result = self.executor.execute_dataframe(mdf.sort_values(by='a0'), concat=True)[0] expected = raw.sort_values(by='a0') pd.testing.assert_frame_equal(result.to_pandas(), expected) # test series raw = pd.Series(np.random.rand(10)) series = Series(raw).to_gpu() result = self.executor.execute_dataframe(series.sort_values(), concat=True)[0] expected = raw.sort_values() pd.testing.assert_series_equal(result.to_pandas(), expected) # test DataFrame.sort_index raw = pd.DataFrame(np.random.rand(10, 10), columns=np.random.rand(10)) mdf = DataFrame(raw).to_gpu() result = self.executor.execute_dataframe(mdf.sort_index(), concat=True)[0] expected = raw.sort_index() pd.testing.assert_frame_equal(result.to_pandas(), expected) # test Series.sort_index raw = pd.Series(np.random.rand(10, ), index=np.random.rand(10)) series = Series(raw).to_gpu() result = self.executor.execute_dataframe(series.sort_index(), concat=True)[0] expected = raw.sort_index() pd.testing.assert_series_equal(result.to_pandas(), expected)
def testSortIndexExecution(self): raw = pd.DataFrame(np.random.rand(100, 20), index=np.random.rand(100)) mdf = DataFrame(raw) result = self.executor.execute_dataframe(mdf.sort_index(), concat=True)[0] expected = raw.sort_index() pd.testing.assert_frame_equal(result, expected) mdf = DataFrame(raw) mdf.sort_index(inplace=True) result = self.executor.execute_dataframe(mdf, concat=True)[0] expected = raw.sort_index() pd.testing.assert_frame_equal(result, expected) mdf = DataFrame(raw, chunk_size=30) result = self.executor.execute_dataframe(mdf.sort_index(), concat=True)[0] expected = raw.sort_index() pd.testing.assert_frame_equal(result, expected) mdf = DataFrame(raw, chunk_size=20) result = self.executor.execute_dataframe( mdf.sort_index(ascending=False), concat=True)[0] expected = raw.sort_index(ascending=False) pd.testing.assert_frame_equal(result, expected) executor = ExecutorForTest(storage=new_session().context) mdf = DataFrame(raw, chunk_size=10) result = executor.execute_dataframe(mdf.sort_index(ignore_index=True), concat=True)[0] try: # for python3.5 expected = raw.sort_index(ignore_index=True) except TypeError: expected = raw.sort_index() expected.index = pd.RangeIndex(len(expected)) pd.testing.assert_frame_equal(result, expected) # test axis=1 raw = pd.DataFrame(np.random.rand(10, 10), columns=np.random.rand(10)) mdf = DataFrame(raw) result = self.executor.execute_dataframe(mdf.sort_index(axis=1), concat=True)[0] expected = raw.sort_index(axis=1) pd.testing.assert_frame_equal(result, expected) mdf = DataFrame(raw, chunk_size=3) result = self.executor.execute_dataframe(mdf.sort_index(axis=1), concat=True)[0] expected = raw.sort_index(axis=1) pd.testing.assert_frame_equal(result, expected) mdf = DataFrame(raw, chunk_size=4) result = self.executor.execute_dataframe(mdf.sort_index( axis=1, ascending=False), concat=True)[0] expected = raw.sort_index(axis=1, ascending=False) pd.testing.assert_frame_equal(result, expected) mdf = DataFrame(raw, chunk_size=4) executor = ExecutorForTest(storage=new_session().context) result = executor.execute_dataframe(mdf.sort_index(axis=1, ignore_index=True), concat=True)[0] try: # for python3.5 expected = raw.sort_index(axis=1, ignore_index=True) except TypeError: expected = raw.sort_index(axis=1) expected.index = pd.RangeIndex(len(expected)) pd.testing.assert_frame_equal(result, expected) # test series raw = pd.Series(np.random.rand(10, ), index=np.random.rand(10)) series = Series(raw) result = self.executor.execute_dataframe(series.sort_index(), concat=True)[0] expected = raw.sort_index() pd.testing.assert_series_equal(result, expected) series = Series(raw, chunk_size=2) result = self.executor.execute_dataframe(series.sort_index(), concat=True)[0] expected = raw.sort_index() pd.testing.assert_series_equal(result, expected) series = Series(raw, chunk_size=3) result = self.executor.execute_dataframe( series.sort_index(ascending=False), concat=True)[0] expected = raw.sort_index(ascending=False) pd.testing.assert_series_equal(result, expected)
def test_sort_index_execution(setup): raw = pd.DataFrame(np.random.rand(100, 20), index=np.random.rand(100)) mdf = DataFrame(raw) result = mdf.sort_index().execute().fetch() expected = raw.sort_index() pd.testing.assert_frame_equal(result, expected) mdf = DataFrame(raw) mdf.sort_index(inplace=True) result = mdf.execute().fetch() expected = raw.sort_index() pd.testing.assert_frame_equal(result, expected) mdf = DataFrame(raw, chunk_size=30) result = mdf.sort_index().execute().fetch() expected = raw.sort_index() pd.testing.assert_frame_equal(result, expected) mdf = DataFrame(raw, chunk_size=20) result = mdf.sort_index(ascending=False).execute().fetch() expected = raw.sort_index(ascending=False) pd.testing.assert_frame_equal(result, expected) mdf = DataFrame(raw, chunk_size=10) result = mdf.sort_index(ignore_index=True).execute().fetch() try: # for python3.5 expected = raw.sort_index(ignore_index=True) except TypeError: expected = raw.sort_index() expected.index = pd.RangeIndex(len(expected)) pd.testing.assert_frame_equal(result, expected) # test axis=1 raw = pd.DataFrame(np.random.rand(10, 10), columns=np.random.rand(10)) mdf = DataFrame(raw) result = mdf.sort_index(axis=1).execute().fetch() expected = raw.sort_index(axis=1) pd.testing.assert_frame_equal(result, expected) mdf = DataFrame(raw, chunk_size=3) result = mdf.sort_index(axis=1).execute().fetch() expected = raw.sort_index(axis=1) pd.testing.assert_frame_equal(result, expected) mdf = DataFrame(raw, chunk_size=4) result = mdf.sort_index(axis=1, ascending=False).execute().fetch() expected = raw.sort_index(axis=1, ascending=False) pd.testing.assert_frame_equal(result, expected) mdf = DataFrame(raw, chunk_size=4) result = mdf.sort_index(axis=1, ignore_index=True).execute().fetch() try: # for python3.5 expected = raw.sort_index(axis=1, ignore_index=True) except TypeError: expected = raw.sort_index(axis=1) expected.index = pd.RangeIndex(len(expected)) pd.testing.assert_frame_equal(result, expected) # test series raw = pd.Series(np.random.rand(10, ), index=np.random.rand(10)) series = Series(raw) result = series.sort_index().execute().fetch() expected = raw.sort_index() pd.testing.assert_series_equal(result, expected) series = Series(raw, chunk_size=2) result = series.sort_index().execute().fetch() expected = raw.sort_index() pd.testing.assert_series_equal(result, expected) series = Series(raw, chunk_size=3) result = series.sort_index(ascending=False).execute().fetch() expected = raw.sort_index(ascending=False) pd.testing.assert_series_equal(result, expected)