def test_spray_file_string(self): thor_file = '~thor::test_spray_file_string' overwrite = True expire = 1 delete_workunit = True chunk_size = 10000 max_workers = 3 conn = hpycc.Connection("user", test_conn=False) col_1_values = ['1', '3', '5', '6'] col_2_values = ['aa', 'ab', 'ac', 'ad'] df = pd.DataFrame({ "a": col_1_values, "b": col_2_values }).sort_values('a') with TemporaryDirectory() as d: p = os.path.join(d, "test.csv") df.to_csv(p, index=False) spray_file(conn, p, thor_file, overwrite, expire, chunk_size, max_workers, delete_workunit) res = get_thor_file(connection=conn, thor_file=thor_file)[['a', 'b']] pd.testing.assert_frame_equal(df, res)
def test_spray_file_df(self): thor_file = '~thor::test_spray_file_df' overwrite = True expire = 1 delete_workunit = True chunk_size = 10000 max_workers = 3 conn = hpycc.Connection("user", test_conn=False) col_1_values = ['1', '3', '5', '6'] col_2_values = ['aa', 'ab', 'ac', 'ad'] df = pd.DataFrame({ "a": col_1_values, "b": col_2_values }).sort_values('a') spray_file(conn, df, thor_file, overwrite, expire, chunk_size, max_workers, delete_workunit) res = get_thor_file(connection=conn, thor_file=thor_file)[['a', 'b']] pd.testing.assert_frame_equal(df, res)
def test_spray_file_string_smallest_chunks_many_workers(self): thor_file = '~test_spray_file_string_smallest_chunks_many_workers' overwrite = True expire = 1 delete_workunit = True chunk_size = 1 max_workers = 100 conn = hpycc.Connection("user", test_conn=False) col_1_values = ['1', '3', '5', '7', '9', '11', '13'] col_2_values = ['a', 'b', 'c', 'd', 'e', 'f', 'g'] df = pd.DataFrame({ "a": col_1_values, "b": col_2_values }).sort_values('a').reset_index(drop=True) spray_file(conn, df, thor_file, overwrite, expire, chunk_size, max_workers, delete_workunit) res = get_thor_file(connection=conn, thor_file=thor_file)[[ 'a', 'b' ]].sort_values('a').reset_index(drop=True) pd.testing.assert_frame_equal(df, res)