def test_mixed_inner_concat(): df, df2 = generate_dfs() df3 = df.copy() mixed_dfs = [from_pandas(df, 2), from_pandas(df2, 2), df3] with pytest.raises(NotImplementedError): rdf.concat(mixed_dfs, join="inner")
def test_ray_concat_on_column(): df, df2 = generate_dfs() ray_df, ray_df2 = from_pandas(df, 2), from_pandas(df2, 2) assert ray_df_equals_pandas(pd.concat([ray_df, ray_df2], axis=1), pandas.concat([df, df2], axis=1)) assert ray_df_equals_pandas(pd.concat([ray_df, ray_df2], axis="columns"), pandas.concat([df, df2], axis="columns"))
def test_ray_concat_on_column(): df, df2 = generate_dfs() ray_df, ray_df2 = from_pandas(df, 2), from_pandas(df2, 2) with pytest.raises(NotImplementedError): rdf.concat([ray_df, ray_df2], axis=1) with pytest.raises(NotImplementedError): rdf.concat([ray_df, ray_df2], axis="columns")
def test_ray_concat_on_index(): df, df2 = generate_dfs() ray_df, ray_df2 = from_pandas(df, 2), from_pandas(df2, 2) assert ray_df_equals_pandas(pd.concat([ray_df, ray_df2], axis='index'), pandas.concat([df, df2], axis='index')) assert ray_df_equals_pandas(pd.concat([ray_df, ray_df2], axis='rows'), pandas.concat([df, df2], axis='rows')) assert ray_df_equals_pandas(pd.concat([ray_df, ray_df2], axis=0), pandas.concat([df, df2], axis=0))
def test_mixed_concat(): df, df2 = generate_dfs() df3 = df.copy() mixed_dfs = [from_pandas(df, 2), from_pandas(df2, 2), df3] assert(ray_df_equals_pandas(pd.concat(mixed_dfs), pandas.concat([df, df2, df3])))
def merge_set(ray_df): # Merge ray_payments = pd.DataFrame({'num':[1, 2, 3, 4, 5, 6], 'payment_name':['Credit Card', 'Cash', 'No Charge', 'Dispute', 'Unknown', 'Voided trip']}) ray_df2 = ray_df.merge(ray_payments, left_on="payment_type", right_on="num") # Groupby on Merge ray2_groupby = ray_df2.groupby(ray_df2.payment_name) # Ray does not support getting column from groupby below # result = ray2_groupby.tip_amount.mean() # Boolean correlation ray_bool1 = ray_df2.tip_amount == 0 ray_bool2 = ray_df2.payment_name == 'Cash' ray_bools = pd.concat([ray_bool1, ray_bool2], axis=1)
print(' new DataFrame finished. Result:') print(ray_payments) ray_df2 = ray_df.merge(ray_payments, left_on="payment_type", right_on="num") print(' merge finished. Result:') print(ray_df2.head(3)) ray2_groupby = ray_df2.groupby(ray_df2.payment_name) print(' groupby on merge finished. Result:') print(ray2_groupby.head(3)) # Ray does not support getting column from groupby below # result = ray2_groupby.tip_amount.mean() # print(' mean on groupby finished. Result:') # print(result) ray_bool1 = ray_df2.tip_amount == 0 ray_bool2 = ray_df2.payment_name == 'Cash' print(' booleans finished. Result:') print(ray_bool1.head(3)) ray_bools = pd.concat([ray_bool1, ray_bool2], axis=1) print(' concat of booleans finished. Result:') print(ray_bools.head(3)) # Ray does not support correlation # result = ray_bools.corr() # print(' correlation finished. Result:') # print(result) print(' Set of merge tests finished.')
def test_invalid_axis_errors(): df, df2 = generate_dfs() ray_df, ray_df2 = from_pandas(df, 2), from_pandas(df2, 2) with pytest.raises(ValueError): pd.concat([ray_df, ray_df2], axis=2)
def test_ray_concat(): df, df2 = generate_dfs() ray_df, ray_df2 = from_pandas(df, 2), from_pandas(df2, 2) assert ray_df_equals_pandas(pd.concat([ray_df, ray_df2]), pandas.concat([df, df2]))
def test_df_concat(): df, df2 = generate_dfs() assert(ray_df_equals_pandas(pd.concat([df, df2]), pandas.concat([df, df2])))