def set_index(data, user_map, item_map): def set_user_item(df, item_map, user_map): user_list = [] item_list = [] item_map = item_map.value user_map = user_map.value for i in range(len(df)): user_list.append(user_map[df['user'][i]]) item_list.append(item_map[df['item'][i]]) df['user'] = user_list df['item'] = item_list return df user_map_shared_value = SharedValue(user_map) item_map_shared_value = SharedValue(item_map) return data.transform_shard(set_user_item, item_map_shared_value, user_map_shared_value)
def test_transform_broadcast(self): def negative(df, column_name, minus_val): df[column_name] = df[column_name] * (-1) df[column_name] = df[column_name] - minus_val.value return df file_path = os.path.join(self.resource_path, "orca/data/json") data_shard = zoo.orca.data.pandas.read_json(file_path, orient='columns', lines=True) data = data_shard.collect() assert data[0]["value"].values[0] > 0, "value should be positive" col_name = "value" minus_val = 2 minus_val_shared_value = SharedValue(minus_val) trans_shard = data_shard.transform_shard(negative, col_name, minus_val_shared_value) data2 = trans_shard.collect() assert data2[0]["value"].values[0] < 0, "value should be negative" assert data[0]["value"].values[0] + data2[0]["value"].values[0] == -2, "value should be -2"