示例#1
0
def set_index(data, user_map, item_map):
    def set_user_item(df, item_map, user_map):
        user_list = []
        item_list = []
        item_map = item_map.value
        user_map = user_map.value
        for i in range(len(df)):
            user_list.append(user_map[df['user'][i]])
            item_list.append(item_map[df['item'][i]])
        df['user'] = user_list
        df['item'] = item_list
        return df

    user_map_shared_value = SharedValue(user_map)
    item_map_shared_value = SharedValue(item_map)
    return data.transform_shard(set_user_item, item_map_shared_value,
                                user_map_shared_value)
    def test_transform_broadcast(self):
        def negative(df, column_name, minus_val):
            df[column_name] = df[column_name] * (-1)
            df[column_name] = df[column_name] - minus_val.value
            return df

        file_path = os.path.join(self.resource_path, "orca/data/json")
        data_shard = zoo.orca.data.pandas.read_json(file_path, orient='columns', lines=True)
        data = data_shard.collect()
        assert data[0]["value"].values[0] > 0, "value should be positive"
        col_name = "value"
        minus_val = 2
        minus_val_shared_value = SharedValue(minus_val)
        trans_shard = data_shard.transform_shard(negative, col_name,
                                                 minus_val_shared_value)
        data2 = trans_shard.collect()
        assert data2[0]["value"].values[0] < 0, "value should be negative"
        assert data[0]["value"].values[0] + data2[0]["value"].values[0] == -2, "value should be -2"