def execute(cls, ctx, op): if vineyard is None: raise RuntimeError('vineyard is not available') client = vineyard.connect(op.vineyard_socket) meta = vineyard.ObjectMeta() instances = set() chunks = set() for idx, in_chunk in enumerate(op.inputs): instance_id, chunk_id = ctx[in_chunk.key] instances.add(instance_id) chunks.add(chunk_id) meta.add_member('object_%d' % idx, vineyard.ObjectID(chunk_id)) meta['typename'] = 'vineyard::ObjectSet' meta['num_of_instances'] = len(instances) meta['num_of_objects'] = len(chunks) object_set_id = client.create_metadata(meta) meta = vineyard.ObjectMeta() meta['typename'] = 'vineyard::GlobalDataFrame' meta['partition_shape_row_'] = op.shape[0] meta['partition_shape_column_'] = op.shape[1] meta.add_member('objects_', object_set_id) global_dataframe_id = client.create_metadata(meta) client.persist(global_dataframe_id) # # store the result object id to execution context ctx[op.outputs[0].key] = repr(global_dataframe_id)
def test_add_remote_placeholder(vineyard_ipc_sockets): vineyard_ipc_sockets = list( itertools.islice(itertools.cycle(vineyard_ipc_sockets), 4)) client1 = vineyard.connect(vineyard_ipc_sockets[0]) client2 = vineyard.connect(vineyard_ipc_sockets[1]) client3 = vineyard.connect(vineyard_ipc_sockets[2]) client4 = vineyard.connect(vineyard_ipc_sockets[3]) data = np.ones((1, 2, 3, 4, 5)) o1 = client1.put(data) o2 = client2.put(data) o3 = client3.put(data) o4 = client4.put(data) client4.persist(o4) client3.persist(o3) client2.persist(o2) client1.persist(o1) meta = vineyard.ObjectMeta() meta['typename'] = 'vineyard::Tuple' meta['size_'] = 4 meta.add_member('__elements_-0', o1) meta.add_member('__elements_-1', o2) meta.add_member('__elements_-2', o3) meta.add_member('__elements_-3', o4) meta['__elements_-size'] = 4 tupid = client1.create_metadata(meta) client1.persist(tupid) meta = client2.get_meta(tupid) assert meta['__elements_-size'] == 4
def test_add_remote_placeholder(vineyard_ipc_sockets): client1, client2, client3, client4 = \ generate_vineyard_ipc_clients(vineyard_ipc_sockets, 4) data = np.ones((1, 2, 3, 4, 5)) o1 = client1.put(data) o2 = client2.put(data) o3 = client3.put(data) o4 = client4.put(data) client4.persist(o4) client3.persist(o3) client2.persist(o2) client1.persist(o1) meta = vineyard.ObjectMeta() meta['typename'] = 'vineyard::Tuple' meta['size_'] = 4 meta.set_global(True) meta.add_member('__elements_-0', o1) meta.add_member('__elements_-1', o2) meta.add_member('__elements_-2', o3) meta.add_member('__elements_-3', o4) meta['__elements_-size'] = 4 tup = client1.create_metadata(meta) client1.persist(tup) meta = client2.get_meta(tup.id, True) assert meta['__elements_-size'] == 4
def global_object(vineyard_ipc_socket): client1 = vineyard.connect(vineyard_ipc_socket) client2 = vineyard.connect(vineyard_ipc_socket) client3 = vineyard.connect(vineyard_ipc_socket) client4 = vineyard.connect(vineyard_ipc_socket) data = np.ones((1, 2, 3, 4, 5)) o1 = client1.put(data) o2 = client2.put(data) o3 = client3.put(data) o4 = client4.put(data) client4.persist(o4) client3.persist(o3) client2.persist(o2) client1.persist(o1) meta = vineyard.ObjectMeta() meta['typename'] = 'vineyard::Tuple' meta['size_'] = 4 meta.set_global(True) meta.add_member('__elements_-0', client1.get_meta(o1)) meta.add_member('__elements_-1', client1.get_meta(o2)) meta.add_member('__elements_-2', o3) meta.add_member('__elements_-3', o4) meta['__elements_-size'] = 4 tup = client1.create_metadata(meta) client1.persist(tup) return tup.id
def execute(cls, ctx, op): if vineyard is None: raise RuntimeError("vineyard is not available") socket, needs_put = resolve_vineyard_socket(ctx, op) client = vineyard.connect(socket) # some op might be fused and executed twice on different workers if not needs_put: # might be fused try: # pragma: no cover meta = ctx.get_chunks_meta([op.inputs[0].key])[0] df_id = vineyard.ObjectID(meta["object_ref"]) if not client.exists(df_id): needs_put = True except KeyError: needs_put = True if needs_put: df_id = client.put(ctx[op.inputs[0].key], partition_index=op.inputs[0].index) else: # pragma: no cover meta = client.get_meta(df_id) new_meta = vineyard.ObjectMeta() for k, v in meta.items(): if k not in ["id", "signature", "instance_id"]: if isinstance(v, vineyard.ObjectMeta): new_meta.add_member(k, v) else: new_meta[k] = v new_meta["partition_index_"] = to_json(op.inputs[0].index) df_id = client.create_metadata(new_meta).id client.persist(df_id) ctx[op.outputs[0].key] = pd.DataFrame({0: [df_id]})
def execute(cls, ctx, op): if vineyard is None: raise RuntimeError('vineyard is not available') socket, needs_put = resolve_vineyard_socket(ctx, op) client = vineyard.connect(socket) # some op might be fused and executed twice on different workers if not needs_put: # might be fused try: # pragma: no cover meta = ctx.get_chunks_meta([op.inputs[0].key])[0] tensor_id = vineyard.ObjectID(meta['object_ref']) if not client.exists(tensor_id): needs_put = True except KeyError: needs_put = True if needs_put: tensor_id = client.put(ctx[op.inputs[0].key], partition_index=op.inputs[0].index) else: # pragma: no cover meta = client.get_meta(tensor_id) new_meta = vineyard.ObjectMeta() for k, v in meta.items(): if k not in ['id', 'signature', 'instance_id']: if isinstance(v, vineyard.ObjectMeta): new_meta.add_member(k, v) else: new_meta[k] = v new_meta['partition_index_'] = to_json(op.inputs[0].index) tensor_id = client.create_metadata(new_meta).id client.persist(tensor_id) holder = np.empty((1, ), dtype=object) holder[0] = tensor_id ctx[op.outputs[0].key] = holder
def test_persist_multiref(vineyard_client): xid = vineyard_client.put(1.2345) meta = vineyard.ObjectMeta() meta['typename'] = 'vineyard::Pair' meta.add_member('first_', xid) meta.add_member('second_', xid) meta.set_global(True) rmeta = vineyard_client.create_metadata(meta) vineyard_client.persist(rmeta)
def test_migrate_stream(vineyard_ipc_sockets, vineyard_endpoint, test_dataset, test_dataset_tmp): vineyard_ipc_sockets = list( itertools.islice(itertools.cycle(vineyard_ipc_sockets), 2)) # read the file as a stream, note that the open api # always returns a global stream stream = vineyard.io.open( "file://%s/p2p-31.e" % test_dataset, vineyard_ipc_socket=vineyard_ipc_sockets[0], vineyard_endpoint=vineyard_endpoint, read_options={ "header_row": False, "delimiter": " " }, ) # extract the local stream from the opened global stream client1 = vineyard.connect(vineyard_ipc_sockets[0]) local_streams = client1.get(stream) # migrate the local stream to another vineyardd client2 = vineyard.connect(vineyard_ipc_sockets[1]) new_stream = client2.migrate_stream(local_streams[0].id) # create a global stream from the migrated local stream to fit # the open api meta = vineyard.ObjectMeta() meta['typename'] = 'vineyard::ParallelStream' meta.set_global(True) meta['size_'] = 1 meta.add_member("stream_0", new_stream) ret_id = client2.create_metadata(meta) client2.persist(ret_id) # output the global stream vineyard.io.open( "file://%s/p2p-31.out" % test_dataset_tmp, ret_id, mode="w", vineyard_ipc_socket=vineyard_ipc_sockets[1], vineyard_endpoint=vineyard_endpoint, ) # check the equility assert filecmp.cmp("%s/p2p-31.e" % test_dataset, "%s/p2p-31.out_0" % test_dataset_tmp)
def execute(cls, ctx, op): if vineyard is None: raise RuntimeError('vineyard is not available') client = vineyard.connect(op.vineyard_socket) meta = vineyard.ObjectMeta() meta.set_global(True) meta['typename'] = 'vineyard::GlobalTensor' meta['shape_'] = json.dumps(op.shape) meta['partition_shape_'] = json.dumps(op.chunk_shape) for idx, in_chunk in enumerate(op.inputs): _, chunk_id = ctx[in_chunk.key] meta.add_member('partitions_-%d' % idx, vineyard.ObjectID(chunk_id)) meta['partitions_-size'] = len(op.inputs) global_tensor_id = client.create_metadata(meta) client.persist(global_tensor_id) # # store the result object id to execution context ctx[op.outputs[0].key] = repr(global_tensor_id)
def test_metadata(vineyard_client): xid = vineyard_client.put(1.2345) yid = vineyard_client.put(2.3456) meta = vineyard.ObjectMeta() meta['typename'] = 'vineyard::Pair' meta.add_member('first_', xid) meta.add_member('second_', yid) meta.set_global(True) rmeta = vineyard_client.create_metadata(meta) vineyard_client.persist(rmeta) def go(meta): for k, v in meta.items(): if isinstance(v, vineyard.ObjectMeta): go(v) else: print('k-v in meta: ', k, v) meta = vineyard_client.get_meta(rmeta.id) go(meta) go(meta) go(meta) go(meta)