def merge_global_object(vineyard_endpoint, results: List[List[ObjectID]]) -> ObjectID: if results is None or len(results) == 0: raise ValueError("No available sub objects to merge") chunks = [] for subresults in results: chunks.extend(subresults) if len(chunks) == 0: raise ValueError("No available sub objects to merge") if len(chunks) == 1: # fastpath: no need to merge if not isinstance(chunks[0], ObjectID): return ObjectID(chunks[0]) else: return chunks[0] vineyard_rpc_client = vineyard.connect(vineyard_endpoint) metadatas = [] for chunk in chunks: if not isinstance(chunk, ObjectID): chunk = ObjectID(chunk) metadatas.append(vineyard_rpc_client.get_meta(chunk)) chunkmap, isglobal = dict(), False for meta in metadatas: if meta.isglobal: isglobal = True for k, v in meta.items(): if isinstance(v, ObjectMeta): chunkmap[v.id] = k else: if isglobal: raise ValueError('Not all sub objects are global objects: %s' % results) if not isglobal: raise ValueError( "Unable to merge more than one non-global objects: %s" % results) base_meta = ObjectMeta() base_meta.set_global(True) for k, v in metadatas[0].items(): if isinstance(v, ObjectMeta): continue if k in ['id', 'signature', 'instance_id']: continue base_meta[k] = v for v, k in chunkmap.items(): base_meta.add_member(k, v) meta = vineyard_rpc_client.create_metadata(base_meta) vineyard_rpc_client.persist(meta.id) return meta.id
def make_global_dataframe(client, blocks, extra_meta=None) -> ObjectMeta: meta = ObjectMeta() meta['typename'] = 'vineyard::GlobalDataFrame' meta.set_global(True) meta['partitions_-size'] = len(blocks) if extra_meta: for k, v in extra_meta.items(): meta[k] = v for idx, block in enumerate(blocks): if not isinstance(block, (ObjectMeta, ObjectID, Object)): block = ObjectID(block) meta.add_member('partitions_-%d' % idx, block) gtensor_meta = client.create_metadata(meta) client.persist(gtensor_meta) return gtensor_meta
def traverse_to_rebuild(client, stream_id: ObjectID, blobs: Dict[ObjectID, Blob]): stream = client.get(stream_id) if isinstance(stream, StreamCollection): fullpath = stream.meta[StreamCollection.KEY_OF_PATH] if fullpath: memberpath = fullpath.split(os.path.sep)[-1] else: memberpath = '' logger.info('rebuilding object %s as %s', fullpath, memberpath) meta = ObjectMeta() for k, v in stream.meta.items(): # erase existing identifiers if k in [ 'typename', 'id', 'signature', 'instance_id', 'transient', StreamCollection.KEY_OF_GLOBAL, StreamCollection.KEY_OF_PATH, StreamCollection.KEY_OF_STREAMS, ]: continue if k == '__typename': meta['typename'] = v else: meta[k] = v isglobal = stream.meta[StreamCollection.KEY_OF_GLOBAL] if isglobal: meta.set_global(True) for s in stream.streams: name, member = traverse_to_rebuild(client, s, blobs) if isglobal: client.persist(member.id) meta.add_member(name, member) meta = client.create_metadata(meta) return memberpath, meta else: memberpath, blob = blobs[stream.id] return memberpath, blob