def generate_returns_placeholders(): pb_returns = [] pb_placeholders = [] for op in returns: pb_returns.append(op_to_protobuf(op)) for op in placeholders: pb_placeholders.append(op_to_protobuf(op)) return pb_returns, pb_placeholders
def generate_messages(): pb_returns = [op_to_protobuf(o) for o in returns] pb_placeholders = [op_to_protobuf(o) for o in placeholders] for pb_ops, pb_edges in pb_graph: msg = make_computation_request( pb_ops, pb_edges, pb_returns, pb_placeholders) yield msg pb_returns, pb_placeholders = [], []
def test_op_to_protobuf(): axis = ng.make_axis(name='C', length=2) axes = ng.make_axes([axis]) orig_op = ng.placeholder(axes) # Test attributes orig_op.test0 = 'stringval_attr' orig_op.test1 = [-1.0, 4] orig_op.test2 = dict(foo=2, you='bar') orig_op.test3 = dict() orig_op.test4 = slice(1, 3, 5) orig_op.test5 = slice(1, 3) orig_op.test6 = slice(1, None, 3) orig_op.test7 = axis orig_op.test8 = axes # Test metadata orig_op.metadata['test0'] = 'stringval' orig_op.metadata['test1'] = [1, 4.0] orig_op.metadata['test2'] = dict(hey=1, you=4.0) orig_op.metadata['test4'] = dict() orig_op.metadata['test5'] = slice(1, 3, 5) orig_op.metadata['test6'] = slice(1, 3) orig_op.metadata['test7'] = slice(1, None, 5) orig_op.metadata['test8'] = axis orig_op.metadata['test9'] = axes pb_op = ser.op_to_protobuf(orig_op) py_op = ser.protobuf_to_op(pb_op) assert_object_equality(py_op, orig_op)
def computation(self, returns, placeholders): if not self.initialized: raise RuntimeError("RPC build_transformer request failed!") update_comm_deps(returns) pb_subgraph = _serialize_graph(returns + list(placeholders)) pb_returns = [] pb_placeholders = [] for op in returns: pb_returns.append(op_to_protobuf(op)) for op in placeholders: pb_placeholders.append(op_to_protobuf(op)) response = self.RPC.Computation( hetr_pb2.ComputationRequest( subgraph=pb_subgraph, returns=pb_returns, placeholders=pb_placeholders), _TIMEOUT_SECONDS) if response.comp_id >= 0: rpcComputationClient = RPCComputationClient(response.comp_id, self.RPC) return rpcComputationClient else: raise RuntimeError("RPC computation request failed!")
def generate_messages(): pb_ops, pb_edges = [], [] pb_returns, pb_placeholders = generate_returns_placeholders() ops = Op.all_op_references(returns + list(placeholders)) for i, op in enumerate(ops): pb_ops.append(op_to_protobuf(op)) add_edges(pb_edges, pb_ops, op) if (i != 0 and i % _OPS_PER_MSG == 0) or (i == len(ops) - 1): msg = make_computation_request(pb_ops, pb_edges, pb_returns, pb_placeholders) yield msg pb_ops, pb_edges = [], [] pb_returns, pb_placeholders = [], []
def __init__(self, hetr, computation_op): self.child_computations = dict() self.transformer = hetr # clear send_nodes for multiple computations if hetr.send_nodes: hetr.send_nodes.clear() self.send_nodes = hetr.send_nodes self.computation_op = computation_op # self.returns could be replaced by comp_op.returns if it were expressed as a set self.returns = OrderedSet() if isinstance(computation_op.returns, collections.Container): self.returns.update(list(computation_op.returns)) elif isinstance(computation_op.returns, Op): self.returns.update(list([computation_op.returns])) # if one of the requested results is marked as distributed across devices, # wrap it in a ResultOp to facilitate DistributedPass inserting a gather operation new_returns = OrderedSet() for op in self.returns: if 'device_id' in op.metadata and \ isinstance(op.metadata['device_id'], (list, tuple)): op.metadata['is_split_op'] = True new_result = ResultOp(device_id=0, args=tuple([op])) op.metadata['hetr_replaced_by'] = new_result new_result.metadata['replaces_op'] = op new_returns.add(new_result) else: new_returns.add(op) # Do Hetr passes logger.info('Running graph passes'), pass_ops = new_returns | OrderedSet(self.computation_op.parameters) for graph_pass in self.transformer.graph_passes: pass_ops = pass_ops | OrderedSet(hetr.send_nodes) graph_pass.do_pass(ops=pass_ops) # hack around new TensorValueOp that wraps AssignableTensorOp # autogenerated by creating a ComputationOp: for p in self.computation_op.parameters: if isinstance(p, TensorValueOp): p.metadata.update(p.states_read[0].metadata) logger.info('Launching child processes'), # assume all children are the same type # and all GPUs are in one chassis num_process = len(self.transformer.child_transformers) ppn = 1 if self.transformer.default_device == 'cpu' else num_process self.transformer.mpilauncher.launch(num_process, ppn) self.transformer.setup_child_transformers(num_process) def is_my_op(op, name): op_trans = op.metadata['transformer'] return name == op_trans or name in op_trans logger.info('Serializaing computation graph'), # build whole_graph once to avoid slow serialization once per worker # split whole pb message into list of smaller chunks # gRPC prefers sending smaller messages placeholders = [p for p in self.computation_op.parameters] all_returns = [o for o in self.send_nodes | new_returns] transform_returns = [ o.args[0] if isinstance(o, ResultOp) else o for o in all_returns ] whole_graph = Op.all_op_references(transform_returns + placeholders) pb_whole_graph = [] pb_ops, pb_edges = [], [] for i, o in enumerate(whole_graph): pb_ops.append(op_to_protobuf(o)) add_edges(pb_edges, pb_ops, o) if (i != 0 and i % _OPS_PER_MSG == 0) or (i == len(whole_graph) - 1): pb_whole_graph.append((pb_ops, pb_edges)) pb_ops, pb_edges = [], [] t_placeholders, t_returns = {}, {} for t_name in self.transformer.child_transformers.keys(): t_placeholders[t_name] = [ p for p in placeholders if is_my_op(p, t_name) ] t_returns[t_name] = [r for r in all_returns if is_my_op(r, t_name)] # create_computation is an async call using gPRC future # allowing child transformers to create computation simultaneously # get_computation waits the corresponding request to finish logger.info('Creating remote computations'), for t_name, trans in iteritems(self.transformer.child_transformers): logger.debug('child transformer: {}'.format(t_name)) trans.build_transformer() transform_ops = [ r.args[0] if isinstance(r, ResultOp) else r for r in t_returns[t_name] ] trans.create_computation(pb_whole_graph, transform_ops, t_placeholders[t_name]) for t_name, trans in iteritems(self.transformer.child_transformers): comp = trans.get_computation() comp.param_idx = [ g_pos for g_pos, p in enumerate(self.computation_op.parameters) if is_my_op(p, t_name) ] # when there is a ResultOp, hack around it comp.returns = dict() for i, op in enumerate(t_returns[t_name]): if op in self.returns and 'hetr_replaced_by' not in op.metadata: comp.returns[op] = i elif 'replaces_op' in op.metadata and op.metadata[ 'replaces_op'] in self.returns: comp.returns[op.metadata['replaces_op']] = i self.child_computations[t_name] = comp