def eval_single_image_imagenet(opt_model, loaded_model, image_path, image_dims): img = Image.open(image_path) if image_path.endswith("png"): temp = Image.new("RGB", img.size, (255, 255, 255)) temp.paste(img, img) img = temp resized = img.resize((image_dims[2], image_dims[1]), Image.ANTIALIAS) bgr_image = np.asarray(resized, dtype=np.float32)[..., [2, 1, 0]] hwc_format = np.ascontiguousarray(np.rollaxis(bgr_image, 2)) if "VGG" in opt_model: arguments = {loaded_model.arguments[0]: [hwc_format]} output = loaded_model.eval(arguments) sm = cntk.softmax(output[0]) return sm.eval() elif "InceptionV3" in opt_model: z = cntk.as_composite(loaded_model[0].owner) output = z.eval({z.arguments[0]: [hwc_format]}) else: z = cntk.as_composite(loaded_model[3].owner) output = z.eval({z.arguments[0]: [hwc_format]}) predictions = np.squeeze(output) return predictions
def test_factor_dense(): input_dim = 2 num_output_classes = 2 hidden_layer_dim = 50 input = C.input_variable(input_dim) z = _create_model_dense(input, input_dim, hidden_layer_dim, num_output_classes) newz = nc.factor_dense(z, projection_function=_get_rank_same_size, filter_function = _filter) newblocks = C.logging.graph.depth_first_search( newz, lambda x : type(x) == C.Function and x.root_function.is_block, depth = 0) assert(newblocks[1].op_name == "DenseFactored") block_root = C.as_composite(newblocks[1].block_root) # no reduction, same size but factored. assert(block_root.W1.value.shape == (50, 50)) newz = nc.factor_dense(z, projection_function=_get_rank_reduced_size, filter_function = _filter) newblocks = C.logging.graph.depth_first_search( newz, lambda x : type(x) == C.Function and x.root_function.is_block, depth = 0) assert(newblocks[1].op_name == "DenseFactored") block_root = C.as_composite(newblocks[1].block_root) # the reduction has taken place now. assert(block_root.W1.value.shape == (50, 40))
def test_factor_dense(): input_dim = 2 num_output_classes = 2 hidden_layer_dim = 50 input = C.input_variable(input_dim) z = _create_model_dense(input, input_dim, hidden_layer_dim, num_output_classes) newz = nc.factor_dense(z, projection_function=_get_rank_same_size, filter_function=_filter) newblocks = C.logging.graph.depth_first_search( newz, lambda x: type(x) == C.Function and x.root_function.is_block, depth=0) assert (newblocks[1].op_name == "DenseFactored") block_root = C.as_composite(newblocks[1].block_root) # no reduction, same size but factored. assert (block_root.W1.value.shape == (50, 50)) newz = nc.factor_dense(z, projection_function=_get_rank_reduced_size, filter_function=_filter) newblocks = C.logging.graph.depth_first_search( newz, lambda x: type(x) == C.Function and x.root_function.is_block, depth=0) assert (newblocks[1].op_name == "DenseFactored") block_root = C.as_composite(newblocks[1].block_root) # the reduction has taken place now. assert (block_root.W1.value.shape == (50, 40))
def test_as_composite(): input_dim = 1 proj_dim = 2 x = C.input_variable((input_dim, )) b = C.parameter((proj_dim)) w = C.parameter((input_dim, proj_dim)) func_name = 't_plus_b' t_plus_b = C.plus(C.times(x, w), b, name=func_name) assert (t_plus_b.root_function.name == func_name) composite = C.as_composite(t_plus_b.root_function) assert (composite.root_function.name == func_name) composite = C.as_composite(composite) assert (composite.root_function.name == func_name) composite = C.as_composite(t_plus_b) assert (composite.root_function.name == func_name)
def test_as_composite(): input_dim = 1 proj_dim = 2 x = C.input_variable((input_dim,)) b = C.parameter((proj_dim)) w = C.parameter((input_dim, proj_dim)) func_name = 't_plus_b' t_plus_b = C.plus(C.times(x, w), b, name=func_name) assert(t_plus_b.root_function.name == func_name) composite = C.as_composite(t_plus_b.root_function) assert(composite.root_function.name == func_name) composite = C.as_composite(composite) assert(composite.root_function.name == func_name) composite = C.as_composite(t_plus_b) assert(composite.root_function.name == func_name)
def test(test_data, model_path, model_file, config_file): polymath = PolyMath(config_file) model = C.load_model(os.path.join(model_path, model_file if model_file else model_name)) begin_logits = model.outputs[0] end_logits = model.outputs[1] loss = C.as_composite(model.outputs[2].owner) begin_prediction = C.sequence.input_variable(1, sequence_axis=begin_logits.dynamic_axes[1], needs_gradient=True) end_prediction = C.sequence.input_variable(1, sequence_axis=end_logits.dynamic_axes[1], needs_gradient=True) best_span_score = symbolic_best_span(begin_prediction, end_prediction) predicted_span = C.layers.Recurrence(C.plus)(begin_prediction - C.sequence.past_value(end_prediction)) batch_size = 32 # in sequences misc = {'rawctx':[], 'ctoken':[], 'answer':[], 'uid':[]} tsv_reader = create_tsv_reader(loss, test_data, polymath, batch_size, 1, is_test=True, misc=misc) results = {} with open('{}_out.json'.format(model_file), 'w', encoding='utf-8') as json_output: for data in tsv_reader: out = model.eval(data, outputs=[begin_logits,end_logits,loss], as_numpy=False) g = best_span_score.grad({begin_prediction:out[begin_logits], end_prediction:out[end_logits]}, wrt=[begin_prediction,end_prediction], as_numpy=False) other_input_map = {begin_prediction: g[begin_prediction], end_prediction: g[end_prediction]} span = predicted_span.eval((other_input_map)) for seq, (raw_text, ctokens, answer, uid) in enumerate(zip(misc['rawctx'], misc['ctoken'], misc['answer'], misc['uid'])): seq_where = np.argwhere(span[seq])[:,0] span_begin = np.min(seq_where) span_end = np.max(seq_where) predict_answer = get_answer(raw_text, ctokens, span_begin, span_end) results['query_id'] = int(uid) results['answers'] = [predict_answer] json.dump(results, json_output) json_output.write("\n") misc['rawctx'] = [] misc['ctoken'] = [] misc['answer'] = [] misc['uid'] = []
def test(test_data, model_path, model_file, config_file): polymath = PolyMath(config_file) model = C.load_model(os.path.join(model_path, model_file if model_file else model_name)) begin_logits = model.outputs[0] end_logits = model.outputs[1] loss = C.as_composite(model.outputs[2].owner) begin_prediction = C.sequence.input_variable(1, sequence_axis=begin_logits.dynamic_axes[1], needs_gradient=True) end_prediction = C.sequence.input_variable(1, sequence_axis=end_logits.dynamic_axes[1], needs_gradient=True) best_span_score = symbolic_best_span(begin_prediction, end_prediction) predicted_span = C.layers.Recurrence(C.plus)(begin_prediction - C.sequence.past_value(end_prediction)) batch_size = 32 # in sequences misc = {'rawctx':[], 'ctoken':[], 'answer':[], 'uid':[]} tsv_reader = create_tsv_reader(loss, test_data, polymath, batch_size, 1, is_test=True, misc=misc) results = {} with open('{}_out.json'.format(model_file), 'w', encoding='utf-8') as json_output: for data in tsv_reader: out = model.eval(data, outputs=[begin_logits,end_logits,loss], as_numpy=False) g = best_span_score.grad({begin_prediction:out[begin_logits], end_prediction:out[end_logits]}, wrt=[begin_prediction,end_prediction], as_numpy=False) other_input_map = {begin_prediction: g[begin_prediction], end_prediction: g[end_prediction]} span = predicted_span.eval((other_input_map)) for seq, (raw_text, ctokens, answer, uid) in enumerate(zip(misc['rawctx'], misc['ctoken'], misc['answer'], misc['uid'])): seq_where = np.argwhere(span[seq])[:,0] span_begin = np.min(seq_where) span_end = np.max(seq_where) predict_answer = get_answer(raw_text, ctokens, span_begin, span_end) results['query_id'] = int(uid) results['answers'] = [predict_answer] json.dump(results, json_output) json_output.write("\n") misc['rawctx'] = [] misc['ctoken'] = [] misc['answer'] = [] misc['uid'] = []
def create_transfer_learning_model(input, num_classes, model_file, freeze=False): base_model = load_model(model_file) base_model = C.as_composite(base_model[3].owner) # Load the pretrained classification net and find nodes feature_node = C.logging.find_by_name(base_model, feature_node_name) last_node = C.logging.find_by_name(base_model, last_hidden_node_name) base_model = C.combine([last_node.owner]).clone(C.CloneMethod.freeze if freeze else C.CloneMethod.clone, {feature_node: C.placeholder(name='features')}) base_model = base_model(C.input_variable((num_channels, image_height, image_width))) r1 = C.logging.find_by_name(base_model, "z.x.x.r") r2_2 = C.logging.find_by_name(base_model, "z.x.x.x.x.r") r3_2 = C.logging.find_by_name(base_model, "z.x.x.x.x.x.x.r") r4_2 = C.logging.find_by_name(base_model, "z.x.x.x.x.x.x.x.x.r") up_r1 = OneByOneConvAndUpSample(r1, 3, num_classes) up_r2_2 = OneByOneConvAndUpSample(r2_2, 2, num_classes) up_r3_2 = OneByOneConvAndUpSample(r3_2, 1, num_classes) up_r4_2 = OneByOneConvAndUpSample(r4_2, 0, num_classes) merged = C.splice(up_r1, up_r3_2, up_r2_2, axis=0) resnet_fcn_out = Convolution((1, 1), num_classes, init=he_normal(), activation=sigmoid, pad=True)(merged) z = UpSampling2DPower(resnet_fcn_out,2) return z
def inference(model, data): p = Model() model = C.load_model(model) prob = model.outputs[0] loss = C.as_composite(model.outputs[1].owner) mb_test, map_test = deserialize(loss, data, p, randomize=False, repeat=False, is_test=True) token = argument_by_name(loss, 'token') results = [] total_samples = 411972 with tqdm(total=total_samples, ncols=79) as progress_bar: while True: data = mb_test.next_minibatch(4, input_map=map_test) if not data: break out = model.eval(data, outputs=[prob]) results.extend(out) progress_bar.update(len(data)) assert (len(results) == total_samples) return results
def verify_model(cntk_model, node_name, tmpdir, model_name, image=None, skip_round_trip_test=True, use_external_files_to_store_parameters=False): if (node_name is not None): cntk_node = cntk_model.find_by_name(node_name) if not cntk_node: cntk_node = C.logging.depth_first_search( cntk_model, lambda x: x.uid == node_name, depth=10)[0] cntk_node_model = C.as_composite(cntk_node) else: node_name = "full" cntk_node_model = cntk_model sanitized_node_name = model_name + node_name.replace("/", ".") if (image is None): image = np.random.rand(*np.shape(cntk_model.arguments[0])).astype( np.float32) test_model_path = os.path.join(str(tmpdir), R'test_' + sanitized_node_name) print(test_model_path) if os.path.exists(test_model_path): shutil.rmtree(test_model_path, ignore_errors=True) verify_sequence_model(cntk_node_model, image, tmpdir, sanitized_node_name, resave=not skip_round_trip_test, use_external_files_to_store_parameters= use_external_files_to_store_parameters)
def inference(model, test): p = Model() model = C.load_model(model) cos = model.outputs[0] loss = C.as_composite(model.outputs[1].owner) mb_test, map_test = deserialize(loss, test, p, randomize=False, repeat=False, is_test=True) c1 = argument_by_name(loss, 'c1') c2 = argument_by_name(loss, 'c2') results = [] if 'test' in test: total_samples = 3000 else: total_samples = num_validation with tqdm(total=total_samples) as progress_bar: while True: data = mb_test.next_minibatch(minibatch_size, input_map=map_test) progress_bar.update(len(data)) if not data: break out = model.eval(data, outputs=[cos]) results.extend(out) assert (len(results) == total_samples) return results
def _main(): parser = argparse.ArgumentParser() parser.add_argument('-n', '--network', type=_text_type, help='Model Type', required=True, choices=MODEL_URL.keys()) parser.add_argument('-i', '--image', default=None, type=_text_type, help='Test Image Path') parser.add_argument('-o', '--output_dir', default='./', type=_text_type, help='Caffe Checkpoint file name') args = parser.parse_args() fn = download_file(MODEL_URL[args.network], directory=args.output_dir) if not fn: return -1 model = C.Function.load(fn) if len(model.outputs) > 1: for idx, output in enumerate(model.outputs): if len(output.shape) > 0: eval_node = idx break model = C.as_composite(model[eval_node].owner) model.save(fn) print("Model {} is saved as {}.".format(args.network, fn)) if args.image: import numpy as np from mmdnn.conversion.examples.imagenet_test import TestKit func = TestKit.preprocess_func['cntk'][args.network] img = func(args.image) img = np.transpose(img, (2, 0, 1)) predict = model.eval({model.arguments[0]: [img]}) predict = np.squeeze(predict) top_indices = predict.argsort()[-5:][::-1] result = [(i, predict[i]) for i in top_indices] print(result) print(np.sum(result)) return 0
def convert(root_func, filter, converter): ''' Clones the graph underlying root_func and in the clone substitutes all Functions obtained by applying 'filter', with a new Function obtained by calling the specified 'converter' Args: root_func: a root function of a graph to be cloned and converted filter: a lambda for filtering out the Functions to be converted converter: a lambda for obtaining the substitute for each of the Functions to be converted Returns: Cloned and converted Function (graph) ''' # recursively convert for blocks in root_func blocks = C.logging.graph.depth_first_search(root_func, lambda x : type(x) == C.Function and x.root_function.is_block, depth = 0) for i in range(len(blocks)): # search for blocks again in case block input/output has been modified blocks1 = C.logging.graph.depth_first_search(root_func, lambda x : type(x) == C.Function and x.root_function.is_block, depth = 0) block = blocks1[i] # assuming depth_first_search order to be stable, so use the old index on new search results block_root = C.as_composite(block.block_root) new_block_root = convert(block_root, filter, converter) if new_block_root != block_root: block_arguments_mapping = dict(block.block_arguments_mapping) new_block_arguments_mapping = [] for arg, new_arg in zip(block_root.arguments, new_block_root.arguments): new_block_arguments_mapping += [(new_arg, block_arguments_mapping[arg])] new_block = C.as_block(new_block_root, new_block_arguments_mapping, block.op_name, block.name) if all([x not in root_func.outputs for x in block.outputs]) or all([x in block.outputs for x in root_func.outputs]): root_func = root_func.clone(C.CloneMethod.share, dict(zip(block.outputs, new_block.outputs))) else: new_outputs = [new_block.outputs[block.outputs.index(x)] if x in block.outputs else None for x in root_func.outputs] root_func_nonreplaced = C.combine([x for x in root_func.outputs if x not in block.outputs]) root_func_nonreplaced_clone = root_func_nonreplaced.clone(C.CloneMethod.share, dict(zip(block.outputs, new_block.outputs))) idx = 0 for nonreplaced_output in root_func_nonreplaced_clone.outputs: while new_outputs[idx]: idx += 1 new_outputs[idx] = nonreplaced_output root_func = C.combine(new_outputs) # replace all Function instances under root_func that pass the specified 'filter' functions_to_convert = C.logging.graph.depth_first_search(root_func, filter, depth = 0) for function_to_convert in functions_to_convert: converted = converter(function_to_convert) if not function_to_convert.output in root_func.outputs: root_func = root_func.clone(C.CloneMethod.share, {function_to_convert.output : converted.output}) else: # if cudnn_rnn output is the root_func output, just use converted as root_func and no clone needed if len(root_func.outputs) > 1: root_func = C.combine([converted if x == function_to_convert.output else x for x in root_func.outputs]) else: root_func = converted return root_func
def func(dh, dc, input): LSTM_func = LSTM_cell(dh, dc, input) if use_scan: LSTM_func_root = C.as_composite( LSTM_func.outputs[0].owner.block_root) args = LSTM_func_root.arguments LSTM_func = LSTM_func_root.clone(C.CloneMethod.share, { args[0]: input, args[1]: dh, args[2]: dc }) return LSTM_func
def download(cls, architecture, path="./"): if cls.sanity_check(architecture): architecture_file = download_file(cls.architecture_map[architecture], directory=path) model = C.Function.load(architecture_file) if len(model.outputs) > 1: for idx, output in enumerate(model.outputs): if len(output.shape) > 0: eval_node = idx break model = C.as_composite(model[eval_node].owner) model.save(architecture_file) print("Cntk Model {} saved as [{}].".format(architecture, architecture_file)) return architecture_file else: return None
def _main(): parser = argparse.ArgumentParser() parser.add_argument('-n', '--network', type=_text_type, help='Model Type', required=True, choices=MODEL_URL.keys()) parser.add_argument('-i', '--image', default=None, type=_text_type, help='Test Image Path') parser.add_argument('-o', '--output_dir', default='./', type=_text_type, help='Caffe Checkpoint file name') args = parser.parse_args() fn = download_file(MODEL_URL[args.network], directory=args.output_dir) if not fn: return -1 model = C.Function.load(fn) if len(model.outputs) > 1: for idx, output in enumerate(model.outputs): if len(output.shape) > 0: eval_node = idx break model = C.as_composite(model[eval_node].owner) model.save(fn) print("Model {} is saved as {}.".format(args.network, fn)) if args.image: import numpy as np from mmdnn.conversion.examples.imagenet_test import TestKit func = TestKit.preprocess_func['cntk'][args.network] img = func(args.image) img = np.transpose(img, (2, 0, 1)) predict = model.eval({model.arguments[0]:[img]}) predict = np.squeeze(predict) top_indices = predict.argsort()[-5:][::-1] result = [(i, predict[i]) for i in top_indices] print(result) print(np.sum(result)) return 0
def download(cls, architecture, path="./"): if cls.sanity_check(architecture): architecture_file = download_file( cls.architecture_map[architecture], directory=path) model = C.Function.load(architecture_file) if len(model.outputs) > 1: for idx, output in enumerate(model.outputs): if len(output.shape) > 0: eval_node = idx break model = C.as_composite(model[eval_node].owner) model.save(architecture_file) print("Cntk Model {} saved as [{}].".format( architecture, architecture_file)) return architecture_file else: return None
def convert_model_and_gen_data(input, output, end_node, seq_len, batch_size): cntk_model = C.load_model(input) if end_node: nodes = C.logging.depth_first_search(cntk_model, lambda x: x.name == end_node, depth=-1) assert len(nodes) == 1 cntk_model = C.as_composite(nodes[0]) cntk_model.save(output, C.ModelFormat.ONNX) if seq_len == 0: return pair_desc = PairDescription() pair_string = onnx.load(output).graph.doc_string pair_desc.parse_from_string(pair_string) cntk_feeds = {} for var in cntk_model.arguments: data_shape = [] for ax in var.dynamic_axes: if ax.name == 'defaultBatchAxis': data_shape = data_shape + [batch_size] else: data_shape = data_shape + [ seq_len ] # TODO: handle models with multiple sequence axes data_shape = data_shape + list(var.shape) cntk_feeds[var] = np.random.rand(*data_shape).astype(var.dtype) # run inference with CNTK cntk_output = cntk_model.eval(cntk_feeds) if type(cntk_output) != dict: assert len(cntk_model.outputs) == 1 cntk_output = {cntk_model.output: cntk_output} test_data_dir = os.path.join(os.path.split(output)[0], 'test_data_set_0') os.makedirs(test_data_dir, exist_ok=True) save_data(test_data_dir, cntk_feeds) save_data( test_data_dir, cntk_output, pair_desc.get_pairs(PairDescription.PairType.uid_2_onnx_node_name))
def convo_block_converter(block): convo_filter = ( lambda x: type(x) == C.Function and not x. is_block # replace the inner function only. and x.op_name == 'Convolution') def convolution_converter(x): assert (not x.is_block) # we replace only the function. attributes = x.attributes # the parameter W of the convolution has the shape # [num filters, depth, (filter shape)] num_filters = x.W.shape[0] depth = x.W.shape[1] filter_shape = (x.W.shape[-2], x.W.shape[-1]) strides = attributes["strides"][-1] # check for squre strides for now. assert (strides == attributes["strides"][-2]) padding = attributes["autoPadding"] pad = padding[-1] # Checking for the last two elements in the padding vector. assert (pad == padding[-2]) return binary_convolution(filter_shape, num_filters=num_filters, channels=depth, strides=strides, pad=pad, name='BinaryConvolution')( block.inputs[-1]) return C.misc.convert(C.as_composite(block.block_root), convo_filter, convolution_converter)
def convo_block_converter(block): convo_filter = (lambda x: type(x) == C.Function and not x.is_block # replace the inner function only. and x.op_name == 'Convolution') def convolution_converter(x): assert(not x.is_block) # we replace only the function. attributes = x.attributes # the parameter W of the convolution has the shape # [num filters, depth, (filter shape)] num_filters = x.W.shape[0] depth = x.W.shape[1] filter_shape = (x.W.shape[-2], x.W.shape[-1]) strides = attributes["strides"][-1] # check for squre strides for now. assert(strides == attributes["strides"][-2]) padding =attributes["autoPadding"] pad = padding[-1] # Checking for the last two elements in the padding vector. assert(pad == padding[-2]) return binary_convolution( filter_shape, num_filters = num_filters, channels = depth, strides=strides, pad = pad, name='BinaryConvolution')(block.inputs[-1]) return C.misc.convert(C.as_composite(block.block_root), convo_filter, convolution_converter)
def validate_model(i2w, test_data, model, polymath): print('validating') RL = rouge.Rouge() testout = model.outputs[1] # according to model.shape start_logits = model.outputs[2] end_logits = model.outputs[3] context = model.outputs[4] loss = model.outputs[5] root = C.as_composite(loss.owner) mb_source, input_map = create_mb_and_map(root, test_data, polymath, randomize=False, repeat=False) begin_label = argument_by_name(root, 'ab') end_label = argument_by_name(root, 'ae') onehot = argument_by_name(root, 'aw') begin_prediction = C.sequence.input_variable( 1, sequence_axis=begin_label.dynamic_axes[1], needs_gradient=True) end_prediction = C.sequence.input_variable( 1, sequence_axis=end_label.dynamic_axes[1], needs_gradient=True) predicted_span = C.layers.Recurrence( C.plus)(begin_prediction - C.sequence.past_value(end_prediction)) true_span = C.layers.Recurrence(C.plus)(begin_label - C.sequence.past_value(end_label)) best_span_score = symbolic_best_span(begin_prediction, end_prediction) one2num = C.argmax(onehot, 0) minibatch_size = 128 num_sequences = 0 stat = np.array([0, 0, 0, 0, 0, 0], dtype=np.dtype('float64')) loss_sum = 0 cnt = 0 #while True: while cnt < 1000: data = mb_source.next_minibatch(minibatch_size, input_map=input_map) if not data or not (onehot in data) or data[onehot].num_sequences == 0: break out = model.eval( data, outputs=[testout, start_logits, end_logits, context, loss], as_numpy=True) true = one2num.eval({onehot: data[onehot]}) g = best_span_score.grad( { begin_prediction: out[start_logits], end_prediction: out[end_logits] }, wrt=[begin_prediction, end_prediction], as_numpy=False) # print(g[begin_prediction], g[end_prediction]) other_input_map = { begin_prediction: g[begin_prediction], end_prediction: g[end_prediction] } span = predicted_span.eval((other_input_map)) # print(span) span_out = np.asarray(span).reshape(-1).tolist() context_o = np.asarray(out[context]).reshape(-1).tolist() predict_answer = [] for i in range(len(span_out)): if (span_out[i] == 1): predict_answer.append(context_o[i]) # pred_out = np.asarray(out[context]).reshape(-1).tolist() # predict_answer = pred_out[span_begin:span_end+1] if cnt < 10: #print(predict_answer) print(format_true_sequences(predict_answer, i2w, polymath)) print('\n') cnt += 1 true_text = format_true_sequences( np.asarray(true).reshape(-1).tolist(), i2w, polymath) predout_text = format_predict_sequences( np.asarray(out[testout]).reshape(-1), predict_answer, i2w, polymath) testloss = out[loss] stat += RL.calc_score(predout_text, true_text) loss_sum += np.sum(np.asarray(testloss)) num_sequences += data[onehot].num_sequences loss_avg = loss_sum / num_sequences stat_avg = stat / float(num_sequences) print( "Validated {} sequences, loss {:.4f}, RouL {:.4f}, LCS {:.4f}, LengCan {:.4f}, LenRef {:.4f}, prec {:.4f}, rec {:.4f}" .format(num_sequences, loss_avg, stat_avg[0], stat_avg[1], stat_avg[2], stat_avg[3], stat_avg[4], stat_avg[5])) return loss_avg
def _convert_optimized_rnnstack(root_func, map_param_to_func): ''' Internal implementation that converts root_func that contains cudnn optimized_rnnstack to use non-cudnn functions, so it can be used in non-CUDA environment Args: root_func: a root function of a graph that contains optimized_rnnstacks map_param_to_func: a mapping of converted rnn functions for parameter sharing Returns: converted root_func on GEMM based implementation of rnn that can be used on CPU ''' # recursively convert for blocks in root_func blocks = C.logging.graph.depth_first_search( root_func, lambda x: type(x) == C.Function and x.root_function.is_block, depth=0) for i in range(len(blocks)): # search for blocks again in case block input/output has been modified blocks1 = C.logging.graph.depth_first_search( root_func, lambda x: type(x) == C.Function and x.root_function.is_block, depth=0) block = blocks1[ i] # assuming depth_first_search order to be stable, so use the old index on new search results block_root = C.as_composite(block.block_root) new_block_root = _convert_optimized_rnnstack(block_root, map_param_to_func) if new_block_root != block_root: block_arguments_mapping = dict(block.block_arguments_mapping) new_block_arguments_mapping = [] for arg, new_arg in zip(block_root.arguments, new_block_root.arguments): new_block_arguments_mapping += [(new_arg, block_arguments_mapping[arg])] new_block = C.as_block(new_block_root, new_block_arguments_mapping, block.op_name, block.name) if all([x not in root_func.outputs for x in block.outputs]) or all( [x in block.outputs for x in root_func.outputs]): root_func = root_func.clone( C.CloneMethod.share, dict(zip(block.outputs, new_block.outputs))) else: new_outputs = [ new_block.outputs[block.outputs.index(x)] if x in block.outputs else None for x in root_func.outputs ] root_func_nonreplaced = C.combine( [x for x in root_func.outputs if x not in block.outputs]) root_func_nonreplaced_clone = root_func_nonreplaced.clone( C.CloneMethod.share, dict(zip(block.outputs, new_block.outputs))) idx = 0 for nonreplaced_output in root_func_nonreplaced_clone.outputs: while new_outputs[idx]: idx += 1 new_outputs[idx] = nonreplaced_output root_func = C.combine(new_outputs) # replace all optimized_rnnstack instances in root_func cudnn_rnns = C.logging.graph.depth_first_search( root_func, lambda x: type(x) == C.Function and x.root_function.op_name == 'OptimizedRNNStack', depth=0) for cudnn_rnn in cudnn_rnns: param = cudnn_rnn.parameters[0] if map_param_to_func[param]: #shared parameter, clone converted = map_param_to_func[param][0].clone( C.CloneMethod.share, { map_param_to_func[param][1]: cudnn_rnn.inputs[0], map_param_to_func[param][2]: C.placeholder() }) else: #unique or first parameter, convert converted = _from_optimized_rnnstack(cudnn_rnn) map_param_to_func[param] = ( converted, cudnn_rnn.inputs[0], cudnn_rnn.output, ) if not cudnn_rnn.output in root_func.outputs: root_func = root_func.clone(C.CloneMethod.share, {cudnn_rnn.output: converted.output}) else: # if cudnn_rnn output is the root_func output, just use converted as root_func and no clone needed if len(root_func.outputs) > 1: root_func = C.combine([ converted if x == cudnn_rnn.output else x for x in root_func.outputs ]) else: root_func = converted return root_func
def convert(root_func, filter, converter): ''' Clones the graph underlying root_func and in the clone substitutes all Functions obtained by applying 'filter', with a new Function obtained by calling the specified 'converter' Args: root_func: a root function of a graph to be cloned and converted filter: a lambda for filtering out the Functions to be converted converter: a lambda for obtaining the substitute for each of the Functions to be converted Returns: Cloned and converted Function (graph) ''' # recursively convert for blocks in root_func blocks = C.logging.graph.depth_first_search( root_func, lambda x: type(x) == C.Function and x.root_function.is_block, depth=0) for i in range(len(blocks)): # search for blocks again in case block input/output has been modified blocks1 = C.logging.graph.depth_first_search( root_func, lambda x: type(x) == C.Function and x.root_function.is_block, depth=0) block = blocks1[ i] # assuming depth_first_search order to be stable, so use the old index on new search results block_root = C.as_composite(block.block_root) new_block_root = convert(block_root, filter, converter) if new_block_root != block_root: block_arguments_mapping = dict(block.block_arguments_mapping) new_block_arguments_mapping = [] for arg, new_arg in zip(block_root.arguments, new_block_root.arguments): new_block_arguments_mapping += [(new_arg, block_arguments_mapping[arg])] new_block = C.as_block(new_block_root, new_block_arguments_mapping, block.op_name, block.name) if all([x not in root_func.outputs for x in block.outputs]) or all( [x in block.outputs for x in root_func.outputs]): root_func = root_func.clone( C.CloneMethod.share, dict(zip(block.outputs, new_block.outputs))) else: new_outputs = [ new_block.outputs[block.outputs.index(x)] if x in block.outputs else None for x in root_func.outputs ] root_func_nonreplaced = C.combine( [x for x in root_func.outputs if x not in block.outputs]) root_func_nonreplaced_clone = root_func_nonreplaced.clone( C.CloneMethod.share, dict(zip(block.outputs, new_block.outputs))) idx = 0 for nonreplaced_output in root_func_nonreplaced_clone.outputs: while new_outputs[idx]: idx += 1 new_outputs[idx] = nonreplaced_output root_func = C.combine(new_outputs) # replace all Function instances under root_func that pass the specified 'filter' functions_to_convert = C.logging.graph.depth_first_search(root_func, filter, depth=0) for function_to_convert in functions_to_convert: converted = converter(function_to_convert) if not function_to_convert.output in root_func.outputs: root_func = root_func.clone( C.CloneMethod.share, {function_to_convert.output: converted.output}) else: # if cudnn_rnn output is the root_func output, just use converted as root_func and no clone needed if len(root_func.outputs) > 1: root_func = C.combine([ converted if x == function_to_convert.output else x for x in root_func.outputs ]) else: root_func = converted return root_func
def validate_model(test_data, model, polymath): begin_logits = model.outputs[0] end_logits = model.outputs[1] loss = model.outputs[2] root = C.as_composite(loss.owner) mb_source, input_map = create_mb_and_map(root, test_data, polymath, randomize=False, repeat=False) begin_label = argument_by_name(root, 'ab') end_label = argument_by_name(root, 'ae') begin_prediction = C.sequence.input_variable( 1, sequence_axis=begin_label.dynamic_axes[1], needs_gradient=True) end_prediction = C.sequence.input_variable( 1, sequence_axis=end_label.dynamic_axes[1], needs_gradient=True) best_span_score = symbolic_best_span(begin_prediction, end_prediction) predicted_span = C.layers.Recurrence( C.plus)(begin_prediction - C.sequence.past_value(end_prediction)) true_span = C.layers.Recurrence(C.plus)(begin_label - C.sequence.past_value(end_label)) common_span = C.element_min(predicted_span, true_span) begin_match = C.sequence.reduce_sum( C.element_min(begin_prediction, begin_label)) end_match = C.sequence.reduce_sum(C.element_min(end_prediction, end_label)) predicted_len = C.sequence.reduce_sum(predicted_span) true_len = C.sequence.reduce_sum(true_span) common_len = C.sequence.reduce_sum(common_span) f1 = 2 * common_len / (predicted_len + true_len) exact_match = C.element_min(begin_match, end_match) precision = common_len / predicted_len recall = common_len / true_len overlap = C.greater(common_len, 0) s = lambda x: C.reduce_sum(x, axis=C.Axis.all_axes()) stats = C.splice(s(f1), s(exact_match), s(precision), s(recall), s(overlap), s(begin_match), s(end_match)) # Evaluation parameters minibatch_size = 2048 num_sequences = 0 stat_sum = 0 loss_sum = 0 with tqdm(ncols=32) as progress_bar: while True: data = mb_source.next_minibatch(minibatch_size, input_map=input_map) if not data or not (begin_label in data ) or data[begin_label].num_sequences == 0: break out = model.eval(data, outputs=[begin_logits, end_logits, loss], as_numpy=False) testloss = out[loss] g = best_span_score.grad( { begin_prediction: out[begin_logits], end_prediction: out[end_logits] }, wrt=[begin_prediction, end_prediction], as_numpy=False) other_input_map = { begin_prediction: g[begin_prediction], end_prediction: g[end_prediction], begin_label: data[begin_label], end_label: data[end_label] } stat_sum += stats.eval((other_input_map)) loss_sum += np.sum(testloss.asarray()) num_sequences += data[begin_label].num_sequences progress_bar.update(data[begin_label].num_sequences) stat_avg = stat_sum / num_sequences loss_avg = loss_sum / num_sequences print( "\nValidated {} sequences, loss {:.4f}, F1 {:.4f}, EM {:.4f}, precision {:4f}, recall {:4f} hasOverlap {:4f}, start_match {:4f}, end_match {:4f}" .format(num_sequences, loss_avg, stat_avg[0], stat_avg[1], stat_avg[2], stat_avg[3], stat_avg[4], stat_avg[5], stat_avg[6])) return loss_avg
def streaming_inference(model_path, model_file, config_file, port="8889", is_test=1): polymath = PolyMath(config_file) model = C.load_model( os.path.join(model_path, model_file if model_file else model_name)) begin_logits = model.outputs[0] end_logits = model.outputs[1] loss = C.as_composite(model.outputs[2].owner) begin_prediction = C.sequence.input_variable( 1, sequence_axis=begin_logits.dynamic_axes[1], needs_gradient=True) end_prediction = C.sequence.input_variable( 1, sequence_axis=end_logits.dynamic_axes[1], needs_gradient=True) best_span_score = symbolic_best_span(begin_prediction, end_prediction) predicted_span = C.layers.Recurrence( C.plus)(begin_prediction - C.sequence.past_value(end_prediction)) batch_size = 1 # in sequences misc = {'rawctx': [], 'ctoken': [], 'answer': [], 'uid': []} Flag = True context = zmq.Context() socket = context.socket(zmq.REP) socket.bind("tcp://*:8889") while True: message = socket.recv() question_str, context_str = pickle.loads(message) line = "1102432\tDESCRIPTION\t" + context_str + "\t" + question_str data = streaming_create_tsv_reader(loss, line, polymath, batch_size, 1, is_test=True, misc=misc) out = model.eval(data, outputs=[begin_logits, end_logits, loss], as_numpy=False) g = best_span_score.grad( { begin_prediction: out[begin_logits], end_prediction: out[end_logits] }, wrt=[begin_prediction, end_prediction], as_numpy=False) other_input_map = { begin_prediction: g[begin_prediction], end_prediction: g[end_prediction] } span = predicted_span.eval((other_input_map)) #print("just before for {}".format(misc['ctoken'])) seq, raw_text, ctokens, answer, uid = 0, misc['rawctx'], misc[ 'ctoken'], misc['answer'], misc['uid'] #print("just AFTER for {}".format(ctokens)) seq_where = np.argwhere(span[seq])[:, 0] span_begin = np.min(seq_where) span_end = np.max(seq_where) #print("before predict") predict_answer = get_answer(raw_text[0], ctokens[0], span_begin, span_end) # results['query_id'] = int(uid) result = (question_str, predict_answer) socket.send(pickle.dumps(result))
def _get(f, attr=None): return C.as_composite(f.owner).eval(data)[f]
def streaming_inference(line, model_path, model_file, config_file, port="8889", is_test=1): polymath = PolyMath(config_file) model = C.load_model( os.path.join(model_path, model_file if model_file else model_name)) begin_logits = model.outputs[0] end_logits = model.outputs[1] loss = C.as_composite(model.outputs[2].owner) begin_prediction = C.sequence.input_variable( 1, sequence_axis=begin_logits.dynamic_axes[1], needs_gradient=True) end_prediction = C.sequence.input_variable( 1, sequence_axis=end_logits.dynamic_axes[1], needs_gradient=True) best_span_score = symbolic_best_span(begin_prediction, end_prediction) predicted_span = C.layers.Recurrence( C.plus)(begin_prediction - C.sequence.past_value(end_prediction)) batch_size = 1 # in sequences misc = {'rawctx': [], 'ctoken': [], 'answer': [], 'uid': []} Flag = True while Flag: # try: if True: data = streaming_create_tsv_reader(loss, line, polymath, batch_size, 1, is_test=True, misc=misc) out = model.eval(data, outputs=[begin_logits, end_logits, loss], as_numpy=False) g = best_span_score.grad( { begin_prediction: out[begin_logits], end_prediction: out[end_logits] }, wrt=[begin_prediction, end_prediction], as_numpy=False) other_input_map = { begin_prediction: g[begin_prediction], end_prediction: g[end_prediction] } span = predicted_span.eval((other_input_map)) print("just before for {}".format(misc['ctoken'])) seq, raw_text, ctokens, answer, uid = 0, misc['rawctx'], misc[ 'ctoken'], misc['answer'], misc['uid'] print("just AFTER for {}".format(ctokens)) seq_where = np.argwhere(span[seq])[:, 0] span_begin = np.min(seq_where) span_end = np.max(seq_where) print("before predict") predict_answer = get_answer(raw_text[0], ctokens[0], span_begin, span_end) # results['query_id'] = int(uid) result = predict_answer print(result) # except: # import pdb # pdb.set_trace() Flag = False
def test(i2w, test_data, model_path, model_file, config_file): #C.try_set_default_device(C.cpu()) polymath = PolyMath(config_file) print(test_data, model_path, model_file, model_name) print(os.path.join(model_path, model_file)) model = C.Function.load( os.path.join(model_path, model_file if model_file else model_name)) print(model) output = model.outputs[1] # loss = model.outputs[5] start_logits = model.outputs[2] end_logits = model.outputs[3] context = model.outputs[4] # loss = model.outputs[5] root = C.as_composite(output.owner) begin_prediction = C.sequence.input_variable( 1, sequence_axis=start_logits.dynamic_axes[1], needs_gradient=True) end_prediction = C.sequence.input_variable( 1, sequence_axis=end_logits.dynamic_axes[1], needs_gradient=True) predicted_span = C.layers.Recurrence( C.plus)(begin_prediction - C.sequence.past_value(end_prediction)) best_span_score = symbolic_best_span(begin_prediction, end_prediction) batch_size = 1 # in sequences misc = {'rawctx': [], 'ctoken': [], 'answer': [], 'uid': []} tsv_reader = create_tsv_reader(root, test_data, polymath, batch_size, 1, is_test=True, misc=misc) results = {} with open('{}_out.json'.format(model_file), 'w', encoding='utf-8') as json_output: for data in tsv_reader: out = model.eval( data, outputs=[output, start_logits, end_logits, context], as_numpy=False) g = best_span_score.grad( { begin_prediction: out[start_logits], end_prediction: out[end_logits] }, wrt=[begin_prediction, end_prediction], as_numpy=False) other_input_map = { begin_prediction: g[begin_prediction], end_prediction: g[end_prediction] } span = predicted_span.eval((other_input_map)) for seq, (raw_text, ctokens, answer, uid) in enumerate( zip(misc['rawctx'], misc['ctoken'], misc['answer'], misc['uid'])): # g = best_span_score.grad({begin_prediction:out[start_logits], end_prediction:out[end_logits]}, wrt=[begin_prediction,end_prediction], as_numpy=False) # other_input_map = {begin_prediction: g[begin_prediction], end_prediction: g[end_prediction]} # span = predicted_span.eval((other_input_map)) seq_where = np.argwhere(span[seq])[:, 0] span_begin = np.min(seq_where) span_end = np.max(seq_where) predict_answer = get_answer(raw_text, ctokens, span_begin, span_end) # span_out = np.asarray(span).reshape(-1).tolist() # context_o = np.asarray(out[context]).reshape(-1).tolist() # predict_answer = [] # for i in range(len(span_out)): # if(span_out[i]==1): # predict_answer.append(context_o[i]) print(predict_answer) final_answer = format_output_sequences( np.asarray(out[output].as_sequences()).reshape(-1), predict_answer, i2w, polymath) results['query_id'] = int(uid) results['answers'] = [final_answer] print(results) json.dump(results, json_output) json_output.write("\n") misc['rawctx'] = [] misc['ctoken'] = [] misc['answer'] = [] misc['uid'] = []
def main(base_folder, output_dir, training_mode='majority', learning_rate=0.05, momentum_rate=0.9, l2_reg_weight=0.0, model_name='VGG13', max_epochs=100): # create the model num_classes = len(emotion_table) model = build_model(num_classes, model_name) # set the input variables. input_var = C.input_variable((1, model.input_height, model.input_width), np.float32) label_var = C.input_variable((num_classes), np.float32) # read FER+ dataset. train_params = FERPlusParameters(num_classes, model.input_height, model.input_width, training_mode, False) test_and_val_params = FERPlusParameters(num_classes, model.input_height, model.input_width, "majority", True) train_data_reader = FERPlusReader.create(base_folder, train_folders, "label.csv", train_params) val_data_reader = FERPlusReader.create(base_folder, valid_folders, "label.csv", test_and_val_params) test_data_reader = FERPlusReader.create(base_folder, test_folders, "label.csv", test_and_val_params) # print summary of the data. display_summary(train_data_reader, val_data_reader, test_data_reader) # get the probalistic output of the model. z = model.model(input_var) pred = C.softmax(z) epoch_size = train_data_reader.size() minibatch_size = 32 # Training config lr_per_minibatch = [learning_rate] * 20 + [learning_rate / 2.0] * 20 + [ learning_rate / 10.0 ] lr_schedule = C.learning_rate_schedule(lr_per_minibatch, unit=C.UnitType.minibatch, epoch_size=epoch_size) mm_schedule = C.momentum_schedule(momentum_rate, minibatch_size=minibatch_size) # loss and error cost train_loss = cost_func(training_mode, pred, label_var) pe = C.classification_error(z, label_var) # construct the trainer learner = C.momentum_sgd(z.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) # Construct the distributed learner distributed_learner = C.train.distributed.data_parallel_distributed_learner( learner) num_partitions = C.train.distributed.Communicator.num_workers() partition = C.train.distributed.Communicator.rank() progress_printer = C.logging.ProgressPrinter(freq=50, tag='Training', rank=partition, num_epochs=max_epochs) trainer = C.Trainer(z, (train_loss, pe), distributed_learner, progress_printer) # Get minibatches of images to train with and perform model training max_val_accuracy = 0.0 final_test_accuracy = 0.0 best_test_accuracy = 0.0 epoch = 0 best_epoch = 0 while epoch < max_epochs: train_data_reader.reset() val_data_reader.reset() test_data_reader.reset() # Training start_time = time.time() training_loss = 0 training_accuracy = 0 while train_data_reader.has_more(): images, labels, current_batch_size = train_data_reader.next_minibatch( minibatch_size, num_data_partitions=num_partitions, partition_index=partition) # Specify the mapping of input variables in the model to actual minibatch data to be trained with trainer.train_minibatch({input_var: images, label_var: labels}) # keep track of statistics. training_loss += trainer.previous_minibatch_loss_average * current_batch_size training_accuracy += trainer.previous_minibatch_evaluation_average * current_batch_size training_accuracy /= train_data_reader.size() training_accuracy = 1.0 - training_accuracy trainer.summarize_training_progress() # Validation val_accuracy = 0 while val_data_reader.has_more(): images, labels, current_batch_size = val_data_reader.next_minibatch( minibatch_size, num_data_partitions=num_partitions, partition_index=partition) val_accuracy += trainer.test_minibatch({ input_var: images, label_var: labels }) * current_batch_size val_accuracy /= val_data_reader.size() val_accuracy = 1.0 - val_accuracy trainer.summarize_test_progress() # if validation accuracy goes higher, we compute test accuracy test_run = False if val_accuracy > max_val_accuracy: best_epoch = epoch max_val_accuracy = val_accuracy trainer.save_checkpoint( os.path.join(output_dir, "model_{}".format(best_epoch))) test_run = True test_accuracy = 0 while test_data_reader.has_more(): images, labels, current_batch_size = test_data_reader.next_minibatch( minibatch_size, num_data_partitions=num_partitions, partition_index=partition) test_accuracy += trainer.test_minibatch({ input_var: images, label_var: labels }) * current_batch_size trainer.summarize_test_progress() test_accuracy /= test_data_reader.size() test_accuracy = 1.0 - test_accuracy final_test_accuracy = test_accuracy if final_test_accuracy > best_test_accuracy: best_test_accuracy = final_test_accuracy epoch += 1 # Output the best checkpointed model to ONNX format, only save on master process if C.train.distributed.Communicator.is_main(): best_model = C.Function.load( os.path.join(output_dir, "model_{}".format(best_epoch))) inference_model = C.as_composite(best_model.outputs[0].owner) #or possibly: #inference_model = C.as_composite(best_model[0].owner) print(inference_model) inference_model.save(os.path.join(output_dir, "model.onnx"), format=C.ModelFormat.ONNX)
def validate_model(test_data, model, polymath): begin_logits = model.outputs[0] end_logits = model.outputs[1] loss = model.outputs[2] root = C.as_composite(loss.owner) mb_source, input_map = create_mb_and_map(root, test_data, polymath, randomize=False, repeat=False) begin_label = argument_by_name(root, 'ab') end_label = argument_by_name(root, 'ae') begin_prediction = C.sequence.input_variable(1, sequence_axis=begin_label.dynamic_axes[1], needs_gradient=True) end_prediction = C.sequence.input_variable(1, sequence_axis=end_label.dynamic_axes[1], needs_gradient=True) best_span_score = symbolic_best_span(begin_prediction, end_prediction) predicted_span = C.layers.Recurrence(C.plus)(begin_prediction - C.sequence.past_value(end_prediction)) true_span = C.layers.Recurrence(C.plus)(begin_label - C.sequence.past_value(end_label)) common_span = C.element_min(predicted_span, true_span) begin_match = C.sequence.reduce_sum(C.element_min(begin_prediction, begin_label)) end_match = C.sequence.reduce_sum(C.element_min(end_prediction, end_label)) predicted_len = C.sequence.reduce_sum(predicted_span) true_len = C.sequence.reduce_sum(true_span) common_len = C.sequence.reduce_sum(common_span) f1 = 2*common_len/(predicted_len+true_len) exact_match = C.element_min(begin_match, end_match) precision = common_len/predicted_len recall = common_len/true_len overlap = C.greater(common_len, 0) s = lambda x: C.reduce_sum(x, axis=C.Axis.all_axes()) stats = C.splice(s(f1), s(exact_match), s(precision), s(recall), s(overlap), s(begin_match), s(end_match)) # Evaluation parameters minibatch_size = 20000 num_sequences = 0 stat_sum = 0 loss_sum = 0 while True: data = mb_source.next_minibatch(minibatch_size, input_map=input_map) if not data or not (begin_label in data) or data[begin_label].num_sequences == 0: break out = model.eval(data, outputs=[begin_logits,end_logits,loss], as_numpy=False) testloss = out[loss] g = best_span_score.grad({begin_prediction:out[begin_logits], end_prediction:out[end_logits]}, wrt=[begin_prediction,end_prediction], as_numpy=False) other_input_map = {begin_prediction: g[begin_prediction], end_prediction: g[end_prediction], begin_label: data[begin_label], end_label: data[end_label]} stat_sum += stats.eval((other_input_map)) loss_sum += np.sum(testloss.asarray()) num_sequences += data[begin_label].num_sequences stat_avg = stat_sum / num_sequences loss_avg = loss_sum / num_sequences print("Validated {} sequences, loss {:.4f}, F1 {:.4f}, EM {:.4f}, precision {:4f}, recall {:4f} hasOverlap {:4f}, start_match {:4f}, end_match {:4f}".format( num_sequences, loss_avg, stat_avg[0], stat_avg[1], stat_avg[2], stat_avg[3], stat_avg[4], stat_avg[5], stat_avg[6])) return loss_avg
def user_matmul(left, right, shape=None, stop_gradients=False, name=''): return ct.as_composite(matmul(left, right, shape, stop_gradients), name=name)
def convert(root_func, filter, converter): ''' Clones the graph underlying root_func and in the clone substitutes all Functions obtained by applying 'filter', with a new Function obtained by calling the specified 'converter' Args: root_func: a root function of a graph to be cloned and converted filter: a lambda for filtering out the Functions to be converted converter: a lambda for obtaining the substitute for each of the Functions to be converted Returns: Cloned and converted Function (graph) ''' # recursively convert for blocks in root_func blocks = C.logging.graph.depth_first_search( root_func, lambda x: type(x) == C.Function and x.root_function.is_block, depth=0) for i in range(len(blocks)): # search for blocks again in case block input/output has been modified blocks1 = C.logging.graph.depth_first_search( root_func, lambda x: type(x) == C.Function and x.root_function.is_block, depth=0) block = blocks1[ i] # assuming depth_first_search order to be stable, so use the old index on new search results block_root = C.as_composite(block.block_root) new_block_root = convert(block_root, filter, converter) if new_block_root != block_root: block_arguments_mapping = dict(block.block_arguments_mapping) new_block_arguments_mapping = [] for arg, new_arg in zip(block_root.arguments, new_block_root.arguments): new_block_arguments_mapping += [(new_arg, block_arguments_mapping[arg])] new_block = C.as_block(new_block_root, new_block_arguments_mapping, block.op_name, block.name) if all([x not in root_func.outputs for x in block.outputs]) or all( [x in block.outputs for x in root_func.outputs]): root_func = root_func.clone( C.CloneMethod.share, dict(zip(block.outputs, new_block.outputs))) else: new_outputs = [ new_block.outputs[block.outputs.index(x)] if x in block.outputs else None for x in root_func.outputs ] root_func_nonreplaced = C.combine( [x for x in root_func.outputs if x not in block.outputs]) root_func_nonreplaced_clone = root_func_nonreplaced.clone( C.CloneMethod.share, dict(zip(block.outputs, new_block.outputs))) idx = 0 for nonreplaced_output in root_func_nonreplaced_clone.outputs: while new_outputs[idx]: idx += 1 new_outputs[idx] = nonreplaced_output root_func = C.combine(new_outputs) # replace all Function instances under root_func that pass the specified 'filter' functions_to_convert = C.logging.graph.depth_first_search(root_func, filter, depth=0) for i in range(len(functions_to_convert)): # The graph could be modified already by this function, so we need to rescan to the new set. functions_to_convert1 = C.logging.graph.depth_first_search(root_func, filter, depth=0) # We are using a filter passed in by the caller. So once a function is converted, we may not # get the same number of functions again, so we need to use correct index depending on the new size. index = 0 if len(functions_to_convert) > len(functions_to_convert1): assert (len(functions_to_convert) - len(functions_to_convert1) == i ) # Only one conversion at a time. # index = 0 will work for this case, we are picking the first function from the new list. elif len(functions_to_convert) == len(functions_to_convert1): index = i # here we pick the current index of the for loop. else: raise RuntimeError( "The conversion adds another possible conversion(s). Stopping infinite conversions." ) function_to_convert = functions_to_convert1[index] converted = converter(function_to_convert) if not function_to_convert.output in root_func.outputs: root_func = root_func.clone( C.CloneMethod.share, {function_to_convert.output: converted.output}) else: # if cudnn_rnn output is the root_func output, just use converted as root_func and no clone needed if len(root_func.outputs) > 1: root_func = C.combine([ converted if x == function_to_convert.output else x for x in root_func.outputs ]) else: root_func = converted return root_func
def _get(f, attr=None): return C.as_composite(f.owner).eval(data)[f]
def convert(root_func, filter, converter): ''' Clones the graph underlying root_func and in the clone substitutes all Functions obtained by applying 'filter', with a new Function obtained by calling the specified 'converter' Args: root_func: a root function of a graph to be cloned and converted filter: a lambda for filtering out the Functions to be converted converter: a lambda for obtaining the substitute for each of the Functions to be converted Returns: Cloned and converted Function (graph) ''' # recursively convert for blocks in root_func blocks = C.logging.graph.depth_first_search(root_func, lambda x : type(x) == C.Function and x.root_function.is_block, depth = 0) for i in range(len(blocks)): # search for blocks again in case block input/output has been modified blocks1 = C.logging.graph.depth_first_search(root_func, lambda x : type(x) == C.Function and x.root_function.is_block, depth = 0) block = blocks1[i] # assuming depth_first_search order to be stable, so use the old index on new search results block_root = C.as_composite(block.block_root) new_block_root = convert(block_root, filter, converter) if new_block_root != block_root: block_arguments_mapping = dict(block.block_arguments_mapping) new_block_arguments_mapping = [] for arg, new_arg in zip(block_root.arguments, new_block_root.arguments): new_block_arguments_mapping += [(new_arg, block_arguments_mapping[arg])] new_block = C.as_block(new_block_root, new_block_arguments_mapping, block.op_name, block.name) if all([x not in root_func.outputs for x in block.outputs]) or all([x in block.outputs for x in root_func.outputs]): root_func = root_func.clone(C.CloneMethod.share, dict(zip(block.outputs, new_block.outputs))) else: new_outputs = [new_block.outputs[block.outputs.index(x)] if x in block.outputs else None for x in root_func.outputs] root_func_nonreplaced = C.combine([x for x in root_func.outputs if x not in block.outputs]) root_func_nonreplaced_clone = root_func_nonreplaced.clone(C.CloneMethod.share, dict(zip(block.outputs, new_block.outputs))) idx = 0 for nonreplaced_output in root_func_nonreplaced_clone.outputs: while new_outputs[idx]: idx += 1 new_outputs[idx] = nonreplaced_output root_func = C.combine(new_outputs) # replace all Function instances under root_func that pass the specified 'filter' functions_to_convert = C.logging.graph.depth_first_search(root_func, filter, depth = 0) for i in range(len(functions_to_convert)): # The graph could be modified already by this function, so we need to rescan to the new set. functions_to_convert1 = C.logging.graph.depth_first_search(root_func, filter, depth = 0) # We are using a filter passed in by the caller. So once a function is converted, we may not # get the same number of functions again, so we need to use correct index depending on the new size. index = 0 if len(functions_to_convert) > len(functions_to_convert1): assert(len(functions_to_convert) - len(functions_to_convert1) == i) # Only one conversion at a time. # index = 0 will work for this case, we are picking the first function from the new list. elif len(functions_to_convert) == len(functions_to_convert1): index = i # here we pick the current index of the for loop. else: raise RuntimeError("The conversion adds another possible conversion(s). Stopping infinite conversions.") function_to_convert = functions_to_convert1[index] converted = converter(function_to_convert) if not function_to_convert.output in root_func.outputs: root_func = root_func.clone(C.CloneMethod.share, {function_to_convert.output : converted.output}) else: # if cudnn_rnn output is the root_func output, just use converted as root_func and no clone needed if len(root_func.outputs) > 1: root_func = C.combine([converted if x == function_to_convert.output else x for x in root_func.outputs]) else: root_func = converted return root_func