Пример #1
0
def eval_single_image_imagenet(opt_model, loaded_model, image_path, image_dims):
    img = Image.open(image_path)

    if image_path.endswith("png"):
        temp = Image.new("RGB", img.size, (255, 255, 255))
        temp.paste(img, img)
        img = temp
    resized = img.resize((image_dims[2], image_dims[1]), Image.ANTIALIAS)
    bgr_image = np.asarray(resized, dtype=np.float32)[..., [2, 1, 0]]
    hwc_format = np.ascontiguousarray(np.rollaxis(bgr_image, 2))

    if "VGG" in opt_model:
        arguments = {loaded_model.arguments[0]: [hwc_format]}
        output = loaded_model.eval(arguments)
        sm = cntk.softmax(output[0])
        return sm.eval()

    elif "InceptionV3" in opt_model:
        z = cntk.as_composite(loaded_model[0].owner)
        output = z.eval({z.arguments[0]: [hwc_format]})

    else:
        z = cntk.as_composite(loaded_model[3].owner)
        output = z.eval({z.arguments[0]: [hwc_format]})

    predictions = np.squeeze(output)
    return predictions
Пример #2
0
def test_factor_dense():

    input_dim = 2
    num_output_classes = 2
    hidden_layer_dim = 50

    input = C.input_variable(input_dim)
    z = _create_model_dense(input, input_dim, hidden_layer_dim, num_output_classes)
    
    newz = nc.factor_dense(z, projection_function=_get_rank_same_size, filter_function = _filter)
    newblocks = C.logging.graph.depth_first_search(
                    newz, lambda x : type(x) == C.Function and x.root_function.is_block, depth = 0)
    
    assert(newblocks[1].op_name == "DenseFactored")    
    block_root = C.as_composite(newblocks[1].block_root)
    # no reduction, same size but factored.
    assert(block_root.W1.value.shape == (50, 50))
    
    newz = nc.factor_dense(z, projection_function=_get_rank_reduced_size, filter_function = _filter)
    newblocks = C.logging.graph.depth_first_search(
                    newz, lambda x : type(x) == C.Function and x.root_function.is_block, depth = 0)
    assert(newblocks[1].op_name == "DenseFactored")    
    block_root = C.as_composite(newblocks[1].block_root)
    # the reduction has taken place now.
    assert(block_root.W1.value.shape == (50, 40))
def test_factor_dense():

    input_dim = 2
    num_output_classes = 2
    hidden_layer_dim = 50

    input = C.input_variable(input_dim)
    z = _create_model_dense(input, input_dim, hidden_layer_dim,
                            num_output_classes)

    newz = nc.factor_dense(z,
                           projection_function=_get_rank_same_size,
                           filter_function=_filter)
    newblocks = C.logging.graph.depth_first_search(
        newz,
        lambda x: type(x) == C.Function and x.root_function.is_block,
        depth=0)

    assert (newblocks[1].op_name == "DenseFactored")
    block_root = C.as_composite(newblocks[1].block_root)
    # no reduction, same size but factored.
    assert (block_root.W1.value.shape == (50, 50))

    newz = nc.factor_dense(z,
                           projection_function=_get_rank_reduced_size,
                           filter_function=_filter)
    newblocks = C.logging.graph.depth_first_search(
        newz,
        lambda x: type(x) == C.Function and x.root_function.is_block,
        depth=0)
    assert (newblocks[1].op_name == "DenseFactored")
    block_root = C.as_composite(newblocks[1].block_root)
    # the reduction has taken place now.
    assert (block_root.W1.value.shape == (50, 40))
Пример #4
0
def test_as_composite():
    input_dim = 1
    proj_dim = 2
    x = C.input_variable((input_dim, ))
    b = C.parameter((proj_dim))
    w = C.parameter((input_dim, proj_dim))
    func_name = 't_plus_b'
    t_plus_b = C.plus(C.times(x, w), b, name=func_name)
    assert (t_plus_b.root_function.name == func_name)
    composite = C.as_composite(t_plus_b.root_function)
    assert (composite.root_function.name == func_name)
    composite = C.as_composite(composite)
    assert (composite.root_function.name == func_name)
    composite = C.as_composite(t_plus_b)
    assert (composite.root_function.name == func_name)
Пример #5
0
def test_as_composite():
    input_dim = 1
    proj_dim = 2
    x = C.input_variable((input_dim,))
    b = C.parameter((proj_dim))
    w = C.parameter((input_dim, proj_dim))
    func_name = 't_plus_b'
    t_plus_b = C.plus(C.times(x, w), b, name=func_name)
    assert(t_plus_b.root_function.name == func_name)
    composite = C.as_composite(t_plus_b.root_function)
    assert(composite.root_function.name == func_name)
    composite = C.as_composite(composite)
    assert(composite.root_function.name == func_name)
    composite = C.as_composite(t_plus_b)
    assert(composite.root_function.name == func_name)
Пример #6
0
def test(test_data, model_path, model_file, config_file):
    polymath = PolyMath(config_file)
    model = C.load_model(os.path.join(model_path, model_file if model_file else model_name))
    begin_logits = model.outputs[0]
    end_logits   = model.outputs[1]
    loss         = C.as_composite(model.outputs[2].owner)
    begin_prediction = C.sequence.input_variable(1, sequence_axis=begin_logits.dynamic_axes[1], needs_gradient=True)
    end_prediction = C.sequence.input_variable(1, sequence_axis=end_logits.dynamic_axes[1], needs_gradient=True)
    best_span_score = symbolic_best_span(begin_prediction, end_prediction)
    predicted_span = C.layers.Recurrence(C.plus)(begin_prediction - C.sequence.past_value(end_prediction))

    batch_size = 32 # in sequences
    misc = {'rawctx':[], 'ctoken':[], 'answer':[], 'uid':[]}
    tsv_reader = create_tsv_reader(loss, test_data, polymath, batch_size, 1, is_test=True, misc=misc)
    results = {}
    with open('{}_out.json'.format(model_file), 'w', encoding='utf-8') as json_output:
        for data in tsv_reader:
            out = model.eval(data, outputs=[begin_logits,end_logits,loss], as_numpy=False)
            g = best_span_score.grad({begin_prediction:out[begin_logits], end_prediction:out[end_logits]}, wrt=[begin_prediction,end_prediction], as_numpy=False)
            other_input_map = {begin_prediction: g[begin_prediction], end_prediction: g[end_prediction]}
            span = predicted_span.eval((other_input_map))
            for seq, (raw_text, ctokens, answer, uid) in enumerate(zip(misc['rawctx'], misc['ctoken'], misc['answer'], misc['uid'])):
                seq_where = np.argwhere(span[seq])[:,0]
                span_begin = np.min(seq_where)
                span_end = np.max(seq_where)
                predict_answer = get_answer(raw_text, ctokens, span_begin, span_end)
                results['query_id'] = int(uid)
                results['answers'] = [predict_answer]
                json.dump(results, json_output)
                json_output.write("\n")
            misc['rawctx'] = []
            misc['ctoken'] = []
            misc['answer'] = []
            misc['uid'] = []
Пример #7
0
def test(test_data, model_path, model_file, config_file):
    polymath = PolyMath(config_file)
    model = C.load_model(os.path.join(model_path, model_file if model_file else model_name))
    begin_logits = model.outputs[0]
    end_logits   = model.outputs[1]
    loss         = C.as_composite(model.outputs[2].owner)
    begin_prediction = C.sequence.input_variable(1, sequence_axis=begin_logits.dynamic_axes[1], needs_gradient=True)
    end_prediction = C.sequence.input_variable(1, sequence_axis=end_logits.dynamic_axes[1], needs_gradient=True)
    best_span_score = symbolic_best_span(begin_prediction, end_prediction)
    predicted_span = C.layers.Recurrence(C.plus)(begin_prediction - C.sequence.past_value(end_prediction))

    batch_size = 32 # in sequences
    misc = {'rawctx':[], 'ctoken':[], 'answer':[], 'uid':[]}
    tsv_reader = create_tsv_reader(loss, test_data, polymath, batch_size, 1, is_test=True, misc=misc)
    results = {}
    with open('{}_out.json'.format(model_file), 'w', encoding='utf-8') as json_output:
        for data in tsv_reader:
            out = model.eval(data, outputs=[begin_logits,end_logits,loss], as_numpy=False)
            g = best_span_score.grad({begin_prediction:out[begin_logits], end_prediction:out[end_logits]}, wrt=[begin_prediction,end_prediction], as_numpy=False)
            other_input_map = {begin_prediction: g[begin_prediction], end_prediction: g[end_prediction]}
            span = predicted_span.eval((other_input_map))
            for seq, (raw_text, ctokens, answer, uid) in enumerate(zip(misc['rawctx'], misc['ctoken'], misc['answer'], misc['uid'])):
                seq_where = np.argwhere(span[seq])[:,0]
                span_begin = np.min(seq_where)
                span_end = np.max(seq_where)
                predict_answer = get_answer(raw_text, ctokens, span_begin, span_end)
                results['query_id'] = int(uid)
                results['answers'] = [predict_answer]
                json.dump(results, json_output)
                json_output.write("\n")
            misc['rawctx'] = []
            misc['ctoken'] = []
            misc['answer'] = []
            misc['uid'] = []
Пример #8
0
def create_transfer_learning_model(input, num_classes, model_file, freeze=False):

    base_model = load_model(model_file)
    base_model = C.as_composite(base_model[3].owner)

    # Load the pretrained classification net and find nodes
    feature_node = C.logging.find_by_name(base_model, feature_node_name)
    last_node = C.logging.find_by_name(base_model, last_hidden_node_name)
    
    base_model = C.combine([last_node.owner]).clone(C.CloneMethod.freeze if freeze else C.CloneMethod.clone, {feature_node: C.placeholder(name='features')})
    base_model = base_model(C.input_variable((num_channels, image_height, image_width)))

    r1 = C.logging.find_by_name(base_model, "z.x.x.r")
    r2_2 = C.logging.find_by_name(base_model, "z.x.x.x.x.r")
    r3_2 = C.logging.find_by_name(base_model, "z.x.x.x.x.x.x.r")
    r4_2 = C.logging.find_by_name(base_model, "z.x.x.x.x.x.x.x.x.r")

    up_r1 = OneByOneConvAndUpSample(r1, 3, num_classes)
    up_r2_2 = OneByOneConvAndUpSample(r2_2, 2, num_classes)
    up_r3_2 = OneByOneConvAndUpSample(r3_2, 1, num_classes)
    up_r4_2 = OneByOneConvAndUpSample(r4_2, 0, num_classes)
    
    merged = C.splice(up_r1, up_r3_2, up_r2_2, axis=0)

    resnet_fcn_out = Convolution((1, 1), num_classes, init=he_normal(), activation=sigmoid, pad=True)(merged)

    z = UpSampling2DPower(resnet_fcn_out,2)
    
    return z
Пример #9
0
def inference(model, data):
    p = Model()
    model = C.load_model(model)

    prob = model.outputs[0]
    loss = C.as_composite(model.outputs[1].owner)

    mb_test, map_test = deserialize(loss,
                                    data,
                                    p,
                                    randomize=False,
                                    repeat=False,
                                    is_test=True)
    token = argument_by_name(loss, 'token')

    results = []
    total_samples = 411972

    with tqdm(total=total_samples, ncols=79) as progress_bar:
        while True:
            data = mb_test.next_minibatch(4, input_map=map_test)
            if not data:
                break
            out = model.eval(data, outputs=[prob])
            results.extend(out)
            progress_bar.update(len(data))
    assert (len(results) == total_samples)
    return results
Пример #10
0
def verify_model(cntk_model,
                 node_name,
                 tmpdir,
                 model_name,
                 image=None,
                 skip_round_trip_test=True,
                 use_external_files_to_store_parameters=False):
    if (node_name is not None):
        cntk_node = cntk_model.find_by_name(node_name)
        if not cntk_node:
            cntk_node = C.logging.depth_first_search(
                cntk_model, lambda x: x.uid == node_name, depth=10)[0]
        cntk_node_model = C.as_composite(cntk_node)
    else:
        node_name = "full"
        cntk_node_model = cntk_model
    sanitized_node_name = model_name + node_name.replace("/", ".")
    if (image is None):
        image = np.random.rand(*np.shape(cntk_model.arguments[0])).astype(
            np.float32)

    test_model_path = os.path.join(str(tmpdir), R'test_' + sanitized_node_name)
    print(test_model_path)

    if os.path.exists(test_model_path):
        shutil.rmtree(test_model_path, ignore_errors=True)

    verify_sequence_model(cntk_node_model,
                          image,
                          tmpdir,
                          sanitized_node_name,
                          resave=not skip_round_trip_test,
                          use_external_files_to_store_parameters=
                          use_external_files_to_store_parameters)
Пример #11
0
def inference(model, test):
    p = Model()
    model = C.load_model(model)

    cos = model.outputs[0]
    loss = C.as_composite(model.outputs[1].owner)

    mb_test, map_test = deserialize(loss,
                                    test,
                                    p,
                                    randomize=False,
                                    repeat=False,
                                    is_test=True)
    c1 = argument_by_name(loss, 'c1')
    c2 = argument_by_name(loss, 'c2')

    results = []
    if 'test' in test:
        total_samples = 3000
    else:
        total_samples = num_validation

    with tqdm(total=total_samples) as progress_bar:
        while True:
            data = mb_test.next_minibatch(minibatch_size, input_map=map_test)
            progress_bar.update(len(data))
            if not data:
                break
            out = model.eval(data, outputs=[cos])
            results.extend(out)
    assert (len(results) == total_samples)
    return results
Пример #12
0
def _main():
    parser = argparse.ArgumentParser()

    parser.add_argument('-n',
                        '--network',
                        type=_text_type,
                        help='Model Type',
                        required=True,
                        choices=MODEL_URL.keys())

    parser.add_argument('-i',
                        '--image',
                        default=None,
                        type=_text_type,
                        help='Test Image Path')

    parser.add_argument('-o',
                        '--output_dir',
                        default='./',
                        type=_text_type,
                        help='Caffe Checkpoint file name')

    args = parser.parse_args()

    fn = download_file(MODEL_URL[args.network], directory=args.output_dir)
    if not fn:
        return -1

    model = C.Function.load(fn)

    if len(model.outputs) > 1:
        for idx, output in enumerate(model.outputs):
            if len(output.shape) > 0:
                eval_node = idx
                break

        model = C.as_composite(model[eval_node].owner)
        model.save(fn)

        print("Model {} is saved as {}.".format(args.network, fn))

    if args.image:
        import numpy as np
        from mmdnn.conversion.examples.imagenet_test import TestKit
        func = TestKit.preprocess_func['cntk'][args.network]
        img = func(args.image)
        img = np.transpose(img, (2, 0, 1))
        predict = model.eval({model.arguments[0]: [img]})
        predict = np.squeeze(predict)
        top_indices = predict.argsort()[-5:][::-1]
        result = [(i, predict[i]) for i in top_indices]
        print(result)
        print(np.sum(result))

    return 0
Пример #13
0
def convert(root_func, filter, converter):
    '''
    Clones the graph underlying root_func and in the clone substitutes
    all Functions obtained by applying 'filter', with a new Function obtained by calling the specified 'converter'

    Args:
        root_func: a root function of a graph to be cloned and converted
        filter: a lambda for filtering out the Functions to be converted
        converter: a lambda for obtaining the substitute for each of the Functions to be converted
    Returns:
        Cloned and converted Function (graph)
    '''
    # recursively convert for blocks in root_func
    blocks = C.logging.graph.depth_first_search(root_func, lambda x : type(x) == C.Function and x.root_function.is_block, depth = 0)
    for i in range(len(blocks)):
        # search for blocks again in case block input/output has been modified
        blocks1 = C.logging.graph.depth_first_search(root_func, lambda x : type(x) == C.Function and x.root_function.is_block, depth = 0)
        block = blocks1[i] # assuming depth_first_search order to be stable, so use the old index on new search results
        block_root = C.as_composite(block.block_root)
        new_block_root = convert(block_root, filter, converter)
        if new_block_root != block_root:
            block_arguments_mapping = dict(block.block_arguments_mapping)
            new_block_arguments_mapping = []
            for arg, new_arg in zip(block_root.arguments, new_block_root.arguments):
                new_block_arguments_mapping += [(new_arg, block_arguments_mapping[arg])]
            new_block = C.as_block(new_block_root, new_block_arguments_mapping, block.op_name, block.name)
            if all([x not in root_func.outputs for x in block.outputs]) or all([x in block.outputs for x in root_func.outputs]):
                root_func = root_func.clone(C.CloneMethod.share, dict(zip(block.outputs, new_block.outputs)))
            else:
                new_outputs = [new_block.outputs[block.outputs.index(x)] if x in block.outputs else None for x in root_func.outputs]
                root_func_nonreplaced = C.combine([x for x in root_func.outputs if x not in block.outputs])
                root_func_nonreplaced_clone = root_func_nonreplaced.clone(C.CloneMethod.share, dict(zip(block.outputs, new_block.outputs)))
                idx = 0
                for nonreplaced_output in root_func_nonreplaced_clone.outputs:
                    while new_outputs[idx]:
                        idx += 1
                    new_outputs[idx] = nonreplaced_output
                root_func = C.combine(new_outputs)

    # replace all Function instances under root_func that pass the specified 'filter'
    functions_to_convert = C.logging.graph.depth_first_search(root_func, filter, depth = 0)
    for function_to_convert in functions_to_convert:
        converted = converter(function_to_convert)

        if not function_to_convert.output in root_func.outputs:            
            root_func = root_func.clone(C.CloneMethod.share, {function_to_convert.output : converted.output})
        else:
            # if cudnn_rnn output is the root_func output, just use converted as root_func and no clone needed
            if len(root_func.outputs) > 1:
                root_func = C.combine([converted if x == function_to_convert.output else x for x in root_func.outputs])
            else:
                root_func = converted

    return root_func
Пример #14
0
 def func(dh, dc, input):
     LSTM_func = LSTM_cell(dh, dc, input)
     if use_scan:
         LSTM_func_root = C.as_composite(
             LSTM_func.outputs[0].owner.block_root)
         args = LSTM_func_root.arguments
         LSTM_func = LSTM_func_root.clone(C.CloneMethod.share, {
             args[0]: input,
             args[1]: dh,
             args[2]: dc
         })
     return LSTM_func
Пример #15
0
    def download(cls, architecture, path="./"):
        if cls.sanity_check(architecture):
            architecture_file = download_file(cls.architecture_map[architecture], directory=path)
            model = C.Function.load(architecture_file)
            if len(model.outputs) > 1:
                for idx, output in enumerate(model.outputs):
                    if len(output.shape) > 0:
                        eval_node = idx
                        break

                model = C.as_composite(model[eval_node].owner)
                model.save(architecture_file)
                print("Cntk Model {} saved as [{}].".format(architecture, architecture_file))
            return architecture_file

        else:
            return None
Пример #16
0
def _main():
    parser = argparse.ArgumentParser()

    parser.add_argument('-n', '--network', type=_text_type, help='Model Type', required=True,
                        choices=MODEL_URL.keys())

    parser.add_argument('-i', '--image', default=None,
                        type=_text_type, help='Test Image Path')

    parser.add_argument('-o', '--output_dir', default='./',
                        type=_text_type, help='Caffe Checkpoint file name')

    args = parser.parse_args()

    fn = download_file(MODEL_URL[args.network], directory=args.output_dir)
    if not fn:
        return -1

    model = C.Function.load(fn)

    if len(model.outputs) > 1:
        for idx, output in enumerate(model.outputs):
            if len(output.shape) > 0:
                eval_node = idx
                break

        model = C.as_composite(model[eval_node].owner)
        model.save(fn)

        print("Model {} is saved as {}.".format(args.network, fn))

    if args.image:
        import numpy as np
        from mmdnn.conversion.examples.imagenet_test import TestKit
        func = TestKit.preprocess_func['cntk'][args.network]
        img = func(args.image)
        img = np.transpose(img, (2, 0, 1))
        predict = model.eval({model.arguments[0]:[img]})
        predict = np.squeeze(predict)
        top_indices = predict.argsort()[-5:][::-1]
        result = [(i, predict[i]) for i in top_indices]
        print(result)
        print(np.sum(result))

    return 0
Пример #17
0
    def download(cls, architecture, path="./"):
        if cls.sanity_check(architecture):
            architecture_file = download_file(
                cls.architecture_map[architecture], directory=path)
            model = C.Function.load(architecture_file)
            if len(model.outputs) > 1:
                for idx, output in enumerate(model.outputs):
                    if len(output.shape) > 0:
                        eval_node = idx
                        break

                model = C.as_composite(model[eval_node].owner)
                model.save(architecture_file)
                print("Cntk Model {} saved as [{}].".format(
                    architecture, architecture_file))
            return architecture_file

        else:
            return None
Пример #18
0
def convert_model_and_gen_data(input, output, end_node, seq_len, batch_size):
    cntk_model = C.load_model(input)
    if end_node:
        nodes = C.logging.depth_first_search(cntk_model,
                                             lambda x: x.name == end_node,
                                             depth=-1)
        assert len(nodes) == 1
        cntk_model = C.as_composite(nodes[0])
    cntk_model.save(output, C.ModelFormat.ONNX)

    if seq_len == 0:
        return

    pair_desc = PairDescription()
    pair_string = onnx.load(output).graph.doc_string
    pair_desc.parse_from_string(pair_string)

    cntk_feeds = {}
    for var in cntk_model.arguments:
        data_shape = []
        for ax in var.dynamic_axes:
            if ax.name == 'defaultBatchAxis':
                data_shape = data_shape + [batch_size]
            else:
                data_shape = data_shape + [
                    seq_len
                ]  # TODO: handle models with multiple sequence axes
        data_shape = data_shape + list(var.shape)
        cntk_feeds[var] = np.random.rand(*data_shape).astype(var.dtype)

    # run inference with CNTK
    cntk_output = cntk_model.eval(cntk_feeds)
    if type(cntk_output) != dict:
        assert len(cntk_model.outputs) == 1
        cntk_output = {cntk_model.output: cntk_output}

    test_data_dir = os.path.join(os.path.split(output)[0], 'test_data_set_0')
    os.makedirs(test_data_dir, exist_ok=True)
    save_data(test_data_dir, cntk_feeds)
    save_data(
        test_data_dir, cntk_output,
        pair_desc.get_pairs(PairDescription.PairType.uid_2_onnx_node_name))
Пример #19
0
    def convo_block_converter(block):

        convo_filter = (
            lambda x: type(x) == C.Function and not x.
            is_block  # replace the inner function only.
            and x.op_name == 'Convolution')

        def convolution_converter(x):

            assert (not x.is_block)  # we replace only the function.
            attributes = x.attributes
            # the parameter W of the convolution has the shape
            # [num filters, depth, (filter shape)]
            num_filters = x.W.shape[0]
            depth = x.W.shape[1]
            filter_shape = (x.W.shape[-2], x.W.shape[-1])

            strides = attributes["strides"][-1]
            # check for squre strides for now.
            assert (strides == attributes["strides"][-2])

            padding = attributes["autoPadding"]
            pad = padding[-1]
            # Checking for the last two elements in the padding vector.
            assert (pad == padding[-2])

            return binary_convolution(filter_shape,
                                      num_filters=num_filters,
                                      channels=depth,
                                      strides=strides,
                                      pad=pad,
                                      name='BinaryConvolution')(
                                          block.inputs[-1])

        return C.misc.convert(C.as_composite(block.block_root), convo_filter,
                              convolution_converter)
Пример #20
0
    def convo_block_converter(block):
        
        convo_filter = (lambda x: type(x) == C.Function 
                and not x.is_block # replace the inner function only.
                and x.op_name == 'Convolution')               

        def convolution_converter(x): 

            assert(not x.is_block) # we replace only the function.
            attributes = x.attributes
            # the parameter W of the convolution has the shape 
            # [num filters, depth, (filter shape)]
            num_filters = x.W.shape[0]
            depth = x.W.shape[1]
            filter_shape = (x.W.shape[-2], x.W.shape[-1])
              
            strides = attributes["strides"][-1]
            # check for squre strides for now.
            assert(strides == attributes["strides"][-2]) 
        
            padding =attributes["autoPadding"]
            pad = padding[-1]        
            # Checking for the last two elements in the padding vector. 
            assert(pad == padding[-2])
          
            return  binary_convolution(
                    filter_shape, 
                    num_filters = num_filters, 
                    channels = depth, 
                    strides=strides, 
                    pad = pad,          
                    name='BinaryConvolution')(block.inputs[-1])

        return C.misc.convert(C.as_composite(block.block_root), 
                              convo_filter, 
                              convolution_converter)
Пример #21
0
def validate_model(i2w, test_data, model, polymath):
    print('validating')
    RL = rouge.Rouge()
    testout = model.outputs[1]  # according to model.shape
    start_logits = model.outputs[2]
    end_logits = model.outputs[3]
    context = model.outputs[4]
    loss = model.outputs[5]
    root = C.as_composite(loss.owner)
    mb_source, input_map = create_mb_and_map(root,
                                             test_data,
                                             polymath,
                                             randomize=False,
                                             repeat=False)
    begin_label = argument_by_name(root, 'ab')
    end_label = argument_by_name(root, 'ae')
    onehot = argument_by_name(root, 'aw')

    begin_prediction = C.sequence.input_variable(
        1, sequence_axis=begin_label.dynamic_axes[1], needs_gradient=True)
    end_prediction = C.sequence.input_variable(
        1, sequence_axis=end_label.dynamic_axes[1], needs_gradient=True)
    predicted_span = C.layers.Recurrence(
        C.plus)(begin_prediction - C.sequence.past_value(end_prediction))
    true_span = C.layers.Recurrence(C.plus)(begin_label -
                                            C.sequence.past_value(end_label))

    best_span_score = symbolic_best_span(begin_prediction, end_prediction)

    one2num = C.argmax(onehot, 0)

    minibatch_size = 128
    num_sequences = 0

    stat = np.array([0, 0, 0, 0, 0, 0], dtype=np.dtype('float64'))
    loss_sum = 0
    cnt = 0
    #while True:
    while cnt < 1000:
        data = mb_source.next_minibatch(minibatch_size, input_map=input_map)
        if not data or not (onehot in data) or data[onehot].num_sequences == 0:
            break

        out = model.eval(
            data,
            outputs=[testout, start_logits, end_logits, context, loss],
            as_numpy=True)
        true = one2num.eval({onehot: data[onehot]})

        g = best_span_score.grad(
            {
                begin_prediction: out[start_logits],
                end_prediction: out[end_logits]
            },
            wrt=[begin_prediction, end_prediction],
            as_numpy=False)
        #        print(g[begin_prediction], g[end_prediction])
        other_input_map = {
            begin_prediction: g[begin_prediction],
            end_prediction: g[end_prediction]
        }
        span = predicted_span.eval((other_input_map))
        #        print(span)

        span_out = np.asarray(span).reshape(-1).tolist()
        context_o = np.asarray(out[context]).reshape(-1).tolist()
        predict_answer = []
        for i in range(len(span_out)):
            if (span_out[i] == 1):
                predict_answer.append(context_o[i])

#       pred_out = np.asarray(out[context]).reshape(-1).tolist()
#       predict_answer = pred_out[span_begin:span_end+1]
        if cnt < 10:

            #print(predict_answer)
            print(format_true_sequences(predict_answer, i2w, polymath))
            print('\n')
        cnt += 1
        true_text = format_true_sequences(
            np.asarray(true).reshape(-1).tolist(), i2w, polymath)
        predout_text = format_predict_sequences(
            np.asarray(out[testout]).reshape(-1), predict_answer, i2w,
            polymath)
        testloss = out[loss]
        stat += RL.calc_score(predout_text, true_text)

        loss_sum += np.sum(np.asarray(testloss))
        num_sequences += data[onehot].num_sequences

    loss_avg = loss_sum / num_sequences
    stat_avg = stat / float(num_sequences)
    print(
        "Validated {} sequences, loss {:.4f}, RouL {:.4f}, LCS {:.4f}, LengCan {:.4f}, LenRef {:.4f}, prec {:.4f}, rec {:.4f}"
        .format(num_sequences, loss_avg, stat_avg[0], stat_avg[1], stat_avg[2],
                stat_avg[3], stat_avg[4], stat_avg[5]))

    return loss_avg
Пример #22
0
def _convert_optimized_rnnstack(root_func, map_param_to_func):
    '''
    Internal implementation that converts root_func that contains cudnn optimized_rnnstack to use non-cudnn functions, so it can be used in non-CUDA environment

    Args:
        root_func: a root function of a graph that contains optimized_rnnstacks
        map_param_to_func: a mapping of converted rnn functions for parameter sharing
    Returns:
        converted root_func on GEMM based implementation of rnn that can be used on CPU
    '''
    # recursively convert for blocks in root_func
    blocks = C.logging.graph.depth_first_search(
        root_func,
        lambda x: type(x) == C.Function and x.root_function.is_block,
        depth=0)
    for i in range(len(blocks)):
        # search for blocks again in case block input/output has been modified
        blocks1 = C.logging.graph.depth_first_search(
            root_func,
            lambda x: type(x) == C.Function and x.root_function.is_block,
            depth=0)
        block = blocks1[
            i]  # assuming depth_first_search order to be stable, so use the old index on new search results
        block_root = C.as_composite(block.block_root)
        new_block_root = _convert_optimized_rnnstack(block_root,
                                                     map_param_to_func)
        if new_block_root != block_root:
            block_arguments_mapping = dict(block.block_arguments_mapping)
            new_block_arguments_mapping = []
            for arg, new_arg in zip(block_root.arguments,
                                    new_block_root.arguments):
                new_block_arguments_mapping += [(new_arg,
                                                 block_arguments_mapping[arg])]
            new_block = C.as_block(new_block_root, new_block_arguments_mapping,
                                   block.op_name, block.name)
            if all([x not in root_func.outputs for x in block.outputs]) or all(
                [x in block.outputs for x in root_func.outputs]):
                root_func = root_func.clone(
                    C.CloneMethod.share,
                    dict(zip(block.outputs, new_block.outputs)))
            else:
                new_outputs = [
                    new_block.outputs[block.outputs.index(x)]
                    if x in block.outputs else None for x in root_func.outputs
                ]
                root_func_nonreplaced = C.combine(
                    [x for x in root_func.outputs if x not in block.outputs])
                root_func_nonreplaced_clone = root_func_nonreplaced.clone(
                    C.CloneMethod.share,
                    dict(zip(block.outputs, new_block.outputs)))
                idx = 0
                for nonreplaced_output in root_func_nonreplaced_clone.outputs:
                    while new_outputs[idx]:
                        idx += 1
                    new_outputs[idx] = nonreplaced_output
                root_func = C.combine(new_outputs)

    # replace all optimized_rnnstack instances in root_func
    cudnn_rnns = C.logging.graph.depth_first_search(
        root_func,
        lambda x: type(x) == C.Function and x.root_function.op_name ==
        'OptimizedRNNStack',
        depth=0)
    for cudnn_rnn in cudnn_rnns:
        param = cudnn_rnn.parameters[0]
        if map_param_to_func[param]:
            #shared parameter, clone
            converted = map_param_to_func[param][0].clone(
                C.CloneMethod.share, {
                    map_param_to_func[param][1]: cudnn_rnn.inputs[0],
                    map_param_to_func[param][2]: C.placeholder()
                })
        else:
            #unique or first parameter, convert
            converted = _from_optimized_rnnstack(cudnn_rnn)
            map_param_to_func[param] = (
                converted,
                cudnn_rnn.inputs[0],
                cudnn_rnn.output,
            )

        if not cudnn_rnn.output in root_func.outputs:
            root_func = root_func.clone(C.CloneMethod.share,
                                        {cudnn_rnn.output: converted.output})
        else:
            # if cudnn_rnn output is the root_func output, just use converted as root_func and no clone needed
            if len(root_func.outputs) > 1:
                root_func = C.combine([
                    converted if x == cudnn_rnn.output else x
                    for x in root_func.outputs
                ])
            else:
                root_func = converted

    return root_func
Пример #23
0
def convert(root_func, filter, converter):
    '''
    Clones the graph underlying root_func and in the clone substitutes
    all Functions obtained by applying 'filter', with a new Function obtained by calling the specified 'converter'

    Args:
        root_func: a root function of a graph to be cloned and converted
        filter: a lambda for filtering out the Functions to be converted
        converter: a lambda for obtaining the substitute for each of the Functions to be converted
    Returns:
        Cloned and converted Function (graph)
    '''
    # recursively convert for blocks in root_func
    blocks = C.logging.graph.depth_first_search(
        root_func,
        lambda x: type(x) == C.Function and x.root_function.is_block,
        depth=0)
    for i in range(len(blocks)):
        # search for blocks again in case block input/output has been modified
        blocks1 = C.logging.graph.depth_first_search(
            root_func,
            lambda x: type(x) == C.Function and x.root_function.is_block,
            depth=0)
        block = blocks1[
            i]  # assuming depth_first_search order to be stable, so use the old index on new search results
        block_root = C.as_composite(block.block_root)
        new_block_root = convert(block_root, filter, converter)
        if new_block_root != block_root:
            block_arguments_mapping = dict(block.block_arguments_mapping)
            new_block_arguments_mapping = []
            for arg, new_arg in zip(block_root.arguments,
                                    new_block_root.arguments):
                new_block_arguments_mapping += [(new_arg,
                                                 block_arguments_mapping[arg])]
            new_block = C.as_block(new_block_root, new_block_arguments_mapping,
                                   block.op_name, block.name)
            if all([x not in root_func.outputs for x in block.outputs]) or all(
                [x in block.outputs for x in root_func.outputs]):
                root_func = root_func.clone(
                    C.CloneMethod.share,
                    dict(zip(block.outputs, new_block.outputs)))
            else:
                new_outputs = [
                    new_block.outputs[block.outputs.index(x)]
                    if x in block.outputs else None for x in root_func.outputs
                ]
                root_func_nonreplaced = C.combine(
                    [x for x in root_func.outputs if x not in block.outputs])
                root_func_nonreplaced_clone = root_func_nonreplaced.clone(
                    C.CloneMethod.share,
                    dict(zip(block.outputs, new_block.outputs)))
                idx = 0
                for nonreplaced_output in root_func_nonreplaced_clone.outputs:
                    while new_outputs[idx]:
                        idx += 1
                    new_outputs[idx] = nonreplaced_output
                root_func = C.combine(new_outputs)

    # replace all Function instances under root_func that pass the specified 'filter'
    functions_to_convert = C.logging.graph.depth_first_search(root_func,
                                                              filter,
                                                              depth=0)
    for function_to_convert in functions_to_convert:
        converted = converter(function_to_convert)

        if not function_to_convert.output in root_func.outputs:
            root_func = root_func.clone(
                C.CloneMethod.share,
                {function_to_convert.output: converted.output})
        else:
            # if cudnn_rnn output is the root_func output, just use converted as root_func and no clone needed
            if len(root_func.outputs) > 1:
                root_func = C.combine([
                    converted if x == function_to_convert.output else x
                    for x in root_func.outputs
                ])
            else:
                root_func = converted

    return root_func
def validate_model(test_data, model, polymath):
    begin_logits = model.outputs[0]
    end_logits = model.outputs[1]
    loss = model.outputs[2]
    root = C.as_composite(loss.owner)
    mb_source, input_map = create_mb_and_map(root,
                                             test_data,
                                             polymath,
                                             randomize=False,
                                             repeat=False)
    begin_label = argument_by_name(root, 'ab')
    end_label = argument_by_name(root, 'ae')

    begin_prediction = C.sequence.input_variable(
        1, sequence_axis=begin_label.dynamic_axes[1], needs_gradient=True)
    end_prediction = C.sequence.input_variable(
        1, sequence_axis=end_label.dynamic_axes[1], needs_gradient=True)

    best_span_score = symbolic_best_span(begin_prediction, end_prediction)
    predicted_span = C.layers.Recurrence(
        C.plus)(begin_prediction - C.sequence.past_value(end_prediction))
    true_span = C.layers.Recurrence(C.plus)(begin_label -
                                            C.sequence.past_value(end_label))
    common_span = C.element_min(predicted_span, true_span)
    begin_match = C.sequence.reduce_sum(
        C.element_min(begin_prediction, begin_label))
    end_match = C.sequence.reduce_sum(C.element_min(end_prediction, end_label))

    predicted_len = C.sequence.reduce_sum(predicted_span)
    true_len = C.sequence.reduce_sum(true_span)
    common_len = C.sequence.reduce_sum(common_span)
    f1 = 2 * common_len / (predicted_len + true_len)
    exact_match = C.element_min(begin_match, end_match)
    precision = common_len / predicted_len
    recall = common_len / true_len
    overlap = C.greater(common_len, 0)
    s = lambda x: C.reduce_sum(x, axis=C.Axis.all_axes())
    stats = C.splice(s(f1), s(exact_match), s(precision), s(recall),
                     s(overlap), s(begin_match), s(end_match))

    # Evaluation parameters
    minibatch_size = 2048
    num_sequences = 0

    stat_sum = 0
    loss_sum = 0

    with tqdm(ncols=32) as progress_bar:
        while True:
            data = mb_source.next_minibatch(minibatch_size,
                                            input_map=input_map)
            if not data or not (begin_label in data
                                ) or data[begin_label].num_sequences == 0:
                break
            out = model.eval(data,
                             outputs=[begin_logits, end_logits, loss],
                             as_numpy=False)
            testloss = out[loss]
            g = best_span_score.grad(
                {
                    begin_prediction: out[begin_logits],
                    end_prediction: out[end_logits]
                },
                wrt=[begin_prediction, end_prediction],
                as_numpy=False)
            other_input_map = {
                begin_prediction: g[begin_prediction],
                end_prediction: g[end_prediction],
                begin_label: data[begin_label],
                end_label: data[end_label]
            }
            stat_sum += stats.eval((other_input_map))
            loss_sum += np.sum(testloss.asarray())
            num_sequences += data[begin_label].num_sequences
            progress_bar.update(data[begin_label].num_sequences)

    stat_avg = stat_sum / num_sequences
    loss_avg = loss_sum / num_sequences

    print(
        "\nValidated {} sequences, loss {:.4f}, F1 {:.4f}, EM {:.4f}, precision {:4f}, recall {:4f} hasOverlap {:4f}, start_match {:4f}, end_match {:4f}"
        .format(num_sequences, loss_avg, stat_avg[0], stat_avg[1], stat_avg[2],
                stat_avg[3], stat_avg[4], stat_avg[5], stat_avg[6]))

    return loss_avg
def streaming_inference(model_path,
                        model_file,
                        config_file,
                        port="8889",
                        is_test=1):
    polymath = PolyMath(config_file)
    model = C.load_model(
        os.path.join(model_path, model_file if model_file else model_name))
    begin_logits = model.outputs[0]
    end_logits = model.outputs[1]
    loss = C.as_composite(model.outputs[2].owner)
    begin_prediction = C.sequence.input_variable(
        1, sequence_axis=begin_logits.dynamic_axes[1], needs_gradient=True)
    end_prediction = C.sequence.input_variable(
        1, sequence_axis=end_logits.dynamic_axes[1], needs_gradient=True)
    best_span_score = symbolic_best_span(begin_prediction, end_prediction)
    predicted_span = C.layers.Recurrence(
        C.plus)(begin_prediction - C.sequence.past_value(end_prediction))

    batch_size = 1  # in sequences
    misc = {'rawctx': [], 'ctoken': [], 'answer': [], 'uid': []}
    Flag = True

    context = zmq.Context()
    socket = context.socket(zmq.REP)
    socket.bind("tcp://*:8889")

    while True:
        message = socket.recv()
        question_str, context_str = pickle.loads(message)

        line = "1102432\tDESCRIPTION\t" + context_str + "\t" + question_str
        data = streaming_create_tsv_reader(loss,
                                           line,
                                           polymath,
                                           batch_size,
                                           1,
                                           is_test=True,
                                           misc=misc)
        out = model.eval(data,
                         outputs=[begin_logits, end_logits, loss],
                         as_numpy=False)
        g = best_span_score.grad(
            {
                begin_prediction: out[begin_logits],
                end_prediction: out[end_logits]
            },
            wrt=[begin_prediction, end_prediction],
            as_numpy=False)
        other_input_map = {
            begin_prediction: g[begin_prediction],
            end_prediction: g[end_prediction]
        }
        span = predicted_span.eval((other_input_map))
        #print("just before for {}".format(misc['ctoken']))
        seq, raw_text, ctokens, answer, uid = 0, misc['rawctx'], misc[
            'ctoken'], misc['answer'], misc['uid']
        #print("just AFTER for {}".format(ctokens))
        seq_where = np.argwhere(span[seq])[:, 0]
        span_begin = np.min(seq_where)
        span_end = np.max(seq_where)
        #print("before predict")
        predict_answer = get_answer(raw_text[0], ctokens[0], span_begin,
                                    span_end)
        # results['query_id'] = int(uid)
        result = (question_str, predict_answer)
        socket.send(pickle.dumps(result))
Пример #26
0
 def _get(f, attr=None):
     return C.as_composite(f.owner).eval(data)[f]
Пример #27
0
def streaming_inference(line,
                        model_path,
                        model_file,
                        config_file,
                        port="8889",
                        is_test=1):
    polymath = PolyMath(config_file)
    model = C.load_model(
        os.path.join(model_path, model_file if model_file else model_name))
    begin_logits = model.outputs[0]
    end_logits = model.outputs[1]
    loss = C.as_composite(model.outputs[2].owner)
    begin_prediction = C.sequence.input_variable(
        1, sequence_axis=begin_logits.dynamic_axes[1], needs_gradient=True)
    end_prediction = C.sequence.input_variable(
        1, sequence_axis=end_logits.dynamic_axes[1], needs_gradient=True)
    best_span_score = symbolic_best_span(begin_prediction, end_prediction)
    predicted_span = C.layers.Recurrence(
        C.plus)(begin_prediction - C.sequence.past_value(end_prediction))

    batch_size = 1  # in sequences
    misc = {'rawctx': [], 'ctoken': [], 'answer': [], 'uid': []}
    Flag = True
    while Flag:
        # try:
        if True:
            data = streaming_create_tsv_reader(loss,
                                               line,
                                               polymath,
                                               batch_size,
                                               1,
                                               is_test=True,
                                               misc=misc)
            out = model.eval(data,
                             outputs=[begin_logits, end_logits, loss],
                             as_numpy=False)
            g = best_span_score.grad(
                {
                    begin_prediction: out[begin_logits],
                    end_prediction: out[end_logits]
                },
                wrt=[begin_prediction, end_prediction],
                as_numpy=False)
            other_input_map = {
                begin_prediction: g[begin_prediction],
                end_prediction: g[end_prediction]
            }
            span = predicted_span.eval((other_input_map))
            print("just before for {}".format(misc['ctoken']))
            seq, raw_text, ctokens, answer, uid = 0, misc['rawctx'], misc[
                'ctoken'], misc['answer'], misc['uid']
            print("just AFTER for {}".format(ctokens))
            seq_where = np.argwhere(span[seq])[:, 0]
            span_begin = np.min(seq_where)
            span_end = np.max(seq_where)
            print("before predict")
            predict_answer = get_answer(raw_text[0], ctokens[0], span_begin,
                                        span_end)
            # results['query_id'] = int(uid)
            result = predict_answer
            print(result)
        # except:
        #     import pdb
        #     pdb.set_trace()

        Flag = False
Пример #28
0
def test(i2w, test_data, model_path, model_file, config_file):
    #C.try_set_default_device(C.cpu())
    polymath = PolyMath(config_file)
    print(test_data, model_path, model_file, model_name)
    print(os.path.join(model_path, model_file))
    model = C.Function.load(
        os.path.join(model_path, model_file if model_file else model_name))
    print(model)
    output = model.outputs[1]
    #    loss         = model.outputs[5]
    start_logits = model.outputs[2]
    end_logits = model.outputs[3]
    context = model.outputs[4]
    #  loss = model.outputs[5]
    root = C.as_composite(output.owner)

    begin_prediction = C.sequence.input_variable(
        1, sequence_axis=start_logits.dynamic_axes[1], needs_gradient=True)
    end_prediction = C.sequence.input_variable(
        1, sequence_axis=end_logits.dynamic_axes[1], needs_gradient=True)
    predicted_span = C.layers.Recurrence(
        C.plus)(begin_prediction - C.sequence.past_value(end_prediction))

    best_span_score = symbolic_best_span(begin_prediction, end_prediction)

    batch_size = 1  # in sequences
    misc = {'rawctx': [], 'ctoken': [], 'answer': [], 'uid': []}
    tsv_reader = create_tsv_reader(root,
                                   test_data,
                                   polymath,
                                   batch_size,
                                   1,
                                   is_test=True,
                                   misc=misc)
    results = {}
    with open('{}_out.json'.format(model_file), 'w',
              encoding='utf-8') as json_output:
        for data in tsv_reader:
            out = model.eval(
                data,
                outputs=[output, start_logits, end_logits, context],
                as_numpy=False)
            g = best_span_score.grad(
                {
                    begin_prediction: out[start_logits],
                    end_prediction: out[end_logits]
                },
                wrt=[begin_prediction, end_prediction],
                as_numpy=False)
            other_input_map = {
                begin_prediction: g[begin_prediction],
                end_prediction: g[end_prediction]
            }
            span = predicted_span.eval((other_input_map))
            for seq, (raw_text, ctokens, answer, uid) in enumerate(
                    zip(misc['rawctx'], misc['ctoken'], misc['answer'],
                        misc['uid'])):
                #           g = best_span_score.grad({begin_prediction:out[start_logits], end_prediction:out[end_logits]}, wrt=[begin_prediction,end_prediction], as_numpy=False)

                #          other_input_map = {begin_prediction: g[begin_prediction], end_prediction: g[end_prediction]}
                #         span = predicted_span.eval((other_input_map))
                seq_where = np.argwhere(span[seq])[:, 0]
                span_begin = np.min(seq_where)
                span_end = np.max(seq_where)
                predict_answer = get_answer(raw_text, ctokens, span_begin,
                                            span_end)
                #       span_out = np.asarray(span).reshape(-1).tolist()
                #       context_o = np.asarray(out[context]).reshape(-1).tolist()
                #       predict_answer = []
                #       for i in range(len(span_out)):
                #           if(span_out[i]==1):
                #               predict_answer.append(context_o[i])
                print(predict_answer)
                final_answer = format_output_sequences(
                    np.asarray(out[output].as_sequences()).reshape(-1),
                    predict_answer, i2w, polymath)
                results['query_id'] = int(uid)
                results['answers'] = [final_answer]
                print(results)
                json.dump(results, json_output)
                json_output.write("\n")
            misc['rawctx'] = []
            misc['ctoken'] = []
            misc['answer'] = []
            misc['uid'] = []
Пример #29
0
def main(base_folder,
         output_dir,
         training_mode='majority',
         learning_rate=0.05,
         momentum_rate=0.9,
         l2_reg_weight=0.0,
         model_name='VGG13',
         max_epochs=100):

    # create the model
    num_classes = len(emotion_table)
    model = build_model(num_classes, model_name)

    # set the input variables.
    input_var = C.input_variable((1, model.input_height, model.input_width),
                                 np.float32)
    label_var = C.input_variable((num_classes), np.float32)

    # read FER+ dataset.
    train_params = FERPlusParameters(num_classes, model.input_height,
                                     model.input_width, training_mode, False)
    test_and_val_params = FERPlusParameters(num_classes, model.input_height,
                                            model.input_width, "majority",
                                            True)

    train_data_reader = FERPlusReader.create(base_folder, train_folders,
                                             "label.csv", train_params)
    val_data_reader = FERPlusReader.create(base_folder, valid_folders,
                                           "label.csv", test_and_val_params)
    test_data_reader = FERPlusReader.create(base_folder, test_folders,
                                            "label.csv", test_and_val_params)

    # print summary of the data.
    display_summary(train_data_reader, val_data_reader, test_data_reader)

    # get the probalistic output of the model.
    z = model.model(input_var)
    pred = C.softmax(z)

    epoch_size = train_data_reader.size()
    minibatch_size = 32

    # Training config
    lr_per_minibatch = [learning_rate] * 20 + [learning_rate / 2.0] * 20 + [
        learning_rate / 10.0
    ]
    lr_schedule = C.learning_rate_schedule(lr_per_minibatch,
                                           unit=C.UnitType.minibatch,
                                           epoch_size=epoch_size)
    mm_schedule = C.momentum_schedule(momentum_rate,
                                      minibatch_size=minibatch_size)

    # loss and error cost
    train_loss = cost_func(training_mode, pred, label_var)
    pe = C.classification_error(z, label_var)

    # construct the trainer
    learner = C.momentum_sgd(z.parameters,
                             lr_schedule,
                             mm_schedule,
                             l2_regularization_weight=l2_reg_weight)

    # Construct the distributed learner
    distributed_learner = C.train.distributed.data_parallel_distributed_learner(
        learner)

    num_partitions = C.train.distributed.Communicator.num_workers()
    partition = C.train.distributed.Communicator.rank()

    progress_printer = C.logging.ProgressPrinter(freq=50,
                                                 tag='Training',
                                                 rank=partition,
                                                 num_epochs=max_epochs)
    trainer = C.Trainer(z, (train_loss, pe), distributed_learner,
                        progress_printer)

    # Get minibatches of images to train with and perform model training
    max_val_accuracy = 0.0
    final_test_accuracy = 0.0
    best_test_accuracy = 0.0

    epoch = 0
    best_epoch = 0
    while epoch < max_epochs:
        train_data_reader.reset()
        val_data_reader.reset()
        test_data_reader.reset()

        # Training
        start_time = time.time()
        training_loss = 0
        training_accuracy = 0
        while train_data_reader.has_more():
            images, labels, current_batch_size = train_data_reader.next_minibatch(
                minibatch_size,
                num_data_partitions=num_partitions,
                partition_index=partition)

            # Specify the mapping of input variables in the model to actual minibatch data to be trained with
            trainer.train_minibatch({input_var: images, label_var: labels})

            # keep track of statistics.
            training_loss += trainer.previous_minibatch_loss_average * current_batch_size
            training_accuracy += trainer.previous_minibatch_evaluation_average * current_batch_size

        training_accuracy /= train_data_reader.size()
        training_accuracy = 1.0 - training_accuracy

        trainer.summarize_training_progress()

        # Validation
        val_accuracy = 0
        while val_data_reader.has_more():
            images, labels, current_batch_size = val_data_reader.next_minibatch(
                minibatch_size,
                num_data_partitions=num_partitions,
                partition_index=partition)
            val_accuracy += trainer.test_minibatch({
                input_var: images,
                label_var: labels
            }) * current_batch_size

        val_accuracy /= val_data_reader.size()
        val_accuracy = 1.0 - val_accuracy

        trainer.summarize_test_progress()

        # if validation accuracy goes higher, we compute test accuracy
        test_run = False
        if val_accuracy > max_val_accuracy:
            best_epoch = epoch
            max_val_accuracy = val_accuracy

            trainer.save_checkpoint(
                os.path.join(output_dir, "model_{}".format(best_epoch)))

            test_run = True
            test_accuracy = 0
            while test_data_reader.has_more():
                images, labels, current_batch_size = test_data_reader.next_minibatch(
                    minibatch_size,
                    num_data_partitions=num_partitions,
                    partition_index=partition)
                test_accuracy += trainer.test_minibatch({
                    input_var: images,
                    label_var: labels
                }) * current_batch_size

            trainer.summarize_test_progress()

            test_accuracy /= test_data_reader.size()
            test_accuracy = 1.0 - test_accuracy
            final_test_accuracy = test_accuracy
            if final_test_accuracy > best_test_accuracy:
                best_test_accuracy = final_test_accuracy

        epoch += 1

    # Output the best checkpointed model to ONNX format, only save on master process
    if C.train.distributed.Communicator.is_main():
        best_model = C.Function.load(
            os.path.join(output_dir, "model_{}".format(best_epoch)))
        inference_model = C.as_composite(best_model.outputs[0].owner)
        #or possibly:
        #inference_model = C.as_composite(best_model[0].owner)
        print(inference_model)
        inference_model.save(os.path.join(output_dir, "model.onnx"),
                             format=C.ModelFormat.ONNX)
Пример #30
0
def validate_model(test_data, model, polymath):
    begin_logits = model.outputs[0]
    end_logits   = model.outputs[1]
    loss         = model.outputs[2]
    root = C.as_composite(loss.owner)
    mb_source, input_map = create_mb_and_map(root, test_data, polymath, randomize=False, repeat=False)
    begin_label = argument_by_name(root, 'ab')
    end_label   = argument_by_name(root, 'ae')

    begin_prediction = C.sequence.input_variable(1, sequence_axis=begin_label.dynamic_axes[1], needs_gradient=True)
    end_prediction = C.sequence.input_variable(1, sequence_axis=end_label.dynamic_axes[1], needs_gradient=True)

    best_span_score = symbolic_best_span(begin_prediction, end_prediction)
    predicted_span = C.layers.Recurrence(C.plus)(begin_prediction - C.sequence.past_value(end_prediction))
    true_span = C.layers.Recurrence(C.plus)(begin_label - C.sequence.past_value(end_label))
    common_span = C.element_min(predicted_span, true_span)
    begin_match = C.sequence.reduce_sum(C.element_min(begin_prediction, begin_label))
    end_match = C.sequence.reduce_sum(C.element_min(end_prediction, end_label))

    predicted_len = C.sequence.reduce_sum(predicted_span)
    true_len = C.sequence.reduce_sum(true_span)
    common_len = C.sequence.reduce_sum(common_span)
    f1 = 2*common_len/(predicted_len+true_len)
    exact_match = C.element_min(begin_match, end_match)
    precision = common_len/predicted_len
    recall = common_len/true_len
    overlap = C.greater(common_len, 0)
    s = lambda x: C.reduce_sum(x, axis=C.Axis.all_axes())
    stats = C.splice(s(f1), s(exact_match), s(precision), s(recall), s(overlap), s(begin_match), s(end_match))

    # Evaluation parameters
    minibatch_size = 20000
    num_sequences = 0

    stat_sum = 0
    loss_sum = 0

    while True:
        data = mb_source.next_minibatch(minibatch_size, input_map=input_map)
        if not data or not (begin_label in data) or data[begin_label].num_sequences == 0:
            break
        out = model.eval(data, outputs=[begin_logits,end_logits,loss], as_numpy=False)
        testloss = out[loss]
        g = best_span_score.grad({begin_prediction:out[begin_logits], end_prediction:out[end_logits]}, wrt=[begin_prediction,end_prediction], as_numpy=False)
        other_input_map = {begin_prediction: g[begin_prediction], end_prediction: g[end_prediction], begin_label: data[begin_label], end_label: data[end_label]}
        stat_sum += stats.eval((other_input_map))
        loss_sum += np.sum(testloss.asarray())
        num_sequences += data[begin_label].num_sequences

    stat_avg = stat_sum / num_sequences
    loss_avg = loss_sum / num_sequences

    print("Validated {} sequences, loss {:.4f}, F1 {:.4f}, EM {:.4f}, precision {:4f}, recall {:4f} hasOverlap {:4f}, start_match {:4f}, end_match {:4f}".format(
            num_sequences,
            loss_avg,
            stat_avg[0],
            stat_avg[1],
            stat_avg[2],
            stat_avg[3],
            stat_avg[4],
            stat_avg[5],
            stat_avg[6]))

    return loss_avg
Пример #31
0
def user_matmul(left, right, shape=None, stop_gradients=False, name=''):
    return ct.as_composite(matmul(left, right, shape, stop_gradients),
                           name=name)
Пример #32
0
def convert(root_func, filter, converter):
    '''
    Clones the graph underlying root_func and in the clone substitutes
    all Functions obtained by applying 'filter', with a new Function obtained by calling the specified 'converter'

    Args:
        root_func: a root function of a graph to be cloned and converted
        filter: a lambda for filtering out the Functions to be converted
        converter: a lambda for obtaining the substitute for each of the Functions to be converted
    Returns:
        Cloned and converted Function (graph)
    '''
    # recursively convert for blocks in root_func
    blocks = C.logging.graph.depth_first_search(
        root_func,
        lambda x: type(x) == C.Function and x.root_function.is_block,
        depth=0)
    for i in range(len(blocks)):
        # search for blocks again in case block input/output has been modified
        blocks1 = C.logging.graph.depth_first_search(
            root_func,
            lambda x: type(x) == C.Function and x.root_function.is_block,
            depth=0)
        block = blocks1[
            i]  # assuming depth_first_search order to be stable, so use the old index on new search results
        block_root = C.as_composite(block.block_root)
        new_block_root = convert(block_root, filter, converter)
        if new_block_root != block_root:
            block_arguments_mapping = dict(block.block_arguments_mapping)
            new_block_arguments_mapping = []
            for arg, new_arg in zip(block_root.arguments,
                                    new_block_root.arguments):
                new_block_arguments_mapping += [(new_arg,
                                                 block_arguments_mapping[arg])]
            new_block = C.as_block(new_block_root, new_block_arguments_mapping,
                                   block.op_name, block.name)
            if all([x not in root_func.outputs for x in block.outputs]) or all(
                [x in block.outputs for x in root_func.outputs]):
                root_func = root_func.clone(
                    C.CloneMethod.share,
                    dict(zip(block.outputs, new_block.outputs)))
            else:
                new_outputs = [
                    new_block.outputs[block.outputs.index(x)]
                    if x in block.outputs else None for x in root_func.outputs
                ]
                root_func_nonreplaced = C.combine(
                    [x for x in root_func.outputs if x not in block.outputs])
                root_func_nonreplaced_clone = root_func_nonreplaced.clone(
                    C.CloneMethod.share,
                    dict(zip(block.outputs, new_block.outputs)))
                idx = 0
                for nonreplaced_output in root_func_nonreplaced_clone.outputs:
                    while new_outputs[idx]:
                        idx += 1
                    new_outputs[idx] = nonreplaced_output
                root_func = C.combine(new_outputs)

    # replace all Function instances under root_func that pass the specified 'filter'
    functions_to_convert = C.logging.graph.depth_first_search(root_func,
                                                              filter,
                                                              depth=0)
    for i in range(len(functions_to_convert)):
        # The graph could be modified already by this function, so we need to rescan to the new set.
        functions_to_convert1 = C.logging.graph.depth_first_search(root_func,
                                                                   filter,
                                                                   depth=0)
        # We are using a filter passed in by the caller. So once a function is converted, we may not
        # get the same number of functions again, so we need to use correct index depending on the new size.
        index = 0
        if len(functions_to_convert) > len(functions_to_convert1):
            assert (len(functions_to_convert) - len(functions_to_convert1) == i
                    )  # Only one conversion at a time.
            # index = 0 will work for this case, we are picking the first function from the new list.
        elif len(functions_to_convert) == len(functions_to_convert1):
            index = i  # here we pick the current index of the for loop.
        else:
            raise RuntimeError(
                "The conversion adds another possible conversion(s). Stopping infinite conversions."
            )

        function_to_convert = functions_to_convert1[index]
        converted = converter(function_to_convert)

        if not function_to_convert.output in root_func.outputs:
            root_func = root_func.clone(
                C.CloneMethod.share,
                {function_to_convert.output: converted.output})
        else:
            # if cudnn_rnn output is the root_func output, just use converted as root_func and no clone needed
            if len(root_func.outputs) > 1:
                root_func = C.combine([
                    converted if x == function_to_convert.output else x
                    for x in root_func.outputs
                ])
            else:
                root_func = converted

    return root_func
Пример #33
0
 def _get(f, attr=None):
     return C.as_composite(f.owner).eval(data)[f]
Пример #34
0
def convert(root_func, filter, converter):
    '''
    Clones the graph underlying root_func and in the clone substitutes
    all Functions obtained by applying 'filter', with a new Function obtained by calling the specified 'converter'

    Args:
        root_func: a root function of a graph to be cloned and converted
        filter: a lambda for filtering out the Functions to be converted
        converter: a lambda for obtaining the substitute for each of the Functions to be converted
    Returns:
        Cloned and converted Function (graph)
    '''
    # recursively convert for blocks in root_func
    blocks = C.logging.graph.depth_first_search(root_func, lambda x : type(x) == C.Function and x.root_function.is_block, depth = 0)
    for i in range(len(blocks)):
        # search for blocks again in case block input/output has been modified
        blocks1 = C.logging.graph.depth_first_search(root_func, lambda x : type(x) == C.Function and x.root_function.is_block, depth = 0)
        block = blocks1[i] # assuming depth_first_search order to be stable, so use the old index on new search results
        block_root = C.as_composite(block.block_root)
        new_block_root = convert(block_root, filter, converter)
        if new_block_root != block_root:
            block_arguments_mapping = dict(block.block_arguments_mapping)
            new_block_arguments_mapping = []
            for arg, new_arg in zip(block_root.arguments, new_block_root.arguments):
                new_block_arguments_mapping += [(new_arg, block_arguments_mapping[arg])]
            new_block = C.as_block(new_block_root, new_block_arguments_mapping, block.op_name, block.name)
            if all([x not in root_func.outputs for x in block.outputs]) or all([x in block.outputs for x in root_func.outputs]):
                root_func = root_func.clone(C.CloneMethod.share, dict(zip(block.outputs, new_block.outputs)))
            else:
                new_outputs = [new_block.outputs[block.outputs.index(x)] if x in block.outputs else None for x in root_func.outputs]
                root_func_nonreplaced = C.combine([x for x in root_func.outputs if x not in block.outputs])
                root_func_nonreplaced_clone = root_func_nonreplaced.clone(C.CloneMethod.share, dict(zip(block.outputs, new_block.outputs)))
                idx = 0
                for nonreplaced_output in root_func_nonreplaced_clone.outputs:
                    while new_outputs[idx]:
                        idx += 1
                    new_outputs[idx] = nonreplaced_output
                root_func = C.combine(new_outputs)

    # replace all Function instances under root_func that pass the specified 'filter'
    functions_to_convert = C.logging.graph.depth_first_search(root_func, filter, depth = 0)
    for i in range(len(functions_to_convert)):
        # The graph could be modified already by this function, so we need to rescan to the new set.
        functions_to_convert1 = C.logging.graph.depth_first_search(root_func, filter, depth = 0)
        # We are using a filter passed in by the caller. So once a function is converted, we may not
        # get the same number of functions again, so we need to use correct index depending on the new size.
        index = 0
        if len(functions_to_convert) > len(functions_to_convert1):
            assert(len(functions_to_convert) - len(functions_to_convert1) == i) # Only one conversion at a time.
            # index = 0 will work for this case, we are picking the first function from the new list.
        elif len(functions_to_convert) == len(functions_to_convert1):
            index = i # here we pick the current index of the for loop.
        else:
            raise RuntimeError("The conversion adds another possible conversion(s). Stopping infinite conversions.")

        function_to_convert = functions_to_convert1[index]
        converted = converter(function_to_convert)

        if not function_to_convert.output in root_func.outputs:            
            root_func = root_func.clone(C.CloneMethod.share, {function_to_convert.output : converted.output})
        else:
            # if cudnn_rnn output is the root_func output, just use converted as root_func and no clone needed
            if len(root_func.outputs) > 1:
                root_func = C.combine([converted if x == function_to_convert.output else x for x in root_func.outputs])
            else:
                root_func = converted

    return root_func