Exemplo n.º 1
def test_model_not_criterion_subset():
    input_dim = 2
    proj_dim = 11
    model1_dim = 3
    model2_dim = 4
    x = input_variable((input_dim,))

    core = Embedding(proj_dim)
    model1 = Dense(model1_dim)(sequence.last(core(x)))
    model1_label = input_variable((model1_dim,), dynamic_axes=[Axis.default_batch_axis()])
    ce_model1 = cross_entropy_with_softmax(model1, model1_label)
    pe_model1 = classification_error(model1, model1_label)
    model2 = Dense(model2_dim)(core(x))
    model2_label = input_variable((model2_dim,))
    ce_model2 = cross_entropy_with_softmax(model2, model2_label)
    pe_model2 = classification_error(model2, model2_label)

    ce = 0.5 * sequence.reduce_sum(ce_model2) + 0.5 * ce_model1

    lr_schedule = learning_rate_schedule(0.003, UnitType.sample)
    trainer_multitask = Trainer(model1, (ce, pe_model1), sgd(ce.parameters, lr=lr_schedule))

    x_data = np.asarray([[2., 1.], [1., 2.]], np.float32)
    model1_label_data = np.asarray([1., 0., 0.], np.float32)
    model2_label_data = np.asarray([[0., 1., 0., 0.], [0., 0., 0., 1.]], np.float32)
    trainer_multitask.train_minibatch({x : [x_data], model1_label : [model1_label_data], model2_label : [model2_label_data]})
Exemplo n.º 2
def test_model_not_criterion_subset():
    input_dim = 2
    proj_dim = 11
    model1_dim = 3
    model2_dim = 4
    x = sequence.input((input_dim, ))

    core = Embedding(proj_dim)
    model1 = Dense(model1_dim)(sequence.last(core(x)))
    model1_label = input((model1_dim, ))
    ce_model1 = cross_entropy_with_softmax(model1, model1_label)
    pe_model1 = classification_error(model1, model1_label)

    model2 = Dense(model2_dim)(core(x))
    model2_label = sequence.input((model2_dim, ))
    ce_model2 = cross_entropy_with_softmax(model2, model2_label)
    pe_model2 = classification_error(model2, model2_label)

    ce = 0.5 * sequence.reduce_sum(ce_model2) + 0.5 * ce_model1

    lr_schedule = learning_rate_schedule(0.003, UnitType.sample)
    trainer_multitask = Trainer(model1, (ce, pe_model1),
                                sgd(ce.parameters, lr=lr_schedule))

    x_data = np.asarray([[2., 1.], [1., 2.]], np.float32)
    model1_label_data = np.asarray([1., 0., 0.], np.float32)
    model2_label_data = np.asarray([[0., 1., 0., 0.], [0., 0., 0., 1.]],
        x: [x_data],
        model1_label: [model1_label_data],
        model2_label: [model2_label_data]
Exemplo n.º 3
def test_trainer(tmpdir, no_eval_function):
    in1 = input(shape=(1, ))
    labels = input(shape=(1, ))
    p = parameter(shape=(2, ), init=10)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    if no_eval_function:
        errs = None
        errs = classification_error(z, labels)

    momentum_time_constant = momentum_as_time_constant_schedule(1100)
    lr_per_sample = learning_rate_schedule(0.007, UnitType.sample)
    trainer = Trainer(z, (ce, errs), [
        momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)
    in1_value = [[1], [2]]
    label_value = [[0], [1]]
    arguments = {in1: in1_value, labels: label_value}
    z_output = z.output
    updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output])

    p = str(tmpdir / 'checkpoint.dat')

    assert trainer.model.name == 'z'

    # Ensure that Swig is not leaking raw types
    assert isinstance(trainer.model, Function)
    assert trainer.model.__doc__
    assert isinstance(trainer.parameter_learners[0], Learner)
def test_trainer(tmpdir, no_eval_function):
    in1 = C.input_variable(shape=(1,))
    labels = C.input_variable(shape=(1,))
    p = parameter(shape=(2,), init=10)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    if no_eval_function:
        errs = None
        errs = classification_error(z, labels)

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size =1)
    trainer = C.Trainer(z, (ce, errs),
            [C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)])
    in1_value = [[1],[2]]
    label_value = [[0], [1]]
    arguments = {in1: in1_value, labels: label_value}
    z_output = z.output
    updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output])

    p = str(tmpdir / 'checkpoint.dat')
    external_state = {"additional external state":math.pi, "nested dict":{"a":"b"}, "list":[1,2,3]}
    trainer.save_checkpoint(p, external_state)
    restored_state = trainer.restore_from_checkpoint(p)

    assert external_state == restored_state

    assert trainer.model.name == 'z'

    # Ensure that Swig is not leaking raw types
    assert isinstance(trainer.model, Function)
    assert trainer.model.__doc__
    assert isinstance(trainer.parameter_learners[0], C.Learner)
Exemplo n.º 5
def test_trainer(tmpdir, no_eval_function):
    in1 = input_variable(shape=(1,))
    labels = input_variable(shape=(1,))
    p = parameter(shape=(2,), init=10)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    if no_eval_function:
        errs = None
        errs = classification_error(z, labels)

    momentum_time_constant = momentum_as_time_constant_schedule(1100)
    lr_per_sample = learning_rate_schedule(0.007, UnitType.sample)
    trainer = Trainer(z, (ce, errs),
            [momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)])
    in1_value = [[1],[2]]
    label_value = [[0], [1]]
    arguments = {in1: in1_value, labels: label_value}
    z_output = z.output
    updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output])

    p = str(tmpdir / 'checkpoint.dat')

    assert trainer.model.name == 'z'

    # Ensure that Swig is not leaking raw types
    assert isinstance(trainer.model, Function)
    assert trainer.model.__doc__
    assert isinstance(trainer.parameter_learners[0], Learner)
Exemplo n.º 6
def create_detection_losses(cls_score, label_targets, bbox_pred, rois, bbox_targets, bbox_inside_weights, cfg):
    # The losses are normalized by the batch size
    # classification loss
    p_cls_score = placeholder()
    p_label_targets = placeholder()
    cls_loss = cross_entropy_with_softmax(p_cls_score, p_label_targets, axis=1)
    cls_normalization_factor = 1.0 / cfg.NUM_ROI_PROPOSALS
    normalized_cls_loss = reduce_sum(cls_loss) * cls_normalization_factor

    reduced_cls_loss = cntk.as_block(normalized_cls_loss,
                                     [(p_cls_score, cls_score), (p_label_targets, label_targets)],
                                     'CrossEntropyWithSoftmax', 'norm_cls_loss')

    # regression loss
    p_bbox_pred = placeholder()
    p_bbox_targets = placeholder()
    p_bbox_inside_weights = placeholder()
    bbox_loss = SmoothL1Loss(cfg.SIGMA_DET_L1, p_bbox_pred, p_bbox_targets, p_bbox_inside_weights, 1.0)
    bbox_normalization_factor = 1.0 / cfg.NUM_ROI_PROPOSALS
    normalized_bbox_loss = reduce_sum(bbox_loss) * bbox_normalization_factor

    reduced_bbox_loss = cntk.as_block(normalized_bbox_loss,
                                     [(p_bbox_pred, bbox_pred), (p_bbox_targets, bbox_targets), (p_bbox_inside_weights, bbox_inside_weights)],
                                     'SmoothL1Loss', 'norm_bbox_loss')

    detection_losses = plus(reduced_cls_loss, reduced_bbox_loss, name="detection_losses")

    return detection_losses
Exemplo n.º 7
def test_trainer_with_some_params_not_learned():
    input_dim = 2
    proj_dim = 2
    x = input_variable(shape=(input_dim,))
    W = parameter(shape=(input_dim, proj_dim), init=glorot_uniform())
    B = parameter(shape=(proj_dim,), init=glorot_uniform())
    t = times(x, W)
    z = t + B

    W_orig_value = W.value
    B_orig_value = B.value

    labels = input_variable(shape=(proj_dim,))
    ce = cross_entropy_with_softmax(z, labels)
    pe = classification_error(z, labels)

    lr_per_sample = learning_rate_schedule(0.1, UnitType.sample)
    trainer = Trainer(z, (ce, pe), sgd([W], lr_per_sample))

    x_value = [[1, 1],[2, 2]]
    label_value = [[0, 1], [1, 0]]
    arguments = {x: x_value, labels: label_value}

    num_iters = 3
    for i in range(num_iters):

        assert np.array_equal(B.value, B_orig_value)
        assert not np.array_equal(W.value, W_orig_value)
        W_orig_value = W.value

Exemplo n.º 8
def create_detection_losses(cls_score, label_targets, bbox_pred, rois, bbox_targets, bbox_inside_weights, cfg):
    # The losses are normalized by the batch size
    # classification loss
    p_cls_score = placeholder()
    p_label_targets = placeholder()
    cls_loss = cross_entropy_with_softmax(p_cls_score, p_label_targets, axis=1)
    cls_normalization_factor = 1.0 / cfg.NUM_ROI_PROPOSALS
    normalized_cls_loss = reduce_sum(cls_loss) * cls_normalization_factor

    reduced_cls_loss = cntk.as_block(normalized_cls_loss,
                                     [(p_cls_score, cls_score), (p_label_targets, label_targets)],
                                     'CrossEntropyWithSoftmax', 'norm_cls_loss')

    # regression loss
    p_bbox_pred = placeholder()
    p_bbox_targets = placeholder()
    p_bbox_inside_weights = placeholder()
    bbox_loss = SmoothL1Loss(cfg.SIGMA_DET_L1, p_bbox_pred, p_bbox_targets, p_bbox_inside_weights, 1.0)
    bbox_normalization_factor = 1.0 / cfg.NUM_ROI_PROPOSALS
    normalized_bbox_loss = reduce_sum(bbox_loss) * bbox_normalization_factor

    reduced_bbox_loss = cntk.as_block(normalized_bbox_loss,
                                      [(p_bbox_pred, bbox_pred), (p_bbox_targets, bbox_targets),
                                       (p_bbox_inside_weights, bbox_inside_weights)],
                                      'SmoothL1Loss', 'norm_bbox_loss')

    detection_losses = plus(reduced_cls_loss, reduced_bbox_loss, name="detection_losses")

    return detection_losses
Exemplo n.º 9
def create_detection_losses(cls_score, label_targets, rois, bbox_pred, bbox_targets, bbox_inside_weights):
    # classification loss
    cls_loss = cross_entropy_with_softmax(cls_score, label_targets, axis=1)

    p_cls_loss = placeholder()
    p_rois = placeholder()
    # The terms that are accounted for in the cls loss are those that correspond to an actual roi proposal --> do not count no-op (all-zero) rois
    roi_indicator = reduce_sum(p_rois, axis=1)
    cls_num_terms = reduce_sum(cntk.greater_equal(roi_indicator, 0.0))
    cls_normalization_factor = 1.0 / cls_num_terms
    normalized_cls_loss = reduce_sum(p_cls_loss) * cls_normalization_factor

    reduced_cls_loss = cntk.as_block(normalized_cls_loss,
                                     [(p_cls_loss, cls_loss), (p_rois, rois)],
                                     'Normalize', 'norm_cls_loss')

    # regression loss
    p_bbox_pred = placeholder()
    p_bbox_targets = placeholder()
    p_bbox_inside_weights = placeholder()
    bbox_loss = SmoothL1Loss(cfg["CNTK"].SIGMA_DET_L1, p_bbox_pred, p_bbox_targets, p_bbox_inside_weights, 1.0)
    # The bbox loss is normalized by the batch size
    bbox_normalization_factor = 1.0 / cfg["TRAIN"].BATCH_SIZE
    normalized_bbox_loss = reduce_sum(bbox_loss) * bbox_normalization_factor

    reduced_bbox_loss = cntk.as_block(normalized_bbox_loss,
                                     [(p_bbox_pred, bbox_pred), (p_bbox_targets, bbox_targets), (p_bbox_inside_weights, bbox_inside_weights)],
                                     'SmoothL1Loss', 'norm_bbox_loss')

    detection_losses = plus(reduced_cls_loss, reduced_bbox_loss, name="detection_losses")

    return detection_losses
Exemplo n.º 10
def create_bn_inception():

    # Input variables denoting the features and label data
    feature_var = input_variable((NUM_CHANNELS, IMAGE_HEIGHT, IMAGE_WIDTH))
    label_var = input_variable((NUM_CLASSES))

    bn_time_const = 4096
    z = bn_inception_cifar_model(feature_var, NUM_CLASSES, bn_time_const)

    # loss and metric
    ce  = cross_entropy_with_softmax(z, label_var)
    pe  = classification_error(z, label_var)
    pe5 = classification_error(z, label_var, topN=5)


    return {
        'feature': feature_var,
        'label'  : label_var,
        'ce'     : ce,
        'pe'     : pe,
        'pe5'    : pe5, 
        'output' : z
Exemplo n.º 11
def test_trainer_with_some_params_not_learned():
    input_dim = 2
    proj_dim = 2
    x = input(shape=(input_dim, ))
    W = parameter(shape=(input_dim, proj_dim), init=glorot_uniform())
    B = parameter(shape=(proj_dim, ), init=glorot_uniform())
    t = times(x, W)
    z = t + B

    W_orig_value = W.value
    B_orig_value = B.value

    labels = input(shape=(proj_dim, ))
    ce = cross_entropy_with_softmax(z, labels)
    pe = classification_error(z, labels)

    lr_per_sample = learning_rate_schedule(0.1, UnitType.sample)
    trainer = Trainer(z, (ce, pe), sgd([W], lr_per_sample))

    x_value = [[1, 1], [2, 2]]
    label_value = [[0, 1], [1, 0]]
    arguments = {x: x_value, labels: label_value}

    num_iters = 3
    for i in range(num_iters):

        assert np.array_equal(B.value, B_orig_value)
        assert not np.array_equal(W.value, W_orig_value)
        W_orig_value = W.value

Exemplo n.º 12
def create_detection_losses(cls_score, label_targets, rois, bbox_pred, bbox_targets, bbox_inside_weights):
    # classification loss
    cls_loss = cross_entropy_with_softmax(cls_score, label_targets, axis=1)

    p_cls_loss = placeholder()
    p_rois = placeholder()
    # The terms that are accounted for in the cls loss are those that correspond to an actual roi proposal --> do not count no-op (all-zero) rois
    roi_indicator = reduce_sum(p_rois, axis=1)
    cls_num_terms = reduce_sum(cntk.greater_equal(roi_indicator, 0.0))
    cls_normalization_factor = 1.0 / cls_num_terms
    normalized_cls_loss = reduce_sum(p_cls_loss) * cls_normalization_factor

    reduced_cls_loss = cntk.as_block(normalized_cls_loss,
                                     [(p_cls_loss, cls_loss), (p_rois, rois)],
                                     'Normalize', 'norm_cls_loss')

    # regression loss
    p_bbox_pred = placeholder()
    p_bbox_targets = placeholder()
    p_bbox_inside_weights = placeholder()
    bbox_loss = SmoothL1Loss(cfg["CNTK"].SIGMA_DET_L1, p_bbox_pred, p_bbox_targets, p_bbox_inside_weights, 1.0)
    # The bbox loss is normalized by the batch size
    bbox_normalization_factor = 1.0 / cfg["TRAIN"].BATCH_SIZE
    normalized_bbox_loss = reduce_sum(bbox_loss) * bbox_normalization_factor

    reduced_bbox_loss = cntk.as_block(normalized_bbox_loss,
                                     [(p_bbox_pred, bbox_pred), (p_bbox_targets, bbox_targets), (p_bbox_inside_weights, bbox_inside_weights)],
                                     'SmoothL1Loss', 'norm_bbox_loss')

    detection_losses = plus(reduced_cls_loss, reduced_bbox_loss, name="detection_losses")

    return detection_losses
    def _setup_test_model(self, *args, **kwargs):
        inputs = placeholder(shape=(1, ))
        outputs = input_variable(shape=(1, ), dtype=np.float32)

        q = Dense(1, activation=None)(inputs)
        loss = cross_entropy_with_softmax(q, outputs)

        return {'inputs': inputs, 'outputs': outputs, 'f': q, 'loss': loss}
Exemplo n.º 14
def entrenar(checkpoint, entrRuedas, entrOperaciones, input_dim, num_output_classes, testRuedas, testOperaciones):
    minibatch_size = 100;
    minibatchIteraciones = int(len(entrOperaciones) / minibatch_size);

    # Input variables denoting the features and label data
    feature = input((input_dim), np.float32)
    label = input((num_output_classes), np.float32)

    netout = crearRed(input_dim, num_output_classes, feature);

    ce = cross_entropy_with_softmax(netout, label)
    pe = classification_error(netout, label)

    lr_per_minibatch=learning_rate_schedule(0.25, UnitType.minibatch)
    # Instantiate the trainer object to drive the model training
    learner = sgd(netout.parameters, lr=lr_per_minibatch)
    progress_printer = ProgressPrinter(log_to_file=checkpoint+".log", num_epochs=epocs);
    trainer = Trainer(netout, (ce, pe), learner, progress_printer)

    if os.path.isfile(checkpoint):

    npentrRuedas = np.array(entrRuedas).astype(np.float32);
    npentrOperaciones = np.array(entrOperaciones).astype(np.float32);

    #iteramos una vez por cada "epoc"
    for i in range(0, epocs):
        p = np.random.permutation(len(entrRuedas));
        npentrOperaciones = npentrOperaciones[p];
        npentrRuedas = npentrRuedas[p];

        #ahora partimos los datos en "minibatches" y entrenamos
        for j in range(0, minibatchIteraciones):
            features = npentrRuedas[j*minibatch_size:(j+1)*minibatch_size];
            labels = npentrOperaciones[j*minibatch_size:(j+1)*minibatch_size];
            trainer.train_minibatch({feature: features, label: labels});

    minibatchIteraciones = int(len(testOperaciones) / minibatch_size);
    avg_error = 0;
    for j in range(0, minibatchIteraciones):

        test_features = np.array(testRuedas[j*minibatch_size:(j+1)*minibatch_size]).astype(np.float32);
        test_labels = np.array(testOperaciones[j*minibatch_size:(j+1)*minibatch_size]).astype(np.float32);
        #test_features = np.array( entrRuedas[0:minibatch_size]).astype(np.float32);
        #test_labels = np.array(entrOperaciones[0:minibatch_size]).astype(np.float32);
        avg_error = avg_error + ( trainer.test_minibatch(
            {feature: test_features, label: test_labels}) / minibatchIteraciones)

    return avg_error
Exemplo n.º 15
 def create_trainer(use_sparse, device):
     a = C.sequence.input(shape=input_shape, is_sparse=use_sparse, name='input')
     w = C.parameter(init=w_init, device=dev)
     z = times(a, w)
     l = C.sequence.input(shape=label_shape, is_sparse=use_sparse, name='label')
     loss = cross_entropy_with_softmax(z, l, axis=-1)
     trainer = C.Trainer(z, (loss, None), C.sgd(z.parameters, lr=C.learning_rate_schedule(0.7, C.UnitType.sample)))
     return (a, l, w, trainer)
Exemplo n.º 16
 def create_trainer(use_sparse, device):
     a = C.sequence.input_variable(shape=input_shape, is_sparse=use_sparse, name='input')
     w = C.parameter(init=w_init, device=dev)
     z = times(a, w)
     l = C.sequence.input_variable(shape=label_shape, is_sparse=use_sparse, name='label')
     loss = cross_entropy_with_softmax(z, l, axis=-1)
     trainer = C.Trainer(z, (loss, None), C.sgd(z.parameters, lr=C.learning_rate_schedule(0.7, C.UnitType.sample)))
     return (a, l, w, trainer)
Exemplo n.º 17
def train_lm(training_file, epochs, max_num_minibatches):

    # load the data and vocab
    data, char_to_ix, ix_to_char, data_size, vocab_dim = load_data_and_vocab(training_file)

    # Model the source and target inputs to the model
    input_sequence, label_sequence = create_inputs(vocab_dim)

    # create the model
    model = create_model(vocab_dim)
    # and apply it to the input sequence    
    z = model(input_sequence)

    # setup the criterions (loss and metric)
    ce = cross_entropy_with_softmax(z, label_sequence)
    errs = classification_error(z, label_sequence)

    # Instantiate the trainer object to drive the model training
    lr_per_sample = learning_parameter_schedule_per_sample(0.001)
    momentum_schedule = momentum_schedule_per_sample(0.9990913221888589)
    clipping_threshold_per_sample = 5.0
    gradient_clipping_with_truncation = True
    learner = momentum_sgd(z.parameters, lr_per_sample, momentum_schedule,
    progress_printer = ProgressPrinter(freq=100, tag='Training')
    trainer = Trainer(z, (ce, errs), learner, progress_printer)

    sample_freq = 1000
    minibatches_per_epoch = min(data_size // minibatch_size, max_num_minibatches // epochs)

    # print out some useful training information
    print ("Running %d epochs with %d minibatches per epoch" % (epochs, minibatches_per_epoch))

    for e in range(0, epochs):
        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        # If it's the start of the data, we specify that we are looking at a new sequence (True)
        mask = [True]
        for b in range(0, minibatches_per_epoch):
            # get the data            
            features, labels = get_data(b, minibatch_size, data, char_to_ix, vocab_dim)
            arguments = ({input_sequence : features, label_sequence : labels}, mask)
            mask = [False] 

            global_minibatch = e*minibatches_per_epoch + b
            if global_minibatch % sample_freq == 0:
                print(sample(z, ix_to_char, vocab_dim, char_to_ix))

        model_filename = "models/shakespeare_epoch%d.dnn" % (e+1)
        print("Saved model to '%s'" % model_filename)
Exemplo n.º 18
def train_sequence_classifier(debug_output=False):
    input_dim = 2000
    cell_dim = 25
    hidden_dim = 25
    embedding_dim = 50
    num_output_classes = 5

    # Input variables denoting the features and label data
    features = sequence.input(shape=input_dim, is_sparse=True)
    label = input(num_output_classes)

    # Instantiate the sequence classification model
    classifier_output = LSTM_sequence_classifer_net(features,
                                                    embedding_dim, hidden_dim,

    ce = cross_entropy_with_softmax(classifier_output, label)
    pe = classification_error(classifier_output, label)

    rel_path = r"../../../../Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf"
    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path)

    reader = create_reader(path, True, input_dim, num_output_classes)

    input_map = {
        features: reader.streams.features,
        label: reader.streams.labels

    lr_per_sample = learning_rate_schedule(0.0005, UnitType.sample)
    # Instantiate the trainer object to drive the model training
    trainer = Trainer(classifier_output, (ce, pe),
                      sgd(classifier_output.parameters, lr=lr_per_sample))

    # Get minibatches of sequences to train with and perform model training
    minibatch_size = 200
    training_progress_output_freq = 10

    if debug_output:
        training_progress_output_freq = training_progress_output_freq / 3

    for i in range(251):
        mb = reader.next_minibatch(minibatch_size, input_map=input_map)
        print_training_progress(trainer, i, training_progress_output_freq)

    import copy

    evaluation_average = copy.copy(
    loss_average = copy.copy(trainer.previous_minibatch_loss_average)

    return evaluation_average, loss_average
def train_model(debug_output=False):
    # Create the minibatch source
    minibatch_source = create_reader(map_file)

    # Input variables denoting features, rois and label data
    image_input = input_variable((num_channels, image_height, image_width))
    label_input = input_variable((num_classes))

    # define mapping from reader streams to network inputs
    input_map = {
        image_input: minibatch_source.streams.features,
        label_input: minibatch_source.streams.labels

    # Instantiate the Fast R-CNN prediction model and loss function
    model = modify_model(image_input, num_classes)
    ce = cross_entropy_with_softmax(model, label_input)
    pe = classification_error(model, label_input)

    # Set learning parameters
    l2_reg_weight = 0.0005
    lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001]
    momentum_time_constant = 10
    lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample)
    mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant)

    # Instantiate the trainer object
    progress_writers = [ProgressPrinter(tag='Training', num_epochs=max_epochs)]
    learner = momentum_sgd(model.parameters,
    trainer = Trainer(model, (ce, pe), learner, progress_writers)

    # Get minibatches of images and perform model training
    print("Training image classifier for %s epochs." % max_epochs)

    for epoch in range(max_epochs):
        sample_count = 0
        while sample_count < epoch_size:
            data = minibatch_source.next_minibatch(min(
                mb_size, epoch_size - sample_count),
            sample_count += trainer.previous_minibatch_sample_count

                         'withcrops_{}.dnn'.format(epoch + 1)))

Exemplo n.º 20
def create_sample_model(device, writer=None,
                        lr_per_sample=C.learning_parameter_schedule_per_sample([0.3, 0.2, 0.1, 0.0])):
    in1 = sequence.input_variable(shape=(input_dim,))
    labels = sequence.input_variable(shape=(input_dim,))
    p = parameter(shape=(input_dim,), init=10, device=device)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)

    learner = C.sgd(z.parameters, lr_per_sample)
    trainer = C.Trainer(z, (ce, errs), [learner], writer)
    return (trainer, in1, labels)
Exemplo n.º 21
    def _setup_test_model(self, *args, **kwargs):
        inputs = placeholder(shape=(1,))
        outputs = input_variable(shape=(1,), dtype=np.float32)

        q = Dense(1, activation=None)(inputs)
        loss = cross_entropy_with_softmax(q, outputs)

        return {
            'inputs': inputs,
            'outputs': outputs,
            'f': q,
            'loss': loss
Exemplo n.º 22
def create_sample_model(device, writer=None):
    in1 = sequence.input(shape=(input_dim, ))
    labels = sequence.input(shape=(input_dim, ))
    p = parameter(shape=(input_dim, ), init=10, device=device)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)

    lr_per_sample = learning_rate_schedule([0.3, 0.2, 0.1, 0.0],
    learner = sgd(z.parameters, lr_per_sample)
    trainer = Trainer(z, (ce, errs), [learner], writer)
    return (trainer, in1, labels)
def test_model_one_output_of_multi_output_function():
    input_dim = 2
    proj_dim = 11
    x = C.input_variable((input_dim,))

    x_placeholder = C.placeholder()
    w = parameter((input_dim, proj_dim))
    b = parameter((proj_dim,))
    proj = times(x_placeholder, w)
    proj_plus_bias = proj + b
    combined_model = as_block(C.combine([proj, proj_plus_bias]), [(x_placeholder, x)], 'dense_op')

    labels = C.input_variable((proj_dim,))
    lr_schedule = C.learning_parameter_schedule(0.003,  minibatch_size =1)
    ce = cross_entropy_with_softmax(combined_model.outputs[0], labels)
    pe = classification_error(combined_model.outputs[0], labels)
    trainer_multitask = C.Trainer(combined_model.outputs[0], (ce, pe), C.sgd(ce.parameters, lr=lr_schedule))
Exemplo n.º 24
def test_model_one_output_of_multi_output_function():
    input_dim = 2
    proj_dim = 11
    x = input((input_dim,))

    x_placeholder = placeholder()
    w = parameter((input_dim, proj_dim))
    b = parameter((proj_dim,))
    proj = times(x_placeholder, w)
    proj_plus_bias = proj + b
    combined_model = as_block(combine([proj, proj_plus_bias]), [(x_placeholder, x)], 'dense_op')

    labels = input((proj_dim,))
    lr_schedule = learning_rate_schedule(0.003, UnitType.sample)
    ce = cross_entropy_with_softmax(combined_model.outputs[0], labels)
    pe = classification_error(combined_model.outputs[0], labels)
    trainer_multitask = Trainer(combined_model.outputs[0], (ce, pe), sgd(ce.parameters, lr=lr_schedule))
def test_output_to_retain():
    in1 = C.input_variable(shape=(1,))
    labels = C.input_variable(shape=(1,))
    p = parameter(shape=(2,), init=10)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)
    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size =1)
    trainer = C.Trainer(z, (ce, errs),
            [C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)])
    in1_value = [[1], [2]]
    label_value = [[0], [1]]
    arguments = {in1: in1_value, labels: label_value}
    z_output = z.output
    updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output])
    assert np.allclose(var_map[z_output], np.asarray(in1_value)+20)
Exemplo n.º 26
def test_model_one_output_of_multi_output_function():
    input_dim = 2
    proj_dim = 11
    x = input_variable((input_dim,))

    x_placeholder = placeholder_variable()
    w = parameter((input_dim, proj_dim))
    b = parameter((proj_dim,))
    proj = times(x_placeholder, w)
    proj_plus_bias = proj + b
    combined_model = as_block(combine([proj, proj_plus_bias]), [(x_placeholder, x)], 'dense_op')

    labels = input_variable((proj_dim,))
    lr_schedule = learning_rate_schedule(0.003, UnitType.sample)
    ce = cross_entropy_with_softmax(combined_model.outputs[0], labels)
    pe = classification_error(combined_model.outputs[0], labels)
    trainer_multitask = Trainer(combined_model.outputs[0], (ce, pe), sgd(ce.parameters, lr=lr_schedule))
Exemplo n.º 27
    def create_trainer(use_sparse, device):
        a = C.sequence.input(shape=input_shape, is_sparse=use_sparse, name='input')
        w_i = C.parameter(init=w_init_i, device=dev)
        a_projection = times(a, w_i)

        p_o = C.placeholder()
        h = C.sequence.past_value(p_o)
        w_h = C.parameter(init=w_init_h, device=dev)
        h_projection = times(h, w_h)        
        z = a_projection + h_projection
        z = z.replace_placeholder(z)
        z = reshape(z, label_shape)

        l = C.sequence.input(shape=label_shape, is_sparse=use_sparse, name='label')
        loss = cross_entropy_with_softmax(z, l, axis=-1)
        trainer = C.Trainer(z, (loss, None), C.sgd(z.parameters, lr=C.learning_rate_schedule(0.7, C.UnitType.sample)))
        return (a, l, w_i, w_h, trainer)
Exemplo n.º 28
def test_output_to_retain():
    in1 = input_variable(shape=(1,))
    labels = input_variable(shape=(1,))
    p = parameter(shape=(2,), init=10)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)
    momentum_time_constant = momentum_as_time_constant_schedule(1100)
    lr_per_sample = learning_rate_schedule(0.007, UnitType.sample)
    trainer = Trainer(z, (ce, errs),
            [momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)])
    in1_value = [[[1]], [[2]]]
    label_value = [[0], [1]]
    arguments = {in1: in1_value, labels: label_value}
    z_output = z.output
    updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output])
    assert np.allclose(var_map[z_output], np.asarray(in1_value)+20)
Exemplo n.º 29
    def create_trainer(use_sparse, device):
        a = C.sequence.input_variable(shape=input_shape, is_sparse=use_sparse, name='input')
        w_i = C.parameter(init=w_init_i, device=dev)
        a_projection = times(a, w_i)

        p_o = C.placeholder()
        h = C.sequence.past_value(p_o)
        w_h = C.parameter(init=w_init_h, device=dev)
        h_projection = times(h, w_h)        
        z = a_projection + h_projection
        z = z.replace_placeholder(z)
        z = reshape(z, label_shape)

        l = C.sequence.input_variable(shape=label_shape, is_sparse=use_sparse, name='label')
        loss = cross_entropy_with_softmax(z, l, axis=-1)
        trainer = C.Trainer(z, (loss, None), C.sgd(z.parameters, lr=C.learning_rate_schedule(0.7, C.UnitType.sample)))
        return (a, l, w_i, w_h, trainer)
Exemplo n.º 30
def train_model(base_model_file, feature_node_name, last_hidden_node_name,
                image_width, image_height, num_channels, num_classes, train_map_file,
                num_epochs, max_images=-1, freeze=False):
    epoch_size = sum(1 for line in open(train_map_file))
    if max_images > 0:
        epoch_size = min(epoch_size, max_images)

    # Create the minibatch source and input variables
    minibatch_source = create_mb_source(train_map_file, image_width, image_height, num_channels, num_classes)
    image_input = C.input_variable((num_channels, image_height, image_width))
    label_input = C.input_variable(num_classes)

    # Define mapping from reader streams to network inputs
    input_map = {
        image_input: minibatch_source[features_stream_name],
        label_input: minibatch_source[label_stream_name]

    # Instantiate the transfer learning model and loss function
    tl_model = create_model(base_model_file, feature_node_name, last_hidden_node_name, num_classes, image_input, freeze)
    ce = cross_entropy_with_softmax(tl_model, label_input)
    pe = classification_error(tl_model, label_input)

    # Instantiate the trainer object
    lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch)
    mm_schedule = momentum_schedule(momentum_per_mb)
    learner = momentum_sgd(tl_model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight)
    progress_printer = ProgressPrinter(tag='Training', num_epochs=num_epochs)
    trainer = Trainer(tl_model, (ce, pe), learner, progress_printer)

    # Get minibatches of images and perform model training
    print("Training transfer learning model for {0} epochs (epoch_size = {1}).".format(num_epochs, epoch_size))
    for epoch in range(num_epochs):       # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map)
            trainer.train_minibatch(data)                                    # update model with it
            sample_count += trainer.previous_minibatch_sample_count          # count samples processed so far
            if sample_count % (100 * mb_size) == 0:
                print ("Processed {0} samples".format(sample_count))


    return tl_model
Exemplo n.º 31
def train_model(base_model_file, feature_node_name, last_hidden_node_name,
                image_width, image_height, num_channels, num_classes, train_map_file,
                num_epochs, max_images=-1, freeze=False):
    epoch_size = sum(1 for line in open(train_map_file))
    if max_images > 0:
        epoch_size = min(epoch_size, max_images)

    # Create the minibatch source and input variables
    minibatch_source = create_mb_source(train_map_file, image_width, image_height, num_channels, num_classes)
    image_input = C.input_variable((num_channels, image_height, image_width))
    label_input = C.input_variable(num_classes)

    # Define mapping from reader streams to network inputs
    input_map = {
        image_input: minibatch_source[features_stream_name],
        label_input: minibatch_source[label_stream_name]

    # Instantiate the transfer learning model and loss function
    tl_model = create_model(base_model_file, feature_node_name, last_hidden_node_name, num_classes, image_input, freeze)
    ce = cross_entropy_with_softmax(tl_model, label_input)
    pe = classification_error(tl_model, label_input)

    # Instantiate the trainer object
    lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch)
    mm_schedule = momentum_schedule(momentum_per_mb)
    learner = momentum_sgd(tl_model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight)
    progress_printer = ProgressPrinter(tag='Training', num_epochs=num_epochs)
    trainer = Trainer(tl_model, (ce, pe), learner, progress_printer)

    # Get minibatches of images and perform model training
    print("Training transfer learning model for {0} epochs (epoch_size = {1}).".format(num_epochs, epoch_size))
    for epoch in range(num_epochs):       # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map)
            trainer.train_minibatch(data)                                    # update model with it
            sample_count += trainer.previous_minibatch_sample_count          # count samples processed so far
            if sample_count % (100 * mb_size) == 0:
                print("Processed {0} samples".format(sample_count))


    return tl_model
Exemplo n.º 32
def ffnet():
    input_dim = 2
    num_output_classes = 2
    num_hidden_layers = 2
    hidden_layers_dim = 50

    # Input variables denoting the features and label data
    feature = input_variable((input_dim), np.float32)
    label = input_variable((num_output_classes), np.float32)

    netout = Sequential([
            lambda i: Dense(hidden_layers_dim, activation=sigmoid)),

    ce = cross_entropy_with_softmax(netout, label)
    pe = classification_error(netout, label)

    lr_per_minibatch = learning_parameter_schedule(0.5)
    # Instantiate the trainer object to drive the model training
    learner = sgd(netout.parameters, lr=lr_per_minibatch)
    progress_printer = ProgressPrinter(128)
    trainer = Trainer(netout, (ce, pe), learner, progress_printer)

    # Get minibatches of training data and perform model training
    minibatch_size = 25

    for i in range(1024):
        features, labels = generate_random_data(minibatch_size, input_dim,
        # Specify the mapping of input variables in the model to actual
        # minibatch data to be trained with
        trainer.train_minibatch({feature: features, label: labels})

    test_features, test_labels = generate_random_data(minibatch_size,
    avg_error = trainer.test_minibatch({
        feature: test_features,
        label: test_labels
    return avg_error
Exemplo n.º 33
def ffnet():
    input_dim = 2
    num_output_classes = 2
    num_hidden_layers = 2
    hidden_layers_dim = 50

    # Input variables denoting the features and label data
    input = input_variable((input_dim), np.float32)
    label = input_variable((num_output_classes), np.float32)

    # Instantiate the feedforward classification model
    netout = fully_connected_classifier_net(input, num_output_classes,
                                            num_hidden_layers, sigmoid)

    ce = cross_entropy_with_softmax(netout, label)
    pe = classification_error(netout, label)

    lr_per_minibatch = learning_rate_schedule(0.5, UnitType.minibatch)
    # Instantiate the trainer object to drive the model training
    learner = sgd(netout.parameters, lr=lr_per_minibatch)
    progress_printer = ProgressPrinter(128)
    trainer = Trainer(netout, (ce, pe), learner, progress_printer)

    # Get minibatches of training data and perform model training
    minibatch_size = 25

    for i in range(1024):
        features, labels = generate_random_data(minibatch_size, input_dim,
        # Specify the mapping of input variables in the model to actual
        # minibatch data to be trained with
        trainer.train_minibatch({input: features, label: labels})

    test_features, test_labels = generate_random_data(minibatch_size,
    avg_error = trainer.test_minibatch({
        input: test_features,
        label: test_labels
    return avg_error
Exemplo n.º 34
def run_distributed_training(tmpdir, create_func):

    in1 = sequence.input_variable(shape=1)
    labels = sequence.input_variable(shape=1)
    p = parameter(shape=2, init=10)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample)
    dist_learner = create_func(
        C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant,

    communicator = dist_learner.communicator()
    workers = communicator.workers()
    current_worker = communicator.current_worker()
    found_rank = False
    for wk in workers:
        if current_worker.global_rank == wk.global_rank:
            found_rank = True

    assert found_rank

    trainer = C.Trainer(z, (ce, errs), [dist_learner])
    in1_value = [[1], [2]]
    label_value = [[0], [1]]
    arguments = {in1: in1_value, labels: label_value}
    z_output = z.output
    updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output])

    p = str(tmpdir / 'checkpoint.dat')


    assert trainer.model.name == 'z'

    # Ensure that Swig is not leaking raw types
    assert isinstance(trainer.model, Function)
    assert trainer.model.__doc__
Exemplo n.º 35
def create_network(num_convolution_layers):
    """ Create network

    # Input variables denoting the features and label data
    input_var = cntk.input_variable(
    label_var = cntk.input_variable((_NUM_CLASSES))

    # create model, and configure learning parameters
    # Instantiate the feedforward classification model
    input_removemean = minus(input_var, constant(128))
    scaled_input = element_times(constant(0.00390625), input_removemean)

    print('Creating NN model')
    with layers.default_options(activation=relu, pad=True):
        model = layers.Sequential([
                range(num_convolution_layers), lambda: [
                    layers.Convolution2D((3, 3), 64),
                    layers.Convolution2D((3, 3), 64),
                    layers.MaxPooling((3, 3), (2, 2))
                lambda i: [layers.Dense([256, 128][i]),
            layers.Dense(_NUM_CLASSES, activation=None)

    # loss and metric
    ce = cross_entropy_with_softmax(model, label_var)
    pe = classification_error(model, label_var)

    return {
        'name': 'convnet',
        'feature': input_var,
        'label': label_var,
        'ce': ce,
        'pe': pe,
        'output': model
Exemplo n.º 36
def ffnet(data, labels):
    input_dim = 800
    num_output_classes = 3
    num_hidden_layers = 2
    hidden_layers_dim = 50

    # Input variables denoting the features and label data
    feature = input((input_dim), np.float32)
    label = input((num_output_classes), np.float32)

    netout = Sequential([For(range(num_hidden_layers), lambda i: Dense(hidden_layers_dim, activation=sigmoid)),

    ce = cross_entropy_with_softmax(netout, label)
    pe = classification_error(netout, label)

    lr_per_minibatch=learning_rate_schedule(0.5, UnitType.minibatch)
    # Instantiate the trainer object to drive the model training
    learner = sgd(netout.parameters, lr=lr_per_minibatch)
    progress_printer = ProgressPrinter(128)
    trainer = Trainer(netout, (ce, pe), learner, progress_printer)

    # Get minibatches of training data and perform model training
    minibatch_size = 25

    features, labels = generate_stock_data(minibatch_size);

    for i in range(1024):
    #    features, labels = generate_random_data(
    #        minibatch_size, input_dim, num_output_classes)
        # Specify the mapping of input variables in the model to actual
        # minibatch data to be trained with
        trainer.train_minibatch({feature: features, label: labels})

    test_features, test_labels = generate_random_data(
        minibatch_size, input_dim, num_output_classes)
    avg_error = trainer.test_minibatch(
        {feature: test_features, label: test_labels})
    return avg_error
Exemplo n.º 37
def run_distributed_training(tmpdir, create_func):

    in1 = sequence.input_variable(shape=1)
    labels = sequence.input_variable(shape=1)
    p = parameter(shape=2, init=10)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample)
    dist_learner = create_func(C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True))

    communicator = dist_learner.communicator()
    workers = communicator.workers()
    current_worker = communicator.current_worker()
    found_rank = False
    for wk in workers:
        if current_worker.global_rank == wk.global_rank:
            found_rank = True

    assert found_rank

    trainer = C.Trainer(z, (ce, errs), [ dist_learner ])
    in1_value = [[1],[2]]
    label_value = [[0], [1]]
    arguments = {in1: in1_value, labels: label_value}
    z_output = z.output
    updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output])
    p = str(tmpdir / 'checkpoint.dat')


    assert trainer.model.name == 'z'

    # Ensure that Swig is not leaking raw types
    assert isinstance(trainer.model, Function)
    assert trainer.model.__doc__
Exemplo n.º 38
def cargarRedDesdeArchivo(archivo):
    input_dim = 800;
    num_output_classes = 3;

    feature = input((input_dim), np.float32);
    label = input((num_output_classes), np.float32)

    netout = crearRed(input_dim, 3, feature);
    ce = cross_entropy_with_softmax(netout, label)
    pe = classification_error(netout, label)

    lr_per_minibatch=learning_rate_schedule(0.5, UnitType.minibatch)
    # Instantiate the trainer object to drive the model training
    learner = sgd(netout.parameters, lr=lr_per_minibatch)
    progress_printer = ProgressPrinter(1)
    trainer = Trainer(netout, (ce, pe), learner, progress_printer)


    return netout;
Exemplo n.º 39
def simple_mnist():
    input_dim = 784
    num_output_classes = 10
    num_hidden_layers = 2
    hidden_layers_dim = 200

    # Input variables denoting the features and label data
    feature = C.input_variable(input_dim)
    label = C.input_variable(num_output_classes)

    # Instantiate the feedforward classification model
    scaled_input = element_times(constant(0.00390625), feature)

    # z = Sequential([
    #     Dense(hidden_layers_dim, activation=relu),
    #     Dense(hidden_layers_dim, activation=relu),
    #     Dense(num_output_classes)])(scaled_input)

    with default_options(activation=relu, init=C.glorot_uniform()):
        z = Sequential([For(range(num_hidden_layers),
            lambda i: Dense(hidden_layers_dim)),
            Dense(num_output_classes, activation=None)])(scaled_input)

    ce = cross_entropy_with_softmax(z, label)
    pe = classification_error(z, label)

    # setup the data
    path = abs_path + "\Train-28x28_cntk_text.txt"

    reader_train = MinibatchSource(CTFDeserializer(path, StreamDefs(
        features=StreamDef(field='features', shape=input_dim),
        labels=StreamDef(field='labels', shape=num_output_classes))))

    input_map = {
        feature: reader_train.streams.features,
        label: reader_train.streams.labels

    # Training config
    minibatch_size = 64
    num_samples_per_sweep = 60000
    num_sweeps_to_train_with = 10

    # Instantiate progress writers.
    progress_writers = [ProgressPrinter(

    # Instantiate the trainer object to drive the model training
    lr = learning_rate_schedule(1, UnitType.sample)
    trainer = Trainer(z, (ce, pe), [adadelta(z.parameters, lr)], progress_writers)

        max_samples=num_samples_per_sweep * num_sweeps_to_train_with,

    # Load test data
    path = abs_path + "\Test-28x28_cntk_text.txt"

    reader_test = MinibatchSource(CTFDeserializer(path, StreamDefs(
        features=StreamDef(field='features', shape=input_dim),
        labels=StreamDef(field='labels', shape=num_output_classes))))

    input_map = {
        feature: reader_test.streams.features,
        label: reader_test.streams.labels

    # Test data for trained model
    test_minibatch_size = 1024
    num_samples = 10000
    num_minibatches_to_test = num_samples / test_minibatch_size
    test_result = 0.0
    for i in range(0, int(num_minibatches_to_test)):
        mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map)
        eval_error = trainer.test_minibatch(mb)
        test_result = test_result + eval_error

    # Average of evaluation errors of all test minibatches
    return test_result / num_minibatches_to_test
Exemplo n.º 40
def simple_mnist(tensorboard_logdir=None):
    input_dim = 784
    num_output_classes = 10
    num_hidden_layers = 2
    hidden_layers_dim = 200

    # Input variables denoting the features and label data
    feature = C.input_variable(input_dim, np.float32)
    label = C.input_variable(num_output_classes, np.float32)

    # Instantiate the feedforward classification model
    scaled_input = element_times(constant(0.00390625), feature)

    z = Sequential([
            lambda i: Dense(hidden_layers_dim, activation=relu)),

    ce = cross_entropy_with_softmax(z, label)
    pe = classification_error(z, label)

    data_dir = os.path.dirname(os.path.abspath(__file__))
    path = os.path.join(data_dir, 'Train-28x28_cntk_text.txt')

    reader_train = create_reader(path, True, input_dim, num_output_classes)

    input_map = {
        feature: reader_train.streams.features,
        label: reader_train.streams.labels

    # Training config
    minibatch_size = 64
    num_samples_per_sweep = 60000
    num_sweeps_to_train_with = 10

    # Instantiate progress writers.
    # training_progress_output_freq = 100
    progress_writers = [
            # freq=training_progress_output_freq,

    if tensorboard_logdir is not None:

    # Instantiate the trainer object to drive the model training
    lr = 0.001
    trainer = Trainer(z, (ce, pe), sgd(z.parameters, lr), progress_writers)

                     max_samples=num_samples_per_sweep *

    # Load test data
    path = os.path.normpath(os.path.join(data_dir, "Test-28x28_cntk_text.txt"))

    reader_test = create_reader(path, False, input_dim, num_output_classes)

    input_map = {
        feature: reader_test.streams.features,
        label: reader_test.streams.labels

    # Test data for trained model

    test_minibatch_size = 1024
    num_samples = 10000
    num_minibatches_to_test = num_samples / test_minibatch_size
    test_result = 0.0
    for i in range(0, int(num_minibatches_to_test)):
        mb = reader_test.next_minibatch(test_minibatch_size,
        eval_error = trainer.test_minibatch(mb)
        test_result = test_result + eval_error


    # Average of evaluation errors of all test minibatches
    return test_result * 100 / num_minibatches_to_test
Exemplo n.º 41
def simple_mnist(tensorboard_logdir=None):
    input_dim = 784
    num_output_classes = 10
    num_hidden_layers = 1
    hidden_layers_dim = 200

    # Input variables denoting the features and label data
    feature = C.input_variable(input_dim, np.float32)
    label = C.input_variable(num_output_classes, np.float32)

    # Instantiate the feedforward classification model
    scaled_input = element_times(constant(0.00390625), feature)

    z = Sequential([For(range(num_hidden_layers), lambda i: Dense(hidden_layers_dim, activation=relu)),

    ce = cross_entropy_with_softmax(z, label)
    pe = classification_error(z, label)

    data_dir = os.path.join(abs_path, "..", "..", "..", "DataSets", "MNIST")

    path = os.path.normpath(os.path.join(data_dir, "Train-28x28_cntk_text.txt"))

    reader_train = create_reader(path, True, input_dim, num_output_classes)

    input_map = {
        feature  : reader_train.streams.features,
        label  : reader_train.streams.labels

    # Training config
    minibatch_size = 64
    num_samples_per_sweep = 60000
    num_sweeps_to_train_with = 10

    # Instantiate progress writers.
    #training_progress_output_freq = 100
    progress_writers = [ProgressPrinter(

    if tensorboard_logdir is not None:
        progress_writers.append(TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z))

    # Instantiate the trainer object to drive the model training
    lr = learning_parameter_schedule_per_sample(1)
    trainer = Trainer(z, (ce, pe), adadelta(z.parameters, lr), progress_writers)

        mb_source = reader_train,
        mb_size = minibatch_size,
        model_inputs_to_streams = input_map,
        max_samples = num_samples_per_sweep * num_sweeps_to_train_with,

    # Load test data
    path = os.path.normpath(os.path.join(data_dir, "Test-28x28_cntk_text.txt"))

    reader_test = create_reader(path, False, input_dim, num_output_classes)

    input_map = {
        feature  : reader_test.streams.features,
        label  : reader_test.streams.labels

    # Test data for trained model
    #C.cntk_py.disable_cpueval_optimization() # uncomment this to check CPU eval perf without optimization

    test_minibatch_size = 1024
    num_samples = 10000
    num_minibatches_to_test = num_samples / test_minibatch_size
    test_result = 0.0
    for i in range(0, int(num_minibatches_to_test)):
        mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map)
        eval_error = trainer.test_minibatch(mb)
        test_result = test_result + eval_error


    # Average of evaluation errors of all test minibatches
    return test_result / num_minibatches_to_test
Exemplo n.º 42
def simple_mnist(tensorboard_logdir=None):
    input_dim = 19
    num_output_classes = 2
    num_hidden_layers = 2
    hidden_layers_dim = 1024

    # Input variables denoting the features and label data
    feature = C.input_variable(input_dim, np.float32)
    label = C.input_variable(num_output_classes, np.float32)

    # Instantiate the feedforward classification model
    # scaled_input = element_times(constant(0.00390625), feature)

    z = Sequential([For(range(num_hidden_layers), lambda i: Dense(hidden_layers_dim, activation=relu)),

    ce = cross_entropy_with_softmax(z, label)
    pe = classification_error(z, label)

    data_dir = r"."

    path = os.path.normpath(os.path.join(data_dir, "train.ctf"))

    reader_train = create_reader(path, True, input_dim, num_output_classes)

    input_map = {
        feature  : reader_train.streams.features,
        label  : reader_train.streams.labels

    # Training config
    minibatch_size = 512
    num_samples_per_sweep = 1825000
    num_sweeps_to_train_with = 100

    # Instantiate progress writers.
    progress_writers = [ProgressPrinter(

    if tensorboard_logdir is not None:
        tensorboard_writer = TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z)

    # Instantiate the trainer object to drive the model training
    lr = learning_parameter_schedule_per_sample(0.001)
    learner = create_learner(model=z)
    trainer = Trainer(z, (ce, pe), learner, progress_writers)

    num_minibatches_to_train = int(num_samples_per_sweep / minibatch_size * num_sweeps_to_train_with)

    model_dir = "model"
    for i in range(num_minibatches_to_train):
        mb = reader_train.next_minibatch(minibatch_size, input_map=input_map)

        freq = int(num_samples_per_sweep / minibatch_size)
        if i > 0 and i % freq == 0:
            timestamp = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")
            current_trainer_cp = os.path.join(model_dir, timestamp + "_epoch_" + str(freq) + ".trainer")

            train_error = get_error_rate(os.path.join(data_dir, "train_subset.ctf"), input_map, input_dim,
                                         num_output_classes, trainer)
            valid_error = get_error_rate(os.path.join(data_dir, "validation.ctf"), input_map, input_dim, num_output_classes,

            if train_error > 0:
                tensorboard_writer.write_value("train_error", train_error, i)
            if valid_error > 0:
                tensorboard_writer.write_value("valid_error", valid_error, i)

    feat_path = os.path.normpath(os.path.join(data_dir, "test.ctf"))
    return get_error_rate(feat_path, input_map, input_dim, num_output_classes, trainer)
Exemplo n.º 43
def create_rpn(conv_out,
    Creates a region proposal network for object detection as proposed in the "Faster R-CNN" paper:
        Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun:
        "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks"

    Outputs object detection proposals by applying estimated bounding-box
    transformations to a set of regular boxes (called "anchors").

        conv_out:        The convolutional feature map, i.e. the output of the conv layers from the pretrained classification network
        scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image.
        im_info:         A CNTK variable or constant containing
                         (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height)
                         e.g. (1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000
        cfg:             The configuration dictionary
        add_loss_functions: If set to True rpn_losses will be returned, otherwise None is returned for the losses

        rpn_rois - the proposed ROIs
        rpn_losses - the losses (SmoothL1 loss for bbox regression plus cross entropy for objectness)

    # RPN network
    # init = 'normal', initValueScale = 0.01, initBias = 0.1
    num_channels = cfg["MODEL"].RPN_NUM_CHANNELS
    rpn_conv_3x3 = Convolution((3, 3),
    rpn_cls_score = Convolution(
        (1, 1),
        init_bias=0.0)(rpn_conv_3x3)  # 2(bg/fg)  * 9(anchors)
    rpn_bbox_pred = Convolution(
        (1, 1),
        init_bias=0.0)(rpn_conv_3x3)  # 4(coords) * 9(anchors)

    # apply softmax to get (bg, fg) probabilities and reshape predictions back to grid of (18, H, W)
    num_predictions = int(rpn_cls_score.shape[0] / 2)
    rpn_cls_score_rshp = reshape(
        (2, num_predictions, rpn_cls_score.shape[1], rpn_cls_score.shape[2]),
    p_rpn_cls_score_rshp = cntk.placeholder()
    rpn_cls_sm = softmax(p_rpn_cls_score_rshp, axis=0)
    rpn_cls_prob = cntk.as_block(rpn_cls_sm,
                                 [(p_rpn_cls_score_rshp, rpn_cls_score_rshp)],
                                 'Softmax', 'rpn_cls_prob')
    rpn_cls_prob_reshape = reshape(rpn_cls_prob,

    # proposal layer
    rpn_rois = create_proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred,
                                     im_info, cfg)

    rpn_losses = None
    if (add_loss_functions):
        # RPN targets
        # Comment: rpn_cls_score is only passed   vvv   to get width and height of the conv feature map ...
        proposal_layer_params = "'feat_stride': {}\n'scales':\n - {}". \
            format(cfg["MODEL"].FEATURE_STRIDE, "\n - ".join([str(v) for v in cfg["DATA"].PROPOSAL_LAYER_SCALES]))
        atl = user_function(
        rpn_labels = atl.outputs[0]
        rpn_bbox_targets = atl.outputs[1]
        rpn_bbox_inside_weights = atl.outputs[2]

        # classification loss
        p_rpn_labels = cntk.placeholder()
        p_rpn_cls_score_rshp = cntk.placeholder()

        keeps = cntk.greater_equal(p_rpn_labels, 0.0)
        fg_labels = element_times(p_rpn_labels, keeps, name="fg_targets")
        bg_labels = minus(1, fg_labels, name="bg_targets")
        rpn_labels_ignore = splice(bg_labels, fg_labels, axis=0)
        rpn_ce = cross_entropy_with_softmax(p_rpn_cls_score_rshp,
        rpn_loss_cls = element_times(rpn_ce, keeps)

        # The terms that are accounted for in the cls loss are those that have a label >= 0
        cls_num_terms = reduce_sum(keeps)
        cls_normalization_factor = 1.0 / cls_num_terms
        normalized_rpn_cls_loss = reduce_sum(
            rpn_loss_cls) * cls_normalization_factor

        reduced_rpn_loss_cls = cntk.as_block(
            [(p_rpn_labels, rpn_labels),
             (p_rpn_cls_score_rshp, rpn_cls_score_rshp)], 'CE_with_ignore',

        # regression loss
        p_rpn_bbox_pred = cntk.placeholder()
        p_rpn_bbox_targets = cntk.placeholder()
        p_rpn_bbox_inside_weights = cntk.placeholder()
        rpn_loss_bbox = SmoothL1Loss(cfg.SIGMA_RPN_L1, p_rpn_bbox_pred,
                                     p_rpn_bbox_inside_weights, 1.0)
        # The bbox loss is normalized by the rpn batch size
        bbox_normalization_factor = 1.0 / cfg["TRAIN"].RPN_BATCHSIZE
        normalized_rpn_bbox_loss = reduce_sum(
            rpn_loss_bbox) * bbox_normalization_factor

        reduced_rpn_loss_bbox = cntk.as_block(
            [(p_rpn_bbox_pred, rpn_bbox_pred),
             (p_rpn_bbox_targets, rpn_bbox_targets),
             (p_rpn_bbox_inside_weights, rpn_bbox_inside_weights)],
            'SmoothL1Loss', 'norm_rpn_bbox_loss')

        rpn_losses = plus(reduced_rpn_loss_cls,

    return rpn_rois, rpn_losses
def create_rpn(conv_out, scaled_gt_boxes, im_info, add_loss_functions=True,
               proposal_layer_param_string=None, conv_bias_init=0.0):
    Creates a region proposal network for object detection as proposed in the "Faster R-CNN" paper:
        Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun:
        "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks"

    Outputs object detection proposals by applying estimated bounding-box
    transformations to a set of regular boxes (called "anchors").

        conv_out:        The convolutional feature map, i.e. the output of the conv layers from the pretrained classification network
        scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image.
        im_info:         A CNTK variable or constant containing
                         (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height)
                         e.g. (1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000
        add_loss_functions: If set to True rpn_losses will be returned, otherwise None is returned for the losses
        proposal_layer_param_string: A yaml parameter string that is passed to the proposal layer.

        rpn_rois - the proposed ROIs
        rpn_losses - the losses (SmoothL1 loss for bbox regression plus cross entropy for objectness)

    # RPN network
    # init = 'normal', initValueScale = 0.01, initBias = 0.1
    num_channels = cfg["CNTK"].RPN_NUM_CHANNELS
    rpn_conv_3x3 = Convolution((3, 3), num_channels, activation=relu, pad=True, strides=1,
                                init = normal(scale=0.01), init_bias=conv_bias_init)(conv_out)
    rpn_cls_score = Convolution((1, 1), 18, activation=None, name="rpn_cls_score",
                                init = normal(scale=0.01), init_bias=conv_bias_init)(rpn_conv_3x3)  # 2(bg/fg)  * 9(anchors)
    rpn_bbox_pred = Convolution((1, 1), 36, activation=None, name="rpn_bbox_pred",
                                init = normal(scale=0.01), init_bias=conv_bias_init)(rpn_conv_3x3)  # 4(coords) * 9(anchors)

    # apply softmax to get (bg, fg) probabilities and reshape predictions back to grid of (18, H, W)
    num_predictions = int(rpn_cls_score.shape[0] / 2)
    rpn_cls_score_rshp = reshape(rpn_cls_score, (2, num_predictions, rpn_cls_score.shape[1], rpn_cls_score.shape[2]), name="rpn_cls_score_rshp")
    p_rpn_cls_score_rshp = cntk.placeholder()
    rpn_cls_sm = softmax(p_rpn_cls_score_rshp, axis=0)
    rpn_cls_prob = cntk.as_block(rpn_cls_sm, [(p_rpn_cls_score_rshp, rpn_cls_score_rshp)], 'Softmax', 'rpn_cls_prob')
    rpn_cls_prob_reshape = reshape(rpn_cls_prob, rpn_cls_score.shape, name="rpn_cls_prob_reshape")

    # proposal layer
    rpn_rois_raw = user_function(ProposalLayer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, param_str=proposal_layer_param_string))
    rpn_rois = alias(rpn_rois_raw, name='rpn_rois')

    rpn_losses = None
        # RPN targets
        # Comment: rpn_cls_score is only passed   vvv   to get width and height of the conv feature map ...
        atl = user_function(AnchorTargetLayer(rpn_cls_score, scaled_gt_boxes, im_info, param_str=proposal_layer_param_string))
        rpn_labels = atl.outputs[0]
        rpn_bbox_targets = atl.outputs[1]
        rpn_bbox_inside_weights = atl.outputs[2]

        # classification loss
        p_rpn_labels = cntk.placeholder()
        p_rpn_cls_score_rshp = cntk.placeholder()

        keeps = cntk.greater_equal(p_rpn_labels, 0.0)
        fg_labels = element_times(p_rpn_labels, keeps, name="fg_targets")
        bg_labels = minus(1, fg_labels, name="bg_targets")
        rpn_labels_ignore = splice(bg_labels, fg_labels, axis=0)
        rpn_ce = cross_entropy_with_softmax(p_rpn_cls_score_rshp, rpn_labels_ignore, axis=0)
        rpn_loss_cls = element_times(rpn_ce, keeps)

        # The terms that are accounted for in the cls loss are those that have a label >= 0
        cls_num_terms = reduce_sum(keeps)
        cls_normalization_factor = 1.0 / cls_num_terms
        normalized_rpn_cls_loss = reduce_sum(rpn_loss_cls) * cls_normalization_factor

        reduced_rpn_loss_cls = cntk.as_block(normalized_rpn_cls_loss,
                                         [(p_rpn_labels, rpn_labels), (p_rpn_cls_score_rshp, rpn_cls_score_rshp)],
                                         'CE_with_ignore', 'norm_rpn_cls_loss')

        # regression loss
        p_rpn_bbox_pred = cntk.placeholder()
        p_rpn_bbox_targets = cntk.placeholder()
        p_rpn_bbox_inside_weights = cntk.placeholder()
        rpn_loss_bbox = SmoothL1Loss(cfg["CNTK"].SIGMA_RPN_L1, p_rpn_bbox_pred, p_rpn_bbox_targets, p_rpn_bbox_inside_weights, 1.0)
        # The bbox loss is normalized by the rpn batch size
        bbox_normalization_factor = 1.0 / cfg["TRAIN"].RPN_BATCHSIZE
        normalized_rpn_bbox_loss = reduce_sum(rpn_loss_bbox) * bbox_normalization_factor

        reduced_rpn_loss_bbox = cntk.as_block(normalized_rpn_bbox_loss,
                                          [(p_rpn_bbox_pred, rpn_bbox_pred), (p_rpn_bbox_targets, rpn_bbox_targets),
                                           (p_rpn_bbox_inside_weights, rpn_bbox_inside_weights)],
                                          'SmoothL1Loss', 'norm_rpn_bbox_loss')

        rpn_losses = plus(reduced_rpn_loss_cls, reduced_rpn_loss_bbox, name="rpn_losses")

    return rpn_rois, rpn_losses
def train_lm(training_file, epochs, max_num_minibatches):

    # load the data and vocab
    data, char_to_ix, ix_to_char, data_size, vocab_dim = load_data_and_vocab(

    # Model the source and target inputs to the model
    input_sequence, label_sequence = create_inputs(vocab_dim)

    # create the model
    model = create_model(vocab_dim)

    # and apply it to the input sequence
    z = model(input_sequence)

    # setup the criterions (loss and metric)
    ce = cross_entropy_with_softmax(z, label_sequence)
    errs = classification_error(z, label_sequence)

    # Instantiate the trainer object to drive the model training
    lr_per_sample = learning_parameter_schedule_per_sample(0.001)
    momentum_schedule = momentum_schedule_per_sample(0.9990913221888589)
    clipping_threshold_per_sample = 5.0
    gradient_clipping_with_truncation = True
    learner = momentum_sgd(
    progress_printer = ProgressPrinter(freq=100, tag='Training')
    trainer = Trainer(z, (ce, errs), learner, progress_printer)

    sample_freq = 1000
    minibatches_per_epoch = min(data_size // minibatch_size,
                                max_num_minibatches // epochs)

    # print out some useful training information
    print("Running %d epochs with %d minibatches per epoch" %
          (epochs, minibatches_per_epoch))

    for e in range(0, epochs):
        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        # If it's the start of the data, we specify that we are looking at a new sequence (True)
        mask = [True]
        for b in range(0, minibatches_per_epoch):
            # get the data
            features, labels = get_data(b, minibatch_size, data, char_to_ix,
            arguments = ({
                input_sequence: features,
                label_sequence: labels
            }, mask)
            mask = [False]

            global_minibatch = e * minibatches_per_epoch + b
            if global_minibatch % sample_freq == 0:
                print(sample(z, ix_to_char, vocab_dim, char_to_ix))

        model_filename = "models/shakespeare_epoch%d.dnn" % (e + 1)
        print("Saved model to '%s'" % model_filename)
Exemplo n.º 46
    Dense(500, activation=relu),
    Dense(300, activation=relu),
    Dense(K, activation=None),

# define the inputs and labels
inputs = C.input_variable(D, np.float32, name='inputs')
labels = C.input_variable(K, np.float32, name='labels')

# get the output
logits = model(inputs)

# define loss / metrics
# like Tensorflow the softmax is done
# internally (if needed), so all we need are the logits
ce = cross_entropy_with_softmax(logits, labels)
pe = classification_error(logits, labels)

# training config
batch_size = 32
epochs = 15
n_batches = len(Xtrain) // batch_size

# do the training

# specify the training algorithm
trainer = Trainer(logits, (ce, pe),
                  adam(logits.parameters, lr=1e-2, momentum=0.9))

# helper function

# define the inputs and labels
inputs = C.input_variable(D, np.float32, name='inputs')
labels = C.input_variable(K, np.float32, name='labels')

# get the output
logits = model(inputs)

# define loss / metrics
# like Tensorflow the softmax is done
# internally (if needed), so all we need are the logits
ce = cross_entropy_with_softmax(logits, labels)
pe = classification_error(logits, labels)

# training config
batch_size = 32
epochs = 15
n_batches = len(Xtrain) // batch_size

# do the training

# specify the training algorithm
trainer = Trainer(logits, (ce, pe), adam(logits.parameters, lr=1e-2, momentum=0.9))
Exemplo n.º 48
 def criterion(query, labels):
     z = model(query)
     ce   = cross_entropy_with_softmax(z, labels)
     errs = classification_error      (z, labels)
     return (ce, errs)
Exemplo n.º 49
    def __train_cntk(self, path_to_folder: str, model_definition, epochs: int,
                     output_model_path: str, classes, minibatch_size: int):
        import cntk
        from cntk.learners import learning_parameter_schedule
        from cntk.ops import input_variable
        from cntk.io import MinibatchSource, ImageDeserializer, StreamDefs, StreamDef, MinibatchData, UserDeserializer
        import cntk.io.transforms as xforms
        from cntk.layers import default_options, Dense, Sequential, Activation, Embedding, Convolution2D, MaxPooling, Stabilizer, Convolution, Dropout, BatchNormalization
        from cntk.ops.functions import CloneMethod
        from cntk.logging import ProgressPrinter
        from cntk.losses import cross_entropy_with_softmax
        from cntk import classification_error, softmax, relu, ModelFormat, element_times, momentum_schedule, momentum_sgd
        import pandas as pd

        path_to_folder = path_to_folder.rstrip('/')

        map_file_train = path_to_folder + "/train_map.txt"
        map_file_test = path_to_folder + "/test_map.txt"
        classes_set = set()
        num_train = 0
        num_test = 0
        num_channels = 3

        class TrackDataset(UserDeserializer):
            def __init__(self, map_file, streams, chunksize=100):
                super(TrackDataset, self).__init__()
                self._batch_size = chunksize
                self.dataframes = pd.read_csv(map_file,
                                              names=["features", "labels"])
                self._streams = [
                    cntk.io.StreamInformation(s['name'], i, 'dense',
                                              np.float32, s['shape'])
                    for i, s in enumerate(streams)

                self._num_chunks = int(
                    math.ceil(len(self.dataframes) / chunksize))

            def _scale_image(self, image, width=224, height=168):
                    return image.resize((width, height), Image.LINEAR)
                    raise Exception('scale_image error')

            def stream_infos(self):
                return self._streams

            def num_chunks(self):
                return self._num_chunks

            def get_chunk(self, chunk_id):
                images = []
                labels = []
                maximum = (chunk_id + 1) * self._batch_size
                if (maximum > len(self.dataframes)):
                    maximum = len(self.dataframes)
                for i in range(chunk_id * self._batch_size, maximum):
                    img_name = self.dataframes.iloc[i, 0]
                    image = Image.open(img_name)
                    cl = self.dataframes.iloc[i, 1:].values[0]
                    image = self._scale_image(image)
                    image = np.moveaxis((np.array(image).astype('float32')),
                                        -1, 0)
                    image -= np.mean(image, keepdims=True)
                    image /= (np.std(image, keepdims=True) + 1e-6)
                    yv = np.zeros(num_classes)
                    yv[classes.index(cl)] = 1

                result = {}
                features = np.array(images)
                lab = np.array(labels).astype('float32')
                result[self._streams[0].m_name] = features
                result[self._streams[1].m_name] = lab
                return result

            with open(map_file_train) as f:
                csv_reader = csv.reader(f, delimiter='\t')
                for row in csv_reader:
                    cmd = row[1]
                    num_train = num_train + 1
        except Exception as e:
            raise Exception(
                "No train_map.txt file found in path " + path_to_folder +
                ". Did you create a dataset using create_balanced_dataset()?")

        num_classes = len(classes)

        with open(map_file_test) as f:
            for num_test, l in enumerate(f):

        # transforms = [
        #     xforms.scale(width=self.__image_width, height=self.__image_height, channels=num_channels, interpolations='linear'),
        #     xforms.mean(mean_file)
        # ]

        dataset_train = TrackDataset(map_file=map_file_train,
                                              shape=(num_classes, ))
        reader_train = MinibatchSource([dataset_train], randomize=True)

        # a = dataset_train.num_chunks()

        dataset_test = TrackDataset(map_file=map_file_test,
                                             shape=(num_classes, ))
        reader_test = MinibatchSource([dataset_test], randomize=True)

        # ImageDeserializer loads images in the BGR format, not RGB
        # reader_train = MinibatchSource(ImageDeserializer(map_file_train, StreamDefs(
        #     features = StreamDef(field='image', transforms=transforms),
        #     labels   = StreamDef(field='label', shape=num_classes)
        # )))

        # reader_test = MinibatchSource(ImageDeserializer(map_file_test, StreamDefs(
        #     features = StreamDef(field='image', transforms=transforms),
        #     labels   = StreamDef(field='label', shape=num_classes)
        # )))

        # mb = reader_train.next_minibatch(10)

        input_var = input_variable(
            (num_channels, self.__image_height, self.__image_width))
        label_var = input_variable((num_classes))

        model = model_definition(input_var)

        ce = cross_entropy_with_softmax(model, label_var)
        pe = classification_error(model, label_var)

        epoch_size = num_train

        lr_per_minibatch = learning_parameter_schedule([0.01] * 10 +
                                                       [0.003] * 10 + [0.001],
        momentums = momentum_schedule(0.9, minibatch_size=minibatch_size)
        l2_reg_weight = 0.001

        learner = momentum_sgd(model.parameters,
        progress_printer = ProgressPrinter(tag='Training', num_epochs=epochs)
        trainer = cntk.train.Trainer(model, (ce, pe), [learner],

        input_map = {
            input_var: reader_train.streams.features,
            label_var: reader_train.streams.labels

        print("Training started")
        batch_index = 0
        plot_data = {'batchindex': [], 'loss': [], 'error': []}
        for epoch in range(epochs):
            sample_count = 0
            while sample_count < epoch_size:
                data: MinibatchSource = reader_train.next_minibatch(
                    min(minibatch_size, epoch_size - sample_count),

                sample_count += data[label_var].num_samples

                batch_index += 1


        metric_numer = 0
        metric_denom = 0
        sample_count = 0
        minibatch_index = 0
        epoch_size = num_test

        while sample_count < epoch_size:
            current_minibatch = min(minibatch_size, epoch_size - sample_count)

            data = reader_test.next_minibatch(current_minibatch,

            metric_numer += trainer.test_minibatch(data) * current_minibatch
            metric_denom += current_minibatch

            sample_count += data[label_var].num_samples
            minibatch_index += 1

        print("Final Results: Minibatch[1-{}]: errs = {:0.1f}% * {}".format(
            minibatch_index + 1, (metric_numer * 100.0) / metric_denom,

        model.save(output_model_path, format=ModelFormat.ONNX)
Exemplo n.º 50
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs
from cntk.ops import input_variable
from cntk.layers import Dense
from cntk.ops import relu
from cntk.metrics import classification_error
from cntk.losses import cross_entropy_with_softmax

path = "dataset.txt"
featuresShapeValue = 6
labelsShapeValue = 1

featuresShape = input_variable(featuresShapeValue)
labelsShape = input_variable(labelsShapeValue)

ctfdResult = CTFDeserializer(
    StreamDefs(features=StreamDef(field='features', shape=featuresShapeValue),
               labels=StreamDef(field='labels', shape=labelsShapeValue)))

reader = MinibatchSource(ctfdResult)

hiddenLayerDimension = 4
hiddenLayerOne = Dense(hiddenLayerDimension, activation=relu)(featuresShape)
outputLayer = Dense(labelsShapeValue, activation=relu)(hiddenLayerOne)

crossEntropy = cross_entropy_with_softmax(outputLayer, labelsShape)
classificationError = classification_error(outputLayer, labelsShape)
Exemplo n.º 51
def simple_mnist(tensorboard_logdir=None):
    input_dim = 784
    num_output_classes = 10
    num_hidden_layers = 1
    hidden_layers_dim = 200

    # Input variables denoting the features and label data
    feature = input(input_dim, np.float32)
    label = input(num_output_classes, np.float32)

    # Instantiate the feedforward classification model
    scaled_input = element_times(constant(0.00390625), feature)
    z = fully_connected_classifier_net(scaled_input, num_output_classes,
                                       hidden_layers_dim, num_hidden_layers,

    ce = cross_entropy_with_softmax(z, label)
    pe = classification_error(z, label)

    data_dir = os.path.join(abs_path, "..", "..", "..", "DataSets", "MNIST")

    path = os.path.normpath(os.path.join(data_dir,

    reader_train = create_reader(path, True, input_dim, num_output_classes)

    input_map = {
        feature: reader_train.streams.features,
        label: reader_train.streams.labels

    # Training config
    minibatch_size = 64
    num_samples_per_sweep = 60000
    num_sweeps_to_train_with = 10

    # Instantiate progress writers.
    #training_progress_output_freq = 100
    progress_writers = [

    if tensorboard_logdir is not None:

    # Instantiate the trainer object to drive the model training
    trainer = Trainer(z, (ce, pe), adadelta(z.parameters), progress_writers)

                     max_samples=num_samples_per_sweep *

    # Load test data
    path = os.path.normpath(os.path.join(data_dir, "Test-28x28_cntk_text.txt"))

    reader_test = create_reader(path, False, input_dim, num_output_classes)

    input_map = {
        feature: reader_test.streams.features,
        label: reader_test.streams.labels

    # Test data for trained model
    test_minibatch_size = 1024
    num_samples = 10000
    num_minibatches_to_test = num_samples / test_minibatch_size
    test_result = 0.0
    for i in range(0, int(num_minibatches_to_test)):
        mb = reader_test.next_minibatch(test_minibatch_size,
        eval_error = trainer.test_minibatch(mb)
        test_result = test_result + eval_error

    # Average of evaluation errors of all test minibatches
    return test_result / num_minibatches_to_test
Exemplo n.º 52
input_sequence = sequence.input_variable(shape=vocab_size)
label_sequence = sequence.input_variable(shape=vocab_size)

model = Sequential([
        range(2), lambda: Sequential(
             Recurrence(LSTM(256), go_backwards=False)])),

z = model(input_sequence)
z_sm = cntk.softmax(z)

ce = cross_entropy_with_softmax(z, label_sequence)
errs = classification_error(z, label_sequence)

lr_per_sample = learning_rate_schedule(0.001, UnitType.sample)
momentum_time_constant = momentum_as_time_constant_schedule(1100)
clipping_threshold_per_sample = 5.0
gradient_clipping_with_truncation = True
learner = momentum_sgd(
progress_printer = ProgressPrinter(freq=100, tag='Training')
trainer = Trainer(z, (ce, errs), learner, progress_printer)
Exemplo n.º 53
def create_rpn(conv_out, scaled_gt_boxes, im_info, add_loss_functions=True,
    Creates a region proposal network for object detection as proposed in the "Faster R-CNN" paper:
        Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun:
        "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks"

    Outputs object detection proposals by applying estimated bounding-box
    transformations to a set of regular boxes (called "anchors").

        conv_out:        The convolutional feature map, i.e. the output of the conv layers from the pretrained classification network
        scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image.
        im_info:         (image_widht, image_height, image_scale) as CNTK variable or constant
        add_loss_functions: If set to True rpn_losses will be returned, otherwise None is returned for the losses
        proposal_layer_param_string: A yaml parameter string that is passed to the proposal layer.

        rpn_rois - the proposed ROIs
        rpn_losses - the losses (SmoothL1 loss for bbox regression plus cross entropy for objectness)

    # RPN network
    # init = 'normal', initValueScale = 0.01, initBias = 0.1
    rpn_conv_3x3 = Convolution((3, 3), 256, activation=relu, pad=True, strides=1,
                                init = normal(scale=0.01), init_bias=0.1)(conv_out)
    rpn_cls_score = Convolution((1, 1), 18, activation=None, name="rpn_cls_score",
                                init = normal(scale=0.01), init_bias=0.1)(rpn_conv_3x3)  # 2(bg/fg)  * 9(anchors)
    rpn_bbox_pred = Convolution((1, 1), 36, activation=None, name="rpn_bbox_pred",
                                init = normal(scale=0.01), init_bias=0.1)(rpn_conv_3x3)  # 4(coords) * 9(anchors)

    # apply softmax to get (bg, fg) probabilities and reshape predictions back to grid of (18, H, W)
    num_predictions = int(np.prod(rpn_cls_score.shape) / 2)
    rpn_cls_score_rshp = reshape(rpn_cls_score, (2, num_predictions))
    rpn_cls_prob = softmax(rpn_cls_score_rshp, axis=0, name="objness_softmax")
    rpn_cls_prob_reshape = reshape(rpn_cls_prob, rpn_cls_score.shape)

    # proposal layer
    rpn_rois_raw = user_function(ProposalLayer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, param_str=proposal_layer_param_string))
    rpn_rois = alias(rpn_rois_raw, name='rpn_rois')

    rpn_losses = None
        # RPN targets
        # Comment: rpn_cls_score is only passed   vvv   to get width and height of the conv feature map ...
        atl = user_function(AnchorTargetLayer(rpn_cls_score, scaled_gt_boxes, im_info, param_str=proposal_layer_param_string))
        rpn_labels = atl.outputs[0]
        rpn_bbox_targets = atl.outputs[1]
        rpn_bbox_inside_weights = atl.outputs[2]

        # For loss functions: ignore label predictions for the 'ignore label',
        # i.e. set target and prediction to 0 --> needs to be softmaxed before
        rpn_labels_rshp = reshape(rpn_labels, (1, num_predictions))
        ignore = user_function(IgnoreLabel(rpn_cls_prob, rpn_labels_rshp, ignore_label=-1))
        rpn_cls_prob_ignore = ignore.outputs[0]
        fg_targets = ignore.outputs[1]
        bg_targets = 1 - fg_targets
        rpn_labels_ignore = splice(bg_targets, fg_targets, axis=0)

        # RPN losses
        rpn_loss_cls = cross_entropy_with_softmax(rpn_cls_prob_ignore, rpn_labels_ignore, axis=0)
        rpn_loss_bbox = user_function(SmoothL1Loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights))
        rpn_losses = plus(reduce_sum(rpn_loss_cls), reduce_sum(rpn_loss_bbox), name="rpn_losses")

    return rpn_rois, rpn_losses