示例#1
0
def train(reader, model, max_epochs):
    # Input variables denoting the features and label data
    query       = Input(input_dim,  is_sparse=False)
    slot_labels = Input(num_labels, is_sparse=True)  # TODO: make sparse once it works

    # apply model to input
    z = model(query)

    # loss and metric
    ce = cross_entropy_with_softmax(z, slot_labels)
    pe = classification_error      (z, slot_labels)

    # training config
    epoch_size = 36000
    minibatch_size = 70
    num_mbs_to_show_result = 100
    momentum_time_constant = momentum_as_time_constant_schedule(minibatch_size / -math.log(0.9))  # TODO: Change to round number. This is 664.39. 700?

    lr_schedule = [0.003]*2+[0.0015]*12+[0.0003] # LR schedule over epochs (we don't run that many epochs, but if we did, these are good values)

    # trainer object
    lr_per_sample = learning_rate_schedule(lr_schedule, UnitType.sample, epoch_size)
    learner = adam_sgd(z.parameters,
                       lr=lr_per_sample, momentum=momentum_time_constant,
                       unit_gain=True,
                       low_memory=True,
                       gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True)

    trainer = Trainer(z, (ce, pe), [learner])

    # define mapping from reader streams to network inputs
    input_map = {
        query       : reader.streams.query,
        slot_labels : reader.streams.slot_labels
    }

    # process minibatches and perform model training
    log_number_of_parameters(z) ; print()
    # more detailed logging
    progress_printer = ProgressPrinter(freq=100, first=10, tag='Training', tensorboard_log_dir='atis_log', model=z)
    #progress_printer = ProgressPrinter(tag='Training')

    t = 0
    for epoch in range(max_epochs):         # loop over epochs
        epoch_end = (epoch+1) * epoch_size
        while t < epoch_end:               # loop over minibatches on the epoch
            # BUGBUG? The change of minibatch_size parameter vv has no effect.
            data = reader.next_minibatch(min(minibatch_size, epoch_end-t), input_map=input_map) # fetch minibatch
            trainer.train_minibatch(data)                                   # update model with it
            t += trainer.previous_minibatch_sample_count                    # count samples processed so far
            progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
            #def trace_node(name):
            #    nl = [n for n in z.parameters if n.name() == name]
            #    if len(nl) > 0:
            #        print (name, np.asarray(nl[0].value))
            #trace_node('W')
            #trace_node('stabilizer_param')
        loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True)

    return loss, metric
示例#2
0
def create_adam_learner(learn_params,
                        learning_rate=0.0005,
                        gradient_clipping_threshold_per_sample=0.001):
    """
  Create adam learner
  """
    lr_schedule = learner.learning_rate_schedule(learning_rate,
                                                 learner.UnitType.sample)
    momentum = learner.momentum_schedule(0.90)
    gradient_clipping_threshold_per_sample = gradient_clipping_threshold_per_sample
    gradient_clipping_with_truncation = True
    momentum_var = learner.momentum_schedule(0.999)
    lr = learner.adam_sgd(
        learn_params,
        lr_schedule,
        momentum,
        True,
        momentum_var,
        low_memory=False,
        gradient_clipping_threshold_per_sample=
        gradient_clipping_threshold_per_sample,
        gradient_clipping_with_truncation=gradient_clipping_with_truncation)
    learner_desc = 'Alg: Adam, learning rage: {0}, momentum: {1}, gradient clip: {2}'.format(
        learning_rate, momentum[0], gradient_clipping_threshold_per_sample)
    logger.log("Create learner. {0}".format(learner_desc))
    return lr
def train(reader, model, max_epochs):
    # Input variables denoting the features and label data
    query       = Input(input_dim,  is_sparse=False)
    slot_labels = Input(num_labels, is_sparse=True)  # TODO: make sparse once it works

    # apply model to input
    z = model(query)

    # loss and metric
    ce = cross_entropy_with_softmax(z, slot_labels)
    pe = classification_error      (z, slot_labels)

    # training config
    epoch_size = 36000
    minibatch_size = 70
    num_mbs_to_show_result = 100
    momentum_time_constant = momentum_as_time_constant_schedule(minibatch_size / -math.log(0.9))  # TODO: Change to round number. This is 664.39. 700?

    lr_schedule = [0.003]*2+[0.0015]*12+[0.0003] # LR schedule over epochs (we don't run that many epochs, but if we did, these are good values)

    # trainer object
    lr_per_sample = learning_rate_schedule(lr_schedule, UnitType.sample, epoch_size)
    learner = adam_sgd(z.parameters,
                       lr=lr_per_sample, momentum=momentum_time_constant,
                       low_memory=True,
                       gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True)

    trainer = Trainer(z, ce, pe, [learner])

    # define mapping from reader streams to network inputs
    input_map = {
        query       : reader.streams.query,
        slot_labels : reader.streams.slot_labels
    }

    # process minibatches and perform model training
    log_number_of_parameters(z) ; print()
    progress_printer = ProgressPrinter(freq=100, first=10, tag='Training') # more detailed logging
    #progress_printer = ProgressPrinter(tag='Training')

    t = 0
    for epoch in range(max_epochs):         # loop over epochs
        epoch_end = (epoch+1) * epoch_size
        while t < epoch_end:               # loop over minibatches on the epoch
            # BUGBUG? The change of minibatch_size parameter vv has no effect.
            data = reader.next_minibatch(min(minibatch_size, epoch_end-t), input_map=input_map) # fetch minibatch
            trainer.train_minibatch(data)                                   # update model with it
            t += trainer.previous_minibatch_sample_count                    # count samples processed so far
            progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
            #def trace_node(name):
            #    nl = [n for n in z.parameters if n.name() == name]
            #    if len(nl) > 0:
            #        print (name, np.asarray(nl[0].value))
            #trace_node('W')
            #trace_node('stabilizer_param')
        loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True)

    return loss, metric
def train(reader, model, max_epochs):

    # declare the model's input dimension, so that the saved model is usable
    model.update_signature(Sequence[SparseTensor[vocab_size]])
    #model.declare_args(vocab_size)

    # criterion: (model args, labels) -> (loss, metric)
    #   here  (query, slot_labels) -> (ce, errs)
    criterion = create_criterion_function(model)

    labels = reader.streams.slot_labels
    #labels = reader.streams.intent_labels  # for intent classification

    #from cntk.graph import plot
    #plot(criterion, filename=data_dir + "/model.pdf")

    # iteration parameters  --needed here because learner schedule needs it
    epoch_size = 36000
    minibatch_size = 70
    #epoch_size = 1000 ; max_epochs = 1 # uncomment for faster testing

    # SGD parameters
    learner = adam_sgd(criterion.parameters,
                       lr         = learning_rate_schedule([0.003]*2+[0.0015]*12+[0.0003], UnitType.sample, epoch_size),
                       momentum   = momentum_as_time_constant_schedule(minibatch_size / -math.log(0.9)),
                       low_memory = True,
                       gradient_clipping_threshold_per_sample = 15,
                       gradient_clipping_with_truncation = True)

    # trainer
    trainer = Trainer(None, criterion, learner)

    # process minibatches and perform model training
    log_number_of_parameters(model) ; print()
    progress_printer = ProgressPrinter(freq=100, first=10, tag='Training') # more detailed logging
    #progress_printer = ProgressPrinter(tag='Training')

    t = 0
    for epoch in range(max_epochs):         # loop over epochs
        peek(model, epoch)                  # log some interesting info
        epoch_end = (epoch+1) * epoch_size
        while t < epoch_end:                # loop over minibatches on the epoch
            # BUGBUG: The change of minibatch_size parameter vv has no effect.
            # TODO: change all examples to this pattern; then remove this comment
            data = reader.next_minibatch(min(minibatch_size, epoch_end-t))     # fetch minibatch
            #trainer.train_minibatch(data[reader.streams.query], data[labels])  # update model with it
            trainer.train_minibatch({criterion.arguments[0]: data[reader.streams.query], criterion.arguments[1]: data[labels]})  # update model with it
            t += data[labels].num_samples                                      # count samples processed so far
            progress_printer.update_with_trainer(trainer, with_metric=True)    # log progress
        loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True)

    return loss, metric # return values from last epoch
示例#5
0
def create_trainer():
    # Will take the model and the batch generator to create a Trainer
    # Will return the input variables, trainer variable, model and the embedding layer
    ##################################################
    ################### Inputs #######################
    ##################################################
    word_one_hot = C.input_variable((G.embedding_vocab_size),
                                    np.float32,
                                    is_sparse=True,
                                    name='word_input')
    context_one_hots = [
        C.input_variable((G.embedding_vocab_size),
                         np.float32,
                         is_sparse=True,
                         name='context_input{}'.format(i))
        for i in range(context_size)
    ]
    negative_one_hots = [
        C.input_variable((G.embedding_vocab_size),
                         np.float32,
                         is_sparse=True,
                         name='negative_input{}'.format(i))
        for i in range(G.negative)
    ]

    # The target labels should have first as 1 and rest as 0
    target = C.input_variable((G.negative + 1), np.float32)

    word_negative_context_product, embedding_layer = create_word2vec_cbow_model(
        word_one_hot, context_one_hots, negative_one_hots)
    loss = C.binary_cross_entropy(word_negative_context_product, target)
    eval_loss = C.binary_cross_entropy(word_negative_context_product, target)

    lr_schedule = learning_rate_schedule(G.learning_rate, UnitType.minibatch)

    learner = adam_sgd(word_negative_context_product.parameters,
                       lr=lr_schedule,
                       momentum=momentum_as_time_constant_schedule(700))

    trainer = Trainer(word_negative_context_product, (loss, eval_loss),
                      learner)

    return word_one_hot, context_one_hots, negative_one_hots, target, trainer, word_negative_context_product, embedding_layer
示例#6
0
def train(train_reader, valid_reader, vocab, i2w, s2smodel, max_epochs, epoch_size):

    # Note: We would like to set the signature of 's2smodel' (s2smodel.update_signature()), but that will cause
    # an error since the training criterion uses a reduced sequence axis for the labels.
    # This is because it removes the initial <s> symbol. Hence, we must leave the model
    # with unspecified input shapes and axes.

    # create the training wrapper for the s2smodel, as well as the criterion function
    model_train = create_model_train(s2smodel)
    criterion = create_criterion_function(model_train)

    # also wire in a greedy decoder so that we can properly log progress on a validation example
    # This is not used for the actual training process.
    model_greedy = create_model_greedy(s2smodel)

    # This does not need to be done in training generally though
    # Instantiate the trainer object to drive the model training
    minibatch_size = 72
    lr = 0.001 if use_attention else 0.005   # TODO: can we use the same value for both?
    learner = adam_sgd(model_train.parameters,
                       lr       = learning_rate_schedule([lr]*2+[lr/2]*3+[lr/4], UnitType.sample, epoch_size),
                       momentum = momentum_as_time_constant_schedule(1100),
                       gradient_clipping_threshold_per_sample=2.3,
                       gradient_clipping_with_truncation=True)
    trainer = Trainer(None, criterion, learner)

    # Get minibatches of sequences to train with and perform model training
    total_samples = 0
    mbs = 0
    eval_freq = 100

    # print out some useful training information
    log_number_of_parameters(model_train) ; print()
    progress_printer = ProgressPrinter(freq=30, tag='Training')
    #progress_printer = ProgressPrinter(freq=30, tag='Training', log_to_file=model_path_stem + ".log") # use this to log to file

    sparse_to_dense = create_sparse_to_dense(input_vocab_dim)

    for epoch in range(max_epochs):
        print("Saving model to '%s'" % model_path(epoch))
        s2smodel.save(model_path(epoch))

        while total_samples < (epoch+1) * epoch_size:
            # get next minibatch of training data
            mb_train = train_reader.next_minibatch(minibatch_size)
            #trainer.train_minibatch(mb_train[train_reader.streams.features], mb_train[train_reader.streams.labels])
            trainer.train_minibatch({criterion.arguments[0]: mb_train[train_reader.streams.features], criterion.arguments[1]: mb_train[train_reader.streams.labels]})

            progress_printer.update_with_trainer(trainer, with_metric=True) # log progress

            # every N MBs evaluate on a test sequence to visually show how we're doing
            if mbs % eval_freq == 0:
                mb_valid = valid_reader.next_minibatch(1)

                # run an eval on the decoder output model (i.e. don't use the groundtruth)
                e = model_greedy(mb_valid[valid_reader.streams.features])
                print(format_sequences(sparse_to_dense(mb_valid[valid_reader.streams.features]), i2w))
                print("->")
                print(format_sequences(e, i2w))

                # debugging attention
                if use_attention:
                    debug_attention(model_greedy, mb_valid[valid_reader.streams.features])

            total_samples += mb_train[train_reader.streams.labels].num_samples
            mbs += 1

        # log a summary of the stats for the epoch
        progress_printer.epoch_summary(with_metric=True)

    # done: save the final model
    print("Saving final model to '%s'" % model_path(max_epochs))
    s2smodel.save(model_path(max_epochs))
    print("%d epochs complete." % max_epochs)
示例#7
0
def train(reader, model, max_epochs):

    # declare the model's input dimension, so that the saved model is usable
    model.update_signature(Sequence[SparseTensor[vocab_size]])
    #model.declare_args(vocab_size)

    # criterion: (model args, labels) -> (loss, metric)
    #   here  (query, slot_labels) -> (ce, errs)
    criterion = create_criterion_function(model)

    labels = reader.streams.slot_labels
    #labels = reader.streams.intent_labels  # for intent classification

    #from cntk.graph import plot
    #plot(criterion, filename=data_dir + "/model.pdf")

    # iteration parameters  --needed here because learner schedule needs it
    epoch_size = 36000
    minibatch_size = 70
    #epoch_size = 1000 ; max_epochs = 1 # uncomment for faster testing

    # SGD parameters
    learner = adam_sgd(
        criterion.parameters,
        lr=learning_rate_schedule([0.003] * 2 + [0.0015] * 12 + [0.0003],
                                  UnitType.sample, epoch_size),
        momentum=momentum_as_time_constant_schedule(minibatch_size /
                                                    -math.log(0.9)),
        low_memory=True,
        gradient_clipping_threshold_per_sample=15,
        gradient_clipping_with_truncation=True)

    # trainer
    trainer = Trainer(None, criterion, learner)

    # process minibatches and perform model training
    log_number_of_parameters(model)
    print()
    progress_printer = ProgressPrinter(freq=100, first=10,
                                       tag='Training')  # more detailed logging
    #progress_printer = ProgressPrinter(tag='Training')

    t = 0
    for epoch in range(max_epochs):  # loop over epochs
        peek(model, epoch)  # log some interesting info
        epoch_end = (epoch + 1) * epoch_size
        while t < epoch_end:  # loop over minibatches on the epoch
            # BUGBUG: The change of minibatch_size parameter vv has no effect.
            # TODO: change all examples to this pattern; then remove this comment
            data = reader.next_minibatch(min(minibatch_size,
                                             epoch_end - t))  # fetch minibatch
            #trainer.train_minibatch(data[reader.streams.query], data[labels])  # update model with it
            trainer.train_minibatch({
                criterion.arguments[0]:
                data[reader.streams.query],
                criterion.arguments[1]:
                data[labels]
            })  # update model with it
            t += data[labels].num_samples  # count samples processed so far
            progress_printer.update_with_trainer(
                trainer, with_metric=True)  # log progress
        loss, metric, actual_samples = progress_printer.epoch_summary(
            with_metric=True)

    return loss, metric  # return values from last epoch
示例#8
0
def train(train_reader, valid_reader, vocab, i2w, s2smodel, max_epochs,
          epoch_size):

    # Note: We would like to set the signature of 's2smodel' (s2smodel.update_signature()), but that will cause
    # an error since the training criterion uses a reduced sequence axis for the labels.
    # This is because it removes the initial <s> symbol. Hence, we must leave the model
    # with unspecified input shapes and axes.

    # create the training wrapper for the s2smodel, as well as the criterion function
    model_train = create_model_train(s2smodel)
    criterion = create_criterion_function(model_train)

    # also wire in a greedy decoder so that we can properly log progress on a validation example
    # This is not used for the actual training process.
    model_greedy = create_model_greedy(s2smodel)

    # This does not need to be done in training generally though
    # Instantiate the trainer object to drive the model training
    minibatch_size = 72
    lr = 0.001 if use_attention else 0.005  # TODO: can we use the same value for both?
    learner = adam_sgd(
        model_train.parameters,
        lr=learning_rate_schedule([lr] * 2 + [lr / 2] * 3 + [lr / 4],
                                  UnitType.sample, epoch_size),
        momentum=momentum_as_time_constant_schedule(1100),
        gradient_clipping_threshold_per_sample=2.3,
        gradient_clipping_with_truncation=True)
    trainer = Trainer(None, criterion, learner)

    # Get minibatches of sequences to train with and perform model training
    total_samples = 0
    mbs = 0
    eval_freq = 100

    # print out some useful training information
    log_number_of_parameters(model_train)
    print()
    progress_printer = ProgressPrinter(freq=30, tag='Training')
    #progress_printer = ProgressPrinter(freq=30, tag='Training', log_to_file=model_path_stem + ".log") # use this to log to file

    sparse_to_dense = create_sparse_to_dense(input_vocab_dim)

    for epoch in range(max_epochs):
        print("Saving model to '%s'" % model_path(epoch))
        s2smodel.save(model_path(epoch))

        while total_samples < (epoch + 1) * epoch_size:
            # get next minibatch of training data
            mb_train = train_reader.next_minibatch(minibatch_size)
            #trainer.train_minibatch(mb_train[train_reader.streams.features], mb_train[train_reader.streams.labels])
            trainer.train_minibatch({
                criterion.arguments[0]:
                mb_train[train_reader.streams.features],
                criterion.arguments[1]:
                mb_train[train_reader.streams.labels]
            })

            progress_printer.update_with_trainer(
                trainer, with_metric=True)  # log progress

            # every N MBs evaluate on a test sequence to visually show how we're doing
            if mbs % eval_freq == 0:
                mb_valid = valid_reader.next_minibatch(1)

                # run an eval on the decoder output model (i.e. don't use the groundtruth)
                e = model_greedy(mb_valid[valid_reader.streams.features])
                print(
                    format_sequences(
                        sparse_to_dense(
                            mb_valid[valid_reader.streams.features]), i2w))
                print("->")
                print(format_sequences(e, i2w))

                # debugging attention
                if use_attention:
                    debug_attention(model_greedy,
                                    mb_valid[valid_reader.streams.features])

            total_samples += mb_train[train_reader.streams.labels].num_samples
            mbs += 1

        # log a summary of the stats for the epoch
        progress_printer.epoch_summary(with_metric=True)

    # done: save the final model
    print("Saving final model to '%s'" % model_path(max_epochs))
    s2smodel.save(model_path(max_epochs))
    print("%d epochs complete." % max_epochs)