Пример #1
0
def train_mlprnn(weight_path=sys.argv[1],
                 file_name1=sys.argv[2],
                 L1_reg=0.0,
                 L2_reg=0.0000,
                 path_name='/exports/work/inf_hcrc_cstr_udialogue/siva/data/'):

    voc_list = Vocabulary(path_name + 'train')
    voc_list.vocab_create()
    vocab = voc_list.vocab
    vocab_size = voc_list.vocab_size

    dataprovider_train = DataProvider(path_name + 'train', vocab, vocab_size)
    dataprovider_valid = DataProvider(path_name + 'valid', vocab, vocab_size)
    dataprovider_test = DataProvider(path_name + 'test', vocab, vocab_size)

    print '..building the model'

    #symbolic variables for input, target vector and batch index
    index = T.lscalar('index')
    x1 = T.fvector('x1')
    x2 = T.fvector('x2')
    x3 = T.fvector('x3')
    ht1 = T.fvector('ht1')
    y = T.ivector('y')
    learning_rate = T.fscalar('learning_rate')

    #theano shared variables for train, valid and test
    train_set_x1 = theano.shared(numpy.empty((1), dtype='float32'),
                                 allow_downcast=True)
    train_set_x2 = theano.shared(numpy.empty((1), dtype='float32'),
                                 allow_downcast=True)
    train_set_x3 = theano.shared(numpy.empty((1), dtype='float32'),
                                 allow_downcast=True)
    train_set_y = theano.shared(numpy.empty((1), dtype='int32'),
                                allow_downcast=True)

    valid_set_x1 = theano.shared(numpy.empty((1), dtype='float32'),
                                 allow_downcast=True)
    valid_set_x2 = theano.shared(numpy.empty((1), dtype='float32'),
                                 allow_downcast=True)
    valid_set_x3 = theano.shared(numpy.empty((1), dtype='float32'),
                                 allow_downcast=True)
    valid_set_y = theano.shared(numpy.empty((1), dtype='int32'),
                                allow_downcast=True)

    test_set_x1 = theano.shared(numpy.empty((1), dtype='float32'),
                                allow_downcast=True)
    test_set_x2 = theano.shared(numpy.empty((1), dtype='float32'),
                                allow_downcast=True)
    test_set_x3 = theano.shared(numpy.empty((1), dtype='float32'),
                                allow_downcast=True)
    test_set_y = theano.shared(numpy.empty((1), dtype='int32'),
                               allow_downcast=True)

    rng = numpy.random.RandomState()

    classifier = MLP_RNN(rng=rng,
                         input1=x1,
                         input2=x2,
                         input3=x3,
                         initial_hidden=ht1,
                         n_in=vocab_size,
                         fea_dim=int(sys.argv[3]),
                         context_size=2,
                         n_hidden=int(sys.argv[4]),
                         n_out=vocab_size)

    hidden_state = theano.shared(
        numpy.empty((int(sys.argv[4]), ), dtype='float32'))

    cost = classifier.cost(y)

    #constructor for learning rate class
    learnrate_schedular = LearningRateNewBob(start_rate = 0.05, scale_by=.5, max_epochs=9999,\
                                    min_derror_ramp_start=.01, min_derror_stop=.01, init_error=100.)

    log_likelihood = classifier.sum(y)
    likelihood = classifier.likelihood(y)

    #test_model
    test_model = theano.function(inputs = [], outputs = [log_likelihood, likelihood],  \
                                 givens = {x1: test_set_x1,
                                           x2: test_set_x2,
                                           x3: test_set_x3,
                                           ht1: hidden_state,
                                           y: test_set_y})
    #validation_model
    validate_model = theano.function(inputs = [], outputs = [log_likelihood], \
                                     givens = {x1: valid_set_x1,
                                               x2: valid_set_x2,
                                               x3: valid_set_x3,
                                               ht1: hidden_state,
                                               y: valid_set_y})

    gradient_param = []
    #calculates the gradient of cost with respect to parameters
    for param in classifier.params:
        gradient_param.append(T.cast(T.grad(cost, param), 'float32'))

    updates = []
    #updates the parameters
    for param, gradient in zip(classifier.params, gradient_param):
        updates.append((param, param - learning_rate * gradient))

    #training_model
    train_model = theano.function(inputs = [learning_rate], outputs = [cost, classifier.RNNhiddenlayer.output], updates = updates, \
                                 givens = {x1: train_set_x1,
                                           x2: train_set_x2,
                                           x3: train_set_x3,
                                           ht1: hidden_state,
                                           y: train_set_y})
    f = h5py.File(weight_path + file_name1, "r")
    for i in xrange(0, classifier.no_of_layers, 2):
        path_modified = '/' + 'MLP' + str(2) + '/layer' + str(i / 2)
        if i == 4:
            classifier.MLPparams[i].set_value(numpy.asarray(f[path_modified +
                                                              "/W"].value,
                                                            dtype='float32'),
                                              borrow=True)
        else:
            classifier.MLPparams[i].set_value(numpy.asarray(f[path_modified +
                                                              "/W"].value,
                                                            dtype='float32'),
                                              borrow=True)
            classifier.MLPparams[i + 1].set_value(numpy.asarray(
                f[path_modified + "/b"].value, dtype='float32'),
                                                  borrow=True)
    f.close()

    print '.....training'
    best_valid_loss = numpy.inf
    start_time = time.time()
    while (learnrate_schedular.get_rate() != 0):

        print 'learning_rate:', learnrate_schedular.get_rate()
        print 'epoch_number:', learnrate_schedular.epoch
        frames_showed, progress = 0, 0
        start_epoch_time = time.time()
        dataprovider_train.reset()

        for feats_lab_tuple in dataprovider_train:

            features, labels = feats_lab_tuple

            if labels is None or features is None:
                continue
            frames_showed += features.shape[0]
            for temp, i in zip(features, xrange(len(labels))):
                temp_features1 = numpy.zeros(vocab_size, dtype='float32')
                temp_features2 = numpy.zeros(vocab_size, dtype='float32')
                temp_features3 = numpy.zeros(vocab_size, dtype='float32')
                temp_features1[temp[0]] = 1
                temp_features2[temp[1]] = 1
                temp_features3[temp[1]] = 1
                train_set_x1.set_value(numpy.asarray(temp_features1,
                                                     dtype='float32'),
                                       borrow=True)
                train_set_x2.set_value(numpy.asarray(temp_features2,
                                                     dtype='float32'),
                                       borrow=True)
                train_set_x3.set_value(numpy.asarray(temp_features2,
                                                     dtype='float32'),
                                       borrow=True)
                train_set_y.set_value(numpy.asarray([labels[i]],
                                                    dtype='int32'),
                                      borrow=True)
                out = train_model(
                    numpy.array(learnrate_schedular.get_rate(),
                                dtype='float32'))
                hidden_state.set_value(numpy.asarray(out[1], dtype='float32'),
                                       borrow=True)

            progress += 1
            if progress % 10000 == 0:
                end_time_progress = time.time()
                print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\
                          %(progress, frames_showed,(end_time_progress-start_epoch_time))
            train_set_x1.set_value(numpy.empty((1), dtype='float32'))
            train_set_x2.set_value(numpy.empty((1), dtype='float32'))
            train_set_x3.set_value(numpy.empty((1), dtype='float32'))
            train_set_y.set_value(numpy.empty((1), dtype='int32'))

        end_time_progress = time.time()
        print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\
                          %(progress, frames_showed,(end_time_progress-start_epoch_time))

        print 'Validating...'
        valid_losses = []
        log_likelihood = []
        valid_frames_showed, progress = 0, 0
        start_valid_time = time.time()  # it is also stop of training time
        dataprovider_valid.reset()

        for feats_lab_tuple in dataprovider_valid:
            features, labels = feats_lab_tuple
            if labels is None or features is None:
                continue
            valid_frames_showed += features.shape[0]
            for temp, i in zip(features, xrange(len(labels))):
                temp_features1 = numpy.zeros(vocab_size, dtype='float32')
                temp_features2 = numpy.zeros(vocab_size, dtype='float32')
                temp_features3 = numpy.zeros(vocab_size, dtype='float32')
                temp_features1[temp[0]] = 1
                temp_features2[temp[1]] = 1
                temp_features3[temp[1]] = 1
                valid_set_x1.set_value(numpy.asarray(temp_features1,
                                                     dtype='float32'),
                                       borrow=True)
                valid_set_x2.set_value(numpy.asarray(temp_features2,
                                                     dtype='float32'),
                                       borrow=True)
                valid_set_x3.set_value(numpy.asarray(temp_features3,
                                                     dtype='float32'),
                                       borrow=True)
                valid_set_y.set_value(numpy.asarray([labels[i]],
                                                    dtype='int32'),
                                      borrow=True)
                out = validate_model()
                #error_rate = out[0]
                likelihoods = out[0]
                #valid_losses.append(error_rate)
                log_likelihood.append(likelihoods)
            valid_set_x1.set_value(numpy.empty((1), 'float32'))
            valid_set_y.set_value(numpy.empty((1), 'int32'))

            progress += 1
            if progress % 1000 == 0:
                end_time_valid_progress = time.time()
                print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                          %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time)

        end_time_valid_progress = time.time()
        print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                          %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time)
        #this_validation_loss = numpy.mean(valid_losses)
        entropy = (-numpy.sum(log_likelihood) / valid_frames_showed)
        print entropy, numpy.sum(log_likelihood)

        if entropy < best_valid_loss:
            learning_rate = learnrate_schedular.get_next_rate(entropy)
            best_valid_loss = entropy
        else:
            learnrate_schedular.rate = 0.0
    end_time = time.time()
    print 'The fine tuning ran for %.2fm' % ((end_time - start_time) / 60.)

    print 'Testing...'
    log_likelihood = []
    likelihoods = []
    test_frames_showed, progress = 0, 0
    start_test_time = time.time()  # it is also stop of training time
    dataprovider_test.reset()

    for feats_lab_tuple in dataprovider_test:

        features, labels = feats_lab_tuple

        if labels is None or features is None:
            continue

        test_frames_showed += features.shape[0]
        for temp, i in zip(features, xrange(len(labels))):
            temp_features1 = numpy.zeros(vocab_size, dtype='float32')
            temp_features2 = numpy.zeros(vocab_size, dtype='float32')
            temp_features3 = numpy.zeros(vocab_size, dtype='float32')
            temp_features1[temp[0]] = 1
            temp_features2[temp[1]] = 1
            temp_features3[temp[1]] = 1
            test_set_x1.set_value(numpy.asarray(temp_features1,
                                                dtype='float32'),
                                  borrow=True)
            test_set_x2.set_value(numpy.asarray(temp_features2,
                                                dtype='float32'),
                                  borrow=True)
            test_set_x3.set_value(numpy.asarray(temp_features3,
                                                dtype='float32'),
                                  borrow=True)
            test_set_y.set_value(numpy.asarray([labels[i]], dtype='int32'),
                                 borrow=True)
            out = test_model()
            log_likelihood.append(out[0])
            likelihoods.append(out[1])
        progress += 1
        if progress % 1000 == 0:
            end_time_test_progress = time.time()
            print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                           %(progress, test_frames_showed, end_time_test_progress - start_test_time)
    end_time_test_progress = time.time()
    print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                    %(progress, test_frames_showed, end_time_test_progress - start_test_time)
    print numpy.sum(log_likelihood)
Пример #2
0
def train_mlpclasses(
    path_name='/exports/work/inf_hcrc_cstr_udialogue/siva/data/',
    n_hidden=int(sys.argv[1]),
    n_classes=int(sys.argv[2])):

    voc_list = Vocabulary(path_name + 'train', n_classes)
    voc_list.vocab_create()
    voc_list.class_label()
    vocab = voc_list.vocab
    vocab_size = voc_list.vocab_size
    classes = voc_list.classes

    dataprovider_train = DataProvider(path_name + 'train', vocab, vocab_size,
                                      classes)
    dataprovider_valid = DataProvider(path_name + 'valid', vocab, vocab_size,
                                      classes)
    dataprovider_test = DataProvider(path_name + 'test', vocab, vocab_size,
                                     classes)

    print '..building the model'
    #symbolic variables for input, target vector and batch index
    index = T.lscalar('index')
    x1 = T.fvector('x1')
    x2 = T.fvector('x2')
    y_class = T.ivector('y_class')
    y_word = T.ivector('y_word')
    learning_rate = T.fscalar('learning_rate')

    #theano shared variables for train, valid and test
    train_set_x1 = theano.shared(numpy.empty((1), dtype='float32'),
                                 allow_downcast=True)
    train_set_x2 = theano.shared(numpy.empty((1), dtype='float32'),
                                 allow_downcast=True)
    train_set_y_class = theano.shared(numpy.empty((1), dtype='int32'),
                                      allow_downcast=True)
    train_set_y_word = theano.shared(numpy.empty((1), dtype='int32'),
                                     allow_downcast=True)

    valid_set_x1 = theano.shared(numpy.empty((1), dtype='float32'),
                                 allow_downcast=True)
    valid_set_x2 = theano.shared(numpy.empty((1), dtype='float32'),
                                 allow_downcast=True)
    valid_set_y_class = theano.shared(numpy.empty((1), dtype='int32'),
                                      allow_downcast=True)
    valid_set_y_word = theano.shared(numpy.empty((1), dtype='int32'),
                                     allow_downcast=True)

    test_set_x1 = theano.shared(numpy.empty((1), dtype='float32'),
                                allow_downcast=True)
    test_set_x2 = theano.shared(numpy.empty((1), dtype='float32'),
                                allow_downcast=True)
    test_set_x3 = theano.shared(numpy.empty((1), dtype='float32'),
                                allow_downcast=True)
    test_set_y_class = theano.shared(numpy.empty((1), dtype='int32'),
                                     allow_downcast=True)
    test_set_y_word = theano.shared(numpy.empty((1), dtype='int32'),
                                    allow_downcast=True)

    rng = numpy.random.RandomState()

    classifier = MLPClasses(rng=rng,
                            input1=x1,
                            input2=x2,
                            n_in=vocab_size,
                            fea_dim=50,
                            context_size=2,
                            n_hidden=n_hidden,
                            classes=classes)

    classcost = classifier.Classcost(y_class)
    wordcost = classifier.Wordcost(y_word)

    #constructor for learning rate class
    learnrate_schedular = LearningRateNewBob(start_rate = float(sys.argv[3]), scale_by = .5, max_epochs = 9999,\
                                    min_derror_ramp_start = .01, min_derror_stop = .01, init_error = 100.)

    class_log_likelihood = classifier.Classsum(y_class)
    word_log_likelihood = classifier.Wordsum(y_word)

    #test_model
    test_model = theano.function(inputs = [], outputs = [class_log_likelihood, word_log_likelihood],  \
                                 givens = {x1: test_set_x1,
                                           x2: test_set_x2,
                                           y_class: test_set_y_class,
                                           y_word: test_set_y_word})
    #validation_model
    validate_model = theano.function(inputs = [], outputs = [class_log_likelihood, word_log_likelihood], \
                                     givens = {x1: valid_set_x1,
                                               x2: valid_set_x2,
                                               y_class: valid_set_y_class,
                                               y_word: valid_set_y_word})

    gradient_wordparam = []
    gradient_classparam = []
    gradient_param = []
    #calculates the gradient of cost with respect to parameters

    for param, i in zip(classifier.Classparams,
                        xrange(len(classifier.Classparams))):
        if i <= 1:
            gradient_param.append(T.grad(classcost, param))
        else:
            gradient_classparam.append(T.grad(classcost, param))

    for param, i in zip(classifier.Wordparams,
                        xrange(len(classifier.Wordparams))):
        if i <= 1:
            gradient_param.append(T.grad(wordcost, param))
        else:
            gradient_wordparam.append(T.grad(wordcost, param))

    for i in xrange(len(gradient_wordparam)):
        gradient_param.append(gradient_classparam[i] + gradient_wordparam[i])

    updates = []
    #updates the parameters
    for param, gradient in zip(classifier.params, gradient_param):
        updates.append((param, param - learning_rate * gradient))

        #training_model
    train_model = theano.function(inputs = [learning_rate], outputs = [classcost, wordcost, classifier.WordoutputLayer.W, classifier.WordoutputLayer.b, class_log_likelihood, word_log_likelihood],\
                                 updates = updates,
                                 givens = {x1: train_set_x1,
                                           x2: train_set_x2,
                                           y_class: train_set_y_class,
                                           y_word: train_set_y_word})
    w_dict, b_dict = {}, {}

    for i in xrange(n_classes):
        W_values = numpy.asarray(rng.uniform(
            low=-numpy.sqrt(6. / (n_hidden + len(classes[i]))),
            high=numpy.sqrt(6. / (n_hidden + len(classes[i]))),
            size=(n_hidden, len(classes[i]))),
                                 dtype='float32')
        w_dict[i] = W_values

        b_values = numpy.zeros((len(classes[i]), ), dtype='float32')
        b_dict[i] = b_values

    print '.....training'
    best_valid_loss = numpy.inf
    start_time = time.time()
    while (learnrate_schedular.get_rate() != 0):

        print 'learning_rate:', learnrate_schedular.get_rate()
        print 'epoch_number:', learnrate_schedular.epoch
        train_loglikelihood = []
        frames_showed, progress = 0, 0
        start_epoch_time = time.time()
        dataprovider_train.reset()

        for feats_lab_tuple in dataprovider_train:

            features, labels = feats_lab_tuple

            if labels is None or features is None:
                continue
            frames_showed += features.shape[0]

            for temp, i in zip(features, xrange(len(labels))):
                temp_features1 = numpy.zeros(vocab_size, dtype='float32')
                temp_features2 = numpy.zeros(vocab_size, dtype='float32')
                temp_features1[temp[0]] = 1
                temp_features2[temp[1]] = 1
                train_set_x1.set_value(numpy.asarray(temp_features1,
                                                     dtype='float32'),
                                       borrow=True)
                train_set_x2.set_value(numpy.asarray(temp_features1,
                                                     dtype='float32'),
                                       borrow=True)
                train_set_y_class.set_value(numpy.asarray([labels[i][1]],
                                                          dtype='int32'),
                                            borrow=True)
                train_set_y_word.set_value(numpy.asarray([labels[i][0]],
                                                         dtype='int32'),
                                           borrow=True)
                classifier.WordoutputLayer.W.set_value(numpy.asarray(
                    w_dict[labels[i][1]], dtype='float32'),
                                                       borrow=True)
                classifier.WordoutputLayer.b.set_value(numpy.asarray(
                    b_dict[labels[i][1]], dtype='float32'),
                                                       borrow=True)
                out = train_model(
                    numpy.asarray(learnrate_schedular.get_rate(),
                                  dtype='float32'))
                w_dict[labels[i][1]], b_dict[labels[i][1]] = out[2], out[3]
                train_loglikelihood.append(out[4] + out[5])

#print out[4] + out[5]
            progress += 1
            if progress % 10000 == 0:
                end_time_progress = time.time()
                print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\
                          %(progress, frames_showed,(end_time_progress-start_epoch_time))
            train_set_x1.set_value(numpy.empty((1, ), dtype='float32'))
            train_set_x2.set_value(numpy.empty((1, ), dtype='float32'))
            train_set_y_class.set_value(numpy.empty((1), dtype='int32'))
            train_set_y_word.set_value(numpy.empty((1), dtype='int32'))

        end_time_progress = time.time()
        print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\
                          %(progress, frames_showed,(end_time_progress-start_epoch_time))
        print numpy.sum(train_loglikelihood)

        print 'Validating...'
        log_likelihood = []
        valid_frames_showed, progress = 0, 0
        start_valid_time = time.time()  # it is also stop of training time
        dataprovider_valid.reset()

        for feats_lab_tuple in dataprovider_valid:
            features, labels = feats_lab_tuple

            if labels is None or features is None:
                continue
            valid_frames_showed += features.shape[0]
            for temp, i in zip(features, xrange(len(labels))):
                temp_features1 = numpy.zeros(vocab_size, dtype='float32')
                temp_features2 = numpy.zeros(vocab_size, dtype='float32')
                temp_features1[temp[0]] = 1
                temp_features2[temp[1]] = 1
                valid_set_x1.set_value(numpy.asarray(temp_features1,
                                                     dtype='float32'),
                                       borrow=True)
                valid_set_x2.set_value(numpy.asarray(temp_features2,
                                                     dtype='float32'),
                                       borrow=True)
                valid_set_y_class.set_value(numpy.asarray([labels[i][1]],
                                                          dtype='int32'),
                                            borrow=True)
                valid_set_y_word.set_value(numpy.asarray([labels[i][0]],
                                                         dtype='int32'),
                                           borrow=True)
                classifier.WordoutputLayer.W.set_value(numpy.asarray(
                    w_dict[labels[i][1]], dtype='float32'),
                                                       borrow=True)
                classifier.WordoutputLayer.b.set_value(numpy.asarray(
                    b_dict[labels[i][1]], dtype='float32'),
                                                       borrow=True)
                out = validate_model()
                log_likelihood.append(sum(out))
            valid_set_x1.set_value(numpy.empty((1), 'float32'))
            valid_set_x2.set_value(numpy.empty((1), 'float32'))
            valid_set_y_class.set_value(numpy.empty((1), 'int32'))
            valid_set_y_word.set_value(numpy.empty((1), 'int32'))
            progress += 1
            if progress % 1000 == 0:
                end_time_valid_progress = time.time()
                print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                          %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time)

        end_time_valid_progress = time.time()
        print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                          %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time)
        end_time_valid_progress = time.time()
        print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                          %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time)
        entropy = (-numpy.sum(log_likelihood) / valid_frames_showed)
        print entropy, numpy.sum(log_likelihood)

        if entropy < best_valid_loss:
            learning_rate = learnrate_schedular.get_next_rate(entropy)
            best_valid_loss = entropy
        else:
            learnrate_schedular.rate = 0.0
    end_time = time.time()
    print 'The fine tuning ran for %.2fm' % ((end_time - start_time) / 60.)

    print 'Testing...'
    log_likelihood = []
    likelihoods = []
    test_frames_showed, progress = 0, 0
    start_test_time = time.time()  # it is also stop of training time
    dataprovider_test.reset()

    for feats_lab_tuple in dataprovider_test:

        features, labels = feats_lab_tuple

        if labels is None or features is None:
            continue

        test_frames_showed += features.shape[0]
        for temp, i in zip(features, xrange(len(labels))):
            temp_features1 = numpy.zeros(vocab_size, dtype='float32')
            temp_features2 = numpy.zeros(vocab_size, dtype='float32')
            temp_features1[temp[0]] = 1
            temp_features2[temp[1]] = 1
            test_set_x1.set_value(numpy.asarray(temp_features1,
                                                dtype='float32'),
                                  borrow=True)
            test_set_x2.set_value(numpy.asarray(temp_features2,
                                                dtype='float32'),
                                  borrow=True)
            test_set_y_class.set_value(numpy.asarray([labels[i][1]],
                                                     dtype='int32'),
                                       borrow=True)
            test_set_y_word.set_value(numpy.asarray([labels[i][0]],
                                                    dtype='int32'),
                                      borrow=True)
            classifier.WordoutputLayer.W.set_value(numpy.asarray(
                w_dict[labels[i][1]], dtype='float32'),
                                                   borrow=True)
            classifier.WordoutputLayer.b.set_value(numpy.asarray(
                b_dict[labels[i][1]], dtype='float32'),
                                                   borrow=True)
            out = test_model()
            log_likelihood.append(sum(out))
        progress += 1
        if progress % 1000 == 0:
            end_time_test_progress = time.time()
            print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                           %(progress, test_frames_showed, end_time_test_progress - start_test_time)
    end_time_test_progress = time.time()
    print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                    %(progress, test_frames_showed, end_time_test_progress - start_test_time)
    #save_posteriors(log_likelihood, likelihoods, weight_path+file_name2)
    print numpy.sum(log_likelihood)
    likelihood_sum = (-numpy.sum(log_likelihood) / test_frames_showed)
    print 'entropy:', likelihood_sum
Пример #3
0
def train_mlp(feature_dimension, context, hidden_size, weight_path, file_name1, file_name2, file_name3, L1_reg = 0.0, L2_reg = 0.0000, path_name = '/exports/work/inf_hcrc_cstr_udialogue/siva/data/'):
    

    #voc_list = Vocabulary(path_name + 'train_modified1')
    #voc_list.vocab_create()
    #vocab = voc_list.vocab
    #vocab_size = voc_list.vocab_size
    #short_list = voc_list.short_list
    #short_list_size = voc_list.short_list_size
    #path = '/exports/work/inf_hcrc_cstr_udialogue/siva/data_normalization/vocab/wlist5c.nvp'
    voc_list = Vocabularyhash('/exports/work/inf_hcrc_cstr_udialogue/siva/data_normalization/vocab/wlist5c.nvp')
    voc_list.hash_create()
    vocab = voc_list.voc_hash
    vocab_size = voc_list.vocab_size
    
    #dataprovider_train = DataProvider(path_name + 'train', vocab, vocab_size, short_list )
    #dataprovider_valid = DataProvider(path_name + 'valid', vocab, vocab_size, short_list )
    #dataprovider_test = DataProvider(path_name + 'test', vocab, vocab_size , short_list)
    
    dataprovider_train = DataProvider(path_name + 'train_modified1_20m', vocab, vocab_size)
    dataprovider_valid = DataProvider(path_name + 'valid_modified1', vocab, vocab_size)
    dataprovider_test = DataProvider(path_name + 'test_modified1', vocab, vocab_size)

    print '..building the model'

    #symbolic variables for input, target vector and batch index
    index = T.lscalar('index')
    x1 = T.fvector('x1')
    x2 = T.fvector('x2')
    y = T.ivector('y')
    learning_rate = T.fscalar('learning_rate') 

    #theano shared variables for train, valid and test
    train_set_x1 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True)
    train_set_x2 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True)
    train_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True)
    
    valid_set_x1 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True)
    valid_set_x2 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True)
    valid_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True)
    
    test_set_x1 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True)
    test_set_x2 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True)
    test_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True)
    
    rng = numpy.random.RandomState() 
   
    classifier = MLP(rng = rng, input1 = x1, input2 = x2,  n_in = vocab_size, fea_dim = int(feature_dimension), context_size = int(context), n_hidden =int(hidden_size), n_out = vocab_size)
    cost = classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr
    
    #constructor for learning rate class
    learnrate_schedular = LearningRateNewBob(start_rate=0.005, scale_by=.5, max_epochs=9999,\
                                    min_derror_ramp_start=.01, min_derror_stop=.01, init_error=100.)

    frame_error = classifier.errors(y)
    log_likelihood = classifier.sum(y)
    likelihood = classifier.likelihood(y)
    
    #test_model
    test_model = theano.function(inputs = [], outputs = [log_likelihood, likelihood],  \
                                 givens = {x1: test_set_x1,
                                           x2: test_set_x2,
                                           y: test_set_y})
    #validation_model
    validate_model = theano.function(inputs = [], outputs = [frame_error, log_likelihood], \
                                     givens = {x1: valid_set_x1,
                                               x2: valid_set_x2,
                                               y: valid_set_y})

    gradient_param = []
    #calculates the gradient of cost with respect to parameters 
    for param in classifier.params:
        gradient_param.append(T.cast(T.grad(cost, param), 'float32'))
        
    updates = []
    #updates the parameters
    for param, gradient in zip(classifier.params, gradient_param):
        updates.append((param, param - learning_rate * gradient))
    
    #training_model
    train_model = theano.function(inputs = [learning_rate], outputs = [cost], updates = updates, \
                                 givens = {x1: train_set_x1,
                                           x2: train_set_x2,
                                           y: train_set_y})


    print '.....training'
    best_valid_loss = numpy.inf
    start_time = time.time()
    while(learnrate_schedular.get_rate() != 0):
    
        print 'learning_rate:', learnrate_schedular.get_rate()		
        print 'epoch_number:', learnrate_schedular.epoch        
        frames_showed, progress = 0, 0
        start_epoch_time = time.time()
        dataprovider_train.reset()
        
	for feats_lab_tuple in dataprovider_train:

            features, labels = feats_lab_tuple 
            
            if labels is None or features is None:
                continue                             
            frames_showed += features.shape[0]
            for temp, i in zip(features, xrange(len(labels))):
                temp_features1 = numpy.zeros(vocab_size, dtype = 'float32')
                temp_features2 = numpy.zeros(vocab_size, dtype = 'float32')
                temp_features1[temp[0]] = 1
                temp_features2[temp[1]] = 1
                train_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True)
                train_set_x2.set_value(numpy.asarray(temp_features2, dtype = 'float32'), borrow = True)
                train_set_y.set_value(numpy.asarray([labels[i]], dtype = 'int32'), borrow = True)
                out = train_model(numpy.array(learnrate_schedular.get_rate(), dtype = 'float32'))
            progress += 1
            if progress%10000==0:
                end_time_progress = time.time()
                print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\
                          %(progress, frames_showed,(end_time_progress-start_epoch_time))
            train_set_x1.set_value(numpy.empty((1), dtype = 'float32'))
            train_set_x2.set_value(numpy.empty((1), dtype = 'float32'))
            train_set_y.set_value(numpy.empty((1), dtype = 'int32'))
        
        end_time_progress = time.time()
        print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\
                          %(progress, frames_showed,(end_time_progress-start_epoch_time))
        classifier_name = 'MLP' + str(learnrate_schedular.epoch)
        save_mlp(classifier, weight_path+file_name1 , classifier_name)
	save_learningrate(learnrate_schedular.get_rate(), weight_path+file_name3, classifier_name)
    
        print 'Validating...'
        valid_losses = []
        log_likelihood = []
        valid_frames_showed, progress = 0, 0
        start_valid_time = time.time() # it is also stop of training time
        dataprovider_valid.reset()
        
	for feats_lab_tuple in dataprovider_valid:            
            features, labels = feats_lab_tuple 
            
            if labels is None or features is None:
                continue                             
            valid_frames_showed += features.shape[0]                
            for temp, i in zip(features, xrange(len(labels))):
                temp_features1 = numpy.zeros(vocab_size, dtype = 'float32')
                temp_features2 = numpy.zeros(vocab_size, dtype = 'float32')
                temp_features1[temp[0]] = 1
                temp_features2[temp[1]] = 1
                valid_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True)
                valid_set_x2.set_value(numpy.asarray(temp_features2, dtype = 'float32'), borrow = True)
                valid_set_y.set_value(numpy.asarray([labels[i]], dtype = 'int32'), borrow = True)
                out = validate_model()
                error_rate = out[0]
                likelihoods = out[1] 
                valid_losses.append(error_rate)
                log_likelihood.append(likelihoods)
            valid_set_x1.set_value(numpy.empty((1), 'float32'))
            valid_set_x2.set_value(numpy.empty((1), 'float32'))
            valid_set_y.set_value(numpy.empty((1), 'int32'))

            progress += 1
            if progress%1000==0:
                end_time_valid_progress = time.time()
                print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                          %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time)
        
        end_time_valid_progress = time.time()
        print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                          %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time)            
        this_validation_loss = numpy.mean(valid_losses)
        entropy = (-numpy.sum(log_likelihood)/valid_frames_showed)
        print this_validation_loss, entropy, numpy.sum(log_likelihood)
        
	if entropy < best_valid_loss:
           learning_rate = learnrate_schedular.get_next_rate(entropy)
           best_valid_loss = entropy
        else:
           learnrate_schedular.rate = 0.0
    end_time = time.time()
    print 'The fine tuning ran for %.2fm' %((end_time-start_time)/60.)
    
    print 'Testing...'
    log_likelihood = []
    likelihoods = []
    test_frames_showed, progress = 0, 0
    start_test_time = time.time() # it is also stop of training time
    dataprovider_test.reset()
    
    for feats_lab_tuple in dataprovider_test:
        
        features, labels = feats_lab_tuple 
            
        if labels is None or features is None:
            continue                             

        test_frames_showed += features.shape[0]                
        for temp, i in zip(features, xrange(len(labels))):
            temp_features1 = numpy.zeros(vocab_size, dtype = 'float32')
            temp_features2 = numpy.zeros(vocab_size, dtype = 'float32')
            temp_features1[temp[0]] = 1
            temp_features2[temp[1]] = 1
            test_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True)
            test_set_x2.set_value(numpy.asarray(temp_features2, dtype = 'float32'), borrow = True)
            test_set_y.set_value(numpy.asarray([labels[i]], dtype = 'int32'), borrow = True)
            out = test_model()
            log_likelihood.append(out[0])
            likelihoods.append(out[1])
        progress += 1
        if progress%1000==0:
           end_time_test_progress = time.time()
           print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                          %(progress, test_frames_showed, end_time_test_progress - start_test_time)
    end_time_test_progress = time.time()
    print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                    %(progress, test_frames_showed, end_time_test_progress - start_test_time)            
    save_posteriors(log_likelihood, likelihoods, weight_path+file_name2)
    print numpy.sum(log_likelihood)
    likelihood_sum = (-numpy.sum(log_likelihood)/test_frames_showed)
    print 'entropy:', likelihood_sum
Пример #4
0
def train_mlp(L1_reg = 0.0, L2_reg = 0.0000, num_batches_per_bunch = 512, batch_size = 1, num_bunches_queue = 5, offset = 0, path_name = '/afs/inf.ed.ac.uk/user/s12/s1264845/scratch/s1264845/data/'):
    

    voc_list = Vocabulary(path_name + 'train')
    voc_list.vocab_create()
    vocab = voc_list.vocab
    vocab_size = voc_list.vocab_size
    
    voc_list_valid = Vocabulary(path_name + 'valid')
    voc_list_valid.vocab_create()
    count = voc_list_valid.count

    voc_list_test = Vocabulary(path_name + 'test')
    voc_list_test.vocab_create()
    no_test_tokens = voc_list_test.count
    print 'The number of sentenses in test set:', no_test_tokens
 
    #print 'number of words in valid data:', count 
    dataprovider_train = DataProvider(path_name + 'train', vocab, vocab_size )
    dataprovider_valid = DataProvider(path_name + 'valid', vocab, vocab_size )
    dataprovider_test = DataProvider(path_name + 'test', vocab, vocab_size )

    #learn_list = [0.1, 0.1, 0.1, 0.75, 0.5, 0.25, 0.125, 0.0625, 0]
    exp_name = 'fine_tuning.hdf5'
    posterior_path = 'log_likelihoods'
    print '..building the model'

    #symbolic variables for input, target vector and batch index
    index = T.lscalar('index')
    x = T.fmatrix('x')
    y = T.ivector('y')
    learning_rate = T.fscalar('learning_rate') 

    #theano shares variables for train, valid and test
    train_set_x = theano.shared(numpy.empty((1,1), dtype='float32'), allow_downcast = True)
    train_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True)
    
    valid_set_x = theano.shared(numpy.empty((1,1), dtype='float32'), allow_downcast = True)
    valid_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True)
    
    test_set_x = theano.shared(numpy.empty((1,1), dtype='float32'), allow_downcast = True)
    test_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True)
    
    rng = numpy.random.RandomState(1234) 
   
    classifier = MLP(rng = rng, input = x, n_in = vocab_size, n_hidden1 = 30, n_hidden2 = 60 , n_out = vocab_size)
    #classifier = MLP(rng = rng, input = x, n_in = vocab_size, n_hidden = 60, n_out = vocab_size)

    cost = classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr
    
    #constructor for learning rate class
    learnrate_schedular = LearningRateNewBob(start_rate=0.001, scale_by=.5, max_epochs=9999,\
                                    min_derror_ramp_start=.1, min_derror_stop=.1, init_error=100.)

    #learnrate_schedular = LearningRateList(learn_list)

    frame_error = classifier.errors(y)
    likelihood = classifier.sum(y)

    #test model
    test_model = theano.function(inputs = [index], outputs = likelihood,  \
                                 givens = {x: test_set_x[index * batch_size:(index + 1) * batch_size],
                                           y: test_set_y[index * batch_size:(index + 1) * batch_size]})
    #validation_model
    validate_model = theano.function(inputs = [index], outputs = [frame_error, likelihood], \
                                     givens = {x: valid_set_x[index * batch_size:(index + 1) * batch_size],
                                               y: valid_set_y[index * batch_size:(index + 1) * batch_size]})

    gradient_param = []
    #calculates the gradient of cost with respect to parameters 
    for param in classifier.params:
        gradient_param.append(T.cast(T.grad(cost, param), 'float32'))
        
    updates = []
    
    for param, gradient in zip(classifier.params, gradient_param):
        updates.append((param, param - learning_rate * gradient))
    
    #training_model
    train_model = theano.function(inputs = [index, theano.Param(learning_rate, default = 0.01)], outputs = cost, updates = updates, \
                                 givens = {x: train_set_x[index * batch_size:(index + 1) * batch_size],
                                           y: train_set_y[index * batch_size:(index + 1) * batch_size]})
   

    #theano.printing.pydotprint(train_model, outfile = "pics/train.png", var_with_name_simple = True) 
    #path_save = '/afs/inf.ed.ac.uk/user/s12/s1264845/scratch/s1264845/mlp/saved_weights/' 
    print '.....training'    
    best_valid_loss = numpy.inf    
    epoch = 1
    start_time = time.time()
    while(learnrate_schedular.get_rate() != 0):
	
	print 'learning_rate:', learnrate_schedular.get_rate()
        print 'epoch_number:', learnrate_schedular.epoch
        
        frames_showed, progress = 0, 0
        start_epoch_time = time.time()
        
        tqueue = TNetsCacheSimple.make_queue()
        cache = TNetsCacheSimple(tqueue, shuffle_frames = True, offset=0, \
                                 batch_size = batch_size, num_batches_per_bunch = num_batches_per_bunch) 
        cache.data_provider = dataprovider_train
        cache.start()
        
        train_cost = []
        while True:

            feats_lab_tuple = TNetsCacheSimple.get_elem_from_queue(tqueue)
            if isinstance(feats_lab_tuple, TNetsCacheLastElem):
                break
                
            features, labels = feats_lab_tuple                  
            train_set_x.set_value(features, borrow=True)
            train_set_y.set_value(numpy.asarray(labels.flatten(), dtype = 'int32'), borrow=True)
            
            frames_showed += features.shape[0]
            train_batches = features.shape[0]/batch_size
            #print train_batches
                #if there is any part left in utterance (smaller than a batch_size), take it into account at the end
            if(features.shape[0] % batch_size!=0 or features.shape[0] < batch_size): 
                train_batches += 1
            
            for i in xrange(train_batches):
                #train_cost.append(train_model(i, learnrate_schedular.get_rate()))
                train_model(i, learnrate_schedular.get_rate())               
            progress += 1
            if progress%10==0:
                end_time_progress = time.time()
                print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\
                          %(progress, frames_showed,(end_time_progress-start_epoch_time))
        
        end_time_progress = time.time()
	print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\
                          %(progress, frames_showed,(end_time_progress-start_epoch_time))
        train_set_x.set_value(numpy.empty((1,1), dtype = 'float32'))
        train_set_y.set_value(numpy.empty((1), dtype = 'int32'))
	classifier_name = 'MLP' + str(learnrate_schedular.epoch)
	
	save_mlp(classifier, GlobalCfg.get_working_dir()+exp_name , classifier_name)
                  
        print 'Validating...'
        valid_losses = []
	log_likelihood = []
        valid_frames_showed, progress = 0, 0
        start_valid_time = time.time() # it is also stop of training time
        #for feat_lab_tuple, path in HDFDatasetDataProviderUtt(devel_files_list, valid_dataset, randomize=False, max_utt=-10):  
        #    features, labels = feat_lab_tuple 
            
        tqueue = TNetsCacheSimple.make_queue()
        cache = TNetsCacheSimple(tqueue, offset = 0, num_batches_per_bunch = 16)

        #cache.deamon = True
        cache.data_provider = dataprovider_valid
        cache.start()
        
        #ex_num = 0
        
        while True:
                
            feats_lab_tuple = TNetsCacheSimple.get_elem_from_queue(tqueue)
            if isinstance(feats_lab_tuple, TNetsCacheLastElem):
                break
                    
            features, labels = feats_lab_tuple

            valid_frames_showed += features.shape[0]                
            valid_set_x.set_value(features, borrow=True)
            valid_set_y.set_value(numpy.asarray(labels.flatten(), 'int32'), borrow=True)
            
            valid_batches = features.shape[0] / batch_size
            #print valid_batches
            #if there is any part left in utterance (smaller than a batch_size), take it into account at the end
            if(features.shape[0] % batch_size!=0 or features.shape[0] < batch_size): 
                valid_batches += 1
          
            for i in xrange(valid_batches):
                #ex_num = ex_num + 1
                out = validate_model(i)
		error_rate = out[0]
		likelihoods = out[1] 
                valid_losses.append(error_rate)
		log_likelihood.append(likelihoods)
                #save_posteriors(likelihoods, GlobalCfg.get_working_dir() + posterior_path, str(ex_num), str(learnrate_schedular.epoch))
                
	    
            progress += 1
            if progress%10==0:
                end_time_valid_progress = time.time()
                print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                          %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time)
        
        end_time_valid_progress = time.time()
        print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                          %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time)            
        valid_set_x.set_value(numpy.empty((1,1), 'float32'))
        valid_set_y.set_value(numpy.empty((1), 'int32'))
            
            
        end_epoch_time = time.time()
	print 'time taken for this epoch in seconds: %f' %(end_epoch_time - start_epoch_time)
            
        this_validation_loss = numpy.mean(valid_losses)
	loglikelihood_sum = numpy.sum(log_likelihood)
	#ppl = math.exp(- loglikelihood_sum /count)
	#print 'ppl:', ppl
	print 'error_rate:', this_validation_loss
	print 'valid log likelihood:', loglikelihood_sum     
	#print 'mean log_probability', this_validation_loss 
        #learnrate_schedular.get_next_rate(this_validation_loss * 100.)
	    #learnrate_schedular.get_next_rate()
	    #print 'epoch_number:', learnrate_schedular.epoch
                
            # logger.info('Epoch %i (lr: %f) took %f min (SPEED [presentations/second] training %f, cv %f), cv error %f %%' % \
            #         (self.cfg.finetune_scheduler.epoch-1, self.cfg.finetune_scheduler.get_rate(), \
            #          ((end_epoch_time-start_epoch_time)/60.0), (frames_showed/(start_valid_time-start_epoch_time)), \
            #          (valid_frames_showed/(stop_valid_time-start_valid_time)), this_validation_loss*100.))

            #self.cfg.finetune_scheduler.get_next_rate(this_validation_loss*100.)
        if this_validation_loss < best_valid_loss:
	   learning_rate = learnrate_schedular.get_next_rate(this_validation_loss * 100.)
           best_valid_loss = this_validation_loss
           #best_epoch = learnrate_schedular.epoch-1
	else:
           #learnrate_schedular.epoch = learnrate_schedular.epoch + 1
	   learnrate_schedular.rate = 0.0
    
    end_time = time.time()
        
    #print 'Optimization complete with best validation score of %f %%' %  best_valid_loss * 100.
    print 'The fine tuning ran for %.2fm' %((end_time-start_time)/60.)

    print 'Testing...'
    log_likelihood_test = []
    test_frames_showed, progress = 0, 0
    start_test_time = time.time() # it is also stop of training time
        #for feat_lab_tuple, path in HDFDatasetDataProviderUtt(devel_files_list, valid_dataset, randomize=False, max_utt=-10):  
        #    features, labels = feat_lab_tuple 
            
    tqueue = TNetsCacheSimple.make_queue()
    cache = TNetsCacheSimple(tqueue, offset = 0, num_batches_per_bunch = 16)

    #cache.deamon = True
    cache.data_provider = dataprovider_test
    cache.start()
        
        #ex_num = 0
        
    while True:
                
	feats_lab_tuple = TNetsCacheSimple.get_elem_from_queue(tqueue)
        if isinstance(feats_lab_tuple, TNetsCacheLastElem):
           break
                    
        features, labels = feats_lab_tuple

        test_frames_showed += features.shape[0]                
        test_set_x.set_value(features, borrow=True)
        test_set_y.set_value(numpy.asarray(labels.flatten(), 'int32'), borrow=True)
            
        test_batches = features.shape[0] / batch_size
            #print valid_batches
            #if there is any part left in utterance (smaller than a batch_size), take it into account at the end
        if(features.shape[0] % batch_size!=0 or features.shape[0] < batch_size): 
           test_batches += 1
          
        for i in xrange(test_batches): 
            log_likelihood_test.append(test_model(i))
	    
        progress += 1
        if progress%10==0:
           end_time_test_progress = time.time()
           print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                          %(progress, test_frames_showed, end_time_test_progress - start_test_time)
        
    end_time_test_progress = time.time()
    print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                    %(progress, test_frames_showed, end_time_test_progress - start_test_time)            
    test_set_x.set_value(numpy.empty((1,1), 'float32'))
    test_set_y.set_value(numpy.empty((1), 'int32'))
 
    likelihood_sum = numpy.sum(log_likelihood_test)
    print 'likelihood_sum', likelihood_sum
Пример #5
0
def train_mlp(
        L1_reg=0.0,
        L2_reg=0.0000,
        num_batches_per_bunch=512,
        batch_size=1,
        num_bunches_queue=5,
        offset=0,
        path_name='/afs/inf.ed.ac.uk/user/s12/s1264845/scratch/s1264845/data/'
):

    voc_list = Vocabulary(path_name + 'train')
    voc_list.vocab_create()
    vocab = voc_list.vocab
    vocab_size = voc_list.vocab_size

    voc_list_valid = Vocabulary(path_name + 'valid')
    voc_list_valid.vocab_create()
    valid_words_count = voc_list_valid.count
    #print valid_words_count
    valid_lines_count = voc_list_valid.line_count
    #print valid_lines_count

    voc_list_test = Vocabulary(path_name + 'test')
    voc_list_test.vocab_create()
    test_words_count = voc_list_test.count
    #print test_words_count
    test_lines_count = voc_list_test.line_count
    #print test_lines_count

    dataprovider_train = DataProvider(path_name + 'train', vocab, vocab_size)
    dataprovider_valid = DataProvider(path_name + 'valid', vocab, vocab_size)
    dataprovider_test = DataProvider(path_name + 'test', vocab, vocab_size)

    #exp_name = 'fine_tuning.hdf5'

    print '..building the model'

    #symbolic variables for input, target vector and batch index
    index = T.lscalar('index')
    x = T.fvector('x')
    y = T.ivector('y')
    learning_rate = T.fscalar('learning_rate')

    #theano shared variables for train, valid and test
    train_set_x = theano.shared(numpy.empty((1), dtype='float32'),
                                allow_downcast=True)
    train_set_y = theano.shared(numpy.empty((1), dtype='int32'),
                                allow_downcast=True)

    valid_set_x = theano.shared(numpy.empty((1), dtype='float32'),
                                allow_downcast=True)
    valid_set_y = theano.shared(numpy.empty((1), dtype='int32'),
                                allow_downcast=True)

    test_set_x = theano.shared(numpy.empty((1), dtype='float32'),
                               allow_downcast=True)
    test_set_y = theano.shared(numpy.empty((1), dtype='int32'),
                               allow_downcast=True)

    rng = numpy.random.RandomState()

    classifier = MLP(rng=rng,
                     input=x,
                     n_in=vocab_size,
                     fea_dim=30,
                     context_size=2,
                     n_hidden=60,
                     n_out=vocab_size)

    cost = classifier.negative_log_likelihood(
        y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr

    #constructor for learning rate class
    learnrate_schedular = LearningRateNewBob(start_rate=0.005, scale_by=.5, max_epochs=9999,\
                                    min_derror_ramp_start=.01, min_derror_stop=.01, init_error=100.)

    #learnrate_schedular = LearningRateList(learn_list)

    frame_error = classifier.errors(y)
    likelihood = classifier.sum(y)

    #test_model
    test_model = theano.function(inputs = [], outputs = likelihood,  \
                                 givens = {x: test_set_x,
                                           y: test_set_y})
    #validation_model
    validate_model = theano.function(inputs = [], outputs = [frame_error, likelihood], \
                                     givens = {x: valid_set_x,
                                               y: valid_set_y})

    gradient_param = []
    #calculates the gradient of cost with respect to parameters
    for param in classifier.params:
        gradient_param.append(T.cast(T.grad(cost, param), 'float32'))

    updates = []
    #updates the parameters
    for param, gradient in zip(classifier.params, gradient_param):
        updates.append((param, param - learning_rate * gradient))

    #training_model
    train_model = theano.function(inputs = [theano.Param(learning_rate, default = 0.01)], outputs = cost, updates = updates, \
                                 givens = {x: train_set_x,
                                           y: train_set_y})

    training(dataprovider_train, dataprovider_valid, learnrate_schedular,
             classifier, train_model, validate_model, train_set_x, train_set_y,
             valid_set_x, valid_set_y, batch_size, num_batches_per_bunch,
             valid_words_count, valid_lines_count)
    testing(dataprovider_test, classifier, test_model, test_set_x, test_set_y,
            test_words_count, test_lines_count)
Пример #6
0
def train_mlp(
        L1_reg=0.0,
        L2_reg=0.0000,
        num_batches_per_bunch=512,
        batch_size=1,
        num_bunches_queue=5,
        offset=0,
        path_name='/afs/inf.ed.ac.uk/user/s12/s1264845/scratch/s1264845/data/'
):

    voc_list = Vocabulary(path_name + 'train')
    voc_list.vocab_create()
    vocab = voc_list.vocab
    vocab_size = voc_list.vocab_size

    voc_list_valid = Vocabulary(path_name + 'valid')
    voc_list_valid.vocab_create()
    count = voc_list_valid.count

    voc_list_test = Vocabulary(path_name + 'test')
    voc_list_test.vocab_create()
    no_test_tokens = voc_list_test.count
    print 'The number of sentenses in test set:', no_test_tokens

    #print 'number of words in valid data:', count
    dataprovider_train = DataProvider(path_name + 'train', vocab, vocab_size)
    dataprovider_valid = DataProvider(path_name + 'valid', vocab, vocab_size)
    dataprovider_test = DataProvider(path_name + 'test', vocab, vocab_size)

    #learn_list = [0.1, 0.1, 0.1, 0.75, 0.5, 0.25, 0.125, 0.0625, 0]
    exp_name = 'fine_tuning.hdf5'
    posterior_path = 'log_likelihoods'
    print '..building the model'

    #symbolic variables for input, target vector and batch index
    index = T.lscalar('index')
    x = T.fmatrix('x')
    y = T.ivector('y')
    learning_rate = T.fscalar('learning_rate')

    #theano shares variables for train, valid and test
    train_set_x = theano.shared(numpy.empty((1, 1), dtype='float32'),
                                allow_downcast=True)
    train_set_y = theano.shared(numpy.empty((1), dtype='int32'),
                                allow_downcast=True)

    valid_set_x = theano.shared(numpy.empty((1, 1), dtype='float32'),
                                allow_downcast=True)
    valid_set_y = theano.shared(numpy.empty((1), dtype='int32'),
                                allow_downcast=True)

    test_set_x = theano.shared(numpy.empty((1, 1), dtype='float32'),
                               allow_downcast=True)
    test_set_y = theano.shared(numpy.empty((1), dtype='int32'),
                               allow_downcast=True)

    rng = numpy.random.RandomState(1234)

    classifier = MLP(rng=rng,
                     input=x,
                     n_in=vocab_size,
                     n_hidden1=30,
                     n_hidden2=60,
                     n_out=vocab_size)
    #classifier = MLP(rng = rng, input = x, n_in = vocab_size, n_hidden = 60, n_out = vocab_size)

    cost = classifier.negative_log_likelihood(
        y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr

    #constructor for learning rate class
    learnrate_schedular = LearningRateNewBob(start_rate=0.001, scale_by=.5, max_epochs=9999,\
                                    min_derror_ramp_start=.1, min_derror_stop=.1, init_error=100.)

    #learnrate_schedular = LearningRateList(learn_list)

    frame_error = classifier.errors(y)
    likelihood = classifier.sum(y)

    #test model
    test_model = theano.function(inputs = [index], outputs = likelihood,  \
                                 givens = {x: test_set_x[index * batch_size:(index + 1) * batch_size],
                                           y: test_set_y[index * batch_size:(index + 1) * batch_size]})
    #validation_model
    validate_model = theano.function(inputs = [index], outputs = [frame_error, likelihood], \
                                     givens = {x: valid_set_x[index * batch_size:(index + 1) * batch_size],
                                               y: valid_set_y[index * batch_size:(index + 1) * batch_size]})

    gradient_param = []
    #calculates the gradient of cost with respect to parameters
    for param in classifier.params:
        gradient_param.append(T.cast(T.grad(cost, param), 'float32'))

    updates = []

    for param, gradient in zip(classifier.params, gradient_param):
        updates.append((param, param - learning_rate * gradient))

    #training_model
    train_model = theano.function(inputs = [index, theano.Param(learning_rate, default = 0.01)], outputs = cost, updates = updates, \
                                 givens = {x: train_set_x[index * batch_size:(index + 1) * batch_size],
                                           y: train_set_y[index * batch_size:(index + 1) * batch_size]})

    #theano.printing.pydotprint(train_model, outfile = "pics/train.png", var_with_name_simple = True)
    #path_save = '/afs/inf.ed.ac.uk/user/s12/s1264845/scratch/s1264845/mlp/saved_weights/'
    print '.....training'
    best_valid_loss = numpy.inf
    epoch = 1
    start_time = time.time()
    while (learnrate_schedular.get_rate() != 0):

        print 'learning_rate:', learnrate_schedular.get_rate()
        print 'epoch_number:', learnrate_schedular.epoch

        frames_showed, progress = 0, 0
        start_epoch_time = time.time()

        tqueue = TNetsCacheSimple.make_queue()
        cache = TNetsCacheSimple(tqueue, shuffle_frames = True, offset=0, \
                                 batch_size = batch_size, num_batches_per_bunch = num_batches_per_bunch)
        cache.data_provider = dataprovider_train
        cache.start()

        train_cost = []
        while True:

            feats_lab_tuple = TNetsCacheSimple.get_elem_from_queue(tqueue)
            if isinstance(feats_lab_tuple, TNetsCacheLastElem):
                break

            features, labels = feats_lab_tuple
            train_set_x.set_value(features, borrow=True)
            train_set_y.set_value(numpy.asarray(labels.flatten(),
                                                dtype='int32'),
                                  borrow=True)

            frames_showed += features.shape[0]
            train_batches = features.shape[0] / batch_size
            #print train_batches
            #if there is any part left in utterance (smaller than a batch_size), take it into account at the end
            if (features.shape[0] % batch_size != 0
                    or features.shape[0] < batch_size):
                train_batches += 1

            for i in xrange(train_batches):
                #train_cost.append(train_model(i, learnrate_schedular.get_rate()))
                train_model(i, learnrate_schedular.get_rate())
            progress += 1
            if progress % 10 == 0:
                end_time_progress = time.time()
                print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\
                          %(progress, frames_showed,(end_time_progress-start_epoch_time))

        end_time_progress = time.time()
        print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\
                                 %(progress, frames_showed,(end_time_progress-start_epoch_time))
        train_set_x.set_value(numpy.empty((1, 1), dtype='float32'))
        train_set_y.set_value(numpy.empty((1), dtype='int32'))
        classifier_name = 'MLP' + str(learnrate_schedular.epoch)

        save_mlp(classifier,
                 GlobalCfg.get_working_dir() + exp_name, classifier_name)

        print 'Validating...'
        valid_losses = []
        log_likelihood = []
        valid_frames_showed, progress = 0, 0
        start_valid_time = time.time()  # it is also stop of training time
        #for feat_lab_tuple, path in HDFDatasetDataProviderUtt(devel_files_list, valid_dataset, randomize=False, max_utt=-10):
        #    features, labels = feat_lab_tuple

        tqueue = TNetsCacheSimple.make_queue()
        cache = TNetsCacheSimple(tqueue, offset=0, num_batches_per_bunch=16)

        #cache.deamon = True
        cache.data_provider = dataprovider_valid
        cache.start()

        #ex_num = 0

        while True:

            feats_lab_tuple = TNetsCacheSimple.get_elem_from_queue(tqueue)
            if isinstance(feats_lab_tuple, TNetsCacheLastElem):
                break

            features, labels = feats_lab_tuple

            valid_frames_showed += features.shape[0]
            valid_set_x.set_value(features, borrow=True)
            valid_set_y.set_value(numpy.asarray(labels.flatten(), 'int32'),
                                  borrow=True)

            valid_batches = features.shape[0] / batch_size
            #print valid_batches
            #if there is any part left in utterance (smaller than a batch_size), take it into account at the end
            if (features.shape[0] % batch_size != 0
                    or features.shape[0] < batch_size):
                valid_batches += 1

            for i in xrange(valid_batches):
                #ex_num = ex_num + 1
                out = validate_model(i)
                error_rate = out[0]
                likelihoods = out[1]
                valid_losses.append(error_rate)
                log_likelihood.append(likelihoods)
                #save_posteriors(likelihoods, GlobalCfg.get_working_dir() + posterior_path, str(ex_num), str(learnrate_schedular.epoch))

            progress += 1
            if progress % 10 == 0:
                end_time_valid_progress = time.time()
                print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                          %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time)

        end_time_valid_progress = time.time()
        print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                          %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time)
        valid_set_x.set_value(numpy.empty((1, 1), 'float32'))
        valid_set_y.set_value(numpy.empty((1), 'int32'))

        end_epoch_time = time.time()
        print 'time taken for this epoch in seconds: %f' % (end_epoch_time -
                                                            start_epoch_time)

        this_validation_loss = numpy.mean(valid_losses)
        loglikelihood_sum = numpy.sum(log_likelihood)
        #ppl = math.exp(- loglikelihood_sum /count)
        #print 'ppl:', ppl
        print 'error_rate:', this_validation_loss
        print 'valid log likelihood:', loglikelihood_sum
        #print 'mean log_probability', this_validation_loss
        #learnrate_schedular.get_next_rate(this_validation_loss * 100.)
        #learnrate_schedular.get_next_rate()
        #print 'epoch_number:', learnrate_schedular.epoch

        # logger.info('Epoch %i (lr: %f) took %f min (SPEED [presentations/second] training %f, cv %f), cv error %f %%' % \
        #         (self.cfg.finetune_scheduler.epoch-1, self.cfg.finetune_scheduler.get_rate(), \
        #          ((end_epoch_time-start_epoch_time)/60.0), (frames_showed/(start_valid_time-start_epoch_time)), \
        #          (valid_frames_showed/(stop_valid_time-start_valid_time)), this_validation_loss*100.))

        #self.cfg.finetune_scheduler.get_next_rate(this_validation_loss*100.)
        if this_validation_loss < best_valid_loss:
            learning_rate = learnrate_schedular.get_next_rate(
                this_validation_loss * 100.)
            best_valid_loss = this_validation_loss
    #best_epoch = learnrate_schedular.epoch-1
        else:
            #learnrate_schedular.epoch = learnrate_schedular.epoch + 1
            learnrate_schedular.rate = 0.0

    end_time = time.time()

    #print 'Optimization complete with best validation score of %f %%' %  best_valid_loss * 100.
    print 'The fine tuning ran for %.2fm' % ((end_time - start_time) / 60.)

    print 'Testing...'
    log_likelihood_test = []
    test_frames_showed, progress = 0, 0
    start_test_time = time.time()  # it is also stop of training time
    #for feat_lab_tuple, path in HDFDatasetDataProviderUtt(devel_files_list, valid_dataset, randomize=False, max_utt=-10):
    #    features, labels = feat_lab_tuple

    tqueue = TNetsCacheSimple.make_queue()
    cache = TNetsCacheSimple(tqueue, offset=0, num_batches_per_bunch=16)

    #cache.deamon = True
    cache.data_provider = dataprovider_test
    cache.start()

    #ex_num = 0

    while True:

        feats_lab_tuple = TNetsCacheSimple.get_elem_from_queue(tqueue)
        if isinstance(feats_lab_tuple, TNetsCacheLastElem):
            break

        features, labels = feats_lab_tuple

        test_frames_showed += features.shape[0]
        test_set_x.set_value(features, borrow=True)
        test_set_y.set_value(numpy.asarray(labels.flatten(), 'int32'),
                             borrow=True)

        test_batches = features.shape[0] / batch_size
        #print valid_batches
        #if there is any part left in utterance (smaller than a batch_size), take it into account at the end
        if (features.shape[0] % batch_size != 0
                or features.shape[0] < batch_size):
            test_batches += 1

        for i in xrange(test_batches):
            log_likelihood_test.append(test_model(i))

        progress += 1
        if progress % 10 == 0:
            end_time_test_progress = time.time()
            print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                           %(progress, test_frames_showed, end_time_test_progress - start_test_time)

    end_time_test_progress = time.time()
    print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                    %(progress, test_frames_showed, end_time_test_progress - start_test_time)
    test_set_x.set_value(numpy.empty((1, 1), 'float32'))
    test_set_y.set_value(numpy.empty((1), 'int32'))

    likelihood_sum = numpy.sum(log_likelihood_test)
    print 'likelihood_sum', likelihood_sum
Пример #7
0
def train_mlpclasses(path_name = '/exports/work/inf_hcrc_cstr_udialogue/siva/data/', n_hidden = int(sys.argv[1]), n_classes = int(sys.argv[2])): 

    voc_list = Vocabulary(path_name + 'train', n_classes)
    voc_list.vocab_create()
    voc_list.class_label()
    vocab = voc_list.vocab
    vocab_size = voc_list.vocab_size
    classes = voc_list.classes
    
    dataprovider_train = DataProvider(path_name + 'train', vocab, vocab_size, classes)
    dataprovider_valid = DataProvider(path_name + 'valid', vocab, vocab_size, classes)
    dataprovider_test = DataProvider(path_name + 'test', vocab, vocab_size, classes)
    
    print '..building the model'
    #symbolic variables for input, target vector and batch index
    index = T.lscalar('index')
    x1 = T.fvector('x1')
    x2 = T.fvector('x2')
    y_class = T.ivector('y_class')
    y_word = T.ivector('y_word')
    learning_rate = T.fscalar('learning_rate') 

    #theano shared variables for train, valid and test
    train_set_x1 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True)
    train_set_x2 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True)
    train_set_y_class = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True)
    train_set_y_word = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True)
    
    valid_set_x1 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True)
    valid_set_x2 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True)
    valid_set_y_class = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True)
    valid_set_y_word = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True)
    
    test_set_x1 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True)
    test_set_x2 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True)
    test_set_x3 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True)
    test_set_y_class = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True)
    test_set_y_word = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True)
    
    rng = numpy.random.RandomState()
 
    classifier = MLPClasses(rng = rng, input1 = x1, input2 = x2, n_in = vocab_size, fea_dim = 50, context_size = 2, n_hidden = n_hidden, classes = classes)
    
    classcost = classifier.Classcost(y_class)
    wordcost = classifier.Wordcost(y_word)
        
    #constructor for learning rate class
    learnrate_schedular = LearningRateNewBob(start_rate = float(sys.argv[3]), scale_by = .5, max_epochs = 9999,\
                                    min_derror_ramp_start = .01, min_derror_stop = .01, init_error = 100.)

    class_log_likelihood = classifier.Classsum(y_class)
    word_log_likelihood = classifier.Wordsum(y_word)
    
    #test_model
    test_model = theano.function(inputs = [], outputs = [class_log_likelihood, word_log_likelihood],  \
                                 givens = {x1: test_set_x1,
                                           x2: test_set_x2,
                                           y_class: test_set_y_class,
                                           y_word: test_set_y_word})
    #validation_model
    validate_model = theano.function(inputs = [], outputs = [class_log_likelihood, word_log_likelihood], \
                                     givens = {x1: valid_set_x1,
                                               x2: valid_set_x2,
                                               y_class: valid_set_y_class,
                                               y_word: valid_set_y_word})

    gradient_wordparam = []
    gradient_classparam = []
    gradient_param = []
    #calculates the gradient of cost with respect to parameters 

    for param, i in zip(classifier.Classparams, xrange(len(classifier.Classparams))):
        if i <= 1:
            gradient_param.append(T.grad(classcost, param))
        else:
            gradient_classparam.append(T.grad(classcost, param))

    for param, i in zip(classifier.Wordparams, xrange(len(classifier.Wordparams))):
        if i <= 1:
            gradient_param.append(T.grad(wordcost, param))
        else:
            gradient_wordparam.append(T.grad(wordcost, param))
                
    for  i in xrange(len(gradient_wordparam)):
        gradient_param.append(gradient_classparam[i] + gradient_wordparam[i])
        
    updates = []
    #updates the parameters
    for param, gradient in zip(classifier.params, gradient_param):
        updates.append((param, param - learning_rate * gradient))

        #training_model
    train_model = theano.function(inputs = [learning_rate], outputs = [classcost, wordcost, classifier.WordoutputLayer.W, classifier.WordoutputLayer.b, class_log_likelihood, word_log_likelihood],\
                                 updates = updates,
                                 givens = {x1: train_set_x1,
                                           x2: train_set_x2,
                                           y_class: train_set_y_class,
                                           y_word: train_set_y_word})
    w_dict, b_dict = {}, {}
    
    for i in xrange(n_classes):
        W_values = numpy.asarray( rng.uniform(
                low  = - numpy.sqrt(6./(n_hidden + len(classes[i]))),
                high = numpy.sqrt(6./(n_hidden + len(classes[i]))),
                size = (n_hidden, len(classes[i]))), dtype = 'float32')
        w_dict[i] = W_values
        
        b_values = numpy.zeros((len(classes[i]), ), dtype = 'float32')
        b_dict[i]= b_values
     
    print '.....training'
    best_valid_loss = numpy.inf    
    start_time = time.time()
    while(learnrate_schedular.get_rate() != 0):
    
        print 'learning_rate:', learnrate_schedular.get_rate()
        print 'epoch_number:', learnrate_schedular.epoch    
        train_loglikelihood = []    
        frames_showed, progress = 0, 0
        start_epoch_time = time.time()
        dataprovider_train.reset()
 
        for feats_lab_tuple in dataprovider_train:
    
            features, labels = feats_lab_tuple 
            
            if labels is None or features is None:
                continue                             
            frames_showed += features.shape[0]

            for temp, i in zip(features, xrange(len(labels))):
                temp_features1 = numpy.zeros(vocab_size, dtype = 'float32')
                temp_features2 = numpy.zeros(vocab_size, dtype = 'float32')                
                temp_features1[temp[0]] = 1
                temp_features2[temp[1]] = 1
                train_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True)
                train_set_x2.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True)
                train_set_y_class.set_value(numpy.asarray([labels[i][1]], dtype = 'int32'), borrow = True)
                train_set_y_word.set_value(numpy.asarray([labels[i][0]], dtype = 'int32'), borrow = True)
                classifier.WordoutputLayer.W.set_value(numpy.asarray(w_dict[labels[i][1]], dtype = 'float32'), borrow = True)
                classifier.WordoutputLayer.b.set_value(numpy.asarray(b_dict[labels[i][1]], dtype = 'float32'), borrow = True)
                out = train_model(numpy.asarray(learnrate_schedular.get_rate(), dtype = 'float32'))
                w_dict[labels[i][1]],  b_dict[labels[i][1]] = out[2], out[3]
                train_loglikelihood.append(out[4]+out[5])
		#print out[4] + out[5]
            progress += 1
            if progress%10000==0:
                end_time_progress = time.time()
                print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\
                          %(progress, frames_showed,(end_time_progress-start_epoch_time))
            train_set_x1.set_value(numpy.empty((1, ), dtype = 'float32'))
            train_set_x2.set_value(numpy.empty((1, ), dtype = 'float32'))
            train_set_y_class.set_value(numpy.empty((1), dtype = 'int32'))
            train_set_y_word.set_value(numpy.empty((1), dtype = 'int32'))
        
        end_time_progress = time.time()
        print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\
                          %(progress, frames_showed,(end_time_progress-start_epoch_time))
	print numpy.sum(train_loglikelihood)
                          
        print 'Validating...'
        log_likelihood = []
        valid_frames_showed, progress = 0, 0
        start_valid_time = time.time() # it is also stop of training time
        dataprovider_valid.reset()
        
        for feats_lab_tuple in dataprovider_valid:            
            features, labels = feats_lab_tuple 
            
            if labels is None or features is None:
                continue                             
            valid_frames_showed += features.shape[0]                
            for temp, i in zip(features, xrange(len(labels))):
                temp_features1 = numpy.zeros(vocab_size, dtype = 'float32')
                temp_features2 = numpy.zeros(vocab_size, dtype = 'float32')
                temp_features1[temp[0]] = 1
                temp_features2[temp[1]] = 1
                valid_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True)
                valid_set_x2.set_value(numpy.asarray(temp_features2, dtype = 'float32'), borrow = True)
                valid_set_y_class.set_value(numpy.asarray([labels[i][1]], dtype = 'int32'), borrow = True)
                valid_set_y_word.set_value(numpy.asarray([labels[i][0]], dtype = 'int32'), borrow = True)
                classifier.WordoutputLayer.W.set_value(numpy.asarray(w_dict[labels[i][1]], dtype = 'float32'), borrow = True)
                classifier.WordoutputLayer.b.set_value(numpy.asarray(b_dict[labels[i][1]], dtype = 'float32'), borrow = True)
                out = validate_model()
                log_likelihood.append(sum(out))
            valid_set_x1.set_value(numpy.empty((1), 'float32'))
            valid_set_x2.set_value(numpy.empty((1), 'float32'))
            valid_set_y_class.set_value(numpy.empty((1), 'int32'))
            valid_set_y_word.set_value(numpy.empty((1), 'int32'))
            progress += 1
            if progress%1000==0:
                end_time_valid_progress = time.time()
                print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                          %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time)
        
        end_time_valid_progress = time.time()
        print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                          %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time)
        end_time_valid_progress = time.time()
        print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                          %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time)            
        entropy = (-numpy.sum(log_likelihood)/valid_frames_showed)
        print entropy, numpy.sum(log_likelihood)
        
        if entropy < best_valid_loss:
           learning_rate = learnrate_schedular.get_next_rate(entropy)
           best_valid_loss = entropy
        else:
           learnrate_schedular.rate = 0.0
    end_time = time.time()
    print 'The fine tuning ran for %.2fm' %((end_time-start_time)/60.)
    
    print 'Testing...'
    log_likelihood = []
    likelihoods = []
    test_frames_showed, progress = 0, 0
    start_test_time = time.time() # it is also stop of training time
    dataprovider_test.reset()
    
    for feats_lab_tuple in dataprovider_test:
        
        features, labels = feats_lab_tuple 
            
        if labels is None or features is None:
            continue                             

        test_frames_showed += features.shape[0]                
        for temp, i in zip(features, xrange(len(labels))):
            temp_features1 = numpy.zeros(vocab_size, dtype = 'float32')
            temp_features2 = numpy.zeros(vocab_size, dtype = 'float32')
            temp_features1[temp[0]] = 1
            temp_features2[temp[1]] = 1
            test_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True)
            test_set_x2.set_value(numpy.asarray(temp_features2, dtype = 'float32'), borrow = True)
            test_set_y_class.set_value(numpy.asarray([labels[i][1]], dtype = 'int32'), borrow = True)
            test_set_y_word.set_value(numpy.asarray([labels[i][0]], dtype = 'int32'), borrow = True)
            classifier.WordoutputLayer.W.set_value(numpy.asarray(w_dict[labels[i][1]], dtype = 'float32'), borrow = True)
            classifier.WordoutputLayer.b.set_value(numpy.asarray(b_dict[labels[i][1]], dtype = 'float32'), borrow = True)
            out = test_model()
            log_likelihood.append(sum(out))
        progress += 1
        if progress%1000==0:
           end_time_test_progress = time.time()
           print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                          %(progress, test_frames_showed, end_time_test_progress - start_test_time)
    end_time_test_progress = time.time()
    print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                    %(progress, test_frames_showed, end_time_test_progress - start_test_time)            
    #save_posteriors(log_likelihood, likelihoods, weight_path+file_name2)
    print numpy.sum(log_likelihood)
    likelihood_sum = (-numpy.sum(log_likelihood)/test_frames_showed)
    print 'entropy:', likelihood_sum
Пример #8
0
def train_mlprnn(weight_path = sys.argv[1], file_name1 = sys.argv[2], L1_reg = 0.0, L2_reg = 0.0000, path_name = '/exports/work/inf_hcrc_cstr_udialogue/siva/data/'): 

    voc_list = Vocabulary(path_name + 'train')
    voc_list.vocab_create()
    vocab = voc_list.vocab
    vocab_size = voc_list.vocab_size
    
    dataprovider_train = DataProvider(path_name + 'train', vocab, vocab_size)
    dataprovider_valid = DataProvider(path_name + 'valid', vocab, vocab_size )
    dataprovider_test = DataProvider(path_name + 'test', vocab, vocab_size )
    
    print '..building the model'

    #symbolic variables for input, target vector and batch index
    index = T.lscalar('index')
    x1 = T.fvector('x1')
    x2 = T.fvector('x2')
    x3 = T.fvector('x3')
    ht1 = T.fvector('ht1')
    y = T.ivector('y')
    learning_rate = T.fscalar('learning_rate') 

    #theano shared variables for train, valid and test
    train_set_x1 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True)
    train_set_x2 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True)
    train_set_x3 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True)
    train_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True)
    
    valid_set_x1 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True)
    valid_set_x2 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True)
    valid_set_x3 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True)
    valid_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True)
    
    test_set_x1 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True)
    test_set_x2 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True)
    test_set_x3 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True)
    test_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True)
    
    rng = numpy.random.RandomState() 
   
    classifier = MLP_RNN(rng = rng, input1 = x1, input2 = x2,  input3 = x3, initial_hidden = ht1, n_in = vocab_size, fea_dim = int(sys.argv[3]), context_size = 2, n_hidden = int(sys.argv[4]) , n_out = vocab_size)
    
    hidden_state = theano.shared(numpy.empty((int(sys.argv[4]), ), dtype = 'float32')) 
    
    cost = classifier.cost(y)
    
    #constructor for learning rate class
    learnrate_schedular = LearningRateNewBob(start_rate = 0.05, scale_by=.5, max_epochs=9999,\
                                    min_derror_ramp_start=.01, min_derror_stop=.01, init_error=100.)

    log_likelihood = classifier.sum(y)
    likelihood = classifier.likelihood(y)
    
    #test_model
    test_model = theano.function(inputs = [], outputs = [log_likelihood, likelihood],  \
                                 givens = {x1: test_set_x1,
                                           x2: test_set_x2,
                                           x3: test_set_x3,
                                           ht1: hidden_state,
                                           y: test_set_y})
    #validation_model
    validate_model = theano.function(inputs = [], outputs = [log_likelihood], \
                                     givens = {x1: valid_set_x1,
                                               x2: valid_set_x2,
                                               x3: valid_set_x3,
                                               ht1: hidden_state,        
                                               y: valid_set_y})

    gradient_param = []
    #calculates the gradient of cost with respect to parameters 
    for param in classifier.params:
        gradient_param.append(T.cast(T.grad(cost, param), 'float32'))
        
    updates = []
    #updates the parameters
    for param, gradient in zip(classifier.params, gradient_param):
        updates.append((param, param - learning_rate * gradient))
    
    #training_model
    train_model = theano.function(inputs = [learning_rate], outputs = [cost, classifier.RNNhiddenlayer.output], updates = updates, \
                                 givens = {x1: train_set_x1,
                                           x2: train_set_x2,
                                           x3: train_set_x3,
                                           ht1: hidden_state,
                                           y: train_set_y})
    f = h5py.File(weight_path+file_name1, "r")
    for i in xrange(0, classifier.no_of_layers, 2):
        path_modified = '/' + 'MLP'+ str(2) + '/layer' + str(i/2)
        if i == 4:
           classifier.MLPparams[i].set_value(numpy.asarray(f[path_modified + "/W"].value, dtype = 'float32'), borrow = True)
        else:
           classifier.MLPparams[i].set_value(numpy.asarray(f[path_modified + "/W"].value, dtype = 'float32'), borrow = True)
           classifier.MLPparams[i + 1].set_value(numpy.asarray(f[path_modified + "/b"].value, dtype = 'float32'), borrow = True)
    f.close()

    print '.....training'
    best_valid_loss = numpy.inf
    start_time = time.time()
    while(learnrate_schedular.get_rate() != 0):
    
        print 'learning_rate:', learnrate_schedular.get_rate()
        print 'epoch_number:', learnrate_schedular.epoch        
        frames_showed, progress = 0, 0
        start_epoch_time = time.time()
        dataprovider_train.reset()
        
        for feats_lab_tuple in dataprovider_train:

            features, labels = feats_lab_tuple 
            
            if labels is None or features is None:
                continue                             
            frames_showed += features.shape[0]
            for temp, i in zip(features, xrange(len(labels))):
                temp_features1 = numpy.zeros(vocab_size, dtype = 'float32')
                temp_features2 = numpy.zeros(vocab_size, dtype = 'float32')
                temp_features3 = numpy.zeros(vocab_size, dtype = 'float32')
                temp_features1[temp[0]] = 1
                temp_features2[temp[1]] = 1
                temp_features3[temp[1]] = 1
                train_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True)
                train_set_x2.set_value(numpy.asarray(temp_features2, dtype = 'float32'), borrow = True)
                train_set_x3.set_value(numpy.asarray(temp_features2, dtype = 'float32'), borrow = True)
                train_set_y.set_value(numpy.asarray([labels[i]], dtype = 'int32'), borrow = True)
                out = train_model(numpy.array(learnrate_schedular.get_rate(), dtype = 'float32'))
                hidden_state.set_value(numpy.asarray(out[1], dtype = 'float32'), borrow = True)
                
            progress += 1
            if progress%10000==0:
                end_time_progress = time.time()
                print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\
                          %(progress, frames_showed,(end_time_progress-start_epoch_time))
            train_set_x1.set_value(numpy.empty((1), dtype = 'float32'))
            train_set_x2.set_value(numpy.empty((1), dtype = 'float32'))
            train_set_x3.set_value(numpy.empty((1), dtype = 'float32'))
            train_set_y.set_value(numpy.empty((1), dtype = 'int32'))
        
        end_time_progress = time.time()
        print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\
                          %(progress, frames_showed,(end_time_progress-start_epoch_time))
                          
                          
        print 'Validating...'
        valid_losses = []
        log_likelihood = []
        valid_frames_showed, progress = 0, 0
        start_valid_time = time.time() # it is also stop of training time
        dataprovider_valid.reset()

        for feats_lab_tuple in dataprovider_valid:            
            features, labels = feats_lab_tuple            
            if labels is None or features is None:
                continue                             
            valid_frames_showed += features.shape[0]                
            for temp, i in zip(features, xrange(len(labels))):
                temp_features1 = numpy.zeros(vocab_size, dtype = 'float32')
                temp_features2 = numpy.zeros(vocab_size, dtype = 'float32')
                temp_features3 = numpy.zeros(vocab_size, dtype = 'float32')
                temp_features1[temp[0]] = 1
                temp_features2[temp[1]] = 1
                temp_features3[temp[1]] = 1
                valid_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True)
                valid_set_x2.set_value(numpy.asarray(temp_features2, dtype = 'float32'), borrow = True)
                valid_set_x3.set_value(numpy.asarray(temp_features3, dtype = 'float32'), borrow = True)
                valid_set_y.set_value(numpy.asarray([labels[i]], dtype = 'int32'), borrow = True)
                out = validate_model()
                #error_rate = out[0]
                likelihoods = out[0] 
                #valid_losses.append(error_rate)
                log_likelihood.append(likelihoods)
            valid_set_x1.set_value(numpy.empty((1), 'float32'))
            valid_set_y.set_value(numpy.empty((1), 'int32'))

            progress += 1
            if progress%1000==0:
                end_time_valid_progress = time.time()
                print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                          %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time)
        
        end_time_valid_progress = time.time()
        print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                          %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time)            
        #this_validation_loss = numpy.mean(valid_losses)
        entropy = (-numpy.sum(log_likelihood)/valid_frames_showed)
        print entropy, numpy.sum(log_likelihood)
        
        
        if entropy < best_valid_loss:
           learning_rate = learnrate_schedular.get_next_rate(entropy)
	   best_valid_loss = entropy
        else:
           learnrate_schedular.rate = 0.0
    end_time = time.time()
    print 'The fine tuning ran for %.2fm' %((end_time-start_time)/60.)

    print 'Testing...'
    log_likelihood = []
    likelihoods = []
    test_frames_showed, progress = 0, 0
    start_test_time = time.time() # it is also stop of training time
    dataprovider_test.reset()
    
    for feats_lab_tuple in dataprovider_test:
        
        features, labels = feats_lab_tuple 
            
        if labels is None or features is None:
            continue                             

        test_frames_showed += features.shape[0]                
        for temp, i in zip(features, xrange(len(labels))):
            temp_features1 = numpy.zeros(vocab_size, dtype = 'float32')
            temp_features2 = numpy.zeros(vocab_size, dtype = 'float32')
            temp_features3 = numpy.zeros(vocab_size, dtype = 'float32')
            temp_features1[temp[0]] = 1
            temp_features2[temp[1]] = 1
            temp_features3[temp[1]] = 1
            test_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True)
            test_set_x2.set_value(numpy.asarray(temp_features2, dtype = 'float32'), borrow = True)
            test_set_x3.set_value(numpy.asarray(temp_features3, dtype = 'float32'), borrow = True)
            test_set_y.set_value(numpy.asarray([labels[i]], dtype = 'int32'), borrow = True)
            out = test_model()
            log_likelihood.append(out[0])
            likelihoods.append(out[1])
        progress += 1
        if progress%1000==0:
           end_time_test_progress = time.time()
           print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                          %(progress, test_frames_showed, end_time_test_progress - start_test_time)
    end_time_test_progress = time.time()
    print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                    %(progress, test_frames_showed, end_time_test_progress - start_test_time)
    print numpy.sum(log_likelihood)
Пример #9
0
def train_mlp(feature_dimension,
              context,
              hidden_size,
              weight_path,
              file_name1,
              file_name2,
              file_name3,
              L1_reg=0.0,
              L2_reg=0.0000,
              path_name='/exports/work/inf_hcrc_cstr_udialogue/siva/data/'):

    #voc_list = Vocabulary(path_name + 'train_modified1')
    #voc_list.vocab_create()
    #vocab = voc_list.vocab
    #vocab_size = voc_list.vocab_size
    #short_list = voc_list.short_list
    #short_list_size = voc_list.short_list_size
    #path = '/exports/work/inf_hcrc_cstr_udialogue/siva/data_normalization/vocab/wlist5c.nvp'
    voc_list = Vocabularyhash(
        '/exports/work/inf_hcrc_cstr_udialogue/siva/data_normalization/vocab/wlist5c.nvp'
    )
    voc_list.hash_create()
    vocab = voc_list.voc_hash
    vocab_size = voc_list.vocab_size

    #dataprovider_train = DataProvider(path_name + 'train', vocab, vocab_size, short_list )
    #dataprovider_valid = DataProvider(path_name + 'valid', vocab, vocab_size, short_list )
    #dataprovider_test = DataProvider(path_name + 'test', vocab, vocab_size , short_list)

    dataprovider_train = DataProvider(path_name + 'train_modified1_20m', vocab,
                                      vocab_size)
    dataprovider_valid = DataProvider(path_name + 'valid_modified1', vocab,
                                      vocab_size)
    dataprovider_test = DataProvider(path_name + 'test_modified1', vocab,
                                     vocab_size)

    print '..building the model'

    #symbolic variables for input, target vector and batch index
    index = T.lscalar('index')
    x1 = T.fvector('x1')
    x2 = T.fvector('x2')
    y = T.ivector('y')
    learning_rate = T.fscalar('learning_rate')

    #theano shared variables for train, valid and test
    train_set_x1 = theano.shared(numpy.empty((1), dtype='float32'),
                                 allow_downcast=True)
    train_set_x2 = theano.shared(numpy.empty((1), dtype='float32'),
                                 allow_downcast=True)
    train_set_y = theano.shared(numpy.empty((1), dtype='int32'),
                                allow_downcast=True)

    valid_set_x1 = theano.shared(numpy.empty((1), dtype='float32'),
                                 allow_downcast=True)
    valid_set_x2 = theano.shared(numpy.empty((1), dtype='float32'),
                                 allow_downcast=True)
    valid_set_y = theano.shared(numpy.empty((1), dtype='int32'),
                                allow_downcast=True)

    test_set_x1 = theano.shared(numpy.empty((1), dtype='float32'),
                                allow_downcast=True)
    test_set_x2 = theano.shared(numpy.empty((1), dtype='float32'),
                                allow_downcast=True)
    test_set_y = theano.shared(numpy.empty((1), dtype='int32'),
                               allow_downcast=True)

    rng = numpy.random.RandomState()

    classifier = MLP(rng=rng,
                     input1=x1,
                     input2=x2,
                     n_in=vocab_size,
                     fea_dim=int(feature_dimension),
                     context_size=int(context),
                     n_hidden=int(hidden_size),
                     n_out=vocab_size)
    cost = classifier.negative_log_likelihood(
        y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr

    #constructor for learning rate class
    learnrate_schedular = LearningRateNewBob(start_rate=0.005, scale_by=.5, max_epochs=9999,\
                                    min_derror_ramp_start=.01, min_derror_stop=.01, init_error=100.)

    frame_error = classifier.errors(y)
    log_likelihood = classifier.sum(y)
    likelihood = classifier.likelihood(y)

    #test_model
    test_model = theano.function(inputs = [], outputs = [log_likelihood, likelihood],  \
                                 givens = {x1: test_set_x1,
                                           x2: test_set_x2,
                                           y: test_set_y})
    #validation_model
    validate_model = theano.function(inputs = [], outputs = [frame_error, log_likelihood], \
                                     givens = {x1: valid_set_x1,
                                               x2: valid_set_x2,
                                               y: valid_set_y})

    gradient_param = []
    #calculates the gradient of cost with respect to parameters
    for param in classifier.params:
        gradient_param.append(T.cast(T.grad(cost, param), 'float32'))

    updates = []
    #updates the parameters
    for param, gradient in zip(classifier.params, gradient_param):
        updates.append((param, param - learning_rate * gradient))

    #training_model
    train_model = theano.function(inputs = [learning_rate], outputs = [cost], updates = updates, \
                                 givens = {x1: train_set_x1,
                                           x2: train_set_x2,
                                           y: train_set_y})

    print '.....training'
    best_valid_loss = numpy.inf
    start_time = time.time()
    while (learnrate_schedular.get_rate() != 0):

        print 'learning_rate:', learnrate_schedular.get_rate()
        print 'epoch_number:', learnrate_schedular.epoch
        frames_showed, progress = 0, 0
        start_epoch_time = time.time()
        dataprovider_train.reset()

        for feats_lab_tuple in dataprovider_train:

            features, labels = feats_lab_tuple

            if labels is None or features is None:
                continue
            frames_showed += features.shape[0]
            for temp, i in zip(features, xrange(len(labels))):
                temp_features1 = numpy.zeros(vocab_size, dtype='float32')
                temp_features2 = numpy.zeros(vocab_size, dtype='float32')
                temp_features1[temp[0]] = 1
                temp_features2[temp[1]] = 1
                train_set_x1.set_value(numpy.asarray(temp_features1,
                                                     dtype='float32'),
                                       borrow=True)
                train_set_x2.set_value(numpy.asarray(temp_features2,
                                                     dtype='float32'),
                                       borrow=True)
                train_set_y.set_value(numpy.asarray([labels[i]],
                                                    dtype='int32'),
                                      borrow=True)
                out = train_model(
                    numpy.array(learnrate_schedular.get_rate(),
                                dtype='float32'))
            progress += 1
            if progress % 10000 == 0:
                end_time_progress = time.time()
                print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\
                          %(progress, frames_showed,(end_time_progress-start_epoch_time))
            train_set_x1.set_value(numpy.empty((1), dtype='float32'))
            train_set_x2.set_value(numpy.empty((1), dtype='float32'))
            train_set_y.set_value(numpy.empty((1), dtype='int32'))

        end_time_progress = time.time()
        print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\
                          %(progress, frames_showed,(end_time_progress-start_epoch_time))
        classifier_name = 'MLP' + str(learnrate_schedular.epoch)
        save_mlp(classifier, weight_path + file_name1, classifier_name)
        save_learningrate(learnrate_schedular.get_rate(),
                          weight_path + file_name3, classifier_name)

        print 'Validating...'
        valid_losses = []
        log_likelihood = []
        valid_frames_showed, progress = 0, 0
        start_valid_time = time.time()  # it is also stop of training time
        dataprovider_valid.reset()

        for feats_lab_tuple in dataprovider_valid:
            features, labels = feats_lab_tuple

            if labels is None or features is None:
                continue
            valid_frames_showed += features.shape[0]
            for temp, i in zip(features, xrange(len(labels))):
                temp_features1 = numpy.zeros(vocab_size, dtype='float32')
                temp_features2 = numpy.zeros(vocab_size, dtype='float32')
                temp_features1[temp[0]] = 1
                temp_features2[temp[1]] = 1
                valid_set_x1.set_value(numpy.asarray(temp_features1,
                                                     dtype='float32'),
                                       borrow=True)
                valid_set_x2.set_value(numpy.asarray(temp_features2,
                                                     dtype='float32'),
                                       borrow=True)
                valid_set_y.set_value(numpy.asarray([labels[i]],
                                                    dtype='int32'),
                                      borrow=True)
                out = validate_model()
                error_rate = out[0]
                likelihoods = out[1]
                valid_losses.append(error_rate)
                log_likelihood.append(likelihoods)
            valid_set_x1.set_value(numpy.empty((1), 'float32'))
            valid_set_x2.set_value(numpy.empty((1), 'float32'))
            valid_set_y.set_value(numpy.empty((1), 'int32'))

            progress += 1
            if progress % 1000 == 0:
                end_time_valid_progress = time.time()
                print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                          %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time)

        end_time_valid_progress = time.time()
        print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                          %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time)
        this_validation_loss = numpy.mean(valid_losses)
        entropy = (-numpy.sum(log_likelihood) / valid_frames_showed)
        print this_validation_loss, entropy, numpy.sum(log_likelihood)

        if entropy < best_valid_loss:
            learning_rate = learnrate_schedular.get_next_rate(entropy)
            best_valid_loss = entropy
        else:
            learnrate_schedular.rate = 0.0
    end_time = time.time()
    print 'The fine tuning ran for %.2fm' % ((end_time - start_time) / 60.)

    print 'Testing...'
    log_likelihood = []
    likelihoods = []
    test_frames_showed, progress = 0, 0
    start_test_time = time.time()  # it is also stop of training time
    dataprovider_test.reset()

    for feats_lab_tuple in dataprovider_test:

        features, labels = feats_lab_tuple

        if labels is None or features is None:
            continue

        test_frames_showed += features.shape[0]
        for temp, i in zip(features, xrange(len(labels))):
            temp_features1 = numpy.zeros(vocab_size, dtype='float32')
            temp_features2 = numpy.zeros(vocab_size, dtype='float32')
            temp_features1[temp[0]] = 1
            temp_features2[temp[1]] = 1
            test_set_x1.set_value(numpy.asarray(temp_features1,
                                                dtype='float32'),
                                  borrow=True)
            test_set_x2.set_value(numpy.asarray(temp_features2,
                                                dtype='float32'),
                                  borrow=True)
            test_set_y.set_value(numpy.asarray([labels[i]], dtype='int32'),
                                 borrow=True)
            out = test_model()
            log_likelihood.append(out[0])
            likelihoods.append(out[1])
        progress += 1
        if progress % 1000 == 0:
            end_time_test_progress = time.time()
            print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                           %(progress, test_frames_showed, end_time_test_progress - start_test_time)
    end_time_test_progress = time.time()
    print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                    %(progress, test_frames_showed, end_time_test_progress - start_test_time)
    save_posteriors(log_likelihood, likelihoods, weight_path + file_name2)
    print numpy.sum(log_likelihood)
    likelihood_sum = (-numpy.sum(log_likelihood) / test_frames_showed)
    print 'entropy:', likelihood_sum
Пример #10
0
def train_rnn(num_batches_per_bunch = 512, batch_size = 1, num_bunches_queue = 5, offset = 0, path_name = '/exports/work/inf_hcrc_cstr_udialogue/siva/data/'):
    

    voc_list = Vocabulary(path_name + 'train')
    voc_list.vocab_create()
    vocab = voc_list.vocab
    vocab_size = voc_list.vocab_size
     
    dataprovider_train = DataProvider(path_name + 'train', vocab, vocab_size)
    dataprovider_valid = DataProvider(path_name + 'valid', vocab, vocab_size )
    dataprovider_test = DataProvider(path_name + 'test', vocab, vocab_size )
    
    print '..building the model'

    #symbolic variables for input, target vector and batch index
    index = T.lscalar('index')
    x = T.fvector('x')
    h0 = T.fvector('h0')
    y = T.ivector('y')
    learning_rate = T.fscalar('learning_rate') 

    #theano shared variables for train, valid and test
    train_set_x1 = theano.shared(numpy.empty((1,), dtype='float32'), allow_downcast = True)
    train_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True)
    
    valid_set_x1 = theano.shared(numpy.empty((1,), dtype='float32'), allow_downcast = True)
    valid_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True)
    
    test_set_x1 = theano.shared(numpy.empty((1,), dtype='float32'), allow_downcast = True)
    test_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True)
    
    
    rng = numpy.random.RandomState()
   
    classifier = RNN(rng = rng, input = x, intial_hidden = h0, n_in = vocab_size, n_hidden = int(sys.argv[1]), n_out = vocab_size)
    
    cost = classifier.negative_log_likelihood(y)

    ht1_values = numpy.ones((int(sys.argv[1]), ), dtype = 'float32')
    
    ht1 = theano.shared(value = ht1_values, name = 'hidden_state')
    
    #constructor for learning rate class
    learnrate_schedular = LearningRateNewBob(start_rate = float(sys.argv[2]), scale_by=.5, max_epochs=9999,\
                                    min_derror_ramp_start=.01, min_derror_stop=.01, init_error=100.)

    log_likelihood = classifier.sum(y)
    likelihood = classifier.likelihood(y)
    
    #test_model
    test_model = theano.function(inputs = [], outputs = [log_likelihood, likelihood],  \
                                 givens = {x: test_set_x1,
                                           y: test_set_y,
                                           h0: ht1})
    #validation_model
    validate_model = theano.function(inputs = [], outputs = [log_likelihood], \
                                     givens = {x: valid_set_x1,
                                               y: valid_set_y,
                                               h0: ht1})

    gradient_param = []
    #calculates the gradient of cost with respect to parameters 
    for param in classifier.params:
        gradient_param.append(T.cast(T.grad(cost, param), 'float32'))
        
    updates = []
    #updates the parameters
    for param, gradient in zip(classifier.params, gradient_param):
        updates.append((param, T.cast(param - learning_rate * gradient - 0.000001 * param, dtype = 'float32')))
    
    #hidden_output = classifier.inputlayer.output
    #training_model
    train_model = theano.function(inputs = [learning_rate], outputs = [cost, classifier.inputlayer.output], updates = updates, \
                                 givens = {x: train_set_x1,
                                           y: train_set_y,
                                           h0:ht1})

    print '.....training'
    best_valid_loss = numpy.inf    
    start_time = time.time()
    while(learnrate_schedular.get_rate() != 0):
    
        print 'learning_rate:', learnrate_schedular.get_rate()
        print 'epoch_number:', learnrate_schedular.epoch        
        frames_showed, progress = 0, 0
        start_epoch_time = time.time()
        dataprovider_train.reset()
 
        for feats_lab_tuple in dataprovider_train:
    
            features, labels = feats_lab_tuple 
            
            if labels is None or features is None:
                continue                             
            frames_showed += features.shape[0]

            for temp, i in zip(features, xrange(len(labels))):
                temp_features1 = numpy.zeros(vocab_size, dtype = 'float32')
                temp_features1[temp[0]] = 1
                train_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True)
                train_set_y.set_value(numpy.asarray([labels[i]], dtype = 'int32'), borrow = True)
                out = train_model(numpy.asarray(learnrate_schedular.get_rate(), dtype = 'float32'))       
                ht1.set_value(numpy.asarray(out[1], dtype = 'float32'), borrow = True)
            progress += 1
            if progress%10000==0:
                end_time_progress = time.time()
                print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\
                          %(progress, frames_showed,(end_time_progress-start_epoch_time))
            train_set_x1.set_value(numpy.empty((1, ), dtype = 'float32'))
            train_set_y.set_value(numpy.empty((1), dtype = 'int32'))
        
        end_time_progress = time.time()
        print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\
                          %(progress, frames_showed,(end_time_progress-start_epoch_time))
	
        #classifier_name = 'MLP' + str(learnrate_schedular.epoch)
        #save_mlp(classifier, path+exp_name1 , classifier_name)
    
        print 'Validating...'
        valid_losses = []
        log_likelihood = []
        valid_frames_showed, progress = 0, 0
        start_valid_time = time.time() # it is also stop of training time
        dataprovider_valid.reset()

        for feats_lab_tuple in dataprovider_valid:            
            features, labels = feats_lab_tuple            
            if labels is None or features is None:
                continue                             
            valid_frames_showed += features.shape[0]                
            for temp, i in zip(features, xrange(len(labels))):
                temp_features1 = numpy.zeros(vocab_size, dtype = 'float32')
                temp_features1[temp[0]] = 1
                valid_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True)
                valid_set_y.set_value(numpy.asarray([labels[i]], dtype = 'int32'), borrow = True)
                log_likelihood.append(validate_model())
            valid_set_x1.set_value(numpy.empty((1), 'float32'))
            valid_set_y.set_value(numpy.empty((1), 'int32'))

            progress += 1
            if progress%1000==0:
                end_time_valid_progress = time.time()
                print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                          %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time)
        
        end_time_valid_progress = time.time()
        print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                          %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time)            
        entropy = (-numpy.sum(log_likelihood)/valid_frames_showed)
        print  entropy, numpy.sum(log_likelihood)

        if entropy < best_valid_loss:
           learning_rate = learnrate_schedular.get_next_rate(entropy)
	   best_valid_loss = entropy
        else:
           learnrate_schedular.rate = 0.0
    end_time = time.time()
    print 'The fine tuning ran for %.2fm' %((end_time-start_time)/60.)

    print 'Testing...'
    log_likelihood = []
    likelihoods = []
    test_frames_showed, progress = 0, 0
    start_test_time = time.time() # it is also stop of training time
    dataprovider_test.reset()
    
    for feats_lab_tuple in dataprovider_test:
        
        features, labels = feats_lab_tuple 
            
        if labels is None or features is None:
            continue                             

        test_frames_showed += features.shape[0]                
        for temp, i in zip(features, xrange(len(labels))):
            temp_features1 = numpy.zeros(vocab_size, dtype = 'float32')
            temp_features1[temp[0]] = 1
            test_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True)
            test_set_y.set_value(numpy.asarray([labels[i]], dtype = 'int32'), borrow = True)
            out = test_model()
            log_likelihood.append(out[0])
            likelihoods.append(out[1])
        progress += 1
        if progress%1000==0:
           end_time_test_progress = time.time()
           print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                          %(progress, test_frames_showed, end_time_test_progress - start_test_time)
    end_time_test_progress = time.time()
    print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                    %(progress, test_frames_showed, end_time_test_progress - start_test_time)            
    #save_posteriors(log_likelihood, likelihoods, weight_path+file_name2)
    print numpy.sum(log_likelihood)