Пример #1
0
                             y_train,
                             batch_size=BATCH_SIZE)
dev_it = DatasetABIterator(x1_dev, x2_dev, y_dev, batch_size=BATCH_SIZE)
test_it = DatasetABIterator(x1_test, x2_test, y_test, batch_size=BATCH_SIZE)
numpy_rng = np.random.RandomState(123)
from nnet_archs import ABNeuralNet, DropoutABNeuralNet
from layers import ReLU, SigmoidLayer, Linear
nnet = DropoutABNeuralNet(
    numpy_rng=numpy_rng,
    n_ins=x1_train.shape[1],
    #layers_types=[SigmoidLayer],
    #layers_sizes=[],
    #layers_types=[SigmoidLayer, SigmoidLayer, SigmoidLayer],
    #layers_sizes=[500, 500],
    layers_types=[SigmoidLayer, SigmoidLayer, SigmoidLayer, SigmoidLayer],
    layers_sizes=[1000, 1000, 1000],
    dropout_rates=[0.2, 0.5, 0.5, 0.5],
    #layers_types=[SigmoidLayer, SigmoidLayer, SigmoidLayer, SigmoidLayer, SigmoidLayer],
    #layers_sizes=[500, 500, 500, 500],
    n_outs=DIM_EMBEDDING,
    loss='cos_cos2',
    rho=0.90,
    eps=1.E-5,
    max_norm=4.,
    debugprint=1)
print nnet

train_fn = nnet.get_adadelta_trainer(debug=True)
#train_fn = nnet.get_SGD_trainer(debug=True)
train_scoref = nnet.score_classif_same_diff_separated(train_it)
valid_scoref = nnet.score_classif_same_diff_separated(dev_it)
test_scoref = nnet.score_classif_same_diff_separated(test_it)
Пример #2
0
def run(dataset_path=DEFAULT_DATASET,
        dataset_name='timit',
        batch_size=100,
        nframes=13,
        features="fbank",
        init_lr=0.01,
        max_epochs=500,
        network_type="AB",
        trainer_type="adadelta",
        layers_types=[ReLU, ReLU, ReLU, ReLU, LogisticRegression],
        layers_sizes=[2400, 2400, 2400, 2400],
        dropout_rates=[0.2, 0.5, 0.5, 0.5, 0.5],
        recurrent_connections=[],
        prefix_fname='',
        debug_print=0,
        debug_time=False,
        debug_plot=0):
    """
    Configures and run the neural net on the given dataset.
    """

    output_file_name = dataset_name
    if prefix_fname != "":
        output_file_name = prefix_fname + "_" + dataset_name
    output_file_name += "_" + features + str(nframes)
    output_file_name += "_" + network_type + "_" + trainer_type
    output_file_name += "_emb_" + str(DIM_EMBEDDING)
    print "output file name:", output_file_name

    n_ins = None
    n_outs = None
    print "loading dataset from", dataset_path
    # TODO DO A FUNCTION FOR DATASET LOADING CRAP
    if dataset_path[-7:] != '.joblib':
        print >> sys.stderr, "prepare your dataset with align_words.py or lucid.py or buckeye.py"
        sys.exit(-1)

    ### LOADING DATA
    data_same = joblib.load(dataset_path)
    shuffle(data_same)

    has_dev_set = True
    test_dataset_path = dataset_path[:-7].replace("train", "") + 'test.joblib'
    dev_split_at = int(0.9 * len(data_same))
    test_split_at = len(data_same)
    if not os.path.exists(test_dataset_path):
        has_dev_set = False
        test_split_at = int(0.95 * test_split_at)

    print data_same[0]
    print data_same[0][3].shape
    n_ins = data_same[0][3].shape[1] * nframes
    n_outs = DIM_EMBEDDING

    normalize = True
    min_max_scale = False
    marginf = (nframes - 1) / 2  # TODO

    ### TRAIN SET
    train_set_iterator = DatasetDTWWrdSpkrIterator(data_same[:dev_split_at],
                                                   normalize=normalize,
                                                   min_max_scale=min_max_scale,
                                                   scale_f1=None,
                                                   scale_f2=None,
                                                   nframes=nframes,
                                                   batch_size=batch_size,
                                                   marginf=marginf)
    f1 = train_set_iterator._scale_f1
    f2 = train_set_iterator._scale_f2

    ### DEV SET
    valid_set_iterator = DatasetDTWWrdSpkrIterator(
        data_same[dev_split_at:test_split_at],
        normalize=normalize,
        min_max_scale=min_max_scale,
        scale_f1=f1,
        scale_f2=f2,
        nframes=nframes,
        batch_size=batch_size,
        marginf=marginf)

    ### TEST SET
    if has_dev_set:
        data_same = joblib.load(test_dataset_path)
        test_set_iterator = DatasetDTWWrdSpkrIterator(
            data_same,
            normalize=normalize,
            min_max_scale=min_max_scale,
            scale_f1=f1,
            scale_f2=f2,
            nframes=nframes,
            batch_size=batch_size,
            marginf=marginf)
    else:
        test_set_iterator = DatasetDTWWrdSpkrIterator(
            data_same[test_split_at:],
            normalize=normalize,
            min_max_scale=min_max_scale,
            scale_f1=f1,
            scale_f2=f2,
            nframes=nframes,
            batch_size=batch_size,
            marginf=marginf)

    assert n_ins != None
    assert n_outs != None

    # numpy random generator
    numpy_rng = numpy.random.RandomState(123)
    print '... building the model'

    nnet = None
    fast_dropout = False
    if "dropout" in network_type:
        print >> sys.stderr, "Dropout is not implemented for ABnets with 2 Outputs"
        nnet = DropoutABNeuralNet(
            numpy_rng=numpy_rng,  # TODO with 2 Outputs
            n_ins=n_ins,
            layers_types=layers_types,
            layers_sizes=layers_sizes,
            n_outs=n_outs,
            loss='cos_cos2',
            rho=0.95,
            eps=1.E-6,
            max_norm=4.,
            fast_drop=fast_dropout,
            debugprint=debug_print)
    else:
        nnet = ABNeuralNet2Outputs(numpy_rng=numpy_rng,
                                   n_ins=n_ins,
                                   layers_types=layers_types,
                                   layers_sizes=layers_sizes,
                                   n_outs=n_outs,
                                   loss='cos_cos2',
                                   rho=0.90,
                                   eps=1.E-6,
                                   max_norm=0.,
                                   debugprint=debug_print)
    print "Created a neural net as:",
    print str(nnet)

    # get the training, validation and testing function for the model
    print '... getting the training functions'
    print trainer_type
    train_fn = None
    if debug_plot or debug_print:
        if trainer_type == "adadelta":
            train_fn = nnet.get_adadelta_trainer(debug=True)
        elif trainer_type == "adagrad":
            train_fn = nnet.get_adagrad_trainer(debug=True)
        else:
            train_fn = nnet.get_SGD_trainer(debug=True)
    else:
        if trainer_type == "adadelta":
            train_fn = nnet.get_adadelta_trainer()
        elif trainer_type == "adagrad":
            train_fn = nnet.get_adagrad_trainer()
        else:
            train_fn = nnet.get_SGD_trainer()

    train_scoref_w = nnet.score_classif_same_diff_word_separated(
        train_set_iterator)
    valid_scoref_w = nnet.score_classif_same_diff_word_separated(
        valid_set_iterator)
    test_scoref_w = nnet.score_classif_same_diff_word_separated(
        test_set_iterator)
    train_scoref_s = nnet.score_classif_same_diff_spkr_separated(
        train_set_iterator)
    valid_scoref_s = nnet.score_classif_same_diff_spkr_separated(
        valid_set_iterator)
    test_scoref_s = nnet.score_classif_same_diff_spkr_separated(
        test_set_iterator)
    data_iterator = train_set_iterator

    print '... training the model'
    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    lr = init_lr
    timer = None
    if debug_plot:
        print_mean_weights_biases(nnet.params)
    #with open(output_file_name + 'epoch_0.pickle', 'wb') as f:
    #    cPickle.dump(nnet, f, protocol=-1)

    while (epoch < max_epochs):
        epoch = epoch + 1
        avg_costs = []
        avg_params_gradients_updates = []
        if debug_time:
            timer = time.time()
        for iteration, (x, y) in enumerate(data_iterator):
            #print "x[0][0]", x[0][0]
            #print "x[1][0]", x[1][0]
            #print "y[0][0]", y[0][0]
            #print "y[1][0]", y[1][0]
            avg_cost = 0.
            if "delta" in trainer_type:  # TODO remove need for this if
                avg_cost = train_fn(x[0], x[1], y[0], y[1])
            else:
                avg_cost = train_fn(x[0], x[1], y[0], y[1], lr)
            if debug_print >= 3:
                print "cost:", avg_cost[0]
            if debug_plot >= 2:
                plot_costs(avg_cost[0])
                if not len(avg_params_gradients_updates):
                    avg_params_gradients_updates = map(numpy.asarray,
                                                       avg_cost[1:])
                else:
                    avg_params_gradients_updates = rolling_avg_pgu(
                        iteration, avg_params_gradients_updates,
                        map(numpy.asarray, avg_cost[1:]))
            if debug_plot >= 3:
                plot_params_gradients_updates(iteration, avg_cost[1:])
            if type(avg_cost) == list:
                avg_costs.append(avg_cost[0])
            else:
                avg_costs.append(avg_cost)
        if debug_print >= 2:
            print_mean_weights_biases(nnet.params)
        if debug_plot >= 2:
            plot_params_gradients_updates(epoch, avg_params_gradients_updates)
        if debug_time:
            print('  epoch %i took %f seconds' % (epoch, time.time() - timer))
        avg_cost = numpy.mean(avg_costs)
        if numpy.isnan(avg_cost):
            print("avg costs is NaN so we're stopping here!")
            break
        print('  epoch %i, avg costs %f' % \
              (epoch, avg_cost))
        tmp_train = zip(*train_scoref_w())
        print('  epoch %i, training sim same words %f, diff words %f' % \
              (epoch, numpy.mean(tmp_train[0]), numpy.mean(tmp_train[1])))
        tmp_train = zip(*train_scoref_s())
        print('  epoch %i, training sim same spkrs %f, diff spkrs %f' % \
              (epoch, numpy.mean(tmp_train[0]), numpy.mean(tmp_train[1])))
        # TODO update lr(t) = lr(0) / (1 + lr(0) * lambda * t)
        lr = numpy.float32(init_lr / (numpy.sqrt(iteration) + 1.))  ### TODO
        # or another scheme for learning rate decay
        #with open(output_file_name + 'epoch_' +str(epoch) + '.pickle', 'wb') as f:
        #    cPickle.dump(nnet, f, protocol=-1)

        # we check the validation loss on every epoch
        validation_losses_w = zip(*valid_scoref_w())
        validation_losses_s = zip(*valid_scoref_s())
        this_validation_loss = 0.25*(1.-numpy.mean(validation_losses_w[0])) +\
                0.25*numpy.mean(validation_losses_w[1]) +\
                0.25*(1.-numpy.mean(validation_losses_s[0])) +\
                0.25*numpy.mean(validation_losses_s[1])

        print('  epoch %i, valid sim same words %f, diff words %f' % \
              (epoch, numpy.mean(validation_losses_w[0]), numpy.mean(validation_losses_w[1])))
        print('  epoch %i, valid sim same spkrs %f, diff spkrs %f' % \
              (epoch, numpy.mean(validation_losses_s[0]), numpy.mean(validation_losses_s[1])))
        # if we got the best validation score until now
        if this_validation_loss < best_validation_loss:
            with open(output_file_name + '.pickle', 'wb') as f:
                cPickle.dump(nnet, f, protocol=-1)
            # save best validation score and iteration number
            best_validation_loss = this_validation_loss
            # test it on the test set
            test_losses_w = zip(*test_scoref_w())
            test_losses_s = zip(*test_scoref_s())
            print('  epoch %i, test sim same words %f, diff words %f' % \
                  (epoch, numpy.mean(test_losses_w[0]), numpy.mean(test_losses_w[1])))
            print('  epoch %i, test sim same spkrs %f, diff spkrs %f' % \
                  (epoch, numpy.mean(test_losses_s[0]), numpy.mean(test_losses_s[1])))

    end_time = time.clock()
    print(('Optimization complete with best validation score of %f, '
           'with test performance %f') % (best_validation_loss, test_score))
    print >> sys.stderr, ('The fine tuning code for file ' +
                          os.path.split(__file__)[1] + ' ran for %.2fm' %
                          ((end_time - start_time) / 60.))
    with open(output_file_name + '_final.pickle', 'wb') as f:
        cPickle.dump(nnet, f, protocol=-1)
Пример #3
0
train_it = DatasetABIterator(x1_train, x2_train, y_train, batch_size=BATCH_SIZE)
dev_it = DatasetABIterator(x1_dev, x2_dev, y_dev, batch_size=BATCH_SIZE)
test_it = DatasetABIterator(x1_test, x2_test, y_test, batch_size=BATCH_SIZE)
numpy_rng = np.random.RandomState(123)
from nnet_archs import ABNeuralNet, DropoutABNeuralNet
from layers import ReLU, SigmoidLayer, Linear
nnet = DropoutABNeuralNet(numpy_rng=numpy_rng, 
        n_ins=x1_train.shape[1],
        #layers_types=[SigmoidLayer],
        #layers_sizes=[],
        #layers_types=[SigmoidLayer, SigmoidLayer, SigmoidLayer],
        #layers_sizes=[500, 500],
        layers_types=[SigmoidLayer, SigmoidLayer, SigmoidLayer, SigmoidLayer],
        layers_sizes=[1000, 1000, 1000],
        dropout_rates=[0.2, 0.5, 0.5, 0.5],
        #layers_types=[SigmoidLayer, SigmoidLayer, SigmoidLayer, SigmoidLayer, SigmoidLayer],
        #layers_sizes=[500, 500, 500, 500],
        n_outs=DIM_EMBEDDING,
        loss='cos_cos2',
        rho=0.90,
        eps=1.E-5,
        max_norm=4.,
        debugprint=1)
print nnet

train_fn = nnet.get_adadelta_trainer(debug=True)
#train_fn = nnet.get_SGD_trainer(debug=True)
train_scoref = nnet.score_classif_same_diff_separated(train_it)
valid_scoref = nnet.score_classif_same_diff_separated(dev_it)
test_scoref = nnet.score_classif_same_diff_separated(test_it)
Пример #4
0
def run(dataset_path=DEFAULT_DATASET, dataset_name='timit',
        iterator_type=DatasetDTWIterator, batch_size=100,
        nframes=13, features="fbank",
        init_lr=0.01, max_epochs=500, 
        network_type="dropout_net", trainer_type="adadelta",
        layers_types=[ReLU, ReLU, ReLU, ReLU, LogisticRegression],
        layers_sizes=[2400, 2400, 2400, 2400],
        dropout_rates=[0.2, 0.5, 0.5, 0.5, 0.5],
        recurrent_connections=[],
        prefix_fname='',
        debug_on_test_only=False,
        debug_print=0,
        debug_time=False,
        debug_plot=0):
    """
    FIXME TODO
    """

    output_file_name = dataset_name
    if prefix_fname != "":
        output_file_name = prefix_fname + "_" + dataset_name
    output_file_name += "_" + features + str(nframes)
    output_file_name += "_" + network_type + "_" + trainer_type
    output_file_name += "_emb_" + str(DIM_EMBEDDING)
    print "output file name:", output_file_name

    n_ins = None
    n_outs = None
    print "loading dataset from", dataset_path
     # TODO DO A FUNCTION
    if dataset_path[-7:] != '.joblib':
        print >> sys.stderr, "prepare your dataset with align_words.py or lucid.py or buckeye.py"
        sys.exit(-1)

    ### LOADING DATA
    data_same = joblib.load(dataset_path)
    shuffle(data_same)

    has_dev_and_test_set = True
    has_test_set_only = False
    dev_dataset_path = dataset_path[:-7].replace("train", "") + 'dev.joblib'
    test_dataset_path = dataset_path[:-7].replace("train", "") + 'test.joblib'
    dev_split_at = len(data_same)
    test_split_at = len(data_same)
    if not os.path.exists(dev_dataset_path) or not os.path.exists(test_dataset_path):
        has_dev_and_test_set = False
        if os.path.exists(test_dataset_path):
            print >> sys.stderr, "DOESN'T HAVE A SEPARATED DEV SET, WE'LL SPLIT OUT OWN"
            has_test_set_only = True
            dev_split_at = int(0.9 * dev_split_at)
        else:
            print >> sys.stderr, "DOESN'T HAVE A SEPARATED DEV AND TEST SET, WE'LL SPLIT OUT OWNS"
            dev_split_at = int(0.8 * dev_split_at)
            test_split_at = int(0.9 * test_split_at)

    print data_same[0]
    print data_same[0][3].shape
    n_ins = data_same[0][3].shape[1] * nframes
    n_outs = DIM_EMBEDDING

    normalize = True
    min_max_scale = False
    marginf = (nframes-1)/2  # TODO

    ### TRAIN SET
    if has_dev_and_test_set:
        train_set_iterator = DatasetDTWWrdSpkrIterator(data_same,
                normalize=normalize, min_max_scale=min_max_scale,
                scale_f1=None, scale_f2=None, nframes=nframes,
                batch_size=batch_size, marginf=marginf)
    else:
        train_set_iterator = DatasetDTWWrdSpkrIterator(
                data_same[:dev_split_at], normalize=normalize,
                min_max_scale=min_max_scale, scale_f1=None, scale_f2=None,
                nframes=nframes, batch_size=batch_size, marginf=marginf)
    f1 = train_set_iterator._scale_f1
    f2 = train_set_iterator._scale_f2

    ### DEV SET
    if has_dev_and_test_set:
        data_same = joblib.load(dev_dataset_path)
        valid_set_iterator = DatasetDTWWrdSpkrIterator(data_same,
                normalize=normalize, min_max_scale=min_max_scale,
                scale_f1=f1, scale_f2=f2,
                nframes=nframes, batch_size=batch_size, marginf=marginf)
    else:
        valid_set_iterator = DatasetDTWWrdSpkrIterator(
                data_same[dev_split_at:test_split_at], normalize=normalize,
                min_max_scale=min_max_scale, scale_f1=f1, scale_f2=f2,
                nframes=nframes, batch_size=batch_size, marginf=marginf)

    ### TEST SET
    if has_dev_and_test_set or has_test_set_only:
        data_same = joblib.load(test_dataset_path)
        test_set_iterator = DatasetDTWWrdSpkrIterator(data_same,
                normalize=normalize, min_max_scale=min_max_scale,
                scale_f1=f1, scale_f2=f2, nframes=nframes,
                batch_size=batch_size, marginf=marginf)
    else:
        test_set_iterator = DatasetDTWWrdSpkrIterator(
                data_same[test_split_at:], normalize=normalize,
                min_max_scale=min_max_scale, scale_f1=f1, scale_f2=f2,
                nframes=nframes, batch_size=batch_size, marginf=marginf)

    assert n_ins != None
    assert n_outs != None

    # numpy random generator
    numpy_rng = numpy.random.RandomState(123)
    print '... building the model'

    # TODO the proper network type other than just dropout or not
    nnet = None
    fast_dropout = False
    if "dropout" in network_type:
        nnet = DropoutABNeuralNet(numpy_rng=numpy_rng,  # TODO with 2 Outputs
                n_ins=n_ins,
                layers_types=layers_types,
                layers_sizes=layers_sizes,
                n_outs=n_outs,
                loss='cos_cos2',
                rho=0.95,
                eps=1.E-6,
                max_norm=4.,
                fast_drop=fast_dropout,
                debugprint=debug_print)
    else:
        nnet = ABNeuralNet2Outputs(numpy_rng=numpy_rng, 
                n_ins=n_ins,
                layers_types=layers_types,
                layers_sizes=layers_sizes,
                n_outs=n_outs,
                loss='cos_cos2',
                #loss='dot_prod',
                rho=0.90,
                eps=1.E-6,
                max_norm=0.,
                debugprint=debug_print)
    print "Created a neural net as:",
    print str(nnet)

    # get the training, validation and testing function for the model
    print '... getting the training functions'
    print trainer_type
    train_fn = None
    if debug_plot or debug_print:
        if trainer_type == "adadelta":
            train_fn = nnet.get_adadelta_trainer(debug=True)
        elif trainer_type == "adagrad":
            train_fn = nnet.get_adagrad_trainer(debug=True)
        else:
            train_fn = nnet.get_SGD_trainer(debug=True)
    else:
        if trainer_type == "adadelta":
            train_fn = nnet.get_adadelta_trainer()
        elif trainer_type == "adagrad":
            train_fn = nnet.get_adagrad_trainer()
        else:
            train_fn = nnet.get_SGD_trainer()

    train_scoref_w = nnet.score_classif_same_diff_word_separated(train_set_iterator)
    valid_scoref_w = nnet.score_classif_same_diff_word_separated(valid_set_iterator)
    test_scoref_w = nnet.score_classif_same_diff_word_separated(test_set_iterator)
    train_scoref_s = nnet.score_classif_same_diff_spkr_separated(train_set_iterator)
    valid_scoref_s = nnet.score_classif_same_diff_spkr_separated(valid_set_iterator)
    test_scoref_s = nnet.score_classif_same_diff_spkr_separated(test_set_iterator)
    data_iterator = train_set_iterator

    if debug_on_test_only:
        print >> sys.stderr, "NOT IMPLEMENTED"
        sys.exit(-1)
        data_iterator = test_set_iterator
        train_scoref_w = test_scoref_w
        train_scoref_s = test_scoref_s

    print '... training the model'
    # early-stopping parameters
    patience = 1000  # look as this many examples regardless TODO
    patience_increase = 2.  # wait this much longer when a new best is
                            # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant

    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = time.clock()

    done_looping = False
    epoch = 0
    lr = init_lr
    timer = None
    if debug_plot:
        print_mean_weights_biases(nnet.params)
    #with open(output_file_name + 'epoch_0.pickle', 'wb') as f:
    #    cPickle.dump(nnet, f, protocol=-1)

    while (epoch < max_epochs) and (not done_looping):
        if REDTW and "ab_net" in network_type and ((epoch + 1) % 20) == 0:
            print "recomputing DTW:"
            data_iterator.recompute_DTW(nnet.transform_x1())

        epoch = epoch + 1
        avg_costs = []
        avg_params_gradients_updates = []
        if debug_time:
            timer = time.time()
        for iteration, (x, y) in enumerate(data_iterator):
            #print "x[0][0]", x[0][0]
            #print "x[1][0]", x[1][0]
            #print "y[0][0]", y[0][0]
            #print "y[1][0]", y[1][0]
            avg_cost = 0.
            if "delta" in trainer_type:  # TODO remove need for this if
                avg_cost = train_fn(x[0], x[1], y[0], y[1])
            else:
                avg_cost = train_fn(x[0], x[1], y[0], y[1], lr)
            if debug_print >= 3:
                print "cost:", avg_cost[0]
            if debug_plot >= 2:
                plot_costs(avg_cost[0])
                if not len(avg_params_gradients_updates):
                    avg_params_gradients_updates = map(numpy.asarray, avg_cost[1:])
                else:
                    avg_params_gradients_updates = rolling_avg_pgu(
                            iteration, avg_params_gradients_updates,
                            map(numpy.asarray, avg_cost[1:]))
            if debug_plot >= 3:
                plot_params_gradients_updates(iteration, avg_cost[1:])
            if type(avg_cost) == list:
                avg_costs.append(avg_cost[0])
            else:
                avg_costs.append(avg_cost)
        if debug_print >= 2:
            print_mean_weights_biases(nnet.params)
        if debug_plot >= 2:
            plot_params_gradients_updates(epoch, avg_params_gradients_updates)
        if debug_time:
            print('  epoch %i took %f seconds' % (epoch, time.time() - timer))
        avg_cost = numpy.mean(avg_costs)
        if numpy.isnan(avg_cost):
            print("avg costs is NaN so we're stopping here!")
            break
        print('  epoch %i, avg costs %f' % \
              (epoch, avg_cost))
        tmp_train = zip(*train_scoref_w())
        print('  epoch %i, training sim same words %f, diff words %f' % \
              (epoch, numpy.mean(tmp_train[0]), numpy.mean(tmp_train[1])))
        tmp_train = zip(*train_scoref_s())
        print('  epoch %i, training sim same spkrs %f, diff spkrs %f' % \
              (epoch, numpy.mean(tmp_train[0]), numpy.mean(tmp_train[1])))
        # TODO update lr(t) = lr(0) / (1 + lr(0) * lambda * t)
        lr = numpy.float32(init_lr / (numpy.sqrt(iteration) + 1.)) ### TODO
        #lr = numpy.float32(init_lr / (iteration + 1.)) ### TODO
        # or another scheme for learning rate decay
        #with open(output_file_name + 'epoch_' +str(epoch) + '.pickle', 'wb') as f:
        #    cPickle.dump(nnet, f, protocol=-1)

        if debug_on_test_only:
            continue

        # we check the validation loss on every epoch
        validation_losses_w = zip(*valid_scoref_w())
        validation_losses_s = zip(*valid_scoref_s())
        this_validation_loss = 0.25*(1.-numpy.mean(validation_losses_w[0])) +\
                0.25*numpy.mean(validation_losses_w[1]) +\
                0.25*(1.-numpy.mean(validation_losses_s[0])) +\
                0.25*numpy.mean(validation_losses_s[1])

        print('  epoch %i, valid sim same words %f, diff words %f' % \
              (epoch, numpy.mean(validation_losses_w[0]), numpy.mean(validation_losses_w[1])))
        print('  epoch %i, valid sim same spkrs %f, diff spkrs %f' % \
              (epoch, numpy.mean(validation_losses_s[0]), numpy.mean(validation_losses_s[1])))
        # if we got the best validation score until now
        if this_validation_loss < best_validation_loss:
            with open(output_file_name + '.pickle', 'wb') as f:
                cPickle.dump(nnet, f, protocol=-1)
            # improve patience if loss improvement is good enough
            if (this_validation_loss < best_validation_loss *
                improvement_threshold):
                patience = max(patience, iteration * patience_increase)
            # save best validation score and iteration number
            best_validation_loss = this_validation_loss
            # test it on the test set
            test_losses_w = zip(*test_scoref_w())
            test_losses_s = zip(*test_scoref_s())
            print('  epoch %i, test sim same words %f, diff words %f' % \
                  (epoch, numpy.mean(test_losses_w[0]), numpy.mean(test_losses_w[1])))
            print('  epoch %i, test sim same spkrs %f, diff spkrs %f' % \
                  (epoch, numpy.mean(test_losses_s[0]), numpy.mean(test_losses_s[1])))
        if patience <= iteration:  # TODO correct that
            done_looping = True
            break

    end_time = time.clock()
    print(('Optimization complete with best validation score of %f, '
           'with test performance %f') %
                 (best_validation_loss, test_score))
    print >> sys.stderr, ('The fine tuning code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time)
                                              / 60.))
    with open(output_file_name + '_final.pickle', 'wb') as f:
        cPickle.dump(nnet, f, protocol=-1)
Пример #5
0
def run(dataset_path=DEFAULT_DATASET, dataset_name='timit',
        iterator_type=DatasetDTWIterator, batch_size=100,
        nframes=13, features="fbank",
        init_lr=0.001, max_epochs=500, 
        network_type="dropout_net", trainer_type="adadelta",
        layers_types=[ReLU, ReLU, ReLU, ReLU, LogisticRegression],
        layers_sizes=[2400, 2400, 2400, 2400],
        dropout_rates=[0.2, 0.5, 0.5, 0.5, 0.5],
        recurrent_connections=[],
        prefix_fname='',
        debug_on_test_only=False,
        debug_print=0,
        debug_time=False,
        debug_plot=0):
    """
    FIXME TODO
    """

    output_file_name = dataset_name
    if prefix_fname != "":
        output_file_name = prefix_fname + "_" + dataset_name
    output_file_name += "_" + features + str(nframes)
    output_file_name += "_" + network_type + "_" + trainer_type
    output_file_name += "_emb_" + str(DIM_EMBEDDING)
    print "output file name:", output_file_name

    n_ins = None
    n_outs = None
    print "loading dataset from", dataset_path
     # TODO DO A FUNCTION
    if dataset_path[-7:] == '.joblib':
        if REDTW:
            data_same = joblib.load(dataset_path)
            shuffle(data_same)
            ten_percent = int(0.1 * len(data_same))

            x_arr_same = numpy.r_[numpy.concatenate([e[3] for e in data_same]),
                numpy.concatenate([e[4] for e in data_same])]
            mean = numpy.mean(x_arr_same, 0)
            std = numpy.std(x_arr_same, 0)
            numpy.savez("mean_std_3", mean=mean, std=std)
            print x_arr_same.shape
            print "mean:", mean
            print "std:", std
            marginf = 0#(nframes-1)/2  # TODO
            train_set_iterator = iterator_type(data_same[:-ten_percent],
                    mean, std, nframes=nframes, batch_size=batch_size, marginf=marginf)
            valid_set_iterator = iterator_type(data_same[-ten_percent:],
                    mean, std, nframes=nframes, batch_size=batch_size, marginf=marginf)

            #test_dataset_path = dataset_path[:-7].replace("train", "test") + '.joblib'
            test_dataset_path = dataset_path[:-7].replace("train", "dev") + '.joblib'
            data_same = joblib.load(test_dataset_path)
            test_set_iterator = iterator_type(data_same, mean, std,
                    nframes=nframes, batch_size=batch_size, marginf=marginf, only_same=True)
            n_ins = mean.shape[0] * nframes
            n_outs = DIM_EMBEDDING

        else:
            data_same = joblib.load(dataset_path)
            #data_same = [(word_label, talker1, talker2, fbanks1, fbanks2, DTW_cost, DTW_1to2, DTW_2to1)]
            print "number of word paired:", len(data_same)
            if debug_print:
                # some stats on the DTW
                dtw_costs = zip(*data_same)[5]
                words_frames = numpy.asarray([fb.shape[0] for fb in zip(*data_same)[3]])
                print "mean DTW cost", numpy.mean(dtw_costs), "std dev", numpy.std(dtw_costs)
                print "mean word length in frames", numpy.mean(words_frames), "std dev", numpy.std(words_frames)
                print "mean DTW cost per frame", numpy.mean(dtw_costs/words_frames), "std dev", numpy.std(dtw_costs/words_frames)

            # generate data_diff:
#            spkr_words = {}
            same_spkr = 0
            for i, tup in enumerate(data_same):
#                spkr_words[tup[1]].append((i, 0))
#                spkr_words[tup[2]].append((i, 1))
                if tup[1] == tup[2]:
                    same_spkr += 1
#            to_del = []
#            for spkr, words in spkr_words.iteritems():
#                if len(words) < 2:
#                    to_del.append(spkr)
#            print "to del len:", len(to_del)
#            for td in to_del:
#                del spkr_words[td]
            ratio = same_spkr * 1. / len(data_same)
            print "ratio same spkr / all for same:", ratio
            data_diff = []
#            keys = spkr_words.keys()
#            lkeys = len(keys) - 1
            ldata_same = len(data_same)-1
            same_spkr_diff = 0
            for i in xrange(len(data_same)):
                word_1 = random.randint(0, ldata_same)
                word_1_type = data_same[word_1][0]
                word_2 = random.randint(0, ldata_same)
                while data_same[word_2][0] == word_1_type:
                    word_2 = random.randint(0, ldata_same)

                wt1 = random.randint(0, 1)
                wt2 = random.randint(0, 1)
                if data_same[word_1][1+wt1] == data_same[word_2][1+wt2]:
                    same_spkr_diff += 1
                p1 = data_same[word_1][3+wt1]
                p2 = data_same[word_2][3+wt2]
                r1 = p1[:min(len(p1), len(p2))]
                r2 = p2[:min(len(p1), len(p2))]
                data_diff.append((r1, r2))

            ratio = same_spkr_diff * 1. / len(data_diff)
            print "ratio same spkr / all for diff:", ratio

            x_arr_same = numpy.r_[numpy.concatenate([e[3] for e in data_same]),
                numpy.concatenate([e[4] for e in data_same])]
            print x_arr_same.shape
            x_arr_diff = numpy.r_[numpy.concatenate([e[0] for e in data_diff]),
                    numpy.concatenate([e[1] for e in data_diff])]
            print x_arr_diff.shape

            x_arr_all = numpy.concatenate([x_arr_same, x_arr_diff])
            mean = numpy.mean(x_arr_all, 0)
            std = numpy.std(x_arr_all, 0)
            numpy.savez("mean_std_3", mean=mean, std=std)

            x_same = [((e[3][e[-2]] - mean) / std, (e[4][e[-1]] - mean) / std)
                    for e in data_same]
            shuffle(x_same)  # in place
            y_same = [[1 for _ in xrange(len(e[0]))] for i, e in enumerate(x_same)]
            x_diff = [((e[0] - mean) / std, (e[1] - mean) / std)
                    for e in data_diff]
            #shuffle(x_diff)
            y_diff = [[0 for _ in xrange(len(e[0]))] for i, e in enumerate(x_diff)]
            y = [j for i in zip(y_same, y_diff) for j in i]
            x = [j for i in zip(x_same, x_diff) for j in i]

            x1, x2 = zip(*x)
            assert x1[0].shape[0] == x2[0].shape[0]
            assert x1[0].shape[1] == x2[0].shape[1]
            assert len(x1) == len(x2)
            assert len(x1) == len(y)
            ten_percent = int(0.1 * len(x1))

            n_ins = x1[0].shape[1] * nframes
            n_outs = DIM_EMBEDDING

            print "nframes:", nframes

            marginf = (nframes-1)/2  # TODO

            train_set_iterator = iterator_type(x1[:-ten_percent], 
                    x2[:-ten_percent], y[:-ten_percent], # TODO
                    nframes=nframes, batch_size=batch_size, marginf=marginf)
            valid_set_iterator = iterator_type(x1[-ten_percent:], 
                    x2[-ten_percent:], y[-ten_percent:],  # TODO
                    nframes=nframes, batch_size=batch_size, marginf=marginf)

            ### TEST SET
            test_dataset_path = dataset_path[:-7].replace("train", "dev") + '.joblib'
            data_same = joblib.load(test_dataset_path)
            # DO ONLY SAME
            x_arr_same = numpy.r_[numpy.concatenate([e[3] for e in data_same]),
                numpy.concatenate([e[4] for e in data_same])]
            print x_arr_same.shape
            x_same = [((e[3][e[-2]] - mean) / std, (e[4][e[-1]] - mean) / std)
                    for e in data_same]
            shuffle(x_same)  # in place
            y_same = [[1 for _ in xrange(len(e[0]))] for i, e in enumerate(x_same)]
            x = x_same
            y = y_same

            x1, x2 = zip(*x)
            test_set_iterator = iterator_type(x1, x2, y,
                nframes=nframes, batch_size=batch_size, marginf=marginf)

    else:
        data = load_data(dataset_path, nframes=1, features=features, scaling='normalize', cv_frac='fixed', speakers=False, numpy_array_only=True) 

        train_set_x, train_set_y = data[0]
        valid_set_x, valid_set_y = data[1]
        test_set_x, test_set_y = data[2]
        assert train_set_x.shape[1] == valid_set_x.shape[1]
        assert test_set_x.shape[1] == valid_set_x.shape[1]

        print "dataset loaded!"
        print "train set size", train_set_x.shape[0]
        print "validation set size", valid_set_x.shape[0]
        print "test set size", test_set_x.shape[0]
        print "phones in train", len(set(train_set_y))
        print "phones in valid", len(set(valid_set_y))
        print "phones in test", len(set(test_set_y))
        n_outs = len(set(train_set_y))

        to_int = {}
        with open(dataset_name + '_to_int_and_to_state_dicts_tuple.pickle') as f:
            to_int, _ = cPickle.load(f)

        print "nframes:", nframes
        train_set_iterator = iterator_type(train_set_x, train_set_y,
                to_int, nframes=nframes, batch_size=batch_size)
        valid_set_iterator = iterator_type(valid_set_x, valid_set_y,
                to_int, nframes=nframes, batch_size=batch_size)
        test_set_iterator = iterator_type(test_set_x, test_set_y,
                to_int, nframes=nframes, batch_size=batch_size)
        n_ins = test_set_x.shape[1]*nframes

    assert n_ins != None
    assert n_outs != None

    # numpy random generator
    numpy_rng = numpy.random.RandomState(123)
    print '... building the model'

    # TODO the proper network type other than just dropout or not
    nnet = None
    fast_dropout = False
    if "fast_dropout" in network_type:
        fast_dropout = True
    if "ab_net" in network_type or "abnet" in network_type:
        if "dropout" in network_type:
            print "dropout ab net"
            nnet = DropoutABNeuralNet(numpy_rng=numpy_rng, 
                    n_ins=n_ins,
                    layers_types=layers_types,
                    layers_sizes=layers_sizes,
                    n_outs=n_outs,
                    loss='cos_cos2',
                    rho=0.95,
                    eps=1.E-6,
                    max_norm=4.,
                    fast_drop=fast_dropout,
                    debugprint=debug_print)
        else:
            print "ab net"
            nnet = ABNeuralNet(numpy_rng=numpy_rng, 
                    n_ins=n_ins,
                    layers_types=layers_types,
                    layers_sizes=layers_sizes,
                    n_outs=n_outs,
                    loss='dot_prod',
                    rho=0.95,
                    eps=1.E-6,
                    max_norm=0.,
                    debugprint=debug_print)
    else:
        if "dropout" in network_type:
            nnet = DropoutNet(numpy_rng=numpy_rng, 
                    n_ins=n_ins,
                    layers_types=layers_types,
                    layers_sizes=layers_sizes,
                    dropout_rates=dropout_rates,
                    n_outs=n_outs,
                    rho=0.95,
                    eps=1.E-6,
                    max_norm=0.,
                    fast_drop=fast_dropout,
                    debugprint=debug_print)
        else:
            nnet = NeuralNet(numpy_rng=numpy_rng, 
                    n_ins=n_ins,
                    layers_types=layers_types,
                    layers_sizes=layers_sizes,
                    n_outs=n_outs,
                    rho=0.92,
                    eps=1.E-6,
                    max_norm=0.,
                    debugprint=debug_print)
    print "Created a neural net as:",
    print str(nnet)

    # get the training, validation and testing function for the model
    print '... getting the training functions'
    print trainer_type
    train_fn = None
    if debug_plot or debug_print:
        if trainer_type == "adadelta":
            train_fn = nnet.get_adadelta_trainer(debug=True)
        elif trainer_type == "adagrad":
            train_fn = nnet.get_adagrad_trainer(debug=True)
        else:
            train_fn = nnet.get_SGD_trainer(debug=True)
    else:
        if trainer_type == "adadelta":
            train_fn = nnet.get_adadelta_trainer()
        elif trainer_type == "adagrad":
            train_fn = nnet.get_adagrad_trainer()
        else:
            train_fn = nnet.get_SGD_trainer()

    train_scoref = nnet.score_classif_same_diff_separated(train_set_iterator)
    valid_scoref = nnet.score_classif_same_diff_separated(valid_set_iterator)
    test_scoref = nnet.score_classif(test_set_iterator)
    data_iterator = train_set_iterator

    if debug_on_test_only:
        data_iterator = test_set_iterator
        train_scoref = test_scoref

    print '... training the model'
    # early-stopping parameters
    patience = 1000  # look as this many examples regardless TODO
    patience_increase = 2.  # wait this much longer when a new best is
                            # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant

    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = time.clock()

    done_looping = False
    epoch = 0
    lr = init_lr
    timer = None
    if debug_plot:
        print_mean_weights_biases(nnet.params)
    #with open(output_file_name + 'epoch_0.pickle', 'wb') as f:
    #    cPickle.dump(nnet, f)

    while (epoch < max_epochs) and (not done_looping):
        if REDTW and "ab_net" in network_type and ((epoch + 1) % 20) == 0:
            print "recomputing DTW:"
            data_iterator.recompute_DTW(nnet.transform_x1())

        epoch = epoch + 1
        avg_costs = []
        avg_params_gradients_updates = []
        if debug_time:
            timer = time.time()
        for iteration, (x, y) in enumerate(data_iterator):
            avg_cost = 0.
            if "ab_net" in network_type or "abnet" in network_type:  # remove need for this if
                if "delta" in trainer_type:  # TODO remove need for this if
                    avg_cost = train_fn(x[0], x[1], y)
                else:
                    avg_cost = train_fn(x[0], x[1], y, lr)
                if debug_print >= 3:
                    print "cost:", avg_cost[0]
                if debug_plot >= 2:
                    plot_costs(avg_cost[0])
                    if not len(avg_params_gradients_updates):
                        avg_params_gradients_updates = map(numpy.asarray, avg_cost[1:])
                    else:
                        avg_params_gradients_updates = rolling_avg_pgu(
                                iteration, avg_params_gradients_updates,
                                map(numpy.asarray, avg_cost[1:]))
                if debug_plot >= 3:
                    plot_params_gradients_updates(iteration, avg_cost[1:])
            else:
                if "delta" in trainer_type:  # TODO remove need for this if
                    avg_cost = train_fn(x, y)
                else:
                    avg_cost = train_fn(x, y, lr)
            if type(avg_cost) == list:
                avg_costs.append(avg_cost[0])
            else:
                avg_costs.append(avg_cost)
        if debug_print >= 2:
            print_mean_weights_biases(nnet.params)
        if debug_plot >= 2:
            plot_params_gradients_updates(epoch, avg_params_gradients_updates)
        if debug_time:
            print('  epoch %i took %f seconds' % (epoch, time.time() - timer))
        avg_cost = numpy.mean(avg_costs)
        if numpy.isnan(avg_cost):
            print("avg costs is NaN so we're stopping here!")
            break
        print('  epoch %i, avg costs %f' % \
              (epoch, avg_cost))
        tmp_train = zip(*train_scoref())
        print('  epoch %i, training error same %f, diff %f' % \
              (epoch, numpy.mean(tmp_train[0]), numpy.mean(tmp_train[1])))
        # TODO update lr(t) = lr(0) / (1 + lr(0) * lambda * t)
        # or another scheme for learning rate decay
        #with open(output_file_name + 'epoch_' +str(epoch) + '.pickle', 'wb') as f:
        #    cPickle.dump(nnet, f, protocol=-1)

        if debug_on_test_only:
            continue

        # we check the validation loss on every epoch
        validation_losses = zip(*valid_scoref())
        #this_validation_loss = -numpy.mean(validation_losses[0])  # TODO this is a mean of means (with different lengths)
        this_validation_loss = 0.5*(1.-numpy.mean(validation_losses[0])) +\
                0.5*numpy.mean(validation_losses[1])
        print('  epoch %i, valid error same %f, diff %f' % \
              (epoch, numpy.mean(validation_losses[0]), numpy.mean(validation_losses[1])))
        # if we got the best validation score until now
        if this_validation_loss < best_validation_loss:
            with open(output_file_name + '.pickle', 'wb') as f:
                cPickle.dump(nnet, f, protocol=-1)
            # improve patience if loss improvement is good enough
            if (this_validation_loss < best_validation_loss *
                improvement_threshold):
                patience = max(patience, iteration * patience_increase)
            # save best validation score and iteration number
            best_validation_loss = this_validation_loss
            # test it on the test set
            test_losses = test_scoref()
            test_score_same = numpy.mean(test_losses[0])  # TODO this is a mean of means (with different lengths)
            test_score_diff = numpy.mean(test_losses[1])  # TODO this is a mean of means (with different lengths)
            print(('  epoch %i, test error of best model same %f diff %f') %
                  (epoch, test_score_same, test_score_diff))
        if patience <= iteration:  # TODO correct that
            done_looping = True
            break

    end_time = time.clock()
    print(('Optimization complete with best validation score of %f, '
           'with test performance %f') %
                 (best_validation_loss, test_score))
    print >> sys.stderr, ('The fine tuning code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time)
                                              / 60.))
    with open(output_file_name + '_final.pickle', 'wb') as f:
        cPickle.dump(nnet, f, protocol=-1)
Пример #6
0
def run(dataset_path=DEFAULT_DATASET,
        dataset_name='timit',
        iterator_type=DatasetDTWIterator,
        batch_size=100,
        nframes=13,
        features="fbank",
        init_lr=0.001,
        max_epochs=500,
        network_type="dropout_net",
        trainer_type="adadelta",
        layers_types=[ReLU, ReLU, ReLU, ReLU, LogisticRegression],
        layers_sizes=[2400, 2400, 2400, 2400],
        dropout_rates=[0.2, 0.5, 0.5, 0.5, 0.5],
        recurrent_connections=[],
        prefix_fname='',
        debug_on_test_only=False,
        debug_print=0,
        debug_time=False,
        debug_plot=0):
    """
    FIXME TODO
    """

    output_file_name = dataset_name
    if prefix_fname != "":
        output_file_name = prefix_fname + "_" + dataset_name
    output_file_name += "_" + features + str(nframes)
    output_file_name += "_" + network_type + "_" + trainer_type
    output_file_name += "_emb_" + str(DIM_EMBEDDING)
    print "output file name:", output_file_name

    n_ins = None
    n_outs = None
    print "loading dataset from", dataset_path
    # TODO DO A FUNCTION
    if dataset_path[-7:] == '.joblib':
        if REDTW:
            data_same = joblib.load(dataset_path)
            shuffle(data_same)
            ten_percent = int(0.1 * len(data_same))

            x_arr_same = numpy.r_[numpy.concatenate([e[3] for e in data_same]),
                                  numpy.concatenate([e[4] for e in data_same])]
            mean = numpy.mean(x_arr_same, 0)
            std = numpy.std(x_arr_same, 0)
            numpy.savez("mean_std_3", mean=mean, std=std)
            print x_arr_same.shape
            print "mean:", mean
            print "std:", std
            marginf = 0  #(nframes-1)/2  # TODO
            train_set_iterator = iterator_type(data_same[:-ten_percent],
                                               mean,
                                               std,
                                               nframes=nframes,
                                               batch_size=batch_size,
                                               marginf=marginf)
            valid_set_iterator = iterator_type(data_same[-ten_percent:],
                                               mean,
                                               std,
                                               nframes=nframes,
                                               batch_size=batch_size,
                                               marginf=marginf)

            #test_dataset_path = dataset_path[:-7].replace("train", "test") + '.joblib'
            test_dataset_path = dataset_path[:-7].replace("train",
                                                          "dev") + '.joblib'
            data_same = joblib.load(test_dataset_path)
            test_set_iterator = iterator_type(data_same,
                                              mean,
                                              std,
                                              nframes=nframes,
                                              batch_size=batch_size,
                                              marginf=marginf,
                                              only_same=True)
            n_ins = mean.shape[0] * nframes
            n_outs = DIM_EMBEDDING

        else:
            data_same = joblib.load(dataset_path)
            #data_same = [(word_label, talker1, talker2, fbanks1, fbanks2, DTW_cost, DTW_1to2, DTW_2to1)]
            print "number of word paired:", len(data_same)
            if debug_print:
                # some stats on the DTW
                dtw_costs = zip(*data_same)[5]
                words_frames = numpy.asarray(
                    [fb.shape[0] for fb in zip(*data_same)[3]])
                print "mean DTW cost", numpy.mean(
                    dtw_costs), "std dev", numpy.std(dtw_costs)
                print "mean word length in frames", numpy.mean(
                    words_frames), "std dev", numpy.std(words_frames)
                print "mean DTW cost per frame", numpy.mean(
                    dtw_costs / words_frames), "std dev", numpy.std(
                        dtw_costs / words_frames)

            # generate data_diff:
#            spkr_words = {}
            same_spkr = 0
            for i, tup in enumerate(data_same):
                #                spkr_words[tup[1]].append((i, 0))
                #                spkr_words[tup[2]].append((i, 1))
                if tup[1] == tup[2]:
                    same_spkr += 1


#            to_del = []
#            for spkr, words in spkr_words.iteritems():
#                if len(words) < 2:
#                    to_del.append(spkr)
#            print "to del len:", len(to_del)
#            for td in to_del:
#                del spkr_words[td]
            ratio = same_spkr * 1. / len(data_same)
            print "ratio same spkr / all for same:", ratio
            data_diff = []
            #            keys = spkr_words.keys()
            #            lkeys = len(keys) - 1
            ldata_same = len(data_same) - 1
            same_spkr_diff = 0
            for i in xrange(len(data_same)):
                word_1 = random.randint(0, ldata_same)
                word_1_type = data_same[word_1][0]
                word_2 = random.randint(0, ldata_same)
                while data_same[word_2][0] == word_1_type:
                    word_2 = random.randint(0, ldata_same)

                wt1 = random.randint(0, 1)
                wt2 = random.randint(0, 1)
                if data_same[word_1][1 + wt1] == data_same[word_2][1 + wt2]:
                    same_spkr_diff += 1
                p1 = data_same[word_1][3 + wt1]
                p2 = data_same[word_2][3 + wt2]
                r1 = p1[:min(len(p1), len(p2))]
                r2 = p2[:min(len(p1), len(p2))]
                data_diff.append((r1, r2))

            ratio = same_spkr_diff * 1. / len(data_diff)
            print "ratio same spkr / all for diff:", ratio

            x_arr_same = numpy.r_[numpy.concatenate([e[3] for e in data_same]),
                                  numpy.concatenate([e[4] for e in data_same])]
            print x_arr_same.shape
            x_arr_diff = numpy.r_[numpy.concatenate([e[0] for e in data_diff]),
                                  numpy.concatenate([e[1] for e in data_diff])]
            print x_arr_diff.shape

            x_arr_all = numpy.concatenate([x_arr_same, x_arr_diff])
            mean = numpy.mean(x_arr_all, 0)
            std = numpy.std(x_arr_all, 0)
            numpy.savez("mean_std_3", mean=mean, std=std)

            x_same = [((e[3][e[-2]] - mean) / std, (e[4][e[-1]] - mean) / std)
                      for e in data_same]
            shuffle(x_same)  # in place
            y_same = [[1 for _ in xrange(len(e[0]))]
                      for i, e in enumerate(x_same)]
            x_diff = [((e[0] - mean) / std, (e[1] - mean) / std)
                      for e in data_diff]
            #shuffle(x_diff)
            y_diff = [[0 for _ in xrange(len(e[0]))]
                      for i, e in enumerate(x_diff)]
            y = [j for i in zip(y_same, y_diff) for j in i]
            x = [j for i in zip(x_same, x_diff) for j in i]

            x1, x2 = zip(*x)
            assert x1[0].shape[0] == x2[0].shape[0]
            assert x1[0].shape[1] == x2[0].shape[1]
            assert len(x1) == len(x2)
            assert len(x1) == len(y)
            ten_percent = int(0.1 * len(x1))

            n_ins = x1[0].shape[1] * nframes
            n_outs = DIM_EMBEDDING

            print "nframes:", nframes

            marginf = (nframes - 1) / 2  # TODO

            train_set_iterator = iterator_type(
                x1[:-ten_percent],
                x2[:-ten_percent],
                y[:-ten_percent],  # TODO
                nframes=nframes,
                batch_size=batch_size,
                marginf=marginf)
            valid_set_iterator = iterator_type(
                x1[-ten_percent:],
                x2[-ten_percent:],
                y[-ten_percent:],  # TODO
                nframes=nframes,
                batch_size=batch_size,
                marginf=marginf)

            ### TEST SET
            test_dataset_path = dataset_path[:-7].replace("train",
                                                          "dev") + '.joblib'
            data_same = joblib.load(test_dataset_path)
            # DO ONLY SAME
            x_arr_same = numpy.r_[numpy.concatenate([e[3] for e in data_same]),
                                  numpy.concatenate([e[4] for e in data_same])]
            print x_arr_same.shape
            x_same = [((e[3][e[-2]] - mean) / std, (e[4][e[-1]] - mean) / std)
                      for e in data_same]
            shuffle(x_same)  # in place
            y_same = [[1 for _ in xrange(len(e[0]))]
                      for i, e in enumerate(x_same)]
            x = x_same
            y = y_same

            x1, x2 = zip(*x)
            test_set_iterator = iterator_type(x1,
                                              x2,
                                              y,
                                              nframes=nframes,
                                              batch_size=batch_size,
                                              marginf=marginf)

    else:
        data = load_data(dataset_path,
                         nframes=1,
                         features=features,
                         scaling='normalize',
                         cv_frac='fixed',
                         speakers=False,
                         numpy_array_only=True)

        train_set_x, train_set_y = data[0]
        valid_set_x, valid_set_y = data[1]
        test_set_x, test_set_y = data[2]
        assert train_set_x.shape[1] == valid_set_x.shape[1]
        assert test_set_x.shape[1] == valid_set_x.shape[1]

        print "dataset loaded!"
        print "train set size", train_set_x.shape[0]
        print "validation set size", valid_set_x.shape[0]
        print "test set size", test_set_x.shape[0]
        print "phones in train", len(set(train_set_y))
        print "phones in valid", len(set(valid_set_y))
        print "phones in test", len(set(test_set_y))
        n_outs = len(set(train_set_y))

        to_int = {}
        with open(dataset_name +
                  '_to_int_and_to_state_dicts_tuple.pickle') as f:
            to_int, _ = cPickle.load(f)

        print "nframes:", nframes
        train_set_iterator = iterator_type(train_set_x,
                                           train_set_y,
                                           to_int,
                                           nframes=nframes,
                                           batch_size=batch_size)
        valid_set_iterator = iterator_type(valid_set_x,
                                           valid_set_y,
                                           to_int,
                                           nframes=nframes,
                                           batch_size=batch_size)
        test_set_iterator = iterator_type(test_set_x,
                                          test_set_y,
                                          to_int,
                                          nframes=nframes,
                                          batch_size=batch_size)
        n_ins = test_set_x.shape[1] * nframes

    assert n_ins != None
    assert n_outs != None

    # numpy random generator
    numpy_rng = numpy.random.RandomState(123)
    print '... building the model'

    # TODO the proper network type other than just dropout or not
    nnet = None
    fast_dropout = False
    if "fast_dropout" in network_type:
        fast_dropout = True
    if "ab_net" in network_type or "abnet" in network_type:
        if "dropout" in network_type:
            print "dropout ab net"
            nnet = DropoutABNeuralNet(numpy_rng=numpy_rng,
                                      n_ins=n_ins,
                                      layers_types=layers_types,
                                      layers_sizes=layers_sizes,
                                      n_outs=n_outs,
                                      loss='cos_cos2',
                                      rho=0.95,
                                      eps=1.E-6,
                                      max_norm=4.,
                                      fast_drop=fast_dropout,
                                      debugprint=debug_print)
        else:
            print "ab net"
            nnet = ABNeuralNet(numpy_rng=numpy_rng,
                               n_ins=n_ins,
                               layers_types=layers_types,
                               layers_sizes=layers_sizes,
                               n_outs=n_outs,
                               loss='dot_prod',
                               rho=0.95,
                               eps=1.E-6,
                               max_norm=0.,
                               debugprint=debug_print)
    else:
        if "dropout" in network_type:
            nnet = DropoutNet(numpy_rng=numpy_rng,
                              n_ins=n_ins,
                              layers_types=layers_types,
                              layers_sizes=layers_sizes,
                              dropout_rates=dropout_rates,
                              n_outs=n_outs,
                              rho=0.95,
                              eps=1.E-6,
                              max_norm=0.,
                              fast_drop=fast_dropout,
                              debugprint=debug_print)
        else:
            nnet = NeuralNet(numpy_rng=numpy_rng,
                             n_ins=n_ins,
                             layers_types=layers_types,
                             layers_sizes=layers_sizes,
                             n_outs=n_outs,
                             rho=0.92,
                             eps=1.E-6,
                             max_norm=0.,
                             debugprint=debug_print)
    print "Created a neural net as:",
    print str(nnet)

    # get the training, validation and testing function for the model
    print '... getting the training functions'
    print trainer_type
    train_fn = None
    if debug_plot or debug_print:
        if trainer_type == "adadelta":
            train_fn = nnet.get_adadelta_trainer(debug=True)
        elif trainer_type == "adagrad":
            train_fn = nnet.get_adagrad_trainer(debug=True)
        else:
            train_fn = nnet.get_SGD_trainer(debug=True)
    else:
        if trainer_type == "adadelta":
            train_fn = nnet.get_adadelta_trainer()
        elif trainer_type == "adagrad":
            train_fn = nnet.get_adagrad_trainer()
        else:
            train_fn = nnet.get_SGD_trainer()

    train_scoref = nnet.score_classif_same_diff_separated(train_set_iterator)
    valid_scoref = nnet.score_classif_same_diff_separated(valid_set_iterator)
    test_scoref = nnet.score_classif(test_set_iterator)
    data_iterator = train_set_iterator

    if debug_on_test_only:
        data_iterator = test_set_iterator
        train_scoref = test_scoref

    print '... training the model'
    # early-stopping parameters
    patience = 1000  # look as this many examples regardless TODO
    patience_increase = 2.  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant

    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = time.clock()

    done_looping = False
    epoch = 0
    lr = init_lr
    timer = None
    if debug_plot:
        print_mean_weights_biases(nnet.params)
    #with open(output_file_name + 'epoch_0.pickle', 'wb') as f:
    #    cPickle.dump(nnet, f)

    while (epoch < max_epochs) and (not done_looping):
        if REDTW and "ab_net" in network_type and ((epoch + 1) % 20) == 0:
            print "recomputing DTW:"
            data_iterator.recompute_DTW(nnet.transform_x1())

        epoch = epoch + 1
        avg_costs = []
        avg_params_gradients_updates = []
        if debug_time:
            timer = time.time()
        for iteration, (x, y) in enumerate(data_iterator):
            avg_cost = 0.
            if "ab_net" in network_type or "abnet" in network_type:  # remove need for this if
                if "delta" in trainer_type:  # TODO remove need for this if
                    avg_cost = train_fn(x[0], x[1], y)
                else:
                    avg_cost = train_fn(x[0], x[1], y, lr)
                if debug_print >= 3:
                    print "cost:", avg_cost[0]
                if debug_plot >= 2:
                    plot_costs(avg_cost[0])
                    if not len(avg_params_gradients_updates):
                        avg_params_gradients_updates = map(
                            numpy.asarray, avg_cost[1:])
                    else:
                        avg_params_gradients_updates = rolling_avg_pgu(
                            iteration, avg_params_gradients_updates,
                            map(numpy.asarray, avg_cost[1:]))
                if debug_plot >= 3:
                    plot_params_gradients_updates(iteration, avg_cost[1:])
            else:
                if "delta" in trainer_type:  # TODO remove need for this if
                    avg_cost = train_fn(x, y)
                else:
                    avg_cost = train_fn(x, y, lr)
            if type(avg_cost) == list:
                avg_costs.append(avg_cost[0])
            else:
                avg_costs.append(avg_cost)
        if debug_print >= 2:
            print_mean_weights_biases(nnet.params)
        if debug_plot >= 2:
            plot_params_gradients_updates(epoch, avg_params_gradients_updates)
        if debug_time:
            print('  epoch %i took %f seconds' % (epoch, time.time() - timer))
        avg_cost = numpy.mean(avg_costs)
        if numpy.isnan(avg_cost):
            print("avg costs is NaN so we're stopping here!")
            break
        print('  epoch %i, avg costs %f' % \
              (epoch, avg_cost))
        tmp_train = zip(*train_scoref())
        print('  epoch %i, training error same %f, diff %f' % \
              (epoch, numpy.mean(tmp_train[0]), numpy.mean(tmp_train[1])))
        # TODO update lr(t) = lr(0) / (1 + lr(0) * lambda * t)
        # or another scheme for learning rate decay
        #with open(output_file_name + 'epoch_' +str(epoch) + '.pickle', 'wb') as f:
        #    cPickle.dump(nnet, f, protocol=-1)

        if debug_on_test_only:
            continue

        # we check the validation loss on every epoch
        validation_losses = zip(*valid_scoref())
        #this_validation_loss = -numpy.mean(validation_losses[0])  # TODO this is a mean of means (with different lengths)
        this_validation_loss = 0.5*(1.-numpy.mean(validation_losses[0])) +\
                0.5*numpy.mean(validation_losses[1])
        print('  epoch %i, valid error same %f, diff %f' % \
              (epoch, numpy.mean(validation_losses[0]), numpy.mean(validation_losses[1])))
        # if we got the best validation score until now
        if this_validation_loss < best_validation_loss:
            with open(output_file_name + '.pickle', 'wb') as f:
                cPickle.dump(nnet, f, protocol=-1)
            # improve patience if loss improvement is good enough
            if (this_validation_loss <
                    best_validation_loss * improvement_threshold):
                patience = max(patience, iteration * patience_increase)
            # save best validation score and iteration number
            best_validation_loss = this_validation_loss
            # test it on the test set
            test_losses = test_scoref()
            test_score_same = numpy.mean(
                test_losses[0]
            )  # TODO this is a mean of means (with different lengths)
            test_score_diff = numpy.mean(
                test_losses[1]
            )  # TODO this is a mean of means (with different lengths)
            print(('  epoch %i, test error of best model same %f diff %f') %
                  (epoch, test_score_same, test_score_diff))
        if patience <= iteration:  # TODO correct that
            done_looping = True
            break

    end_time = time.clock()
    print(('Optimization complete with best validation score of %f, '
           'with test performance %f') % (best_validation_loss, test_score))
    print >> sys.stderr, ('The fine tuning code for file ' +
                          os.path.split(__file__)[1] + ' ran for %.2fm' %
                          ((end_time - start_time) / 60.))
    with open(output_file_name + '_final.pickle', 'wb') as f:
        cPickle.dump(nnet, f, protocol=-1)
Пример #7
0
def run(dataset_path=DEFAULT_DATASET,
        dataset_name='mnist',
        iterator_type=DatasetABIterator,
        batch_size=100,
        init_lr=0.001,
        max_epochs=500,
        network_type="dropout_net",
        trainer_type="adadelta",
        layers_types=[ReLU, ReLU, ReLU, ReLU, LogisticRegression],
        layers_sizes=[2400, 2400, 2400, 2400],
        dropout_rates=[0.2, 0.5, 0.5, 0.5, 0.5],
        recurrent_connections=[],
        prefix_fname='',
        debug_on_test_only=False,
        debug_print=0,
        debug_time=False,
        debug_plot=0):
    """
    FIXME TODO
    """

    output_file_name = dataset_name
    if prefix_fname != "":
        output_file_name = prefix_fname + "_" + dataset_name
    output_file_name += "_" + network_type + "_" + trainer_type
    output_file_name += "_emb_" + str(DIM_EMBEDDING)
    print "output file name:", output_file_name

    n_ins = None
    n_outs = None
    if dataset_path[-7:] == '.joblib':
        test_dataset_path = dataset_path.replace('train', 'test')
        print "loading dataset from", dataset_path, "and", test_dataset_path
        x1_train, x2_train, y_train = joblib.load(dataset_path)
        if numpy.max(x1_train) > 1:
            x1_train = numpy.asarray(x1_train, dtype='float32') / 255
        if numpy.max(x2_train) > 1:
            x2_train = numpy.asarray(x2_train, dtype='float32') / 255
        x1_test, x2_test, y_test = joblib.load(test_dataset_path)
        if numpy.max(x1_test) > 1:
            x1_test = numpy.asarray(x1_test, dtype='float32') / 255
        if numpy.max(x2_test) > 1:
            x2_test = numpy.asarray(x2_test, dtype='float32') / 255
        ten_percent = int(0.1 * x1_train.shape[0])
        train_set_iterator = iterator_type(x1_train[:-ten_percent],
                                           x2_train[:-ten_percent],
                                           y_train[:-ten_percent],
                                           batch_size=batch_size)
        valid_set_iterator = iterator_type(x1_train[-ten_percent:],
                                           x2_train[-ten_percent:],
                                           y_train[-ten_percent:],
                                           batch_size=batch_size)
        test_set_iterator = iterator_type(x1_test,
                                          x2_test,
                                          y_test,
                                          batch_size=batch_size)
        n_ins = x1_train.shape[1]
        n_outs = DIM_EMBEDDING
    else:
        SCALE = True
        N_SAMPLES = 10
        from sklearn.datasets import fetch_mldata
        mnist = fetch_mldata('MNIST original')
        X = numpy.asarray(mnist.data, dtype='uint8')
        if SCALE:
            X = numpy.asarray(X, dtype='float32')
            X /= 255.
        y = numpy.asarray(mnist.target, dtype='uint8')
        X_train = X[:60000]
        y_train = y[:60000]
        xy = numpy.ndarray((X_train.shape[0], X_train.shape[1] + 1),
                           dtype='float32')
        xy[:, :-1] = X_train
        xy[:, -1] = y_train
        numpy.random.shuffle(xy)
        ten_percent = int(0.1 * X_train.shape[0])
        X_train = xy[:-ten_percent, :-1]
        y_train = xy[:-ten_percent, -1]
        X_valid = xy[-ten_percent:, :-1]
        y_valid = xy[-ten_percent:, -1]
        X_test = X[60000:]
        y_test = y[60000:]
        xy = numpy.ndarray((X_test.shape[0], X_test.shape[1] + 1),
                           dtype='float32')
        xy[:, :-1] = X_test
        xy[:, -1] = y_test
        numpy.random.shuffle(xy)
        X_test = xy[:, :-1]
        y_test = xy[:, -1]
        print X_train.shape
        print X_valid.shape
        print X_test.shape
        train_set_iterator = DatasetABSamplingIteratorFromLabels(
            X_train, y_train, n_samples=N_SAMPLES, batch_size=batch_size)
        valid_set_iterator = DatasetABSamplingIteratorFromLabels(
            X_valid, y_valid, n_samples=N_SAMPLES, batch_size=batch_size)
        test_set_iterator = DatasetABSamplingIteratorFromLabels(
            X_test, y_test, n_samples=N_SAMPLES, batch_size=batch_size)
        n_ins = X_train.shape[1]
        n_outs = DIM_EMBEDDING

    assert n_ins != None
    assert n_outs != None

    # numpy random generator
    numpy_rng = numpy.random.RandomState(123)
    print '... building the model'

    # TODO the proper network type other than just dropout or not
    nnet = None
    fast_dropout = False
    if "fast_dropout" in network_type:
        fast_dropout = True
    if "dropout" in network_type:
        nnet = DropoutABNeuralNet(
            numpy_rng=numpy_rng,
            n_ins=n_ins,
            layers_types=layers_types,
            layers_sizes=layers_sizes,
            n_outs=n_outs,
            #loss='cos_cos2',
            loss='hellinger',
            rho=0.95,
            eps=1.E-6,
            max_norm=4.,
            fast_drop=fast_dropout,
            debugprint=debug_print)
    else:
        nnet = ABNeuralNet(numpy_rng=numpy_rng,
                           n_ins=n_ins,
                           layers_types=layers_types,
                           layers_sizes=layers_sizes,
                           n_outs=n_outs,
                           loss='cos_cos2',
                           rho=0.9,
                           eps=1.E-6,
                           max_norm=4.,
                           debugprint=debug_print)
    print "Created a neural net as:",
    print str(nnet)

    # get the training, validation and testing function for the model
    print '... getting the training functions'
    print trainer_type
    train_fn = None
    if debug_plot or debug_print:
        if trainer_type == "adadelta":
            train_fn = nnet.get_adadelta_trainer(debug=True)
        elif trainer_type == "adagrad":
            train_fn = nnet.get_adagrad_trainer(debug=True)
        else:
            train_fn = nnet.get_SGD_trainer(debug=True)
    else:
        if trainer_type == "adadelta":
            train_fn = nnet.get_adadelta_trainer()
        elif trainer_type == "adagrad":
            train_fn = nnet.get_adagrad_trainer()
        else:
            train_fn = nnet.get_SGD_trainer()

    train_scoref = nnet.score_classif_same_diff_separated(train_set_iterator)
    valid_scoref = nnet.score_classif_same_diff_separated(valid_set_iterator)
    test_scoref = nnet.score_classif(test_set_iterator)
    data_iterator = train_set_iterator

    if debug_on_test_only:
        data_iterator = test_set_iterator
        train_scoref = test_scoref

    print '... training the model'
    # early-stopping parameters
    patience = 1000  # look as this many examples regardless TODO
    patience_increase = 2.  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant

    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = time.clock()

    done_looping = False
    epoch = 0
    lr = init_lr
    timer = None
    if debug_plot:
        print_mean_weights_biases(nnet.params)
    #with open(output_file_name + 'epoch_0.pickle', 'wb') as f:
    #    cPickle.dump(nnet, f)

    while (epoch < max_epochs) and (not done_looping):
        epoch = epoch + 1
        avg_costs = []
        avg_params_gradients_updates = []
        if debug_time:
            timer = time.time()
        for iteration, (x, y) in enumerate(data_iterator):
            avg_cost = 0.
            if "ab_net" in network_type:  # remove need for this if
                if "delta" in trainer_type:  # TODO remove need for this if
                    avg_cost = train_fn(x[0], x[1], y)
                else:
                    avg_cost = train_fn(x[0], x[1], y, lr)
                if debug_print >= 3:
                    print "cost:", avg_cost[0]
                if debug_plot >= 2:
                    plot_costs(avg_cost[0])
                    if not len(avg_params_gradients_updates):
                        avg_params_gradients_updates = map(
                            numpy.asarray, avg_cost[1:])
                    else:
                        avg_params_gradients_updates = rolling_avg_pgu(
                            iteration, avg_params_gradients_updates,
                            map(numpy.asarray, avg_cost[1:]))
                if debug_plot >= 3:
                    plot_params_gradients_updates(iteration, avg_cost[1:])
            else:
                if "delta" in trainer_type:  # TODO remove need for this if
                    avg_cost = train_fn(x, y)
                else:
                    avg_cost = train_fn(x, y, lr)
            if type(avg_cost) == list:
                avg_costs.append(avg_cost[0])
            else:
                avg_costs.append(avg_cost)
        if debug_print >= 2:
            print_mean_weights_biases(nnet.params)
        if debug_plot >= 2:
            plot_params_gradients_updates(epoch, avg_params_gradients_updates)
        if debug_time:
            print('  epoch %i took %f seconds' % (epoch, time.time() - timer))
        print('  epoch %i, avg costs %f' % \
              (epoch, numpy.mean(avg_costs)))
        tmp_train = zip(*train_scoref())
        print('  epoch %i, training error same %f, diff %f' % \
              (epoch, numpy.mean(tmp_train[0]), numpy.mean(tmp_train[1])))
        # TODO update lr(t) = lr(0) / (1 + lr(0) * lambda * t)
        # or another scheme for learning rate decay
        #with open(output_file_name + 'epoch_' +str(epoch) + '.pickle', 'wb') as f:
        #    cPickle.dump(nnet, f)

        if debug_on_test_only:
            continue

        # we check the validation loss on every epoch
        validation_losses = zip(*valid_scoref())
        #this_validation_loss = -numpy.mean(validation_losses[0])  # TODO this is a mean of means (with different lengths)
        this_validation_loss = 0.5*(1.-numpy.mean(validation_losses[0])) +\
                0.5*numpy.mean(validation_losses[1])
        print('  epoch %i, valid error same %f, diff %f' % \
              (epoch, numpy.mean(validation_losses[0]), numpy.mean(validation_losses[1])))
        # if we got the best validation score until now
        if this_validation_loss < best_validation_loss:
            with open(output_file_name + '.pickle', 'wb') as f:
                cPickle.dump(nnet, f)
            # improve patience if loss improvement is good enough
            if (this_validation_loss <
                    best_validation_loss * improvement_threshold):
                patience = max(patience, iteration * patience_increase)
            # save best validation score and iteration number
            best_validation_loss = this_validation_loss
            # test it on the test set
            test_losses = test_scoref()
            test_score_same = numpy.mean(
                test_losses[0]
            )  # TODO this is a mean of means (with different lengths)
            test_score_diff = numpy.mean(
                test_losses[1]
            )  # TODO this is a mean of means (with different lengths)
            print(('  epoch %i, test error of best model same %f diff %f') %
                  (epoch, test_score_same, test_score_diff))
        if patience <= iteration:  # TODO correct that
            done_looping = True
            break

    end_time = time.clock()
    print(('Optimization complete with best validation score of %f, '
           'with test performance %f') % (best_validation_loss, test_score))
    print >> sys.stderr, ('The fine tuning code for file ' +
                          os.path.split(__file__)[1] + ' ran for %.2fm' %
                          ((end_time - start_time) / 60.))
    with open(output_file_name + '_final.pickle', 'wb') as f:
        cPickle.dump(nnet, f)
Пример #8
0
def run(dataset_path=DEFAULT_DATASET, dataset_name='mnist',
        iterator_type=DatasetABIterator, batch_size=100,
        init_lr=0.001, max_epochs=500, 
        network_type="dropout_net", trainer_type="adadelta",
        layers_types=[ReLU, ReLU, ReLU, ReLU, LogisticRegression],
        layers_sizes=[2400, 2400, 2400, 2400],
        dropout_rates=[0.2, 0.5, 0.5, 0.5, 0.5],
        recurrent_connections=[],
        prefix_fname='',
        debug_on_test_only=False,
        debug_print=0,
        debug_time=False,
        debug_plot=0):
    """
    FIXME TODO
    """

    output_file_name = dataset_name
    if prefix_fname != "":
        output_file_name = prefix_fname + "_" + dataset_name
    output_file_name += "_" + network_type + "_" + trainer_type
    output_file_name += "_emb_" + str(DIM_EMBEDDING)
    print "output file name:", output_file_name

    n_ins = None
    n_outs = None
    if dataset_path[-7:] == '.joblib':
        test_dataset_path = dataset_path.replace('train', 'test')
        print "loading dataset from", dataset_path, "and", test_dataset_path
        x1_train, x2_train, y_train = joblib.load(dataset_path)
        if numpy.max(x1_train) > 1:
            x1_train = numpy.asarray(x1_train, dtype='float32') / 255
        if numpy.max(x2_train) > 1:
            x2_train = numpy.asarray(x2_train, dtype='float32') / 255
        x1_test, x2_test, y_test = joblib.load(test_dataset_path)
        if numpy.max(x1_test) > 1:
            x1_test = numpy.asarray(x1_test, dtype='float32') / 255
        if numpy.max(x2_test) > 1:
            x2_test = numpy.asarray(x2_test, dtype='float32') / 255
        ten_percent = int(0.1 * x1_train.shape[0])
        train_set_iterator = iterator_type(x1_train[:-ten_percent],
                x2_train[:-ten_percent], y_train[:-ten_percent],
                batch_size=batch_size)
        valid_set_iterator = iterator_type(x1_train[-ten_percent:],
                x2_train[-ten_percent:], y_train[-ten_percent:],
                batch_size=batch_size)
        test_set_iterator = iterator_type(x1_test, x2_test, y_test,
                batch_size=batch_size)
        n_ins = x1_train.shape[1]
        n_outs = DIM_EMBEDDING
    else:
        SCALE = True
        N_SAMPLES = 10
        from sklearn.datasets import fetch_mldata
        mnist = fetch_mldata('MNIST original')
        X = numpy.asarray(mnist.data, dtype='uint8')
        if SCALE:
            X = numpy.asarray(X, dtype='float32')
            X /= 255.
        y = numpy.asarray(mnist.target, dtype='uint8')
        X_train = X[:60000]
        y_train = y[:60000]
        xy = numpy.ndarray((X_train.shape[0], X_train.shape[1] + 1),
                dtype='float32')
        xy[:, :-1] = X_train
        xy[:, -1] = y_train
        numpy.random.shuffle(xy)
        ten_percent = int(0.1 * X_train.shape[0])
        X_train = xy[:-ten_percent, :-1]
        y_train = xy[:-ten_percent, -1]
        X_valid = xy[-ten_percent:, :-1]
        y_valid = xy[-ten_percent:, -1]
        X_test = X[60000:]
        y_test = y[60000:]
        xy = numpy.ndarray((X_test.shape[0], X_test.shape[1] + 1),
                dtype='float32')
        xy[:, :-1] = X_test
        xy[:, -1] = y_test
        numpy.random.shuffle(xy)
        X_test = xy[:, :-1]
        y_test = xy[:, -1]
        print X_train.shape
        print X_valid.shape
        print X_test.shape
        train_set_iterator = DatasetABSamplingIteratorFromLabels(X_train,
                y_train, n_samples=N_SAMPLES, batch_size=batch_size)
        valid_set_iterator = DatasetABSamplingIteratorFromLabels(X_valid,
                y_valid, n_samples=N_SAMPLES, batch_size=batch_size)
        test_set_iterator = DatasetABSamplingIteratorFromLabels(X_test,
                y_test, n_samples=N_SAMPLES, batch_size=batch_size)
        n_ins = X_train.shape[1]
        n_outs = DIM_EMBEDDING

    assert n_ins != None
    assert n_outs != None

    # numpy random generator
    numpy_rng = numpy.random.RandomState(123)
    print '... building the model'

    # TODO the proper network type other than just dropout or not
    nnet = None
    fast_dropout = False
    if "fast_dropout" in network_type:
        fast_dropout = True
    if "dropout" in network_type:
        nnet = DropoutABNeuralNet(numpy_rng=numpy_rng, 
                n_ins=n_ins,
                layers_types=layers_types,
                layers_sizes=layers_sizes,
                n_outs=n_outs,
                #loss='cos_cos2',
                loss='hellinger',
                rho=0.95,
                eps=1.E-6,
                max_norm=4.,
                fast_drop=fast_dropout,
                debugprint=debug_print)
    else:
        nnet = ABNeuralNet(numpy_rng=numpy_rng, 
                n_ins=n_ins,
                layers_types=layers_types,
                layers_sizes=layers_sizes,
                n_outs=n_outs,
                loss='cos_cos2',
                rho=0.9,
                eps=1.E-6,
                max_norm=4.,
                debugprint=debug_print)
    print "Created a neural net as:",
    print str(nnet)

    # get the training, validation and testing function for the model
    print '... getting the training functions'
    print trainer_type
    train_fn = None
    if debug_plot or debug_print:
        if trainer_type == "adadelta":
            train_fn = nnet.get_adadelta_trainer(debug=True)
        elif trainer_type == "adagrad":
            train_fn = nnet.get_adagrad_trainer(debug=True)
        else:
            train_fn = nnet.get_SGD_trainer(debug=True)
    else:
        if trainer_type == "adadelta":
            train_fn = nnet.get_adadelta_trainer()
        elif trainer_type == "adagrad":
            train_fn = nnet.get_adagrad_trainer()
        else:
            train_fn = nnet.get_SGD_trainer()

    train_scoref = nnet.score_classif_same_diff_separated(train_set_iterator)
    valid_scoref = nnet.score_classif_same_diff_separated(valid_set_iterator)
    test_scoref = nnet.score_classif(test_set_iterator)
    data_iterator = train_set_iterator

    if debug_on_test_only:
        data_iterator = test_set_iterator
        train_scoref = test_scoref

    print '... training the model'
    # early-stopping parameters
    patience = 1000  # look as this many examples regardless TODO
    patience_increase = 2.  # wait this much longer when a new best is
                            # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant

    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = time.clock()

    done_looping = False
    epoch = 0
    lr = init_lr
    timer = None
    if debug_plot:
        print_mean_weights_biases(nnet.params)
    #with open(output_file_name + 'epoch_0.pickle', 'wb') as f:
    #    cPickle.dump(nnet, f)

    while (epoch < max_epochs) and (not done_looping):
        epoch = epoch + 1
        avg_costs = []
        avg_params_gradients_updates = []
        if debug_time:
            timer = time.time()
        for iteration, (x, y) in enumerate(data_iterator):
            avg_cost = 0.
            if "ab_net" in network_type:  # remove need for this if
                if "delta" in trainer_type:  # TODO remove need for this if
                    avg_cost = train_fn(x[0], x[1], y)
                else:
                    avg_cost = train_fn(x[0], x[1], y, lr)
                if debug_print >= 3:
                    print "cost:", avg_cost[0]
                if debug_plot >= 2:
                    plot_costs(avg_cost[0])
                    if not len(avg_params_gradients_updates):
                        avg_params_gradients_updates = map(numpy.asarray, avg_cost[1:])
                    else:
                        avg_params_gradients_updates = rolling_avg_pgu(
                                iteration, avg_params_gradients_updates,
                                map(numpy.asarray, avg_cost[1:]))
                if debug_plot >= 3:
                    plot_params_gradients_updates(iteration, avg_cost[1:])
            else:
                if "delta" in trainer_type:  # TODO remove need for this if
                    avg_cost = train_fn(x, y)
                else:
                    avg_cost = train_fn(x, y, lr)
            if type(avg_cost) == list:
                avg_costs.append(avg_cost[0])
            else:
                avg_costs.append(avg_cost)
        if debug_print >= 2:
            print_mean_weights_biases(nnet.params)
        if debug_plot >= 2:
            plot_params_gradients_updates(epoch, avg_params_gradients_updates)
        if debug_time:
            print('  epoch %i took %f seconds' % (epoch, time.time() - timer))
        print('  epoch %i, avg costs %f' % \
              (epoch, numpy.mean(avg_costs)))
        tmp_train = zip(*train_scoref())
        print('  epoch %i, training error same %f, diff %f' % \
              (epoch, numpy.mean(tmp_train[0]), numpy.mean(tmp_train[1])))
        # TODO update lr(t) = lr(0) / (1 + lr(0) * lambda * t)
        # or another scheme for learning rate decay
        #with open(output_file_name + 'epoch_' +str(epoch) + '.pickle', 'wb') as f:
        #    cPickle.dump(nnet, f)

        if debug_on_test_only:
            continue

        # we check the validation loss on every epoch
        validation_losses = zip(*valid_scoref())
        #this_validation_loss = -numpy.mean(validation_losses[0])  # TODO this is a mean of means (with different lengths)
        this_validation_loss = 0.5*(1.-numpy.mean(validation_losses[0])) +\
                0.5*numpy.mean(validation_losses[1])
        print('  epoch %i, valid error same %f, diff %f' % \
              (epoch, numpy.mean(validation_losses[0]), numpy.mean(validation_losses[1])))
        # if we got the best validation score until now
        if this_validation_loss < best_validation_loss:
            with open(output_file_name + '.pickle', 'wb') as f:
                cPickle.dump(nnet, f)
            # improve patience if loss improvement is good enough
            if (this_validation_loss < best_validation_loss *
                improvement_threshold):
                patience = max(patience, iteration * patience_increase)
            # save best validation score and iteration number
            best_validation_loss = this_validation_loss
            # test it on the test set
            test_losses = test_scoref()
            test_score_same = numpy.mean(test_losses[0])  # TODO this is a mean of means (with different lengths)
            test_score_diff = numpy.mean(test_losses[1])  # TODO this is a mean of means (with different lengths)
            print(('  epoch %i, test error of best model same %f diff %f') %
                  (epoch, test_score_same, test_score_diff))
        if patience <= iteration:  # TODO correct that
            done_looping = True
            break

    end_time = time.clock()
    print(('Optimization complete with best validation score of %f, '
           'with test performance %f') %
                 (best_validation_loss, test_score))
    print >> sys.stderr, ('The fine tuning code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time)
                                              / 60.))
    with open(output_file_name + '_final.pickle', 'wb') as f:
        cPickle.dump(nnet, f)
Пример #9
0
def run(dataset_path="from_aren.joblib", dataset_name='timit',
        batch_size=100,
        nframes=13, features="fbank",
        init_lr=0.01, max_epochs=500, 
        network_type="AB", trainer_type="adadelta",
        layers_types=[ReLU, ReLU, ReLU, ReLU, LogisticRegression],
        layers_sizes=[2400, 2400, 2400, 2400],
        dropout_rates=[0.2, 0.5, 0.5, 0.5, 0.5],
        loss='cos_cos2',
        recurrent_connections=[],
        prefix_fname='',
        debug_print=0,
        debug_time=False,
        debug_plot=0):
    """
    Configures and run the neural net on the given dataset.
    """

    output_file_name = dataset_name
    if prefix_fname != "":
        output_file_name = prefix_fname + "_" + dataset_name
    output_file_name += "_" + features + str(nframes)
    output_file_name += "_" + network_type + "_" + trainer_type
    output_file_name += "_emb_" + str(DIM_EMBEDDING)
    print "output file name:", output_file_name

    n_ins = None
    n_outs = None
    print "loading dataset from", dataset_path
     # TODO DO A FUNCTION FOR DATASET LOADING CRAP
    if dataset_path[-7:] != '.joblib':
        print >> sys.stderr, "prepare your dataset with align_words.py or lucid.py or buckeye.py"
        sys.exit(-1)

    ### LOADING DATA
    data_same = joblib.load(dataset_path)
    shuffle(data_same)

    dev_split_at = int(0.9 * len(data_same))

    print data_same[0]
    print data_same[0][3].shape
    n_ins = data_same[0][3].shape[1] * nframes
    n_outs = DIM_EMBEDDING

    normalize = True
    min_max_scale = False
    marginf = (nframes-1)/2  # TODO

    ### TRAIN SET
    train_set_iterator = DatasetDTWWrdSpkrIterator(
            data_same[:dev_split_at], normalize=normalize,
            min_max_scale=min_max_scale, scale_f1=None, scale_f2=None,
            nframes=nframes, batch_size=batch_size, marginf=marginf)
    f1 = train_set_iterator._scale_f1
    f2 = train_set_iterator._scale_f2

    ### DEV SET
    valid_set_iterator = DatasetDTWWrdSpkrIterator(
            data_same[dev_split_at:], normalize=normalize,
            min_max_scale=min_max_scale, scale_f1=f1, scale_f2=f2,
            nframes=nframes, batch_size=batch_size, marginf=marginf)

    assert n_ins != None
    assert n_outs != None

    # numpy random generator
    numpy_rng = numpy.random.RandomState(123)
    print '... building the model'

    nnet = None
    fast_dropout = False
    if "dropout" in network_type:
        print >> sys.stderr, "Dropout is not implemented for ABnets with 2 Outputs"
        nnet = DropoutABNeuralNet(numpy_rng=numpy_rng,  # TODO with 2 Outputs
                n_ins=n_ins,
                layers_types=layers_types,
                layers_sizes=layers_sizes,
                n_outs=n_outs,
                loss=loss,
                rho=0.95,
                eps=1.E-6,
                max_norm=4.,
                fast_drop=fast_dropout,
                debugprint=debug_print)
    else:
        nnet = ABNeuralNet2Outputs(numpy_rng=numpy_rng, 
                n_ins=n_ins,
                layers_types=layers_types,
                layers_sizes=layers_sizes,
                n_outs=n_outs,
                loss=loss,
                rho=0.90,
                eps=1.E-6,
                max_norm=0.,
                debugprint=debug_print)
    print "Created a neural net as:",
    print str(nnet)

    # get the training, validation and testing function for the model
    print '... getting the training functions'
    print trainer_type
    train_fn = None
    if debug_plot or debug_print:
        if trainer_type == "adadelta":
            train_fn = nnet.get_adadelta_trainer(debug=True)
        elif trainer_type == "adagrad":
            train_fn = nnet.get_adagrad_trainer(debug=True)
        else:
            train_fn = nnet.get_SGD_trainer(debug=True)
    else:
        if trainer_type == "adadelta":
            train_fn = nnet.get_adadelta_trainer()
        elif trainer_type == "adagrad":
            train_fn = nnet.get_adagrad_trainer()
        else:
            train_fn = nnet.get_SGD_trainer()

    train_scoref_w = nnet.score_classif_same_diff_word_separated(train_set_iterator)
    valid_scoref_w = nnet.score_classif_same_diff_word_separated(valid_set_iterator)
    train_scoref_s = nnet.score_classif_same_diff_spkr_separated(train_set_iterator)
    valid_scoref_s = nnet.score_classif_same_diff_spkr_separated(valid_set_iterator)
    data_iterator = train_set_iterator

    print '... training the model'
    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    lr = init_lr
    timer = None
    if debug_plot:
        print_mean_weights_biases(nnet.params)
    #with open(output_file_name + 'epoch_0.pickle', 'wb') as f:
    #    cPickle.dump(nnet, f, protocol=-1)

    while (epoch < max_epochs):
        epoch = epoch + 1
        avg_costs = []
        avg_params_gradients_updates = []
        if debug_time:
            timer = time.time()
        for iteration, (x, y) in enumerate(data_iterator):
            #print "x[0][0]", x[0][0]
            #print "x[1][0]", x[1][0]
            #print "y[0][0]", y[0][0]
            #print "y[1][0]", y[1][0]
            avg_cost = 0.
            if "delta" in trainer_type:  # TODO remove need for this if
                avg_cost = train_fn(x[0], x[1], y[0], y[1])
            else:
                avg_cost = train_fn(x[0], x[1], y[0], y[1], lr)
            if debug_print >= 3:
                print "cost:", avg_cost[0]
            if debug_plot >= 2:
                plot_costs(avg_cost[0])
                if not len(avg_params_gradients_updates):
                    avg_params_gradients_updates = map(numpy.asarray, avg_cost[1:])
                else:
                    avg_params_gradients_updates = rolling_avg_pgu(
                            iteration, avg_params_gradients_updates,
                            map(numpy.asarray, avg_cost[1:]))
            if debug_plot >= 3:
                plot_params_gradients_updates(iteration, avg_cost[1:])
            if type(avg_cost) == list:
                avg_costs.append(avg_cost[0])
            else:
                avg_costs.append(avg_cost)
        if debug_print >= 2:
            print_mean_weights_biases(nnet.params)
        if debug_plot >= 2:
            plot_params_gradients_updates(epoch, avg_params_gradients_updates)
        if debug_time:
            print('  epoch %i took %f seconds' % (epoch, time.time() - timer))
        avg_cost = numpy.mean(avg_costs)
        if numpy.isnan(avg_cost):
            print("avg costs is NaN so we're stopping here!")
            break
        print('  epoch %i, avg costs %f' % \
              (epoch, avg_cost))
        tmp_train = zip(*train_scoref_w())
        print('  epoch %i, training sim same words %f, diff words %f' % \
              (epoch, numpy.mean(tmp_train[0]), numpy.mean(tmp_train[1])))
        tmp_train = zip(*train_scoref_s())
        print('  epoch %i, training sim same spkrs %f, diff spkrs %f' % \
              (epoch, numpy.mean(tmp_train[0]), numpy.mean(tmp_train[1])))
        # TODO update lr(t) = lr(0) / (1 + lr(0) * lambda * t)
        lr = numpy.float32(init_lr / (numpy.sqrt(iteration) + 1.)) ### TODO
        # or another scheme for learning rate decay
        #with open(output_file_name + 'epoch_' +str(epoch) + '.pickle', 'wb') as f:
        #    cPickle.dump(nnet, f, protocol=-1)

        # we check the validation loss on every epoch
        validation_losses_w = zip(*valid_scoref_w())
        validation_losses_s = zip(*valid_scoref_s())
        this_validation_loss = 0.25*(1.-numpy.mean(validation_losses_w[0])) +\
                0.25*numpy.mean(validation_losses_w[1]) +\
                0.25*(1.-numpy.mean(validation_losses_s[0])) +\
                0.25*numpy.mean(validation_losses_s[1])

        print('  epoch %i, valid sim same words %f, diff words %f' % \
              (epoch, numpy.mean(validation_losses_w[0]), numpy.mean(validation_losses_w[1])))
        print('  epoch %i, valid sim same spkrs %f, diff spkrs %f' % \
              (epoch, numpy.mean(validation_losses_s[0]), numpy.mean(validation_losses_s[1])))
        # if we got the best validation score until now
        if this_validation_loss < best_validation_loss:
            with open(output_file_name + '.pickle', 'wb') as f:
                cPickle.dump(nnet, f, protocol=-1)
            # save best validation score and iteration number
            best_validation_loss = this_validation_loss

    end_time = time.clock()
    print(('Optimization complete with best validation score of %f, ') %
                 (best_validation_loss))
    print >> sys.stderr, ('The fine tuning code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time)
                                              / 60.))
    with open(output_file_name + '_final.pickle', 'wb') as f:
        cPickle.dump(nnet, f, protocol=-1)
Пример #10
0
def run(dataset_path=DEFAULT_DATASET,
        dataset_name='timit',
        iterator_type=DatasetDTWIterator,
        batch_size=100,
        nframes=13,
        features="fbank",
        init_lr=0.01,
        max_epochs=500,
        network_type="dropout_net",
        trainer_type="adadelta",
        layers_types=[ReLU, ReLU, ReLU, ReLU, LogisticRegression],
        layers_sizes=[2400, 2400, 2400, 2400],
        dropout_rates=[0.2, 0.5, 0.5, 0.5, 0.5],
        recurrent_connections=[],
        prefix_fname='',
        debug_on_test_only=False,
        debug_print=0,
        debug_time=False,
        debug_plot=0):
    """
    FIXME TODO
    """

    output_file_name = dataset_name
    if prefix_fname != "":
        output_file_name = prefix_fname + "_" + dataset_name
    output_file_name += "_" + features + str(nframes)
    output_file_name += "_" + network_type + "_" + trainer_type
    output_file_name += "_emb_" + str(DIM_EMBEDDING)
    print "output file name:", output_file_name

    n_ins = None
    n_outs = None
    print "loading dataset from", dataset_path
    # TODO DO A FUNCTION
    if dataset_path[-7:] != '.joblib':
        print >> sys.stderr, "prepare your dataset with align_words.py"
        sys.exit(-1)

    ### LOADING DATA
    data_same = joblib.load(dataset_path)
    shuffle(data_same)

    has_dev_and_test_set = True
    dev_dataset_path = dataset_path[:-7].replace("train", "") + 'dev.joblib'
    test_dataset_path = dataset_path[:-7].replace("train", "") + 'test.joblib'
    dev_split_at = len(data_same)
    test_split_at = len(data_same)
    if not os.path.exists(dev_dataset_path) or not os.path.exists(
            test_dataset_path):
        has_dev_and_test_set = False
        dev_split_at = int(0.8 * dev_split_at)
        test_split_at = int(0.9 * test_split_at)

    n_ins = data_same[0][3].shape[1] * nframes
    n_outs = DIM_EMBEDDING

    normalize = True  # TODO without
    min_max_scale = False
    marginf = (nframes - 1) / 2  # TODO

    ### TRAIN SET
    if has_dev_and_test_set:
        train_set_iterator = DatasetDTWWrdSpkrIterator(
            data_same,
            normalize=normalize,
            min_max_scale=min_max_scale,
            scale_f1=None,
            scale_f2=None,
            nframes=nframes,
            batch_size=batch_size,
            marginf=marginf)
    else:
        train_set_iterator = DatasetDTWWrdSpkrIterator(
            data_same[:dev_split_at],
            normalize=normalize,
            min_max_scale=min_max_scale,
            scale_f1=None,
            scale_f2=None,
            nframes=nframes,
            batch_size=batch_size,
            marginf=marginf)
    f1 = train_set_iterator._scale_f1
    f2 = train_set_iterator._scale_f2

    ### DEV SET
    if has_dev_and_test_set:
        data_same = joblib.load(dev_dataset_path)
        valid_set_iterator = DatasetDTWWrdSpkrIterator(
            data_same,
            normalize=normalize,
            min_max_scale=min_max_scale,
            scale_f1=f1,
            scale_f2=f2,
            nframes=nframes,
            batch_size=batch_size,
            marginf=marginf)
    else:
        valid_set_iterator = DatasetDTWWrdSpkrIterator(
            data_same[dev_split_at:test_split_at],
            normalize=normalize,
            min_max_scale=min_max_scale,
            scale_f1=f1,
            scale_f2=f2,
            nframes=nframes,
            batch_size=batch_size,
            marginf=marginf)

    ### TEST SET
    if has_dev_and_test_set:
        data_same = joblib.load(test_dataset_path)
        test_set_iterator = DatasetDTWWrdSpkrIterator(
            data_same,
            normalize=normalize,
            min_max_scale=min_max_scale,
            scale_f1=f1,
            scale_f2=f2,
            nframes=nframes,
            batch_size=batch_size,
            marginf=marginf)
    else:
        test_set_iterator = DatasetDTWWrdSpkrIterator(
            data_same[test_split_at:],
            normalize=normalize,
            min_max_scale=min_max_scale,
            scale_f1=f1,
            scale_f2=f2,
            nframes=nframes,
            batch_size=batch_size,
            marginf=marginf)

    assert n_ins != None
    assert n_outs != None

    # numpy random generator
    numpy_rng = numpy.random.RandomState(123)
    print '... building the model'

    # TODO the proper network type other than just dropout or not
    nnet = None
    fast_dropout = False
    if "fast_dropout" in network_type:
        fast_dropout = True
    if "dropout" in network_type:
        nnet = DropoutABNeuralNet(numpy_rng=numpy_rng,
                                  n_ins=n_ins,
                                  layers_types=layers_types,
                                  layers_sizes=layers_sizes,
                                  n_outs=n_outs,
                                  loss='cos_cos2',
                                  rho=0.95,
                                  eps=1.E-6,
                                  max_norm=4.,
                                  fast_drop=fast_dropout,
                                  debugprint=debug_print)
    else:
        #        nnet = ABNeuralNet2Outputs(numpy_rng=numpy_rng,
        #                n_ins=n_ins,
        #                layers_types=layers_types,
        #                layers_sizes=layers_sizes,
        #                n_outs=n_outs,
        #                loss='cos_cos2',
        #                #loss='euclidean',
        #                rho=0.90,
        #                eps=1.E-6,
        #                max_norm=0.,
        #                debugprint=debug_print)
        from nnet_archs import ABNeuralNet
        nnet = ABNeuralNet(numpy_rng=numpy_rng,
                           n_ins=n_ins,
                           layers_types=layers_types,
                           layers_sizes=layers_sizes,
                           n_outs=n_outs,
                           loss='cos_cos2',
                           rho=0.9,
                           eps=1.E-6,
                           max_norm=0.,
                           debugprint=debug_print)
    print "Created a neural net as:",
    print str(nnet)

    # get the training, validation and testing function for the model
    print '... getting the training functions'
    print trainer_type
    train_fn = None
    if debug_plot or debug_print:
        if trainer_type == "adadelta":
            train_fn = nnet.get_adadelta_trainer(debug=True)
        elif trainer_type == "adagrad":
            train_fn = nnet.get_adagrad_trainer(debug=True)
        else:
            train_fn = nnet.get_SGD_trainer(debug=True)
    else:
        if trainer_type == "adadelta":
            train_fn = nnet.get_adadelta_trainer()
        elif trainer_type == "adagrad":
            train_fn = nnet.get_adagrad_trainer()
        else:
            train_fn = nnet.get_SGD_trainer()

    train_scoref = nnet.score_classif_same_diff_separated(train_set_iterator)
    valid_scoref = nnet.score_classif_same_diff_separated(valid_set_iterator)
    test_scoref = nnet.score_classif(test_set_iterator)
    data_iterator = train_set_iterator

    if debug_on_test_only:
        print >> sys.stderr, "NOT IMPLEMENTED"
        sys.exit(-1)
        data_iterator = test_set_iterator

    print '... training the model'
    # early-stopping parameters
    patience = 1000  # look as this many examples regardless TODO
    patience_increase = 2.  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant

    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = time.clock()

    done_looping = False
    epoch = 0
    lr = init_lr
    timer = None
    if debug_plot:
        print_mean_weights_biases(nnet.params)
    #with open(output_file_name + 'epoch_0.pickle', 'wb') as f:
    #    cPickle.dump(nnet, f, protocol=-1)

    while (epoch < max_epochs) and (not done_looping):
        if REDTW and ("ab_net" in network_type
                      or "abnet" in network_type) and ((epoch + 1) % 20) == 0:
            print "recomputing DTW:"
            data_iterator.recompute_DTW(nnet.transform_x1())

        epoch = epoch + 1
        avg_costs = []
        avg_params_gradients_updates = []
        if debug_time:
            timer = time.time()
        for iteration, (x, y) in enumerate(data_iterator):
            #print "x[0][0]", x[0][0]
            #print "x[1][0]", x[1][0]
            #print "y[0][0]", y[0][0]
            #print "y[1][0]", y[1][0]
            avg_cost = 0.
            if "delta" in trainer_type:  # TODO remove need for this if
                avg_cost = train_fn(x[0], x[1], y)
            else:
                avg_cost = train_fn(x[0], x[1], y, lr)
            if debug_print >= 3:
                print "cost:", avg_cost[0]
            if debug_plot >= 2:
                plot_costs(avg_cost[0])
                if not len(avg_params_gradients_updates):
                    avg_params_gradients_updates = map(numpy.asarray,
                                                       avg_cost[1:])
                else:
                    avg_params_gradients_updates = rolling_avg_pgu(
                        iteration, avg_params_gradients_updates,
                        map(numpy.asarray, avg_cost[1:]))
            if debug_plot >= 3:
                plot_params_gradients_updates(iteration, avg_cost[1:])
            if type(avg_cost) == list:
                avg_costs.append(avg_cost[0])
            else:
                avg_costs.append(avg_cost)
        if debug_print >= 2:
            print_mean_weights_biases(nnet.params)
        if debug_plot >= 2:
            plot_params_gradients_updates(epoch, avg_params_gradients_updates)
        if debug_time:
            print('  epoch %i took %f seconds' % (epoch, time.time() - timer))
        avg_cost = numpy.mean(avg_costs)
        if numpy.isnan(avg_cost):
            print("avg costs is NaN so we're stopping here!")
            break
        print('  epoch %i, avg costs %f' % \
              (epoch, avg_cost))
        tmp_train = zip(*train_scoref())
        print('  epoch %i, training sim same %f, diff %f' % \
              (epoch, numpy.mean(tmp_train[0]), numpy.mean(tmp_train[1])))
        # TODO update lr(t) = lr(0) / (1 + lr(0) * lambda * t)
        lr = numpy.float32(init_lr / (numpy.sqrt(iteration) + 1.))  ### TODO
        #lr = numpy.float32(init_lr / (iteration + 1.)) ### TODO
        # or another scheme for learning rate decay
        #with open(output_file_name + 'epoch_' +str(epoch) + '.pickle', 'wb') as f:
        #    cPickle.dump(nnet, f, protocol=-1)

        if debug_on_test_only:
            continue

        # we check the validation loss on every epoch
        validation_losses = zip(*valid_scoref())
        this_validation_loss = 0.5*(1.-numpy.mean(validation_losses[0])) +\
                0.5*numpy.mean(validation_losses[1])

        print('  epoch %i, valid sim same %f, diff %f' % \
              (epoch, numpy.mean(validation_losses[0]), numpy.mean(validation_losses[1])))
        # if we got the best validation score until now
        if this_validation_loss < best_validation_loss:
            with open(output_file_name + '.pickle', 'wb') as f:
                cPickle.dump(nnet, f, protocol=-1)
            # improve patience if loss improvement is good enough
            if (this_validation_loss <
                    best_validation_loss * improvement_threshold):
                patience = max(patience, iteration * patience_increase)
            # save best validation score and iteration number
            best_validation_loss = this_validation_loss
            # test it on the test set
            test_losses = test_scoref()
            test_score_same = numpy.mean(
                test_losses[0]
            )  # TODO this is a mean of means (with different lengths)
            test_score_diff = numpy.mean(
                test_losses[1]
            )  # TODO this is a mean of means (with different lengths)
            print(('  epoch %i, test sim of best model same %f diff %f') %
                  (epoch, test_score_same, test_score_diff))
        if patience <= iteration:  # TODO correct that
            done_looping = True
            break

    end_time = time.clock()
    print(('Optimization complete with best validation score of %f, '
           'with test performance %f') % (best_validation_loss, test_score))
    print >> sys.stderr, ('The fine tuning code for file ' +
                          os.path.split(__file__)[1] + ' ran for %.2fm' %
                          ((end_time - start_time) / 60.))
    with open(output_file_name + '_final.pickle', 'wb') as f:
        cPickle.dump(nnet, f, protocol=-1)