예제 #1
0
def train_DNN(train_xy_file_list, valid_xy_file_list, \
              nnets_file_name, n_ins, n_outs, ms_outs, hyper_params, buffer_size, plot=False, var_dict=None,
              cmp_mean_vector = None, cmp_std_vector = None, init_dnn_model_file = None):

    # get loggers for this function
    # this one writes to both console and file
    logger = logging.getLogger("main.train_DNN")
    logger.debug('Starting train_DNN')

    if plot:
        # this one takes care of plotting duties
        plotlogger = logging.getLogger("plotting")
        # create an (empty) plot of training convergence, ready to receive data points
        logger.create_plot('training convergence',MultipleSeriesPlot)

    try:
        assert numpy.sum(ms_outs) == n_outs
    except AssertionError:
        logger.critical('the summation of multi-stream outputs does not equal to %d' %(n_outs))
        raise

    ####parameters#####
    finetune_lr     = float(hyper_params['learning_rate'])
    training_epochs = int(hyper_params['training_epochs'])
    batch_size      = int(hyper_params['batch_size'])
    l1_reg          = float(hyper_params['l1_reg'])
    l2_reg          = float(hyper_params['l2_reg'])
    warmup_epoch    = int(hyper_params['warmup_epoch'])
    momentum        = float(hyper_params['momentum'])
    warmup_momentum = float(hyper_params['warmup_momentum'])

    hidden_layer_size = hyper_params['hidden_layer_size']

    buffer_utt_size = buffer_size
    early_stop_epoch = int(hyper_params['early_stop_epochs'])

    hidden_activation = hyper_params['hidden_activation']
    output_activation = hyper_params['output_activation']

    model_type = hyper_params['model_type']
    hidden_layer_type  = hyper_params['hidden_layer_type']

    ## use a switch to turn on pretraining
    ## pretraining may not help too much, if this case, we turn it off to save time
    do_pretraining = hyper_params['do_pretraining']
    pretraining_epochs = int(hyper_params['pretraining_epochs'])
    pretraining_lr = float(hyper_params['pretraining_lr'])

    sequential_training = hyper_params['sequential_training']
    dropout_rate = hyper_params['dropout_rate']

    buffer_size = int(buffer_size / batch_size) * batch_size

    ###################
    (train_x_file_list, train_y_file_list) = train_xy_file_list
    (valid_x_file_list, valid_y_file_list) = valid_xy_file_list

    logger.debug('Creating training   data provider')
    train_data_reader = ListDataProvider(x_file_list = train_x_file_list, y_file_list = train_y_file_list,
                            n_ins = n_ins, n_outs = n_outs, buffer_size = buffer_size, 
                            sequential = sequential_training, shuffle = True)

    logger.debug('Creating validation data provider')
    valid_data_reader = ListDataProvider(x_file_list = valid_x_file_list, y_file_list = valid_y_file_list,
                            n_ins = n_ins, n_outs = n_outs, buffer_size = buffer_size, 
                            sequential = sequential_training, shuffle = False)

    if cfg.rnn_batch_training:
        train_data_reader.set_rnn_params(training_algo=cfg.training_algo, batch_size=cfg.batch_size, seq_length=cfg.seq_length, merge_size=cfg.merge_size, bucket_range=cfg.bucket_range)
        valid_data_reader.reshape_input_output()
    
    shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_one_partition()
    train_set_x, train_set_y = shared_train_set_xy
    shared_valid_set_xy, temp_valid_set_x, temp_valid_set_y = valid_data_reader.load_one_partition() 
    valid_set_x, valid_set_y = shared_valid_set_xy
    train_data_reader.reset()
    valid_data_reader.reset()


    ##temporally we use the training set as pretrain_set_x.
    ##we need to support any data for pretraining

    # numpy random generator
    numpy_rng = numpy.random.RandomState(123)
    logger.info('building the model')


    dnn_model = None
    pretrain_fn = None  ## not all the model support pretraining right now
    train_fn = None
    valid_fn = None
    valid_model = None ## valid_fn and valid_model are the same. reserve to computer multi-stream distortion
    if model_type == 'DNN':
        dnn_model = DeepRecurrentNetwork(n_in= n_ins, hidden_layer_size = hidden_layer_size, n_out = n_outs,
                                         L1_reg = l1_reg, L2_reg = l2_reg, hidden_layer_type = hidden_layer_type, 
                                         dropout_rate = dropout_rate, optimizer = cfg.optimizer, rnn_batch_training = cfg.rnn_batch_training)
        train_fn, valid_fn = dnn_model.build_finetune_functions(
                    (train_set_x, train_set_y), (valid_set_x, valid_set_y))  #, batch_size=batch_size

    else:
        logger.critical('%s type NN model is not supported!' %(model_type))
        raise

    logger.info('fine-tuning the %s model' %(model_type))

    start_time = time.time()

    best_dnn_model = dnn_model
    best_validation_loss = sys.float_info.max
    previous_loss = sys.float_info.max

    lr_decay  = cfg.lr_decay
    if lr_decay>0:
        early_stop_epoch *= lr_decay

    early_stop = 0
    val_loss_counter = 0

    previous_finetune_lr = finetune_lr

    epoch = 0
    while (epoch < training_epochs):
        epoch = epoch + 1
        
        if lr_decay==0:
            # fixed learning rate 
            reduce_lr = False
        elif lr_decay<0:
            # exponential decay
            reduce_lr = False if epoch <= warmup_epoch else True
        elif val_loss_counter > 0:
            # linear decay
            reduce_lr = False
            if val_loss_counter%lr_decay==0:
                reduce_lr = True
                val_loss_counter = 0
        else:
            # no decay
            reduce_lr = False

        if reduce_lr:
            current_finetune_lr = previous_finetune_lr * 0.5
            current_momentum    = momentum
        else:
            current_finetune_lr = previous_finetune_lr
            current_momentum    = warmup_momentum
        
        previous_finetune_lr = current_finetune_lr

        train_error = []
        sub_start_time = time.time()

        logger.debug("training params -- learning rate: %f, early_stop: %d/%d" % (current_finetune_lr, early_stop, early_stop_epoch))
        while (not train_data_reader.is_finish()):

            shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_one_partition()

            # if sequential training, the batch size will be the number of frames in an utterance
            # batch_size for sequential training is considered only when rnn_batch_training is set to True
            if sequential_training == True:
                batch_size = temp_train_set_x.shape[0]

            n_train_batches = temp_train_set_x.shape[0] // batch_size
            for index in range(n_train_batches):
                ## send a batch to the shared variable, rather than pass the batch size and batch index to the finetune function
                train_set_x.set_value(numpy.asarray(temp_train_set_x[index*batch_size:(index + 1)*batch_size], dtype=theano.config.floatX), borrow=True)
                train_set_y.set_value(numpy.asarray(temp_train_set_y[index*batch_size:(index + 1)*batch_size], dtype=theano.config.floatX), borrow=True)

                this_train_error = train_fn(current_finetune_lr, current_momentum)

                train_error.append(this_train_error)

        train_data_reader.reset()

        logger.debug('calculating validation loss')
        validation_losses = []
        while (not valid_data_reader.is_finish()):
            shared_valid_set_xy, temp_valid_set_x, temp_valid_set_y = valid_data_reader.load_one_partition()
            valid_set_x.set_value(numpy.asarray(temp_valid_set_x, dtype=theano.config.floatX), borrow=True)
            valid_set_y.set_value(numpy.asarray(temp_valid_set_y, dtype=theano.config.floatX), borrow=True)

            this_valid_loss = valid_fn()

            validation_losses.append(this_valid_loss)
        valid_data_reader.reset()

        this_validation_loss = numpy.mean(validation_losses)

        this_train_valid_loss = numpy.mean(numpy.asarray(train_error))

        sub_end_time = time.time()

        loss_difference = this_validation_loss - previous_loss

        logger.info('epoch %i, validation error %f, train error %f  time spent %.2f' %(epoch, this_validation_loss, this_train_valid_loss, (sub_end_time - sub_start_time)))
        if plot:
            plotlogger.add_plot_point('training convergence','validation set',(epoch,this_validation_loss))
            plotlogger.add_plot_point('training convergence','training set',(epoch,this_train_valid_loss))
            plotlogger.save_plot('training convergence',title='Progress of training and validation error',xlabel='epochs',ylabel='error')

        if this_validation_loss < best_validation_loss:
            pickle.dump(best_dnn_model, open(nnets_file_name, 'wb'))

            best_dnn_model = dnn_model
            best_validation_loss = this_validation_loss

        if this_validation_loss >= previous_loss:
            logger.debug('validation loss increased')
            val_loss_counter+=1
            early_stop+=1

        if epoch > 15 and early_stop > early_stop_epoch:
            logger.debug('stopping early')
            break

        if math.isnan(this_validation_loss):
            break

        previous_loss = this_validation_loss

    end_time = time.time()

    logger.info('overall  training time: %.2fm validation error %f' % ((end_time - start_time) / 60., best_validation_loss))

    if plot:
        plotlogger.save_plot('training convergence',title='Final training and validation error',xlabel='epochs',ylabel='error')

    return  best_validation_loss
예제 #2
0
def train_DNN(train_xy_file_list, valid_xy_file_list, \
              nnets_file_name, n_ins, n_outs, ms_outs, hyper_params, buffer_size, plot=False, var_dict=None,
              cmp_mean_vector = None, cmp_std_vector = None, init_dnn_model_file = None):

    # get loggers for this function
    # this one writes to both console and file
    logger = logging.getLogger("main.train_DNN")
    logger.debug('Starting train_DNN')

    if plot:
        # this one takes care of plotting duties
        plotlogger = logging.getLogger("plotting")
        # create an (empty) plot of training convergence, ready to receive data points
        logger.create_plot('training convergence', MultipleSeriesPlot)

    try:
        assert numpy.sum(ms_outs) == n_outs
    except AssertionError:
        logger.critical(
            'the summation of multi-stream outputs does not equal to %d' %
            (n_outs))
        raise

    ####parameters#####
    finetune_lr = float(hyper_params['learning_rate'])
    training_epochs = int(hyper_params['training_epochs'])
    batch_size = int(hyper_params['batch_size'])
    l1_reg = float(hyper_params['l1_reg'])
    l2_reg = float(hyper_params['l2_reg'])
    warmup_epoch = int(hyper_params['warmup_epoch'])
    momentum = float(hyper_params['momentum'])
    warmup_momentum = float(hyper_params['warmup_momentum'])

    hidden_layer_size = hyper_params['hidden_layer_size']

    buffer_utt_size = buffer_size
    early_stop_epoch = int(hyper_params['early_stop_epochs'])

    hidden_activation = hyper_params['hidden_activation']
    output_activation = hyper_params['output_activation']

    model_type = hyper_params['model_type']
    hidden_layer_type = hyper_params['hidden_layer_type']

    ## use a switch to turn on pretraining
    ## pretraining may not help too much, if this case, we turn it off to save time
    do_pretraining = hyper_params['do_pretraining']
    pretraining_epochs = int(hyper_params['pretraining_epochs'])
    pretraining_lr = float(hyper_params['pretraining_lr'])

    sequential_training = hyper_params['sequential_training']

    #    sequential_training = True

    buffer_size = int(buffer_size / batch_size) * batch_size

    ###################
    (train_x_file_list, train_y_file_list) = train_xy_file_list
    (valid_x_file_list, valid_y_file_list) = valid_xy_file_list

    logger.debug('Creating training   data provider')
    train_data_reader = ListDataProvider(x_file_list=train_x_file_list,
                                         y_file_list=train_y_file_list,
                                         n_ins=n_ins,
                                         n_outs=n_outs,
                                         buffer_size=buffer_size,
                                         sequential=sequential_training,
                                         shuffle=True)

    logger.debug('Creating validation data provider')
    valid_data_reader = ListDataProvider(x_file_list=valid_x_file_list,
                                         y_file_list=valid_y_file_list,
                                         n_ins=n_ins,
                                         n_outs=n_outs,
                                         buffer_size=buffer_size,
                                         sequential=sequential_training,
                                         shuffle=False)

    shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_one_partition(
    )
    train_set_x, train_set_y = shared_train_set_xy
    shared_valid_set_xy, valid_set_x, valid_set_y = valid_data_reader.load_one_partition(
    )  #validation data is still read block by block
    valid_set_x, valid_set_y = shared_valid_set_xy
    train_data_reader.reset()
    valid_data_reader.reset()

    ##temporally we use the training set as pretrain_set_x.
    ##we need to support any data for pretraining
    #    pretrain_set_x = train_set_x

    # numpy random generator
    numpy_rng = numpy.random.RandomState(123)
    logger.info('building the model')

    dnn_model = None
    pretrain_fn = None  ## not all the model support pretraining right now
    train_fn = None
    valid_fn = None
    valid_model = None  ## valid_fn and valid_model are the same. reserve to computer multi-stream distortion
    if model_type == 'DNN':
        #        dnn_model = DeepRecurrentNetwork(n_in= n_ins, hidden_layer_size = hidden_layer_size, n_out = n_outs, L1_reg = l1_reg, L2_reg = l2_reg, hidden_layer_type = hidden_layer_type)

        #        dnn_model = SequentialDNN(numpy_rng=numpy_rng, n_ins=n_ins, n_outs = n_outs,
        #                        l1_reg = l1_reg, l2_reg = l2_reg,
        #                         hidden_layer_sizes = hidden_layer_size)
        dnn_model = DNN(numpy_rng=numpy_rng,
                        n_ins=n_ins,
                        n_outs=n_outs,
                        l1_reg=l1_reg,
                        l2_reg=l2_reg,
                        hidden_layer_sizes=hidden_layer_size)

    else:
        logger.critical('%s type NN model is not supported!' % (model_type))
        raise

    logger.info('fine-tuning the %s model' % (model_type))

    start_time = time.time()

    best_dnn_model = dnn_model
    best_validation_loss = sys.float_info.max
    previous_loss = sys.float_info.max

    early_stop = 0
    epoch = 0

    #    finetune_lr = 0.000125
    previous_finetune_lr = finetune_lr

    print finetune_lr

    while (epoch < training_epochs):
        epoch = epoch + 1

        current_momentum = momentum
        current_finetune_lr = finetune_lr
        if epoch <= warmup_epoch:
            current_finetune_lr = finetune_lr
            current_momentum = warmup_momentum
        else:
            current_finetune_lr = previous_finetune_lr * 0.5

        previous_finetune_lr = current_finetune_lr

        train_error = []
        sub_start_time = time.time()

        while (not train_data_reader.is_finish()):
            shared_train_set_xy, train_set_x, train_set_y = train_data_reader.load_one_partition(
            )

            n_train_batches = train_set_x.shape[0] / batch_size

            logger.debug(
                'this partition: %d frames (divided into %d batches of size %d)'
                % (train_set_x.shape[0], n_train_batches, batch_size))

            all_batches = all_batches + n_train_batches

            for minibatch_index in xrange(n_train_batches):
                this_train_error = dnn_model.finetune((train_set_x[minibatch_index*batch_size:(minibatch_index+1)*batch_size, :], \
                                                       train_set_y[minibatch_index*batch_size:(minibatch_index+1)*batch_size, :]), batch_size, current_finetune_lr, current_momentum)
                train_error.extend(this_train_error)

        train_data_reader.reset()

        logger.debug('calculating validation loss')
        predicted_parameter = dnn_model.parameter_prediction(
            valid_set_x)  #, valid_set_y
        validation_losses = numpy.sum((predicted_parameter - valid_set_y)**2,
                                      axis=1)
        this_validation_loss = numpy.mean(validation_losses)

        this_train_valid_loss = numpy.mean(numpy.asarray(train_error))

        sub_end_time = time.time()

        loss_difference = this_validation_loss - previous_loss

        logger.info(
            'epoch %i, validation error %f, train error %f  time spent %.2f' %
            (epoch, this_validation_loss, this_train_valid_loss,
             (sub_end_time - sub_start_time)))
        if plot:
            plotlogger.add_plot_point('training convergence', 'validation set',
                                      (epoch, this_validation_loss))
            plotlogger.add_plot_point('training convergence', 'training set',
                                      (epoch, this_train_valid_loss))
            plotlogger.save_plot(
                'training convergence',
                title='Progress of training and validation error',
                xlabel='epochs',
                ylabel='error')

        if this_validation_loss < best_validation_loss:
            if epoch > 10:
                cPickle.dump(best_dnn_model, open(nnets_file_name, 'wb'))

            best_dnn_model = dnn_model
            best_validation_loss = this_validation_loss
#            logger.debug('validation loss decreased, so saving model')

        if this_validation_loss >= previous_loss:
            logger.debug('validation loss increased')

            #            dbn = best_dnn_model
            early_stop += 1


#        if early_stop > early_stop_epoch:
#            logger.debug('stopping early')
#            break

        if math.isnan(this_validation_loss):
            break

        previous_loss = this_validation_loss

    end_time = time.time()
    #    cPickle.dump(best_dnn_model, open(nnets_file_name, 'wb'))

    logger.info('overall  training time: %.2fm validation error %f' %
                ((end_time - start_time) / 60., best_validation_loss))

    if plot:
        plotlogger.save_plot('training convergence',
                             title='Final training and validation error',
                             xlabel='epochs',
                             ylabel='error')

    return best_validation_loss
예제 #3
0
def train_DNN(train_xy_file_list, valid_xy_file_list, \
              nnets_file_name, n_ins, n_outs, ms_outs, hyper_params, buffer_size, plot=False, var_dict=None,
              cmp_mean_vector = None, cmp_std_vector = None, init_dnn_model_file = None):

    # get loggers for this function
    # this one writes to both console and file
    logger = logging.getLogger("main.train_DNN")
    logger.debug('Starting train_DNN')

    if plot:
        # this one takes care of plotting duties
        plotlogger = logging.getLogger("plotting")
        # create an (empty) plot of training convergence, ready to receive data points
        logger.create_plot('training convergence',MultipleSeriesPlot)

    try:
        assert numpy.sum(ms_outs) == n_outs
    except AssertionError:
        logger.critical('the summation of multi-stream outputs does not equal to %d' %(n_outs))
        raise

    ####parameters#####
    finetune_lr     = float(hyper_params['learning_rate'])
    training_epochs = int(hyper_params['training_epochs'])
    batch_size      = int(hyper_params['batch_size'])
    l1_reg          = float(hyper_params['l1_reg'])
    l2_reg          = float(hyper_params['l2_reg'])
    warmup_epoch    = int(hyper_params['warmup_epoch'])
    momentum        = float(hyper_params['momentum'])
    warmup_momentum = float(hyper_params['warmup_momentum'])

    hidden_layer_size = hyper_params['hidden_layer_size']

    buffer_utt_size = buffer_size
    early_stop_epoch = int(hyper_params['early_stop_epochs'])

    hidden_activation = hyper_params['hidden_activation']
    output_activation = hyper_params['output_activation']

    model_type = hyper_params['model_type']
    hidden_layer_type  = hyper_params['hidden_layer_type']

    ## use a switch to turn on pretraining
    ## pretraining may not help too much, if this case, we turn it off to save time
    do_pretraining = hyper_params['do_pretraining']
    pretraining_epochs = int(hyper_params['pretraining_epochs'])
    pretraining_lr = float(hyper_params['pretraining_lr'])

    sequential_training = hyper_params['sequential_training']

#    sequential_training = True

    buffer_size = int(buffer_size / batch_size) * batch_size

    ###################
    (train_x_file_list, train_y_file_list) = train_xy_file_list
    (valid_x_file_list, valid_y_file_list) = valid_xy_file_list

    logger.debug('Creating training   data provider')
    train_data_reader = ListDataProvider(x_file_list = train_x_file_list, y_file_list = train_y_file_list,
                            n_ins = n_ins, n_outs = n_outs, buffer_size = buffer_size, sequential = sequential_training, shuffle = True)

    logger.debug('Creating validation data provider')
    valid_data_reader = ListDataProvider(x_file_list = valid_x_file_list, y_file_list = valid_y_file_list,
                            n_ins = n_ins, n_outs = n_outs, buffer_size = buffer_size, sequential = sequential_training, shuffle = False)

    shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_one_partition()
    train_set_x, train_set_y = shared_train_set_xy
    shared_valid_set_xy, valid_set_x, valid_set_y = valid_data_reader.load_one_partition()   #validation data is still read block by block
    valid_set_x, valid_set_y = shared_valid_set_xy
    train_data_reader.reset()
    valid_data_reader.reset()


    ##temporally we use the training set as pretrain_set_x.
    ##we need to support any data for pretraining
#    pretrain_set_x = train_set_x

    # numpy random generator
    numpy_rng = numpy.random.RandomState(123)
    logger.info('building the model')


    dnn_model = None
    pretrain_fn = None  ## not all the model support pretraining right now
    train_fn = None
    valid_fn = None
    valid_model = None ## valid_fn and valid_model are the same. reserve to computer multi-stream distortion
    if model_type == 'DNN':
#        dnn_model = DeepRecurrentNetwork(n_in= n_ins, hidden_layer_size = hidden_layer_size, n_out = n_outs, L1_reg = l1_reg, L2_reg = l2_reg, hidden_layer_type = hidden_layer_type)

        dnn_model = SequentialDNN(numpy_rng=numpy_rng, n_ins=n_ins, n_outs = n_outs,
                        l1_reg = l1_reg, l2_reg = l2_reg,
                         hidden_layer_sizes = hidden_layer_size)

    else:
        logger.critical('%s type NN model is not supported!' %(model_type))
        raise

    logger.info('fine-tuning the %s model' %(model_type))

    init_dnn_model = pickle.load(open(init_dnn_model_file, 'rb'))

    dnn_model.set_parameters(init_dnn_model.W_params, init_dnn_model.b_params)


    start_time = time.time()

    best_dnn_model = dnn_model
    best_validation_loss = sys.float_info.max
    previous_loss = sys.float_info.max

    early_stop = 0
    epoch = 0

#    finetune_lr = 0.000125
    previous_finetune_lr = finetune_lr

    print(finetune_lr)

    while (epoch < training_epochs):
        epoch = epoch + 1

        current_momentum = momentum
        current_finetune_lr = finetune_lr
        if epoch <= warmup_epoch:
            current_finetune_lr = finetune_lr
            current_momentum = warmup_momentum
        else:
            current_finetune_lr = previous_finetune_lr * 0.5

        previous_finetune_lr = current_finetune_lr

        train_error = []
        sub_start_time = time.time()

        while (not train_data_reader.is_finish()):
            shared_train_set_xy, train_set_x, train_set_y = train_data_reader.load_one_partition()

            n_train_batches = train_set_x.shape[0]
            current_frame_number = train_set_x.shape[0]

            mean_matrix = numpy.tile(cmp_mean_vector, (current_frame_number, 1))
            std_matrix = numpy.tile(cmp_std_vector, (current_frame_number, 1))

            logger.debug('this partition: %d frames (divided into %d batches )' %(train_set_x.shape[0], n_train_batches) )

            this_train_error = dnn_model.finetune((train_set_x, train_set_y), current_frame_number, current_finetune_lr, current_momentum, mean_matrix, std_matrix)
            train_error.extend(this_train_error.tolist())

        train_data_reader.reset()


        logger.debug('calculating validation loss')
        validation_losses = []
        validation_losses2 = []
        while (not valid_data_reader.is_finish()):
            shared_valid_set_xy, valid_set_x, valid_set_y = valid_data_reader.load_one_partition()

            current_frame_number = valid_set_x.shape[0]
            mean_matrix = numpy.tile(cmp_mean_vector, (current_frame_number, 1))
            std_matrix = numpy.tile(cmp_std_vector, (current_frame_number, 1))

            this_valid_loss = dnn_model.parameter_prediction_trajectory(valid_set_x, valid_set_y, mean_matrix, std_matrix)
            validation_losses.extend(this_valid_loss.tolist())

            predicted_para = dnn_model.parameter_prediction(valid_set_x)
            temp_loss = numpy.sum(((predicted_para[:, 0:60] - valid_set_y[:, 0:60]) * std_matrix[:, 0:60]) ** 2, axis=1)
            temp_loss = temp_loss ** 0.5
            validation_losses2.extend(temp_loss.tolist())
        valid_data_reader.reset()

        this_validation_loss = numpy.mean(validation_losses)

        this_train_valid_loss = numpy.mean(numpy.asarray(train_error))

        sub_end_time = time.time()

        loss_difference = this_validation_loss - previous_loss

        logger.info('epoch %i, validation error %f, train error %f  time spent %.2f' %(epoch, this_validation_loss, this_train_valid_loss, (sub_end_time - sub_start_time)))
        if plot:
            plotlogger.add_plot_point('training convergence','validation set',(epoch,this_validation_loss))
            plotlogger.add_plot_point('training convergence','training set',(epoch,this_train_valid_loss))
            plotlogger.save_plot('training convergence',title='Progress of training and validation error',xlabel='epochs',ylabel='error')

        if this_validation_loss < best_validation_loss:
            if epoch > 10:
                pickle.dump(best_dnn_model, open(nnets_file_name, 'wb'))

            best_dnn_model = dnn_model
            best_validation_loss = this_validation_loss
#            logger.debug('validation loss decreased, so saving model')

        if this_validation_loss >= previous_loss:
            logger.debug('validation loss increased')

#            dbn = best_dnn_model
            early_stop += 1

#        if early_stop > early_stop_epoch:
#            logger.debug('stopping early')
#            break

        if math.isnan(this_validation_loss):
            break

        previous_loss = this_validation_loss

    end_time = time.time()
#    cPickle.dump(best_dnn_model, open(nnets_file_name, 'wb'))

    logger.info('overall  training time: %.2fm validation error %f' % ((end_time - start_time) / 60., best_validation_loss))

    if plot:
        plotlogger.save_plot('training convergence',title='Final training and validation error',xlabel='epochs',ylabel='error')

    return  best_validation_loss