def train_DNN(train_xy_file_list, valid_xy_file_list, \ nnets_file_name, n_ins, n_outs, ms_outs, hyper_params, buffer_size, plot=False): # get loggers for this function # this one writes to both console and file logger = logging.getLogger("main.train_DNN") logger.debug('Starting train_DNN') if plot: # this one takes care of plotting duties plotlogger = logging.getLogger("plotting") # create an (empty) plot of training convergence, ready to receive data points logger.create_plot('training convergence', MultipleSeriesPlot) try: assert numpy.sum(ms_outs) == n_outs except AssertionError: logger.critical('the summation of multi-stream outputs does not equal to %d' % (n_outs)) raise ####parameters##### finetune_lr = numpy.asarray(hyper_params['learning_rate'], dtype='float32') training_epochs = int(hyper_params['training_epochs']) batch_size = int(hyper_params['batch_size']) l1_reg = float(hyper_params['l1_reg']) l2_reg = float(hyper_params['l2_reg']) # private_l2_reg = float(hyper_params['private_l2_reg']) warmup_epoch = int(hyper_params['warmup_epoch']) momentum = float(hyper_params['momentum']) warmup_momentum = float(hyper_params['warmup_momentum']) use_rprop = int(hyper_params['use_rprop']) use_rprop = int(hyper_params['use_rprop']) hidden_layers_sizes = hyper_params['hidden_layer_size'] # stream_weights = hyper_params['stream_weights'] # private_hidden_sizes = hyper_params['private_hidden_sizes'] buffer_utt_size = buffer_size early_stop_epoch = int(hyper_params['early_stop_epochs']) hidden_activation = hyper_params['hidden_activation'] output_activation = hyper_params['output_activation'] # stream_lr_weights = hyper_params['stream_lr_weights'] # use_private_hidden = hyper_params['use_private_hidden'] model_type = hyper_params['model_type'] ## use a switch to turn on pretraining ## pretraining may not help too much, if this case, we turn it off to save time do_pretraining = hyper_params['do_pretraining'] pretraining_epochs = int(hyper_params['pretraining_epochs']) pretraining_lr = float(hyper_params['pretraining_lr']) buffer_size = int(buffer_size / batch_size) * batch_size ################### (train_x_file_list, train_y_file_list) = train_xy_file_list (valid_x_file_list, valid_y_file_list) = valid_xy_file_list logger.debug('Creating training data provider') train_data_reader = ListDataProvider(x_file_list=train_x_file_list, y_file_list=train_y_file_list, n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=True) logger.debug('Creating validation data provider') valid_data_reader = ListDataProvider(x_file_list=valid_x_file_list, y_file_list=valid_y_file_list, n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=False) shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_next_partition() train_set_x, train_set_y = shared_train_set_xy shared_valid_set_xy, temp_valid_set_x, temp_valid_set_y = valid_data_reader.load_next_partition() valid_set_x, valid_set_y = shared_valid_set_xy train_data_reader.reset() valid_data_reader.reset() ##temporally we use the training set as pretrain_set_x. ##we need to support any data for pretraining pretrain_set_x = train_set_x # numpy random generator numpy_rng = numpy.random.RandomState(123) logger.info('building the model') dnn_model = None pretrain_fn = None ## not all the model support pretraining right now train_fn = None valid_fn = None valid_model = None ## valid_fn and valid_model are the same. reserve to computer multi-stream distortion if model_type == 'DNN': dnn_model = DNN(numpy_rng=numpy_rng, n_ins=n_ins, n_outs=n_outs, l1_reg=l1_reg, l2_reg=l2_reg, hidden_layers_sizes=hidden_layers_sizes, hidden_activation=hidden_activation, output_activation=output_activation, use_rprop=use_rprop, rprop_init_update=finetune_lr) train_fn, valid_fn = dnn_model.build_finetune_functions( (train_set_x, train_set_y), (valid_set_x, valid_set_y), batch_size=batch_size) else: logger.critical('%s type NN model is not supported!' % (model_type)) raise logger.info('fine-tuning the %s model' % (model_type)) start_time = time.clock() best_dnn_model = dnn_model best_validation_loss = sys.float_info.max previous_loss = sys.float_info.max early_stop = 0 epoch = 0 previous_finetune_lr = finetune_lr while (epoch < training_epochs): epoch = epoch + 1 current_momentum = momentum current_finetune_lr = finetune_lr if epoch <= warmup_epoch: current_finetune_lr = finetune_lr current_momentum = warmup_momentum else: current_finetune_lr = previous_finetune_lr * 0.5 previous_finetune_lr = current_finetune_lr train_error = [] sub_start_time = time.clock() while (not train_data_reader.is_finish()): shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_next_partition() train_set_x.set_value(numpy.asarray(temp_train_set_x, dtype=theano.config.floatX), borrow=True) train_set_y.set_value(numpy.asarray(temp_train_set_y, dtype=theano.config.floatX), borrow=True) n_train_batches = train_set_x.get_value().shape[0] / batch_size logger.debug('this partition: %d frames (divided into %d batches of size %d)' % ( train_set_x.get_value(borrow=True).shape[0], n_train_batches, batch_size)) for minibatch_index in range(n_train_batches): this_train_error = train_fn(minibatch_index, current_finetune_lr, current_momentum) train_error.append(this_train_error) if numpy.isnan(this_train_error): logger.warning('training error over minibatch %d of %d was %s' % ( minibatch_index + 1, n_train_batches, this_train_error)) train_data_reader.reset() logger.debug('calculating validation loss') validation_losses = valid_fn() this_validation_loss = numpy.mean(validation_losses) # this has a possible bias if the minibatches were not all of identical size # but it should not be siginficant if minibatches are small this_train_valid_loss = numpy.mean(train_error) sub_end_time = time.clock() loss_difference = this_validation_loss - previous_loss logger.info('epoch %i, validation error %f, train error %f time spent %.2f' % ( epoch, this_validation_loss, this_train_valid_loss, (sub_end_time - sub_start_time))) if plot: plotlogger.add_plot_point('training convergence', 'validation set', (epoch, this_validation_loss)) plotlogger.add_plot_point('training convergence', 'training set', (epoch, this_train_valid_loss)) plotlogger.save_plot('training convergence', title='Progress of training and validation error', xlabel='epochs', ylabel='error') if this_validation_loss < best_validation_loss: best_dnn_model = dnn_model best_validation_loss = this_validation_loss logger.debug('validation loss decreased, so saving model') early_stop = 0 else: logger.debug('validation loss did not improve') dbn = best_dnn_model early_stop += 1 if early_stop >= early_stop_epoch: # too many consecutive epochs without surpassing the best model logger.debug('stopping early') break if math.isnan(this_validation_loss): break previous_loss = this_validation_loss end_time = time.clock() pickle.dump(best_dnn_model, open(nnets_file_name, 'wb')) logger.info( 'overall training time: %.2fm validation error %f' % ((end_time - start_time) / 60., best_validation_loss)) if plot: plotlogger.save_plot('training convergence', title='Final training and validation error', xlabel='epochs', ylabel='error') return best_validation_loss
def train_DNN(train_xy_file_list, valid_xy_file_list, \ nnets_file_name, n_ins, n_outs, ms_outs, hyper_params, buffer_size, plot=False, var_dict=None, cmp_mean_vector = None, cmp_std_vector = None, init_dnn_model_file = None): # get loggers for this function # this one writes to both console and file logger = logging.getLogger("main.train_DNN") logger.debug('Starting train_DNN') if plot: # this one takes care of plotting duties plotlogger = logging.getLogger("plotting") # create an (empty) plot of training convergence, ready to receive data points logger.create_plot('training convergence', MultipleSeriesPlot) try: assert numpy.sum(ms_outs) == n_outs except AssertionError: logger.critical( 'the summation of multi-stream outputs does not equal to %d' % (n_outs)) raise ####parameters##### finetune_lr = float(hyper_params['learning_rate']) training_epochs = int(hyper_params['training_epochs']) batch_size = int(hyper_params['batch_size']) l1_reg = float(hyper_params['l1_reg']) l2_reg = float(hyper_params['l2_reg']) warmup_epoch = int(hyper_params['warmup_epoch']) momentum = float(hyper_params['momentum']) warmup_momentum = float(hyper_params['warmup_momentum']) hidden_layer_size = hyper_params['hidden_layer_size'] buffer_utt_size = buffer_size early_stop_epoch = int(hyper_params['early_stop_epochs']) hidden_activation = hyper_params['hidden_activation'] output_activation = hyper_params['output_activation'] model_type = hyper_params['model_type'] hidden_layer_type = hyper_params['hidden_layer_type'] ## use a switch to turn on pretraining ## pretraining may not help too much, if this case, we turn it off to save time do_pretraining = hyper_params['do_pretraining'] pretraining_epochs = int(hyper_params['pretraining_epochs']) pretraining_lr = float(hyper_params['pretraining_lr']) sequential_training = hyper_params['sequential_training'] # sequential_training = True buffer_size = int(buffer_size / batch_size) * batch_size ################### (train_x_file_list, train_y_file_list) = train_xy_file_list (valid_x_file_list, valid_y_file_list) = valid_xy_file_list logger.debug('Creating training data provider') train_data_reader = ListDataProvider(x_file_list=train_x_file_list, y_file_list=train_y_file_list, n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, sequential=sequential_training, shuffle=True) logger.debug('Creating validation data provider') valid_data_reader = ListDataProvider(x_file_list=valid_x_file_list, y_file_list=valid_y_file_list, n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, sequential=sequential_training, shuffle=False) shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_one_partition( ) train_set_x, train_set_y = shared_train_set_xy shared_valid_set_xy, valid_set_x, valid_set_y = valid_data_reader.load_one_partition( ) #validation data is still read block by block valid_set_x, valid_set_y = shared_valid_set_xy train_data_reader.reset() valid_data_reader.reset() ##temporally we use the training set as pretrain_set_x. ##we need to support any data for pretraining # pretrain_set_x = train_set_x # numpy random generator numpy_rng = numpy.random.RandomState(123) logger.info('building the model') dnn_model = None pretrain_fn = None ## not all the model support pretraining right now train_fn = None valid_fn = None valid_model = None ## valid_fn and valid_model are the same. reserve to computer multi-stream distortion if model_type == 'DNN': # dnn_model = DeepRecurrentNetwork(n_in= n_ins, hidden_layer_size = hidden_layer_size, n_out = n_outs, L1_reg = l1_reg, L2_reg = l2_reg, hidden_layer_type = hidden_layer_type) # dnn_model = SequentialDNN(numpy_rng=numpy_rng, n_ins=n_ins, n_outs = n_outs, # l1_reg = l1_reg, l2_reg = l2_reg, # hidden_layer_sizes = hidden_layer_size) dnn_model = DNN(numpy_rng=numpy_rng, n_ins=n_ins, n_outs=n_outs, l1_reg=l1_reg, l2_reg=l2_reg, hidden_layer_sizes=hidden_layer_size) else: logger.critical('%s type NN model is not supported!' % (model_type)) raise logger.info('fine-tuning the %s model' % (model_type)) start_time = time.time() best_dnn_model = dnn_model best_validation_loss = sys.float_info.max previous_loss = sys.float_info.max early_stop = 0 epoch = 0 # finetune_lr = 0.000125 previous_finetune_lr = finetune_lr print finetune_lr while (epoch < training_epochs): epoch = epoch + 1 current_momentum = momentum current_finetune_lr = finetune_lr if epoch <= warmup_epoch: current_finetune_lr = finetune_lr current_momentum = warmup_momentum else: current_finetune_lr = previous_finetune_lr * 0.5 previous_finetune_lr = current_finetune_lr train_error = [] sub_start_time = time.time() while (not train_data_reader.is_finish()): shared_train_set_xy, train_set_x, train_set_y = train_data_reader.load_one_partition( ) n_train_batches = train_set_x.shape[0] / batch_size logger.debug( 'this partition: %d frames (divided into %d batches of size %d)' % (train_set_x.shape[0], n_train_batches, batch_size)) all_batches = all_batches + n_train_batches for minibatch_index in xrange(n_train_batches): this_train_error = dnn_model.finetune((train_set_x[minibatch_index*batch_size:(minibatch_index+1)*batch_size, :], \ train_set_y[minibatch_index*batch_size:(minibatch_index+1)*batch_size, :]), batch_size, current_finetune_lr, current_momentum) train_error.extend(this_train_error) train_data_reader.reset() logger.debug('calculating validation loss') predicted_parameter = dnn_model.parameter_prediction( valid_set_x) #, valid_set_y validation_losses = numpy.sum((predicted_parameter - valid_set_y)**2, axis=1) this_validation_loss = numpy.mean(validation_losses) this_train_valid_loss = numpy.mean(numpy.asarray(train_error)) sub_end_time = time.time() loss_difference = this_validation_loss - previous_loss logger.info( 'epoch %i, validation error %f, train error %f time spent %.2f' % (epoch, this_validation_loss, this_train_valid_loss, (sub_end_time - sub_start_time))) if plot: plotlogger.add_plot_point('training convergence', 'validation set', (epoch, this_validation_loss)) plotlogger.add_plot_point('training convergence', 'training set', (epoch, this_train_valid_loss)) plotlogger.save_plot( 'training convergence', title='Progress of training and validation error', xlabel='epochs', ylabel='error') if this_validation_loss < best_validation_loss: if epoch > 10: cPickle.dump(best_dnn_model, open(nnets_file_name, 'wb')) best_dnn_model = dnn_model best_validation_loss = this_validation_loss # logger.debug('validation loss decreased, so saving model') if this_validation_loss >= previous_loss: logger.debug('validation loss increased') # dbn = best_dnn_model early_stop += 1 # if early_stop > early_stop_epoch: # logger.debug('stopping early') # break if math.isnan(this_validation_loss): break previous_loss = this_validation_loss end_time = time.time() # cPickle.dump(best_dnn_model, open(nnets_file_name, 'wb')) logger.info('overall training time: %.2fm validation error %f' % ((end_time - start_time) / 60., best_validation_loss)) if plot: plotlogger.save_plot('training convergence', title='Final training and validation error', xlabel='epochs', ylabel='error') return best_validation_loss
def train_DNN(train_xy_file_list, valid_xy_file_list, \ nnets_file_name, n_ins, n_outs, ms_outs, hyper_params, buffer_size, plot=False, var_dict=None, cmp_mean_vector = None, cmp_std_vector = None, init_dnn_model_file = None): # get loggers for this function # this one writes to both console and file logger = logging.getLogger("main.train_DNN") logger.debug('Starting train_DNN') if plot: # this one takes care of plotting duties plotlogger = logging.getLogger("plotting") # create an (empty) plot of training convergence, ready to receive data points logger.create_plot('training convergence',MultipleSeriesPlot) try: assert numpy.sum(ms_outs) == n_outs except AssertionError: logger.critical('the summation of multi-stream outputs does not equal to %d' %(n_outs)) raise ####parameters##### finetune_lr = float(hyper_params['learning_rate']) training_epochs = int(hyper_params['training_epochs']) batch_size = int(hyper_params['batch_size']) l1_reg = float(hyper_params['l1_reg']) l2_reg = float(hyper_params['l2_reg']) warmup_epoch = int(hyper_params['warmup_epoch']) momentum = float(hyper_params['momentum']) warmup_momentum = float(hyper_params['warmup_momentum']) hidden_layer_size = hyper_params['hidden_layer_size'] buffer_utt_size = buffer_size early_stop_epoch = int(hyper_params['early_stop_epochs']) hidden_activation = hyper_params['hidden_activation'] output_activation = hyper_params['output_activation'] model_type = hyper_params['model_type'] hidden_layer_type = hyper_params['hidden_layer_type'] ## use a switch to turn on pretraining ## pretraining may not help too much, if this case, we turn it off to save time do_pretraining = hyper_params['do_pretraining'] pretraining_epochs = int(hyper_params['pretraining_epochs']) pretraining_lr = float(hyper_params['pretraining_lr']) sequential_training = hyper_params['sequential_training'] # sequential_training = True buffer_size = int(buffer_size / batch_size) * batch_size ################### (train_x_file_list, train_y_file_list) = train_xy_file_list (valid_x_file_list, valid_y_file_list) = valid_xy_file_list logger.debug('Creating training data provider') train_data_reader = ListDataProvider(x_file_list = train_x_file_list, y_file_list = train_y_file_list, n_ins = n_ins, n_outs = n_outs, buffer_size = buffer_size, sequential = sequential_training, shuffle = True) logger.debug('Creating validation data provider') valid_data_reader = ListDataProvider(x_file_list = valid_x_file_list, y_file_list = valid_y_file_list, n_ins = n_ins, n_outs = n_outs, buffer_size = buffer_size, sequential = sequential_training, shuffle = False) shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_one_partition() train_set_x, train_set_y = shared_train_set_xy shared_valid_set_xy, valid_set_x, valid_set_y = valid_data_reader.load_one_partition() #validation data is still read block by block valid_set_x, valid_set_y = shared_valid_set_xy train_data_reader.reset() valid_data_reader.reset() ##temporally we use the training set as pretrain_set_x. ##we need to support any data for pretraining # pretrain_set_x = train_set_x # numpy random generator numpy_rng = numpy.random.RandomState(123) logger.info('building the model') dnn_model = None pretrain_fn = None ## not all the model support pretraining right now train_fn = None valid_fn = None valid_model = None ## valid_fn and valid_model are the same. reserve to computer multi-stream distortion if model_type == 'DNN': # dnn_model = DeepRecurrentNetwork(n_in= n_ins, hidden_layer_size = hidden_layer_size, n_out = n_outs, L1_reg = l1_reg, L2_reg = l2_reg, hidden_layer_type = hidden_layer_type) dnn_model = SequentialDNN(numpy_rng=numpy_rng, n_ins=n_ins, n_outs = n_outs, l1_reg = l1_reg, l2_reg = l2_reg, hidden_layer_sizes = hidden_layer_size) else: logger.critical('%s type NN model is not supported!' %(model_type)) raise logger.info('fine-tuning the %s model' %(model_type)) init_dnn_model = pickle.load(open(init_dnn_model_file, 'rb')) dnn_model.set_parameters(init_dnn_model.W_params, init_dnn_model.b_params) start_time = time.time() best_dnn_model = dnn_model best_validation_loss = sys.float_info.max previous_loss = sys.float_info.max early_stop = 0 epoch = 0 # finetune_lr = 0.000125 previous_finetune_lr = finetune_lr print(finetune_lr) while (epoch < training_epochs): epoch = epoch + 1 current_momentum = momentum current_finetune_lr = finetune_lr if epoch <= warmup_epoch: current_finetune_lr = finetune_lr current_momentum = warmup_momentum else: current_finetune_lr = previous_finetune_lr * 0.5 previous_finetune_lr = current_finetune_lr train_error = [] sub_start_time = time.time() while (not train_data_reader.is_finish()): shared_train_set_xy, train_set_x, train_set_y = train_data_reader.load_one_partition() n_train_batches = train_set_x.shape[0] current_frame_number = train_set_x.shape[0] mean_matrix = numpy.tile(cmp_mean_vector, (current_frame_number, 1)) std_matrix = numpy.tile(cmp_std_vector, (current_frame_number, 1)) logger.debug('this partition: %d frames (divided into %d batches )' %(train_set_x.shape[0], n_train_batches) ) this_train_error = dnn_model.finetune((train_set_x, train_set_y), current_frame_number, current_finetune_lr, current_momentum, mean_matrix, std_matrix) train_error.extend(this_train_error.tolist()) train_data_reader.reset() logger.debug('calculating validation loss') validation_losses = [] validation_losses2 = [] while (not valid_data_reader.is_finish()): shared_valid_set_xy, valid_set_x, valid_set_y = valid_data_reader.load_one_partition() current_frame_number = valid_set_x.shape[0] mean_matrix = numpy.tile(cmp_mean_vector, (current_frame_number, 1)) std_matrix = numpy.tile(cmp_std_vector, (current_frame_number, 1)) this_valid_loss = dnn_model.parameter_prediction_trajectory(valid_set_x, valid_set_y, mean_matrix, std_matrix) validation_losses.extend(this_valid_loss.tolist()) predicted_para = dnn_model.parameter_prediction(valid_set_x) temp_loss = numpy.sum(((predicted_para[:, 0:60] - valid_set_y[:, 0:60]) * std_matrix[:, 0:60]) ** 2, axis=1) temp_loss = temp_loss ** 0.5 validation_losses2.extend(temp_loss.tolist()) valid_data_reader.reset() this_validation_loss = numpy.mean(validation_losses) this_train_valid_loss = numpy.mean(numpy.asarray(train_error)) sub_end_time = time.time() loss_difference = this_validation_loss - previous_loss logger.info('epoch %i, validation error %f, train error %f time spent %.2f' %(epoch, this_validation_loss, this_train_valid_loss, (sub_end_time - sub_start_time))) if plot: plotlogger.add_plot_point('training convergence','validation set',(epoch,this_validation_loss)) plotlogger.add_plot_point('training convergence','training set',(epoch,this_train_valid_loss)) plotlogger.save_plot('training convergence',title='Progress of training and validation error',xlabel='epochs',ylabel='error') if this_validation_loss < best_validation_loss: if epoch > 10: pickle.dump(best_dnn_model, open(nnets_file_name, 'wb')) best_dnn_model = dnn_model best_validation_loss = this_validation_loss # logger.debug('validation loss decreased, so saving model') if this_validation_loss >= previous_loss: logger.debug('validation loss increased') # dbn = best_dnn_model early_stop += 1 # if early_stop > early_stop_epoch: # logger.debug('stopping early') # break if math.isnan(this_validation_loss): break previous_loss = this_validation_loss end_time = time.time() # cPickle.dump(best_dnn_model, open(nnets_file_name, 'wb')) logger.info('overall training time: %.2fm validation error %f' % ((end_time - start_time) / 60., best_validation_loss)) if plot: plotlogger.save_plot('training convergence',title='Final training and validation error',xlabel='epochs',ylabel='error') return best_validation_loss
def train_DNN(train_xy_file_list, valid_xy_file_list, \ nnets_file_name, n_ins, n_outs, ms_outs, hyper_params, buffer_size, plot=False, var_dict=None, cmp_mean_vector = None, cmp_std_vector = None, init_dnn_model_file = None): # get loggers for this function # this one writes to both console and file logger = logging.getLogger("main.train_DNN") logger.debug('Starting train_DNN') if plot: # this one takes care of plotting duties plotlogger = logging.getLogger("plotting") # create an (empty) plot of training convergence, ready to receive data points logger.create_plot('training convergence',MultipleSeriesPlot) try: assert numpy.sum(ms_outs) == n_outs except AssertionError: logger.critical('the summation of multi-stream outputs does not equal to %d' %(n_outs)) raise ####parameters##### finetune_lr = float(hyper_params['learning_rate']) training_epochs = int(hyper_params['training_epochs']) batch_size = int(hyper_params['batch_size']) l1_reg = float(hyper_params['l1_reg']) l2_reg = float(hyper_params['l2_reg']) warmup_epoch = int(hyper_params['warmup_epoch']) momentum = float(hyper_params['momentum']) warmup_momentum = float(hyper_params['warmup_momentum']) hidden_layer_size = hyper_params['hidden_layer_size'] buffer_utt_size = buffer_size early_stop_epoch = int(hyper_params['early_stop_epochs']) hidden_activation = hyper_params['hidden_activation'] output_activation = hyper_params['output_activation'] model_type = hyper_params['model_type'] hidden_layer_type = hyper_params['hidden_layer_type'] ## use a switch to turn on pretraining ## pretraining may not help too much, if this case, we turn it off to save time do_pretraining = hyper_params['do_pretraining'] pretraining_epochs = int(hyper_params['pretraining_epochs']) pretraining_lr = float(hyper_params['pretraining_lr']) sequential_training = hyper_params['sequential_training'] dropout_rate = hyper_params['dropout_rate'] buffer_size = int(buffer_size / batch_size) * batch_size ################### (train_x_file_list, train_y_file_list) = train_xy_file_list (valid_x_file_list, valid_y_file_list) = valid_xy_file_list logger.debug('Creating training data provider') train_data_reader = ListDataProvider(x_file_list = train_x_file_list, y_file_list = train_y_file_list, n_ins = n_ins, n_outs = n_outs, buffer_size = buffer_size, sequential = sequential_training, shuffle = True) logger.debug('Creating validation data provider') valid_data_reader = ListDataProvider(x_file_list = valid_x_file_list, y_file_list = valid_y_file_list, n_ins = n_ins, n_outs = n_outs, buffer_size = buffer_size, sequential = sequential_training, shuffle = False) if cfg.rnn_batch_training: train_data_reader.set_rnn_params(training_algo=cfg.training_algo, batch_size=cfg.batch_size, seq_length=cfg.seq_length, merge_size=cfg.merge_size, bucket_range=cfg.bucket_range) valid_data_reader.reshape_input_output() shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_one_partition() train_set_x, train_set_y = shared_train_set_xy shared_valid_set_xy, temp_valid_set_x, temp_valid_set_y = valid_data_reader.load_one_partition() valid_set_x, valid_set_y = shared_valid_set_xy train_data_reader.reset() valid_data_reader.reset() ##temporally we use the training set as pretrain_set_x. ##we need to support any data for pretraining # numpy random generator numpy_rng = numpy.random.RandomState(123) logger.info('building the model') dnn_model = None pretrain_fn = None ## not all the model support pretraining right now train_fn = None valid_fn = None valid_model = None ## valid_fn and valid_model are the same. reserve to computer multi-stream distortion if model_type == 'DNN': dnn_model = DeepRecurrentNetwork(n_in= n_ins, hidden_layer_size = hidden_layer_size, n_out = n_outs, L1_reg = l1_reg, L2_reg = l2_reg, hidden_layer_type = hidden_layer_type, dropout_rate = dropout_rate, optimizer = cfg.optimizer, rnn_batch_training = cfg.rnn_batch_training) train_fn, valid_fn = dnn_model.build_finetune_functions( (train_set_x, train_set_y), (valid_set_x, valid_set_y)) #, batch_size=batch_size else: logger.critical('%s type NN model is not supported!' %(model_type)) raise logger.info('fine-tuning the %s model' %(model_type)) start_time = time.time() best_dnn_model = dnn_model best_validation_loss = sys.float_info.max previous_loss = sys.float_info.max lr_decay = cfg.lr_decay if lr_decay>0: early_stop_epoch *= lr_decay early_stop = 0 val_loss_counter = 0 previous_finetune_lr = finetune_lr epoch = 0 while (epoch < training_epochs): epoch = epoch + 1 if lr_decay==0: # fixed learning rate reduce_lr = False elif lr_decay<0: # exponential decay reduce_lr = False if epoch <= warmup_epoch else True elif val_loss_counter > 0: # linear decay reduce_lr = False if val_loss_counter%lr_decay==0: reduce_lr = True val_loss_counter = 0 else: # no decay reduce_lr = False if reduce_lr: current_finetune_lr = previous_finetune_lr * 0.5 current_momentum = momentum else: current_finetune_lr = previous_finetune_lr current_momentum = warmup_momentum previous_finetune_lr = current_finetune_lr train_error = [] sub_start_time = time.time() logger.debug("training params -- learning rate: %f, early_stop: %d/%d" % (current_finetune_lr, early_stop, early_stop_epoch)) while (not train_data_reader.is_finish()): shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_one_partition() # if sequential training, the batch size will be the number of frames in an utterance # batch_size for sequential training is considered only when rnn_batch_training is set to True if sequential_training == True: batch_size = temp_train_set_x.shape[0] n_train_batches = temp_train_set_x.shape[0] // batch_size for index in range(n_train_batches): ## send a batch to the shared variable, rather than pass the batch size and batch index to the finetune function train_set_x.set_value(numpy.asarray(temp_train_set_x[index*batch_size:(index + 1)*batch_size], dtype=theano.config.floatX), borrow=True) train_set_y.set_value(numpy.asarray(temp_train_set_y[index*batch_size:(index + 1)*batch_size], dtype=theano.config.floatX), borrow=True) this_train_error = train_fn(current_finetune_lr, current_momentum) train_error.append(this_train_error) train_data_reader.reset() logger.debug('calculating validation loss') validation_losses = [] while (not valid_data_reader.is_finish()): shared_valid_set_xy, temp_valid_set_x, temp_valid_set_y = valid_data_reader.load_one_partition() valid_set_x.set_value(numpy.asarray(temp_valid_set_x, dtype=theano.config.floatX), borrow=True) valid_set_y.set_value(numpy.asarray(temp_valid_set_y, dtype=theano.config.floatX), borrow=True) this_valid_loss = valid_fn() validation_losses.append(this_valid_loss) valid_data_reader.reset() this_validation_loss = numpy.mean(validation_losses) this_train_valid_loss = numpy.mean(numpy.asarray(train_error)) sub_end_time = time.time() loss_difference = this_validation_loss - previous_loss logger.info('epoch %i, validation error %f, train error %f time spent %.2f' %(epoch, this_validation_loss, this_train_valid_loss, (sub_end_time - sub_start_time))) if plot: plotlogger.add_plot_point('training convergence','validation set',(epoch,this_validation_loss)) plotlogger.add_plot_point('training convergence','training set',(epoch,this_train_valid_loss)) plotlogger.save_plot('training convergence',title='Progress of training and validation error',xlabel='epochs',ylabel='error') if this_validation_loss < best_validation_loss: pickle.dump(best_dnn_model, open(nnets_file_name, 'wb')) best_dnn_model = dnn_model best_validation_loss = this_validation_loss if this_validation_loss >= previous_loss: logger.debug('validation loss increased') val_loss_counter+=1 early_stop+=1 if epoch > 15 and early_stop > early_stop_epoch: logger.debug('stopping early') break if math.isnan(this_validation_loss): break previous_loss = this_validation_loss end_time = time.time() logger.info('overall training time: %.2fm validation error %f' % ((end_time - start_time) / 60., best_validation_loss)) if plot: plotlogger.save_plot('training convergence',title='Final training and validation error',xlabel='epochs',ylabel='error') return best_validation_loss
def train_DNN(train_xy_file_list, valid_xy_file_list, \ nnets_file_name, n_ins, n_outs, ms_outs, hyper_params, buffer_size, plot=False): # get loggers for this function # this one writes to both console and file logger = logging.getLogger("main.train_DNN") logger.debug('Starting train_DNN') if plot: # this one takes care of plotting duties plotlogger = logging.getLogger("plotting") # create an (empty) plot of training convergence, ready to receive data points logger.create_plot('training convergence',MultipleSeriesPlot) try: assert numpy.sum(ms_outs) == n_outs except AssertionError: logger.critical('the summation of multi-stream outputs does not equal to %d' %(n_outs)) raise ####parameters##### finetune_lr = numpy.asarray(hyper_params['learning_rate'], dtype='float32') training_epochs = int(hyper_params['training_epochs']) batch_size = int(hyper_params['batch_size']) l1_reg = float(hyper_params['l1_reg']) l2_reg = float(hyper_params['l2_reg']) # private_l2_reg = float(hyper_params['private_l2_reg']) warmup_epoch = int(hyper_params['warmup_epoch']) momentum = float(hyper_params['momentum']) warmup_momentum = float(hyper_params['warmup_momentum']) use_rprop = int(hyper_params['use_rprop']) use_rprop = int(hyper_params['use_rprop']) hidden_layers_sizes = hyper_params['hidden_layer_size'] # stream_weights = hyper_params['stream_weights'] # private_hidden_sizes = hyper_params['private_hidden_sizes'] buffer_utt_size = buffer_size early_stop_epoch = int(hyper_params['early_stop_epochs']) hidden_activation = hyper_params['hidden_activation'] output_activation = hyper_params['output_activation'] # stream_lr_weights = hyper_params['stream_lr_weights'] # use_private_hidden = hyper_params['use_private_hidden'] model_type = hyper_params['model_type'] ## use a switch to turn on pretraining ## pretraining may not help too much, if this case, we turn it off to save time do_pretraining = hyper_params['do_pretraining'] pretraining_epochs = int(hyper_params['pretraining_epochs']) pretraining_lr = float(hyper_params['pretraining_lr']) buffer_size = int(buffer_size / batch_size) * batch_size ################### (train_x_file_list, train_y_file_list) = train_xy_file_list (valid_x_file_list, valid_y_file_list) = valid_xy_file_list logger.debug('Creating training data provider') train_data_reader = ListDataProvider(x_file_list = train_x_file_list, y_file_list = train_y_file_list, n_ins = n_ins, n_outs = n_outs, buffer_size = buffer_size, shuffle = True) logger.debug('Creating validation data provider') valid_data_reader = ListDataProvider(x_file_list = valid_x_file_list, y_file_list = valid_y_file_list, n_ins = n_ins, n_outs = n_outs, buffer_size = buffer_size, shuffle = False) shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_next_partition() train_set_x, train_set_y = shared_train_set_xy shared_valid_set_xy, temp_valid_set_x, temp_valid_set_y = valid_data_reader.load_next_partition() valid_set_x, valid_set_y = shared_valid_set_xy train_data_reader.reset() valid_data_reader.reset() ##temporally we use the training set as pretrain_set_x. ##we need to support any data for pretraining pretrain_set_x = train_set_x # numpy random generator numpy_rng = numpy.random.RandomState(123) logger.info('building the model') dnn_model = None pretrain_fn = None ## not all the model support pretraining right now train_fn = None valid_fn = None valid_model = None ## valid_fn and valid_model are the same. reserve to computer multi-stream distortion if model_type == 'DNN': dnn_model = DNN(numpy_rng=numpy_rng, n_ins=n_ins, n_outs = n_outs, l1_reg = l1_reg, l2_reg = l2_reg, hidden_layers_sizes = hidden_layers_sizes, hidden_activation = hidden_activation, output_activation = output_activation, use_rprop = use_rprop, rprop_init_update=finetune_lr) train_fn, valid_fn = dnn_model.build_finetune_functions( (train_set_x, train_set_y), (valid_set_x, valid_set_y), batch_size=batch_size) else: logger.critical('%s type NN model is not supported!' %(model_type)) raise logger.info('fine-tuning the %s model' %(model_type)) start_time = time.clock() best_dnn_model = dnn_model best_validation_loss = sys.float_info.max previous_loss = sys.float_info.max early_stop = 0 epoch = 0 previous_finetune_lr = finetune_lr while (epoch < training_epochs): epoch = epoch + 1 current_momentum = momentum current_finetune_lr = finetune_lr if epoch <= warmup_epoch: current_finetune_lr = finetune_lr current_momentum = warmup_momentum else: current_finetune_lr = previous_finetune_lr * 0.5 previous_finetune_lr = current_finetune_lr train_error = [] sub_start_time = time.clock() while (not train_data_reader.is_finish()): shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_next_partition() train_set_x.set_value(numpy.asarray(temp_train_set_x, dtype=theano.config.floatX), borrow=True) train_set_y.set_value(numpy.asarray(temp_train_set_y, dtype=theano.config.floatX), borrow=True) n_train_batches = train_set_x.get_value().shape[0] / batch_size logger.debug('this partition: %d frames (divided into %d batches of size %d)' %(train_set_x.get_value(borrow=True).shape[0], n_train_batches, batch_size) ) for minibatch_index in range(n_train_batches): this_train_error = train_fn(minibatch_index, current_finetune_lr, current_momentum) train_error.append(this_train_error) if numpy.isnan(this_train_error): logger.warning('training error over minibatch %d of %d was %s' % (minibatch_index+1,n_train_batches,this_train_error) ) train_data_reader.reset() logger.debug('calculating validation loss') validation_losses = valid_fn() this_validation_loss = numpy.mean(validation_losses) # this has a possible bias if the minibatches were not all of identical size # but it should not be siginficant if minibatches are small this_train_valid_loss = numpy.mean(train_error) sub_end_time = time.clock() loss_difference = this_validation_loss - previous_loss logger.info('epoch %i, validation error %f, train error %f time spent %.2f' %(epoch, this_validation_loss, this_train_valid_loss, (sub_end_time - sub_start_time))) if plot: plotlogger.add_plot_point('training convergence','validation set',(epoch,this_validation_loss)) plotlogger.add_plot_point('training convergence','training set',(epoch,this_train_valid_loss)) plotlogger.save_plot('training convergence',title='Progress of training and validation error',xlabel='epochs',ylabel='error') if this_validation_loss < best_validation_loss: best_dnn_model = dnn_model best_validation_loss = this_validation_loss logger.debug('validation loss decreased, so saving model') early_stop = 0 else: logger.debug('validation loss did not improve') dbn = best_dnn_model early_stop += 1 if early_stop >= early_stop_epoch: # too many consecutive epochs without surpassing the best model logger.debug('stopping early') break if math.isnan(this_validation_loss): break previous_loss = this_validation_loss end_time = time.clock() pickle.dump(best_dnn_model, open(nnets_file_name, 'wb')) logger.info('overall training time: %.2fm validation error %f' % ((end_time - start_time) / 60., best_validation_loss)) if plot: plotlogger.save_plot('training convergence',title='Final training and validation error',xlabel='epochs',ylabel='error') return best_validation_loss