def saveModel(dnn,cfg): log("> Start saveModel") # save the model and network configuration if cfg.param_output_file != '': _nnet2file(dnn.layers, path=cfg.param_output_file, input_factor = cfg.input_dropout_factor, factor = cfg.dropout_factor) log('> ... the best PDNN model param so far is ' + cfg.param_output_file) if cfg.cfg_output_file != '': _cfg2file(dnn.cfg, filename=cfg.cfg_output_file) log('> ... the best PDNN model config so far is ' + cfg.cfg_output_file) # output the model into Kaldi-compatible format if cfg.kaldi_output_file != '': dnn.write_model_to_kaldi(cfg.kaldi_output_file) log('> ... the best Kaldi model so far is ' + cfg.kaldi_output_file) log("< End SaveModel")
log('> ... building the model') # setup model dnn = PhaseATTEND_LSTM(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg) # get the training, validation and testing function for the model log('> ... getting the finetuning functions') train_fn, valid_fn = dnn.build_finetune_functions( (cfg.train_x, cfg.train_y), (cfg.valid_x, cfg.valid_y), (cfg.extra_train_x), (cfg.extra_valid_x), batch_size=cfg.batch_size) log('> ... finetuning the model') min_verr = 100 while (cfg.lrate.get_rate() != 0): train_error = train_sgd(train_fn, cfg) log('> epoch %d, training error %0.4f ' % (cfg.lrate.epoch, 100 * numpy.mean(train_error))) valid_error = validate_by_minibatch(valid_fn, cfg) log('> epoch %d, lrate %f, validation error %0.4f ' % (cfg.lrate.epoch, cfg.lrate.get_rate(), 100 * numpy.mean(valid_error))) cfg.lrate.get_next_rate(current_error=100 * numpy.mean(valid_error)) if min_verr > 100 * numpy.mean(valid_error): min_verr = 100 * numpy.mean(valid_error) _nnet2file(dnn.layers, filename=cfg.param_output_file) _lrate2file(cfg.lrate, wdir + '/training_state.tmp') if cfg.cfg_output_file != '': _cfg2file(dnn.cfg, filename=cfg.cfg_output_file) log('> ... the final PDNN model config is ' + cfg.cfg_output_file)
def write_model_to_raw(self, file_path): # output the model to tmp_path; this format is readable by PDNN _nnet2file(self.layers, filename=file_path, input_factor=self.input_dropout_factor, factor=self.dropout_factor)
log('> ... learning the adaptation network') cfg = cfg_adapt while (cfg.lrate.get_rate() != 0): # one epoch of sgd training # train_error = train_sgd_verbose(train_fn, cfg_si.train_sets, cfg_si.train_xy, # cfg.batch_size, cfg.lrate.get_rate(), cfg.momentum) # log('> epoch %d, training error %f ' % (cfg.lrate.epoch, 100*numpy.mean(train_error)) + '(%)') # validation valid_error = validate_by_minibatch_verbose(valid_fn, cfg_si.valid_sets, cfg_si.valid_xy, cfg.batch_size) log('> epoch %d, lrate %f, validation error %f ' % (cfg.lrate.epoch, cfg.lrate.get_rate(), 100*numpy.mean(valid_error)) + '(%)') cfg.lrate.get_next_rate(current_error = 100 * numpy.mean(valid_error)) cfg.lrate.rate = 0 # save the model and network configuration if cfg.param_output_file != '': _nnet2file(dnn.dnn_adapt.layers, filename = cfg.param_output_file + '.adapt', input_factor = cfg_adapt.input_dropout_factor, factor = cfg_adapt.dropout_factor) _nnet2file(dnn.cnn_si.layers, filename = cfg.param_output_file + '.si', input_factor = cfg_si.input_dropout_factor, factor = cfg_si.dropout_factor) log('> ... the final PDNN model parameter is ' + cfg.param_output_file + ' (.si, .adapt)') if cfg.cfg_output_file != '': _cfg2file(cfg_adapt, filename=cfg.cfg_output_file + '.adapt') _cfg2file(cfg_si, filename=cfg.cfg_output_file + '.si') log('> ... the final PDNN model config is ' + cfg.cfg_output_file + ' (.si, .adapt)') # output the model into Kaldi-compatible format if cfg.kaldi_output_file != '': dnn.cnn_si.fc_dnn.write_model_to_kaldi(cfg.kaldi_output_file + '.si') dnn.dnn_adapt.write_model_to_kaldi(cfg.kaldi_output_file + '.adapt', with_softmax = False) log('> ... the final Kaldi model is ' + cfg.kaldi_output_file + ' (.si, .adapt)')
log('> epoch %d, training error %f ' % (cfg.lrate.epoch, 100 * numpy.mean(train_error)) + '(%)') # validation valid_error = validate_by_minibatch_verbose(valid_fn, cfg_si.valid_sets, cfg_si.valid_xy, cfg.batch_size) log('> epoch %d, lrate %f, validation error %f ' % (cfg.lrate.epoch, cfg.lrate.get_rate(), 100 * numpy.mean(valid_error)) + '(%)') cfg.lrate.get_next_rate(current_error=100 * numpy.mean(valid_error)) # save the model and network configuration if cfg.param_output_file != '': _nnet2file(dnn.dnn_adapt.layers, filename=cfg.param_output_file + '.adapt', input_factor=cfg_adapt.input_dropout_factor, factor=cfg_adapt.dropout_factor) _nnet2file(dnn.dnn_si.layers, filename=cfg.param_output_file + '.si', input_factor=cfg_si.input_dropout_factor, factor=cfg_si.dropout_factor) log('> ... the final PDNN model parameter is ' + cfg.param_output_file + ' (.si, .adapt)') if cfg.cfg_output_file != '': _cfg2file(cfg_adapt, filename=cfg.cfg_output_file + '.adapt') _cfg2file(cfg_si, filename=cfg.cfg_output_file + '.si') log('> ... the final PDNN model config is ' + cfg.cfg_output_file + ' (.si, .adapt)') # output the model into Kaldi-compatible format if cfg.kaldi_output_file != '':
def write_model_to_raw(self, file_path): _nnet2file(self.layers, filename=file_path)
while (not cfg.train_sets.is_finish()): cfg.train_sets.load_next_partition(cfg.train_xy) for batch_index in xrange( cfg.train_sets.cur_frame_num / cfg.batch_size): # loop over mini-batches [reconstruction_cost, free_energy_cost] = pretraining_fns[i](index=batch_index, lr=pretrain_lr, momentum=momentum) r_c.append(reconstruction_cost) fe_c.append(free_energy_cost) cfg.train_sets.initialize_read() log('> pre-training layer %i, epoch %d, r_cost %f, fe_cost %f' % (i, epoch, numpy.mean(r_c), numpy.mean(fe_c))) # output nnet parameters and training state, for training resume _nnet2file(dnn.layers, filename=wdir + '/nnet.tmp') save_two_integers((i, epoch + 1), wdir + '/training_state.tmp') start_epoch_index = 0 save_two_integers((i + 1, 0), wdir + '/training_state.tmp') # save the pretrained nnet to file # save the model and network configuration if cfg.param_output_file != '': _nnet2file(dnn.layers, filename=cfg.param_output_file) log('> ... the final PDNN model parameter is ' + cfg.param_output_file) if cfg.cfg_output_file != '': _cfg2file(dnn.cfg, filename=cfg.cfg_output_file) log('> ... the final PDNN model config is ' + cfg.cfg_output_file) # output the model into Kaldi-compatible format if cfg.kaldi_output_file != '':
def write_model_to_raw(self, file_path): # output the model to tmp_path; this format is readable by PDNN _nnet2file(self.layers, filename=file_path)
train_error.append(train_fn(index=batch_index, learning_rate = lrate.get_rate(), momentum = momentum)) train_sets.initialize_read() log('> epoch %d, training error %f' % (lrate.epoch, numpy.mean(train_error))) valid_error = [] while (not valid_sets.is_finish()): valid_sets.load_next_partition(valid_xy) for batch_index in xrange(valid_sets.cur_frame_num / batch_size): # loop over mini-batches valid_error.append(valid_fn(index=batch_index)) valid_sets.initialize_read() log('> epoch %d, lrate %f, validation error %f' % (lrate.epoch, lrate.get_rate(), numpy.mean(valid_error))) lrate.get_next_rate(current_error = 100 * numpy.mean(valid_error)) # output both iVecNN and DNN _nnet2file(dnn.ivec_layers, set_layer_num = len(ivec_nnet_layers)-1, filename=wdir + '/ivec.finetune.tmp', withfinal=True) _nnet2file(dnn.sigmoid_layers, filename=wdir + '/nnet.finetune.tmp') # determine whether it's BNF based on layer sizes set_layer_num = -1 withfinal = True bnf_layer_index = 1 while bnf_layer_index < len(hidden_layers_sizes): if hidden_layers_sizes[bnf_layer_index] < hidden_layers_sizes[bnf_layer_index - 1]: break bnf_layer_index = bnf_layer_index + 1 if bnf_layer_index < len(hidden_layers_sizes): # is bottleneck set_layer_num = bnf_layer_index+1 withfinal = False
train_fn, valid_fn = dnn.build_finetune_functions( (cfg.train_x, cfg.train_y), (cfg.valid_x, cfg.valid_y), batch_size=cfg.batch_size) log('> ... finetuning the model') while (cfg.lrate.get_rate() != 0): # one epoch of sgd training train_error = train_sgd(train_fn, cfg) log('> epoch %d, training error %f ' % (cfg.lrate.epoch, 100*numpy.mean(train_error)) + '(%)') # validation valid_error = validate_by_minibatch(valid_fn, cfg) log('> epoch %d, lrate %f, validation error %f ' % (cfg.lrate.epoch, cfg.lrate.get_rate(), 100*numpy.mean(valid_error)) + '(%)') cfg.lrate.get_next_rate(current_error = 100*numpy.mean(valid_error)) # output nnet parameters and lrate, for training resume if cfg.lrate.epoch % cfg.model_save_step == 0: _nnet2file(dnn.layers, filename=wdir + '/nnet.tmp') _lrate2file(cfg.lrate, wdir + '/training_state.tmp') # save the model and network configuration if cfg.param_output_file != '': _nnet2file(dnn.dnn.layers, filename = cfg.param_output_file, input_factor = cfg.input_dropout_factor, factor = cfg.dropout_factor) _nnet2file(dnn.dnn_tower1.layers, filename = cfg.param_output_file + '.tower1', input_factor = cfg.input_dropout_factor, factor = cfg.dropout_factor) _nnet2file(dnn.dnn_tower2.layers, filename = cfg.param_output_file + '.tower2', input_factor = cfg.input_dropout_factor, factor = cfg.dropout_factor) log('> ... the final PDNN model parameter is ' + cfg.param_output_file + '(, .tower1, .tower2)') if cfg.cfg_output_file != '': _cfg2file(cfg, filename=cfg.cfg_output_file) _cfg2file(cfg_tower1, filename=cfg.cfg_output_file + '.tower1') _cfg2file(cfg_tower2, filename=cfg.cfg_output_file + '.tower2')
valid_error = [] while (not valid_sets.is_finish()): valid_sets.load_next_partition(valid_xy) for batch_index in xrange(valid_sets.cur_frame_num / batch_size): # loop over mini-batches valid_error.append(valid_fn(index=batch_index)) valid_sets.initialize_read() log('> epoch %d, lrate %f, validation error %f' % (lrate.epoch, lrate.get_rate(), numpy.mean(valid_error))) log('> epoch %d, smallest lrate %f' % (lrate.epoch, lrate.lowest_error)) lrate.get_next_rate(current_error = 100 * numpy.mean(valid_error)) # output conv layer config # for i in xrange(len(conv_layer_configs)): # conv_layer_configs[i]['activation'] = activation_to_txt(conv_activation) # with open(wdir + '/conv.config.' + str(i), 'wb') as fp: # json.dump(conv_layer_configs[i], fp, indent=2, sort_keys = True) # fp.flush() # output the conv part _cnn2file(cnn.conv_layers, filename=conv_output_file) # output the full part _nnet2file(cnn.full_layers, filename=full_output_file) _nnet2file(cnn.ivec_layers, set_layer_num = len(ivec_layers_sizes) + 1, filename=ivec_output_file, withfinal=False) # _nnet2kaldi(str(cnn.conv_output_dim) + ':' + full_nnet_spec, filein = wdir + '/nnet.finetune.tmp', fileout = full_output_file) end_time = time.clock() print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
valid_error = [] while (not valid_sets.is_finish()): valid_sets.load_next_partition(valid_xy) for batch_index in xrange(valid_sets.cur_frame_num / batch_size): # loop over mini-batches valid_error.append(valid_fn(index=batch_index)) valid_sets.initialize_read() log('> epoch %d, lrate %f, validation error %f' % (lrate.epoch, lrate.get_rate(), numpy.mean(valid_error))) lrate.get_next_rate(current_error = 100 * numpy.mean(valid_error)) # output conv layer config for i in xrange(len(conv_layer_configs)): conv_layer_configs[i]['activation'] = activation_to_txt(conv_activation) with open(wdir + '/conv.config.' + str(i), 'wb') as fp: json.dump(conv_layer_configs[i], fp, indent=2, sort_keys = True) fp.flush() # output the conv part _cnn2file(cnn.layers[0:len(conv_layer_configs)], filename=conv_output_file) # output the full part total_layer_number = len(cnn.layers) _nnet2file(cnn.layers[len(conv_layer_configs):total_layer_number], filename=wdir + '/nnet.finetune.tmp') _nnet2kaldi(str(cnn.conv_output_dim) + ':' + full_nnet_spec, filein = wdir + '/nnet.finetune.tmp', fileout = full_output_file) end_time = time.clock() print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def dnn_run(arguments): required_arguments = ['train_data', 'valid_data', 'nnet_spec', 'wdir'] for arg in required_arguments: if arguments.has_key(arg) == False: print "Error: the argument %s has to be specified" % (arg); exit(1) train_data_spec = arguments['train_data'] valid_data_spec = arguments['valid_data'] nnet_spec = arguments['nnet_spec'] wdir = arguments['wdir'] cfg = NetworkConfig() cfg.parse_config_dnn(arguments, nnet_spec) cfg.init_data_reading(train_data_spec, valid_data_spec) # parse pre-training options # pre-training files and layer number (how many layers are set to the pre-training parameters) ptr_layer_number = 0; ptr_file = '' if arguments.has_key('ptr_file') and arguments.has_key('ptr_layer_number'): ptr_file = arguments['ptr_file'] ptr_layer_number = int(arguments['ptr_layer_number']) # check working dir to see whether it's resuming training resume_training = False if os.path.exists(wdir + '/nnet.tmp') and os.path.exists(wdir + '/training_state.tmp'): resume_training = True cfg.lrate = _file2lrate(wdir + '/training_state.tmp') log('> ... found nnet.tmp and training_state.tmp, now resume training from epoch ' + str(cfg.lrate.epoch)) numpy_rng = numpy.random.RandomState(89677) theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) log('> ... building the model') # setup model if cfg.do_dropout: dnn = DNN_Dropout(numpy_rng=numpy_rng, theano_rng = theano_rng, cfg = cfg) else: dnn = DNN(numpy_rng=numpy_rng, theano_rng = theano_rng, cfg = cfg) # initialize model parameters # if not resuming training, initialized from the specified pre-training file # if resuming training, initialized from the tmp model file if (ptr_layer_number > 0) and (resume_training is False): _file2nnet(dnn.layers, set_layer_num = ptr_layer_number, filename = ptr_file) if resume_training: _file2nnet(dnn.layers, filename = wdir + '/nnet.tmp') # get the training, validation and testing function for the model log('> ... getting the finetuning functions') train_fn, valid_fn = dnn.build_finetune_functions((cfg.train_x, cfg.train_y), (cfg.valid_x, cfg.valid_y), batch_size=cfg.batch_size) log('> ... finetuning the model') while (cfg.lrate.get_rate() != 0): # one epoch of sgd training train_error = train_sgd(train_fn, cfg) log('> epoch %d, training error %f ' % (cfg.lrate.epoch, 100*numpy.mean(train_error)) + '(%)') # validation valid_error = validate_by_minibatch(valid_fn, cfg) log('> epoch %d, lrate %f, validation error %f ' % (cfg.lrate.epoch, cfg.lrate.get_rate(), 100*numpy.mean(valid_error)) + '(%)') cfg.lrate.get_next_rate(current_error = 100*numpy.mean(valid_error)) # output nnet parameters and lrate, for training resume if cfg.lrate.epoch % cfg.model_save_step == 0: _nnet2file(dnn.layers, filename=wdir + '/nnet.tmp') _lrate2file(cfg.lrate, wdir + '/training_state.tmp') # save the model and network configuration if cfg.param_output_file != '': _nnet2file(dnn.layers, filename=cfg.param_output_file, input_factor = cfg.input_dropout_factor, factor = cfg.dropout_factor) log('> ... the final PDNN model parameter is ' + cfg.param_output_file) if cfg.cfg_output_file != '': _cfg2file(dnn.cfg, filename=cfg.cfg_output_file) log('> ... the final PDNN model config is ' + cfg.cfg_output_file)
def dnn_run(arguments): required_arguments = ['train_data', 'valid_data', 'nnet_spec', 'wdir'] for arg in required_arguments: if arguments.has_key(arg) == False: print "Error: the argument %s has to be specified" % (arg) exit(1) train_data_spec = arguments['train_data'] valid_data_spec = arguments['valid_data'] nnet_spec = arguments['nnet_spec'] wdir = arguments['wdir'] cfg = NetworkConfig() cfg.parse_config_dnn(arguments, nnet_spec) cfg.init_data_reading(train_data_spec, valid_data_spec) # parse pre-training options # pre-training files and layer number (how many layers are set to the pre-training parameters) ptr_layer_number = 0 ptr_file = '' if arguments.has_key('ptr_file') and arguments.has_key('ptr_layer_number'): ptr_file = arguments['ptr_file'] ptr_layer_number = int(arguments['ptr_layer_number']) # check working dir to see whether it's resuming training resume_training = False if os.path.exists(wdir + '/nnet.tmp') and os.path.exists(wdir + '/training_state.tmp'): resume_training = True cfg.lrate = _file2lrate(wdir + '/training_state.tmp') log('> ... found nnet.tmp and training_state.tmp, now resume training from epoch ' + str(cfg.lrate.epoch)) numpy_rng = numpy.random.RandomState(89677) theano_rng = RandomStreams(numpy_rng.randint(2**30)) log('> ... building the model') # setup model if cfg.do_dropout: dnn = DNN_Dropout(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg) else: dnn = DNN(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg) # initialize model parameters # if not resuming training, initialized from the specified pre-training file # if resuming training, initialized from the tmp model file if (ptr_layer_number > 0) and (resume_training is False): _file2nnet(dnn.layers, set_layer_num=ptr_layer_number, filename=ptr_file) if resume_training: _file2nnet(dnn.layers, filename=wdir + '/nnet.tmp') # get the training, validation and testing function for the model log('> ... getting the finetuning functions') train_fn, valid_fn = dnn.build_finetune_functions( (cfg.train_x, cfg.train_y), (cfg.valid_x, cfg.valid_y), batch_size=cfg.batch_size) log('> ... finetuning the model') while (cfg.lrate.get_rate() != 0): # one epoch of sgd training train_error = train_sgd(train_fn, cfg) log('> epoch %d, training error %f ' % (cfg.lrate.epoch, 100 * numpy.mean(train_error)) + '(%)') # validation valid_error = validate_by_minibatch(valid_fn, cfg) log('> epoch %d, lrate %f, validation error %f ' % (cfg.lrate.epoch, cfg.lrate.get_rate(), 100 * numpy.mean(valid_error)) + '(%)') cfg.lrate.get_next_rate(current_error=100 * numpy.mean(valid_error)) # output nnet parameters and lrate, for training resume if cfg.lrate.epoch % cfg.model_save_step == 0: _nnet2file(dnn.layers, filename=wdir + '/nnet.tmp') _lrate2file(cfg.lrate, wdir + '/training_state.tmp') # save the model and network configuration if cfg.param_output_file != '': _nnet2file(dnn.layers, filename=cfg.param_output_file, input_factor=cfg.input_dropout_factor, factor=cfg.dropout_factor) log('> ... the final PDNN model parameter is ' + cfg.param_output_file) if cfg.cfg_output_file != '': _cfg2file(dnn.cfg, filename=cfg.cfg_output_file) log('> ... the final PDNN model config is ' + cfg.cfg_output_file)
train_error.append(train_fn(index=batch_index, learning_rate = lrate.get_rate(), momentum = momentum)) train_sets.initialize_read() log('> epoch %d, training error %f' % (lrate.epoch, numpy.mean(train_error))) valid_error = [] while (not valid_sets.is_finish()): valid_sets.load_next_partition(valid_xy) for batch_index in xrange(valid_sets.cur_frame_num / batch_size): # loop over mini-batches valid_error.append(valid_fn(index=batch_index)) valid_sets.initialize_read() log('> epoch %d, lrate %f, validation error %f' % (lrate.epoch, lrate.get_rate(), numpy.mean(valid_error))) lrate.get_next_rate(current_error = 100 * numpy.mean(valid_error)) if do_dropout: _nnet2file(dnn.sigmoid_layers, filename=wdir + '/nnet.finetune.tmp', input_factor = input_dropout_factor, factor = dropout_factor) else: _nnet2file(dnn.sigmoid_layers, filename=wdir + '/nnet.finetune.tmp') # determine whether it's BNF based on layer sizes set_layer_num = -1 withfinal = True bnf_layer_index = 1 while bnf_layer_index < len(hidden_layers_sizes): if hidden_layers_sizes[bnf_layer_index] < hidden_layers_sizes[bnf_layer_index - 1]: break bnf_layer_index = bnf_layer_index + 1 if bnf_layer_index < len(hidden_layers_sizes): # is bottleneck set_layer_num = bnf_layer_index+1 withfinal = False
if n == 0: log('> task %d, epoch %d, training error %f ' % (n, cfg.lrate.epoch, 100*numpy.mean(train_error_array[n])) + '(%)') train_error_array[n] = [] # perform validation, output valid error rate, and adjust learning rate based on the learning rate valid_error = validate_by_minibatch(valid_fn_array[n], cfg) log('> task %d, epoch %d, lrate %f, validation error %f ' % (n, cfg.lrate.epoch, cfg.lrate.get_rate(), 100*numpy.mean(valid_error)) + '(%)') cfg.lrate.get_next_rate(current_error = 100 * numpy.mean(valid_error)) else: log('> task %d, epoch %d, training error %f ' % (n, cfg.lrate.epoch, numpy.mean(train_error_array[n])) + '(%)') train_error_array[n] = [] # perform validation, output valid error rate, and adjust learning rate based on the learning rate valid_error = validate_by_minibatch(valid_fn_array[n], cfg) log('> task %d, epoch %d, lrate %f, validation error %f ' % (n, cfg.lrate.epoch, cfg.lrate.get_rate(), numpy.mean(valid_error)) + '(%)') cfg.lrate.get_next_rate(current_error = numpy.mean(valid_error)) # output nnet parameters and lrate, for training resume _nnet2file(dnn_array[n].layers, filename=wdir + '/nnet.tmp.task' + str(n)) _lrate2file(cfg.lrate, wdir + '/training_state.tmp.task' + str(n)) # if the lrate of a task decays to 0, training on this task terminates; it will be excluded from future training if cfg.lrate.get_rate() == 0: active_tasks_new.remove(n) # save the model and network configuration if cfg.param_output_file != '': _nnet2file(dnn_array[n].layers, filename=cfg.param_output_file + '.task' + str(n), input_factor = cfg.input_dropout_factor, factor = cfg.dropout_factor) log('> ... the final PDNN model parameter is ' + cfg.param_output_file + '.task' + str(n)) if cfg.cfg_output_file != '': _cfg2file(dnn_array[n].cfg, filename=cfg.cfg_output_file + '.task' + str(n)) log('> ... the final PDNN model config is ' + cfg.cfg_output_file + '.task' + str(n)) # output the model into Kaldi-compatible format if cfg.kaldi_output_file != '': dnn_array[n].write_model_to_kaldi(cfg.kaldi_output_file + '.task' + str(n))
while (cfg.lrate.get_rate() != 0): # one epoch of sgd training train_acc, train_error = train_sgd(train_fn, cfg) log('> epoch %d, lrate %f, training accuracy %f' % (cfg.lrate.epoch, cfg.lrate.get_rate(), 100 * numpy.mean(train_acc)) + '(%)') # validation valid_acc, valid_error = validate_by_minibatch(valid_fn, cfg) log('> epoch %d, lrate %f, validate accuracy %f' % (cfg.lrate.epoch, cfg.lrate.get_rate(), 100 * numpy.mean(valid_acc)) + '(%)') cfg.lrate.get_next_rate(current_error=100 * numpy.mean(valid_error)) # output nnet parameters and lrate, for training resume if cfg.lrate.epoch % cfg.model_save_step == 0: _nnet2file(cnn, filename=wdir + '/nnet.tmp_CNN') _lrate2file(cfg.lrate, wdir + '/training_state.tmp_CNN') # save the model and network configuration if cfg.param_output_file != '': _nnet2file(cnn, filename=cfg.param_output_file, input_factor=cfg.input_dropout_factor, factor=cfg.dropout_factor) log('> ... the final PDNN model parameter is ' + cfg.param_output_file) if cfg.cfg_output_file != '': _cfg2file(cnn.cfg, filename=cfg.cfg_output_file) log('> ... the final PDNN model config is ' + cfg.cfg_output_file) # output the fully-connected part into Kaldi-compatible format if cfg.kaldi_output_file != '':
def write_model_to_raw(self, file_path): # output the model to tmp_path; this format is readable by PDNN _nnet2file(self.layers, filename=file_path, input_factor = self.input_dropout_factor, factor = self.dropout_factor)
pretrain_lr = gbrbm_learning_rate else: pretrain_lr = learning_rate # go through pretraining epochs momentum = initial_momentum for epoch in xrange(epochs): # go through the training set if (epoch == initial_momentum_epoch): momentum = final_momentum r_c, fe_c = [], [] # keep record of reconstruction and free-energy cost while (not train_sets.is_finish()): train_sets.load_next_partition(train_xy) for batch_index in xrange(train_sets.cur_frame_num / batch_size): # loop over mini-batches [reconstruction_cost, free_energy_cost] = pretraining_fns[i](index=batch_index, lr=pretrain_lr, momentum=momentum) r_c.append(reconstruction_cost) fe_c.append(free_energy_cost) train_sets.initialize_read() log('> Pre-training layer %i, epoch %d, r_cost %f, fe_cost %f' % (i, epoch, numpy.mean(r_c), numpy.mean(fe_c))) # save the pretrained nnet to file _nnet2file(srbm.sigmoid_layers, filename=output_file, withfinal=True) end_time = time.clock() print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
cfg.init_data_reading(train_data_spec, valid_data_spec) numpy_rng = numpy.random.RandomState(89677) theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) log('> ... building the model') # setup model dnn = ATTEND_LSTM(numpy_rng=numpy_rng, theano_rng = theano_rng, cfg = cfg) # get the training, validation and testing function for the model log('> ... getting the finetuning functions') train_fn, valid_fn = dnn.build_finetune_functions( (cfg.train_x, cfg.train_y), (cfg.valid_x, cfg.valid_y), batch_size=cfg.batch_size) log('> ... finetuning the model') min_verr = 100 while (cfg.lrate.get_rate() != 0): train_error = train_sgd(train_fn, cfg) log('> epoch %d, training error %0.4f ' % (cfg.lrate.epoch, 100*numpy.mean(train_error)) ) valid_error = validate_by_minibatch(valid_fn, cfg) log('> epoch %d, lrate %f, validation error %0.4f ' % (cfg.lrate.epoch, cfg.lrate.get_rate(), 100*numpy.mean(valid_error))) cfg.lrate.get_next_rate(current_error = 100*numpy.mean(valid_error)) if min_verr > 100*numpy.mean(valid_error): min_verr = 100*numpy.mean(valid_error) _nnet2file(dnn.layers, filename=cfg.param_output_file) _lrate2file(cfg.lrate, wdir + '/training_state.tmp') if cfg.cfg_output_file != '': _cfg2file(dnn.cfg, filename=cfg.cfg_output_file) log('> ... the final PDNN model config is ' + cfg.cfg_output_file)
train_fn, valid_fn = dnn.build_finetune_functions( (cfg.train_x, cfg.train_y), (cfg.valid_x, cfg.valid_y), batch_size=cfg.batch_size) log('> ... finetuning the model') while (cfg.lrate.get_rate() != 0): # one epoch of sgd training train_error = train_sgd(train_fn, cfg) log('> epoch %d, training error %f ' % (cfg.lrate.epoch, 100*numpy.mean(train_error)) + '(%)') # validation valid_error = validate_by_minibatch(valid_fn, cfg) log('> epoch %d, lrate %f, validation error %f ' % (cfg.lrate.epoch, cfg.lrate.get_rate(), 100*numpy.mean(valid_error)) + '(%)') cfg.lrate.get_next_rate(current_error = 100*numpy.mean(valid_error)) # output nnet parameters and lrate, for training resume if cfg.lrate.epoch % cfg.model_save_step == 0: _nnet2file(dnn.layers, filename=wdir + '/nnet.tmp') _lrate2file(cfg.lrate, wdir + '/training_state.tmp') # save the model and network configuration if cfg.param_output_file != '': _nnet2file(dnn.layers, filename=cfg.param_output_file, input_factor = cfg.input_dropout_factor, factor = cfg.dropout_factor) log('> ... the final PDNN model parameter is ' + cfg.param_output_file) if cfg.cfg_output_file != '': _cfg2file(dnn.cfg, filename=cfg.cfg_output_file) log('> ... the final PDNN model config is ' + cfg.cfg_output_file) # output the model into Kaldi-compatible format if cfg.kaldi_output_file != '': dnn.write_model_to_kaldi(cfg.kaldi_output_file) log('> ... the final Kaldi model is ' + cfg.kaldi_output_file)
saveModel(dnn,cfg) else: if valid_percent < lowest_validation_error: msg += "(new low)" lowest_validation_error = valid_percent fail_count = 0 saveModel(dnn,cfg) else: fail_count += 1 msg += "(failed count for "+str(fail_count)+")" log('> epoch %d, lrate %f, validation error %f ' % (cfg.lrate.epoch, cfg.lrate.get_rate(), valid_percent) + '(%) '+msg) if cfg.interrupt_epoch != None and cfg.lrate.epoch == cfg.interrupt_epoch: log("** GOING TO INTERRUPT as requested") sys.exit(0) cfg.lrate.get_next_rate(current_error = valid_percent ) # output nnet parameters and lrate, for training resume if cfg.lrate.epoch % cfg.model_save_step == 0: _nnet2file(dnn.layers, path=wdir + '/dnn.tmp') _lrate2file(cfg.lrate, wdir + '/dnn_training_state.tmp') # remove the tmp files (which have been generated from resuming training) if os.path.exists(wdir + '/dnn.tmp'): shutil.rmtree(wdir + '/dnn.tmp') if os.path.exists(wdir + '/dnn_training_state.tmp'): os.remove(wdir + '/dnn_training_state.tmp')
if multi_label: format_results(train_error, pred, labels, multi_label, cfg) else: format_results(train_error, pred, cfg.train_sets.label_vec, multi_label, cfg) # validation valid_error, pred2, labels = validate_by_minibatch(valid_fn, cfg) n_data_valid = len(pred2) log("- Validation:\n") if multi_label: format_results(valid_error, pred2, labels, multi_label, cfg) else: format_results(valid_error, pred2, cfg.valid_sets.label_vec, multi_label, cfg) cfg.lrate.get_next_rate(current_error = 100 * numpy.mean(valid_error)) # output nnet parameters and lrate, for training resume if cfg.lrate.epoch % cfg.model_save_step == 0: _nnet2file(dnn.layers, filename=wdir + '/nnet.tmp') _lrate2file(cfg.lrate, wdir + '/training_state.tmp') # save the model and network configuration for i in range(len(cfg.hidden_layers_sizes)): if i==0: n_params = (cfg.n_ins + 1) * cfg.hidden_layers_sizes[i] else: n_params += (cfg.hidden_layers_sizes[i-1] + 1) * cfg.hidden_layers_sizes[i] n_params += cfg.n_outs * (cfg.hidden_layers_sizes[i-1] + 1) ratio = float(n_params) / float(n_data_train) log('-->> Ratio Parameters / Data : ' + str(ratio)) if cfg.param_output_file != '': _nnet2file(dnn.layers, filename=cfg.param_output_file, input_factor = cfg.input_dropout_factor, factor = cfg.dropout_factor) log('> ... the final PDNN model parameter is ' + cfg.param_output_file) if cfg.cfg_output_file != '':
def train_dnn(dnn, buf_train, buf_valid, shared_ds=None, save_dir=None, restore=False, pre_validate=False): """ Train DNN given a training and validation set. :type dnn: models.dnn.DNN :param dnn: The DNN to train :type buf_train: chaipy.data.temporal.BufferedTemporalData :param buf_train: The dataset to train on :type buf_valid: chaipy.data.temporal.BufferedTemporalData :param buf_valid: The dataset to validate on :type shared_ds: tuple (see BufferedTemporalData.make_shared) :param shared_ds: (optional) The shared dataset to use. If not set, will be set automatically using either buf_train or buf_valid, whichever has a bigger maximum partition size. :type save_dir: str :param save_dir: (optional) If not None, save the most recent intermediate model to this directory. We only keep the most recent model in this directory, except for the final model since we expect the caller of this function to save it manually. :type restore: bool :param restore: (optional) If True, restore parameters of the previous model if new validation error is higher than the lowest error thus far. This strategy is suitable for less stable learning. :type pre_validate: bool :param pre_validate: (optional) If True, do one validation iteration before training the model and use this value to bootstrap lrate. :rtype: tuple :return: (training errors, validation errors) """ if shared_ds is None: if buf_train.max_partition_size() > buf_valid.max_partition_size(): shared_ds = buf_train.make_shared() else: shared_ds = buf_valid.make_shared() if save_dir is not None and not os.path.exists(save_dir): os.make_dirs(save_dir, 0755) x, shared_x, y, shared_y = shared_ds[:4] # Compile finetuning function shared_xy = (shared_x, shared_y) io.log('... getting the finetuning functions') train_fn, valid_fn = \ dnn.build_finetune_functions(shared_xy, shared_xy, batch_size=dnn.cfg.batch_size) io.log('Got them!') io.log('... finetuning the model') train_errs, valid_errs = [], [] prev_params, prev_dparams = None, None # Do one preemptive validation iteration if necessary if pre_validate: train_errs.append(-1.0) io.log('** Pre-validate: training error {} (%)'.format(train_errs[-1])) valid_errs.append(100 * numpy.mean( _validate(valid_fn, buf_valid, dnn.cfg.batch_size, shared_ds) )) io.log('** Pre-validate: validation error {} (%)'.format(valid_errs[-1])) dnn.cfg.lrate.lowest_error = valid_errs[-1] if restore: prev_params = [p.get_value(borrow=True) for p in dnn.params] prev_dparams = [p.get_value(borrow=True) for p in dnn.delta_params] # Start training while dnn.cfg.lrate.get_rate() != 0: # One epoch of SGD training train_errs.append(100 * numpy.mean( _train_sgd(train_fn, buf_train, dnn.cfg, shared_ds) )) io.log('** Epoch {}, lrate {}, training error {} (%)'.format( dnn.cfg.lrate.epoch, dnn.cfg.lrate.get_rate(), train_errs[-1] )) valid_errs.append(100 * numpy.mean( _validate(valid_fn, buf_valid, dnn.cfg.batch_size, shared_ds) )) io.log('** Epoch {}, lrate {}, validation error {} (%)'.format( dnn.cfg.lrate.epoch, dnn.cfg.lrate.get_rate(), valid_errs[-1] )) prev_error = dnn.cfg.lrate.lowest_error dnn.cfg.lrate.get_next_rate(current_error=valid_errs[-1]) io.log('**** Updated lrate: {}'.format(dnn.cfg.lrate.get_rate())) # Restore model parameters if necessary if restore: if valid_errs[-1] < prev_error: prev_params = [p.get_value(borrow=True) for p in dnn.params] prev_dparams = [p.get_value(borrow=True) for p in dnn.delta_params] elif prev_params is None: io.log('**WARN** error increased but no prev_params to restore!') elif dnn.cfg.lrate.epoch <= dnn.cfg.lrate.min_epoch_decay_start: io.log('** Only {} training epoch, need at least {} to restore **'.format( dnn.cfg.lrate.epoch - 1, dnn.cfg.lrate.min_epoch_decay_start )) else: io.log('** Restoring params of previous best model **') for cp, pp in zip(dnn.params, prev_params): cp.set_value(pp, borrow=True) for cdp, pdp in zip(dnn.delta_params, prev_dparams): cdp.set_value(pdp, borrow=True) idx = numpy.argmin(valid_errs) io.log('** Restored: train err = {}, valid err = {} **'.format( train_errs[idx], valid_errs[idx] )) # Save intermediate model if save_dir is not None: curr_epoch = dnn.cfg.lrate.epoch - 1 prev_epoch = curr_epoch - 1 curr_fname = os.path.join(save_dir, '{}.dnn'.format(curr_epoch)) prev_fname = os.path.join(save_dir, '{}.dnn'.format(prev_epoch)) if dnn.cfg.lrate.get_rate() != 0: _nnet2file(dnn.layers, filename=curr_fname) if os.path.exists(prev_fname): os.remove(prev_fname) # If restoring, make sure the final err is also the best err if restore and valid_errs[-1] != numpy.min(valid_errs): idx = numpy.argmin(valid_errs) train_errs.append(train_errs[idx]) valid_errs.append(valid_errs[idx]) return (train_errs, valid_errs)
# go through the training set if (epoch == cfg.initial_momentum_epoch): momentum = cfg.final_momentum r_c, fe_c = [], [] # keep record of reconstruction and free-energy cost while (not cfg.train_sets.is_finish()): cfg.train_sets.load_next_partition(cfg.train_xy) for batch_index in xrange(cfg.train_sets.cur_frame_num / cfg.batch_size): # loop over mini-batches [reconstruction_cost, free_energy_cost] = pretraining_fns[i](index=batch_index, lr=pretrain_lr, momentum=momentum) r_c.append(reconstruction_cost) fe_c.append(free_energy_cost) cfg.train_sets.initialize_read() log('> pre-training layer %i, epoch %d, r_cost %f, fe_cost %f' % (i, epoch, numpy.mean(r_c), numpy.mean(fe_c))) # output nnet parameters and training state, for training resume _nnet2file(dnn.layers, filename=wdir + '/nnet.tmp') save_two_integers((i, epoch+1), wdir + '/training_state.tmp') start_epoch_index = 0 save_two_integers((i+1, 0), wdir + '/training_state.tmp') # save the pretrained nnet to file # save the model and network configuration if cfg.param_output_file != '': _nnet2file(dnn.layers, filename=cfg.param_output_file) log('> ... the final PDNN model parameter is ' + cfg.param_output_file) if cfg.cfg_output_file != '': _cfg2file(dnn.cfg, filename=cfg.cfg_output_file) log('> ... the final PDNN model config is ' + cfg.cfg_output_file) # output the model into Kaldi-compatible format if cfg.kaldi_output_file != '':
(cfg.train_x, cfg.train_y), (cfg.valid_x, cfg.valid_y), batch_size=cfg.batch_size) log('> ... finetunning the model') while (cfg.lrate.get_rate() != 0): # one epoch of sgd training train_error = train_sgd(train_fn, cfg) log('> epoch %d, training error %f ' % (cfg.lrate.epoch, 100*numpy.mean(train_error)) + '(%)') # validation valid_error = validate_by_minibatch(valid_fn, cfg) log('> epoch %d, lrate %f, validation error %f ' % (cfg.lrate.epoch, cfg.lrate.get_rate(), 100*numpy.mean(valid_error)) + '(%)') cfg.lrate.get_next_rate(current_error = 100 * numpy.mean(valid_error)) # output nnet parameters and lrate, for training resume if cfg.lrate.epoch % cfg.model_save_step == 0: _nnet2file(cnn.layers, path=wdir + '/nnet.tmp') _lrate2file(cfg.lrate, wdir + '/training_state.tmp') # save the model and network configuration if cfg.param_output_file != '': _nnet2file(cnn.layers, path=cfg.param_output_file, input_factor = cfg.input_dropout_factor, factor = cfg.dropout_factor) log('> ... the final PDNN model parameter is ' + cfg.param_output_file) if cfg.cfg_output_file != '': _cfg2file(cnn.cfg, filename=cfg.cfg_output_file) log('> ... the final PDNN model config is ' + cfg.cfg_output_file) # output the fully-connected part into Kaldi-compatible format if cfg.kaldi_output_file != '': cnn.fc_dnn.write_model_to_kaldi(cfg.kaldi_output_file) log('> ... the final Kaldi model (only FC layers) is ' + cfg.kaldi_output_file)