def main(paramFile="",num_epochs=10): # Prepare Theano variables for inputs and targets input_var = T.tensor4('inputs') target_var = T.ivector('targets') # Create neural network model (depending on first command line parameter) print("Building model and compiling functions...") network = build_rcnn(input_var) #jin if paramFile=="": print("Train a new network!") else: print("Load well trained parameters from "+paramFile) f = file(paramFile,'rb') params = cPickle.load(f) f.close() lasagne.layers.set_all_param_values(network,params) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, # disabling dropout layers. test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var) test_loss = test_loss.mean() # As a bonus, also create an expression for the classification accuracy: test_fer = T.mean(T.neq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) # Compile a second function computing the validation loss and accuracy: frames = T.argmax(test_prediction, axis=1) val_fn = theano.function([input_var, target_var], [test_loss, test_fer, frames]) # Finally, launch the training loop. print("Starting Testing Phoneme Error Rate...") # We iterate over epochs: #jin # TEST Phoneme Error Rate dirpath = os.getcwd() print('dirpath = '+dirpath) test_dirpath = dirpath + '/test' # change current directory os.chdir(test_dirpath) # After training, we compute and print the test error: test_ferr = 0 test_loss = 0 total_phn_num = 0 test_batches = 0 test_per = 0 for batch in loadArray(test_dirpath): inputs, targets, batchNum = batch loss, ferr, out_frame = val_fn(inputs, targets) test_ferr += ferr*batchNum test_loss += loss*batchNum test_batches += batchNum out_phn = PER.phn2targetseq(out_frame,39) label_phn = PER.phn2targetseq(targets,39) phn_num = len(label_phn) perr = PER.per(out_phn,label_phn) total_phn_num += phn_num test_per += perr print("frame length = "+str(batchNum)) print("output phoneme = "+str(out_phn)) print("label phoneme = "+str(label_phn)) print("label phoneme length = "+str(phn_num)) print(" phoneme err num = "+str(perr)) print(" FER = "+str(ferr)) print(" PER = "+str(float(perr)/phn_num)) print("Final results:") print(" test loss:\t\t\t{:.6f}".format(test_loss / test_batches)) print(" test Frame Error Rate:\t\t{:.2f} %".format(test_ferr / test_batches * 100)) print( "test_per = "+str(test_per)) print( "total_phn_num = "+str(total_phn_num)) test_per = float(test_per)/total_phn_num print(" test Phoneme Error Rate:\t\t{:.2f} %".format(test_per * 100))
def main(paramFile="",num_epochs=5): # Prepare Theano variables for inputs and targets input_var = T.tensor4('inputs') target_var = T.ivector('targets') #y = T.matrix() label = T.vector() blank_symbol = T.scalar() # Create neural network model (depending on first command line parameter) print("Building model and compiling functions...") network = build_rcnn(input_var) #jin if paramFile=="": print("Train a new network!") else: print("Load well trained parameters from "+paramFile) f = file(paramFile,'rb') params = cPickle.load(f) f.close() lasagne.layers.set_all_param_values(network,params) # Create a loss expression for training, i.e., a scalar objective we want # to minimize the objective function: y = lasagne.layers.get_output(network) ctc_cost = CTC.cost(y, label, blank_symbol) params = lasagne.layers.get_all_params(network, trainable=True) updates = lasagne.updates.nesterov_momentum( ctc_cost, params, learning_rate=1e-5, momentum=0.9) train_fn = theano.function([input_var, label, blank_symbol], ctc_cost, updates=updates,allow_input_downcast=True) test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var) # Create a loss expression for validation/testing. test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var) test_loss = test_loss.mean() # As a bonus, also create an expression for the classification accuracy: test_fer = T.mean(T.neq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) # Compile a second function computing the validation loss and accuracy: test_frames = T.argmax(test_prediction, axis=1) val_fn = theano.function([input_var, target_var], [test_loss, test_fer, test_frames]) # Finally, launch the training loop. print("Starting training...") # We iterate over epochs: #jin # train set and validation set dirpath = os.getcwd() print('dirpath = '+dirpath) train_dirpath = dirpath + '/train' test_dirpath = dirpath + '/test' total = len(os.listdir(train_dirpath)) / 2 train_total_num = int(0.9 * total) validation_total_num = total - train_total_num print('Train num = ' + str(train_total_num)) print('Validation num = '+str(validation_total_num)) blank_symbol_num = 39 for epoch in range(num_epochs): # change current directory os.chdir(train_dirpath) # In each epoch, we do a full pass over the training data: start_time = time.time() counter = 0 # And a full pass over the validation data: val_ferr = 0 val_loss = 0 total_phn_num = 0 val_batches = 0 val_per = 0 #for batch in loadArray(train_dirpath): for batch in loadArray(train_dirpath): inputs, targets, batchNum = batch print('spectro shape:') print(inputs.shape) print('label shape:') print(targets.shape) label_without_blank = PER.phn2targetseq(targets,blank_symbol_num) #label_without_blank = label_without_blank[0,:] #print('noblanklabel shape = '+str(label_without_blank.shape)) counter += 1 if counter < train_total_num: ctc_loss = train_fn(inputs, label_without_blank, blank_symbol_num) print(' Train set completed : '+str(float(counter)/train_total_num*100)) else: inputs, targets, batchNum = batch loss, ferr, out_frame = val_fn(inputs, targets) val_ferr += ferr*batchNum val_loss += loss*batchNum val_batches += batchNum out_phn = PER.phn2targetseq(out_frame,39) label_phn = PER.phn2targetseq(targets,39) phn_num = len(label_phn) perr = PER.per(out_phn,label_phn) total_phn_num += phn_num val_per += perr print("frame length = "+str(batchNum)) print("output phoneme = "+str(out_phn)) print("label phoneme = "+str(label_phn)) print("label phoneme length = "+str(phn_num)) print(" phoneme err num = "+str(perr)) print(" FER = "+str(ferr)) print(" PER = "+str(float(perr)/phn_num)) print(' Validation set completed : '+str(float(counter-train_total_num)/validation_total_num*100)) # Then we print the results for this epoch: print("Validation results:") print(" val loss:\t\t\t{:.6f}".format(val_loss / val_batches)) print(" val Frame Error Rate:\t\t{:.2f} %".format(val_ferr / val_batches * 100)) print( "val_per = "+str(val_per)) print( "total_phn_num = "+str(total_phn_num)) val_per = float(val_per)/total_phn_num print(" val Phoneme Error Rate:\t\t{:.2f} %".format(val_per * 100)) # change current directory os.chdir(dirpath) # store parameters print(" should store epoch {}".format(epoch+1)) pythonName,suffix = os.path.splitext(__file__) param2store = lasagne.layers.get_all_param_values(network) storename = pythonName+"_"+str((epoch+1))+"_per="+str(val_per * 100)+".save" with file(storename,'wb') as f: cPickle.dump(param2store,f) # change current directory os.chdir(test_dirpath) # After training, we compute and print the test error: test_ferr = 0 test_loss = 0 total_phn_num = 0 test_batches = 0 test_per = 0 for batch in loadArray(test_dirpath): inputs, targets, batchNum = batch loss, ferr, out_frame = val_fn(inputs, targets) test_ferr += ferr*batchNum test_loss += loss*batchNum test_batches += batchNum out_phn = PER.phn2targetseq(out_frame,39) label_phn = PER.phn2targetseq(targets,39) phn_num = len(label_phn) perr = PER.per(out_phn,label_phn) total_phn_num += phn_num test_per += perr print("frame length = "+str(batchNum)) print("output phoneme = "+str(out_phn)) print("label phoneme = "+str(label_phn)) print("label phoneme length = "+str(phn_num)) print(" phoneme err num = "+str(perr)) print(" FER = "+str(ferr)) print(" PER = "+str(float(perr)/phn_num)) print("Final results:") print(" test loss:\t\t\t{:.6f}".format(test_loss / test_batches)) print(" test Frame Error Rate:\t\t{:.2f} %".format(test_ferr / test_batches * 100)) print( "test_per = "+str(test_per)) print( "total_phn_num = "+str(total_phn_num)) test_per = float(test_per)/total_phn_num print(" test Phoneme Error Rate:\t\t{:.2f} %".format(test_per * 100))