def train_model(feature_size, hidden_size, init_window_size, generator_model, generator_gan_optimizer, generator_tf_optimizer, discriminator_feature_model, discriminator_output_model, discriminator_gan_optimizer, num_epochs, model_name): # generator updater print 'COMPILING GAN UPDATE FUNCTION ' gan_updater = set_gan_update_function(generator_model=generator_model, discriminator_feature_model=discriminator_feature_model, discriminator_output_model=discriminator_output_model, generator_optimizer=generator_gan_optimizer, discriminator_optimizer=discriminator_gan_optimizer, generator_grad_clipping=.0, discriminator_grad_clipping=.0) print 'COMPILING TF UPDATE FUNCTION ' tf_updater = set_tf_update_function(generator_model=generator_model, generator_optimizer=generator_tf_optimizer, generator_grad_clipping=.0) # evaluator print 'COMPILING EVALUATION FUNCTION ' evaluator = set_evaluation_function(generator_model=generator_model) # sample generator print 'COMPILING SAMPLING FUNCTION ' sample_generator = set_sample_function(generator_model=generator_model) print 'READ RAW WAV DATA' _, train_raw_data = wavfile.read('/data/lisatmp4/taesup/data/YouTubeAudio/XqaJ2Ol5cC4.wav') valid_raw_data = train_raw_data[160000000:] train_raw_data = train_raw_data[:160000000] train_raw_data = train_raw_data[2000:] train_raw_data = (train_raw_data/(1.15*2.**13)).astype(floatX) valid_raw_data = (valid_raw_data/(1.15*2.**13)).astype(floatX) num_train_total_steps = train_raw_data.shape[0] num_valid_total_steps = valid_raw_data.shape[0] batch_size = 64 num_valid_sequences = num_valid_total_steps/(feature_size*init_window_size)-1 valid_source_data = valid_raw_data[:num_valid_sequences*(feature_size*init_window_size)] valid_source_data = valid_source_data.reshape((num_valid_sequences, init_window_size, feature_size)) valid_target_data = valid_raw_data[feature_size:feature_size+num_valid_sequences*(feature_size*init_window_size)] valid_target_data = valid_target_data.reshape((num_valid_sequences, init_window_size, feature_size)) valid_raw_data = None num_seeds = 10 valid_shuffle_idx = np_rng.permutation(num_valid_sequences) valid_source_data = valid_source_data[valid_shuffle_idx] valid_target_data = valid_target_data[valid_shuffle_idx] valid_seed_data = valid_source_data[:num_seeds][0][:] valid_source_data = numpy.swapaxes(valid_source_data, axis1=0, axis2=1) valid_target_data = numpy.swapaxes(valid_target_data, axis1=0, axis2=1) num_valid_batches = num_valid_sequences/batch_size print 'NUM OF VALID BATCHES : ', num_valid_sequences/batch_size best_valid = 10000. print 'START TRAINING' # for each epoch tf_mse_list = [] tf_generator_grad_list = [] gan_generator_grad_list = [] gan_generator_cost_list = [] gan_discriminator_grad_list = [] gan_discriminator_cost_list = [] gan_true_score_list = [] gan_false_score_list = [] gan_mse_list = [] valid_mse_list = [] train_batch_count = 0 for e in xrange(num_epochs): window_size = init_window_size + 5*e sequence_size = feature_size*window_size last_seq_idx = num_train_total_steps-(sequence_size+feature_size) train_seq_orders = np_rng.permutation(last_seq_idx) train_seq_orders = train_seq_orders[:last_seq_idx-last_seq_idx%batch_size] train_seq_orders = train_seq_orders.reshape((-1, batch_size)) print 'NUM OF TRAIN BATCHES : ', train_seq_orders.shape[0] # for each batch for batch_idx, batch_info in enumerate(train_seq_orders): # source data train_source_idx = batch_info.reshape((batch_size, 1)) + numpy.repeat(numpy.arange(sequence_size).reshape((1, sequence_size)), batch_size, axis=0) train_source_data = train_raw_data[train_source_idx] train_source_data = train_source_data.reshape((batch_size, window_size, feature_size)) train_source_data = numpy.swapaxes(train_source_data, axis1=0, axis2=1) # target data train_target_idx = train_source_idx + feature_size train_target_data = train_raw_data[train_target_idx] train_target_data = train_target_data.reshape((batch_size, window_size, feature_size)) train_target_data = numpy.swapaxes(train_target_data, axis1=0, axis2=1) # tf update tf_update_output = tf_updater(train_source_data, train_target_data) tf_square_error = tf_update_output[0].mean() tf_generator_grad_norm = tf_update_output[1] # gan update gan_update_output = gan_updater(train_source_data, train_target_data) generator_gan_cost = gan_update_output[0].mean() discriminator_gan_cost = gan_update_output[1].mean() discriminator_true_score = gan_update_output[2].mean() discriminator_false_score = gan_update_output[3].mean() gan_square_error = gan_update_output[4].mean() gan_generator_grad_norm = gan_update_output[5] gan_discriminator_grad_norm = gan_update_output[6] train_batch_count += 1 tf_generator_grad_list.append(tf_generator_grad_norm) tf_mse_list.append(tf_square_error) gan_generator_grad_list.append(gan_generator_grad_norm) gan_generator_cost_list.append(generator_gan_cost) gan_discriminator_grad_list.append(gan_discriminator_grad_norm) gan_discriminator_cost_list.append(discriminator_gan_cost) gan_true_score_list.append(discriminator_true_score) gan_false_score_list.append(discriminator_false_score) gan_mse_list.append(gan_square_error) if train_batch_count%10==0: print '============{}_LENGTH{}============'.format(model_name, window_size) print 'epoch {}, batch_cnt {} => TF generator mse cost {}'.format(e, train_batch_count, tf_mse_list[-1]) print 'epoch {}, batch_cnt {} => GAN generator mse cost {}'.format(e, train_batch_count, gan_mse_list[-1]) print '----------------------------------------------------------' print 'epoch {}, batch_cnt {} => GAN generator cost {}'.format(e, train_batch_count, gan_generator_cost_list[-1]) print 'epoch {}, batch_cnt {} => GAN discriminator cost {}'.format(e, train_batch_count, gan_discriminator_cost_list[-1]) print '----------------------------------------------------------' print 'epoch {}, batch_cnt {} => GAN input score {}'.format(e, train_batch_count, gan_true_score_list[-1]) print 'epoch {}, batch_cnt {} => GAN sample score {}'.format(e, train_batch_count, gan_false_score_list[-1]) print '----------------------------------------------------------' print 'epoch {}, batch_cnt {} => GAN discrim. grad norm {}'.format(e, train_batch_count, gan_discriminator_grad_list[-1]) print 'epoch {}, batch_cnt {} => GAN generator grad norm {}'.format(e, train_batch_count, gan_generator_grad_list[-1]) print '----------------------------------------------------------' print 'epoch {}, batch_cnt {} => TF generator grad norm {}'.format(e, train_batch_count, tf_generator_grad_list[-1]) if train_batch_count%100==0: tf_valid_mse = 0.0 valid_batch_count = 0 for valid_idx in xrange(num_valid_batches): start_idx = batch_size*valid_idx end_idx = batch_size*(valid_idx+1) evaluation_outputs = evaluator(valid_source_data[:][start_idx:end_idx][:], valid_target_data[:][start_idx:end_idx][:]) tf_valid_mse += evaluation_outputs[0].mean() valid_batch_count += 1 if valid_idx==0: recon_data = evaluation_outputs[1] recon_data = numpy.swapaxes(recon_data, axis1=0, axis2=1) recon_data = recon_data[:10] recon_data = recon_data.reshape((10, -1)) recon_data = recon_data*(1.15*2.**13) recon_data = recon_data.astype(numpy.int16) save_wavfile(recon_data, model_name+'_recon') orig_data = valid_target_data[:][start_idx:end_idx][:] orig_data = numpy.swapaxes(orig_data, axis1=0, axis2=1) orig_data = orig_data[:10] orig_data = orig_data.reshape((10, -1)) orig_data = orig_data*(1.15*2.**13) orig_data = orig_data.astype(numpy.int16) save_wavfile(orig_data, model_name+'_orig') valid_mse_list.append(tf_valid_mse/valid_batch_count) print '----------------------------------------------------------' print 'epoch {}, batch_cnt {} => TF valid mse cost {}'.format(e, train_batch_count, valid_mse_list[-1]) if best_valid>valid_mse_list[-1]: best_valid = valid_mse_list[-1] if train_batch_count%500==0: numpy.save(file=model_name+'tf_mse', arr=numpy.asarray(tf_mse_list)) numpy.save(file=model_name+'tf_gen_grad', arr=numpy.asarray(tf_generator_grad_list)) numpy.save(file=model_name+'gan_mse', arr=numpy.asarray(gan_mse_list)) numpy.save(file=model_name+'gan_gen_cost', arr=numpy.asarray(gan_generator_cost_list)) numpy.save(file=model_name+'gan_disc_cost', arr=numpy.asarray(gan_true_score_list)) numpy.save(file=model_name+'gan_input_score', arr=numpy.asarray(gan_true_score_list)) numpy.save(file=model_name+'gan_sample_score', arr=numpy.asarray(gan_false_score_list)) numpy.save(file=model_name+'gan_gen_grad', arr=numpy.asarray(gan_generator_grad_list)) numpy.save(file=model_name+'gan_disc_grad', arr=numpy.asarray(gan_discriminator_grad_list)) numpy.save(file=model_name+'valid_mse', arr=numpy.asarray(valid_mse_list)) num_sec = 100 sampling_length = num_sec*sampling_rate/feature_size seed_input_data = valid_seed_data [generated_sequence, ] = sample_generator(seed_input_data, sampling_length) sample_data = numpy.swapaxes(generated_sequence, axis1=0, axis2=1) sample_data = sample_data.reshape((num_seeds, -1)) sample_data = sample_data*(1.15*2.**13) sample_data = sample_data.astype(numpy.int16) save_wavfile(sample_data, model_name+'_sample') if best_valid==valid_mse_list[-1]: save_model_params(generator_model, model_name+'_gen_model.pkl') save_model_params(discriminator_feature_model, model_name+'_disc_feat_model.pkl') save_model_params(discriminator_output_model, model_name+'_disc_output_model.pkl')
def train_model(feature_size, hidden_size, init_window_size, generator_model, generator_optimizer, num_epochs, model_name): # model updater print 'COMPILING UPDATER FUNCTION ' t = time() updater_function = set_updater_function(generator_model=generator_model, generator_optimizer=generator_optimizer, generator_grad_clipping=.0) print '%.2f SEC '%(time()-t) # evaluator print 'COMPILING EVALUATION FUNCTION ' t = time() evaluation_function = set_evaluation_function(generator_model=generator_model) print '%.2f SEC '%(time()-t) # sample generator print 'COMPILING SAMPLING FUNCTION ' t = time() sampling_function = set_sampling_function(generator_model=generator_model) print '%.2f SEC '%(time()-t) print 'READ RAW WAV DATA' _, train_raw_data = wavfile.read('/data/lisatmp4/taesup/data/YouTubeAudio/XqaJ2Ol5cC4.wav') valid_raw_data = train_raw_data[160000000:] train_raw_data = train_raw_data[:160000000] train_raw_data = train_raw_data[2000:] train_raw_data = (train_raw_data/(1.15*2.**13)).astype(floatX) valid_raw_data = (valid_raw_data/(1.15*2.**13)).astype(floatX) num_train_total_steps = train_raw_data.shape[0] num_valid_total_steps = valid_raw_data.shape[0] batch_size = 64 num_valid_sequences = num_valid_total_steps/(feature_size*init_window_size)-1 valid_source_data = valid_raw_data[:num_valid_sequences*(feature_size*init_window_size)] valid_source_data = valid_source_data.reshape((num_valid_sequences, init_window_size, feature_size)) valid_target_data = valid_raw_data[feature_size:feature_size+num_valid_sequences*(feature_size*init_window_size)] valid_target_data = valid_target_data.reshape((num_valid_sequences, init_window_size, feature_size)) valid_raw_data = None num_seeds = 10 valid_shuffle_idx = np_rng.permutation(num_valid_sequences) valid_source_data = valid_source_data[valid_shuffle_idx] valid_target_data = valid_target_data[valid_shuffle_idx] valid_seed_data = valid_source_data[:num_seeds][0][:] valid_source_data = numpy.swapaxes(valid_source_data, axis1=0, axis2=1) valid_target_data = numpy.swapaxes(valid_target_data, axis1=0, axis2=1) num_valid_batches = num_valid_sequences/batch_size print 'NUM OF VALID BATCHES : ', num_valid_sequences/batch_size best_valid = 10000. print 'START TRAINING' # for each epoch train_sample_cost_list = [] train_regularizer_cost_list = [] train_gradient_norm_list = [] train_lambda_regularizer_list = [] valid_sample_cost_list = [] train_batch_count = 0 for e in xrange(num_epochs): window_size = init_window_size + 5*e sequence_size = feature_size*window_size last_seq_idx = num_train_total_steps-(sequence_size+feature_size) train_seq_orders = np_rng.permutation(last_seq_idx) train_seq_orders = train_seq_orders[:last_seq_idx-last_seq_idx%batch_size] train_seq_orders = train_seq_orders.reshape((-1, batch_size)) print 'NUM OF TRAIN BATCHES : ', train_seq_orders.shape[0] # for each batch for batch_idx, batch_info in enumerate(train_seq_orders): # source data train_source_idx = batch_info.reshape((batch_size, 1)) + numpy.repeat(numpy.arange(sequence_size).reshape((1, sequence_size)), batch_size, axis=0) train_source_data = train_raw_data[train_source_idx] train_source_data = train_source_data.reshape((batch_size, window_size, feature_size)) train_source_data = numpy.swapaxes(train_source_data, axis1=0, axis2=1) # target data train_target_idx = train_source_idx + feature_size train_target_data = train_raw_data[train_target_idx] train_target_data = train_target_data.reshape((batch_size, window_size, feature_size)) train_target_data = numpy.swapaxes(train_target_data, axis1=0, axis2=1) # update model lambda_regularizer = 0.1 updater_outputs = updater_function(train_source_data, train_target_data, lambda_regularizer) train_sample_cost = updater_outputs[0].mean() train_regularizer_cost = updater_outputs[1].mean() train_gradient_norm = updater_outputs[2] train_batch_count += 1 train_sample_cost_list.append(train_sample_cost) train_regularizer_cost_list.append(train_regularizer_cost) train_gradient_norm_list.append(train_gradient_norm) train_lambda_regularizer_list.append(lambda_regularizer) if train_batch_count%10==0: print '============{}_LENGTH{}============'.format(model_name, window_size) print 'epoch {}, batch_cnt {} => train sample cost {}'.format(e, train_batch_count, train_sample_cost_list[-1]) print 'epoch {}, batch_cnt {} => train regularizer cost {}'.format(e, train_batch_count, train_regularizer_cost_list[-1]) print '----------------------------------------------------------' print 'epoch {}, batch_cnt {} => train gradient norm {}'.format(e, train_batch_count, train_gradient_norm_list[-1]) print 'epoch {}, batch_cnt {} => train regularizer lambda {}'.format(e, train_batch_count, train_lambda_regularizer_list[-1]) if train_batch_count%100==0: tf_valid_mse = 0.0 valid_batch_count = 0 for valid_idx in xrange(num_valid_batches): start_idx = batch_size*valid_idx end_idx = batch_size*(valid_idx+1) evaluation_outputs = evaluation_function(valid_source_data[:][start_idx:end_idx][:], valid_target_data[:][start_idx:end_idx][:]) tf_valid_mse += evaluation_outputs[0].mean() valid_batch_count += 1 if valid_idx==0: recon_data = evaluation_outputs[1] recon_data = numpy.swapaxes(recon_data, axis1=0, axis2=1) recon_data = recon_data[:10] recon_data = recon_data.reshape((10, -1)) recon_data = recon_data*(1.15*2.**13) recon_data = recon_data.astype(numpy.int16) save_wavfile(recon_data, model_name+'_recon') orig_data = valid_target_data[:][start_idx:end_idx][:] orig_data = numpy.swapaxes(orig_data, axis1=0, axis2=1) orig_data = orig_data[:10] orig_data = orig_data.reshape((10, -1)) orig_data = orig_data*(1.15*2.**13) orig_data = orig_data.astype(numpy.int16) save_wavfile(orig_data, model_name+'_orig') valid_sample_cost_list.append(tf_valid_mse/valid_batch_count) print '----------------------------------------------------------' print 'epoch {}, batch_cnt {} => valid sample cost {}'.format(e, train_batch_count, valid_sample_cost_list[-1]) if best_valid>valid_sample_cost_list[-1]: best_valid = valid_sample_cost_list[-1] if train_batch_count%500==0: numpy.save(file=model_name+'_train_sample_cost', arr=numpy.asarray(train_sample_cost_list)) numpy.save(file=model_name+'_train_regularizer_cost', arr=numpy.asarray(train_regularizer_cost_list)) numpy.save(file=model_name+'_train_gradient_norm', arr=numpy.asarray(train_gradient_norm_list)) numpy.save(file=model_name+'_train_lambda_value', arr=numpy.asarray(train_lambda_regularizer_list)) numpy.save(file=model_name+'_valid_sample_cost', arr=numpy.asarray(valid_sample_cost_list)) num_sec = 100 sampling_length = num_sec*sampling_rate/feature_size seed_input_data = valid_seed_data [generated_sequence, ] = sampling_function(seed_input_data, sampling_length) sample_data = numpy.swapaxes(generated_sequence, axis1=0, axis2=1) sample_data = sample_data.reshape((num_seeds, -1)) sample_data = sample_data*(1.15*2.**13) sample_data = sample_data.astype(numpy.int16) save_wavfile(sample_data, model_name+'_sample') if best_valid==valid_sample_cost_list[-1]: save_model_params(generator_model, model_name+'_model.pkl')
def train_model( feature_size, hidden_size, num_layers, generator_rnn_model, generator_mean_model, generator_std_model, generator_optimizer, num_epochs, model_name, ): # generator updater print "DEBUGGING GENERATOR UPDATE FUNCTION " t = time() generator_updater = set_generator_update_function( generator_rnn_model=generator_rnn_model, generator_mean_model=generator_mean_model, generator_std_model=generator_std_model, generator_optimizer=generator_optimizer, grad_clipping=0.0, ) print "{}.sec".format(time() - t) # generator evaluator print "DEBUGGING GENERATOR EVALUATION FUNCTION " t = time() generator_evaluator = set_generator_evaluation_function( generator_rnn_model=generator_rnn_model, generator_mean_model=generator_mean_model, generator_std_model=generator_std_model, ) print "{}.sec".format(time() - t) # generator sampler print "DEBUGGING GENERATOR SAMPLING FUNCTION " t = time() generator_sampler = set_generator_sampling_function( generator_rnn_model=generator_rnn_model, generator_mean_model=generator_mean_model, generator_std_model=generator_std_model, ) print "{}.sec".format(time() - t) print "START TRAINING" # for each epoch generator_train_cost_list = [] generator_valid_cost_list = [] generator_grad_norm_mean = 0.0 init_window_size = 100 for e in xrange(num_epochs): window_size = init_window_size + 5 * e # set train data stream with proper length (window size) train_data_stream = set_train_datastream(feature_size=feature_size, window_size=window_size) # get train data iterator train_data_iterator = train_data_stream.get_epoch_iterator() # for each batch train_batch_count = 0 train_batch_size = 0 train_source_data = [] train_target_data = [] for batch_idx, batch_data in enumerate(train_data_iterator): if batch_idx < 100: continue if train_batch_size == 0: train_source_data = [] train_target_data = [] # source data single_data = batch_data[0] single_data = single_data.reshape(single_data.shape[0] / feature_size, feature_size) train_source_data.append(single_data) # target data single_data = batch_data[1] single_data = single_data.reshape(single_data.shape[0] / feature_size, feature_size) train_target_data.append(single_data) train_batch_size += 1 if train_batch_size < 128: continue else: # source data train_source_data = numpy.asarray(train_source_data, dtype=floatX) train_source_data = numpy.swapaxes(train_source_data, axis1=0, axis2=1) # target data train_target_data = numpy.asarray(train_target_data, dtype=floatX) train_target_data = numpy.swapaxes(train_target_data, axis1=0, axis2=1) train_batch_size = 0 # normalize train_source_data = (train_source_data / (1.15 * 2.0 ** 13)).astype(floatX) train_target_data = (train_target_data / (1.15 * 2.0 ** 13)).astype(floatX) # update generator generator_updater_input = [train_source_data, train_target_data] generator_updater_output = generator_updater(*generator_updater_input) generator_train_cost = generator_updater_output[0].mean() generator_grad_norm = generator_updater_output[1] generator_grad_norm_mean += generator_grad_norm train_batch_count += 1 sampling_seed_data = [] if train_batch_count % 100 == 0: # set valid data stream with proper length (window size) valid_window_size = window_size valid_data_stream = set_valid_datastream(feature_size=feature_size, window_size=valid_window_size) # get train data iterator valid_data_iterator = valid_data_stream.get_epoch_iterator() # for each batch valid_batch_count = 0 valid_batch_size = 0 valid_source_data = [] valid_target_data = [] valid_cost_mean = 0.0 for batch_idx, batch_data in enumerate(valid_data_iterator): if valid_batch_size == 0: valid_source_data = [] valid_target_data = [] # source data single_data = batch_data[0] single_data = single_data.reshape(single_data.shape[0] / feature_size, feature_size) valid_source_data.append(single_data) # target data single_data = batch_data[1] single_data = single_data.reshape(single_data.shape[0] / feature_size, feature_size) valid_target_data.append(single_data) valid_batch_size += 1 if valid_batch_size < 128: continue else: # source data valid_source_data = numpy.asarray(valid_source_data, dtype=floatX) valid_source_data = numpy.swapaxes(valid_source_data, axis1=0, axis2=1) # target data valid_target_data = numpy.asarray(valid_target_data, dtype=floatX) valid_target_data = numpy.swapaxes(valid_target_data, axis1=0, axis2=1) valid_batch_size = 0 # normalize valid_source_data = (valid_source_data / (1.15 * 2.0 ** 13)).astype(floatX) valid_target_data = (valid_target_data / (1.15 * 2.0 ** 13)).astype(floatX) generator_evaluator_input = [valid_source_data, valid_target_data] generator_evaluator_output = generator_evaluator(*generator_evaluator_input) generator_valid_cost = generator_evaluator_output[0].mean() valid_cost_mean += generator_valid_cost valid_batch_count += 1 if valid_batch_count > 100: sampling_seed_data = valid_source_data break valid_cost_mean = valid_cost_mean / valid_batch_count print "=============sample length {}=============================".format(window_size) print "epoch {}, batch_cnt {} => generator train cost {}".format( e, train_batch_count, generator_train_cost ) print "epoch {}, batch_cnt {} => generator valid cost {}".format(e, train_batch_count, valid_cost_mean) print "epoch {}, batch_cnt {} => generator grad norm {}".format( e, train_batch_count, generator_grad_norm_mean / train_batch_count ) generator_train_cost_list.append(generator_train_cost) generator_valid_cost_list.append(valid_cost_mean) plot_learning_curve( cost_values=[generator_train_cost_list, generator_valid_cost_list], cost_names=["Train Cost", "Valid Cost"], save_as=model_name + "_model_cost.png", legend_pos="upper left", ) if train_batch_count % 100 == 0: num_samples = 10 num_sec = 10 sampling_length = num_sec * sampling_rate / feature_size curr_input_data = sampling_seed_data[0][:num_samples] prev_hidden_data = np_rng.normal(size=(num_layers, num_samples, hidden_size)).astype(floatX) prev_hidden_data = numpy.tanh(prev_hidden_data) output_data = numpy.zeros(shape=(sampling_length, num_samples, feature_size)) for s in xrange(sampling_length): generator_input = [curr_input_data, prev_hidden_data] [curr_input_data, prev_hidden_data] = generator_sampler(*generator_input) output_data[s] = curr_input_data sample_data = numpy.swapaxes(output_data, axis1=0, axis2=1) sample_data = sample_data.reshape((num_samples, -1)) sample_data = sample_data * (1.15 * 2.0 ** 13) sample_data = sample_data.astype(numpy.int16) save_wavfile(sample_data, model_name + "_sample")
def train_model(recurrent_model, output_model, num_hiddens, model_optimizer, data_stream, num_epochs, model_name): update_function = set_update_function(recurrent_model=recurrent_model, output_model=output_model, optimizer=model_optimizer, grad_clip=1.0) generation_function = set_generation_function(recurrent_model=recurrent_model, output_model=output_model) # for each epoch cost_list = [] cnt = 0 for e in xrange(num_epochs): # get data iterator data_iterator = data_stream.get_epoch_iterator() # for each batch for batch_idx, batch_data in enumerate(data_iterator): input_data = numpy.swapaxes(batch_data[0], axis1=0, axis2=1) input_mask = numpy.ones(shape=input_data.shape[:2], dtype=floatX) target_data = numpy.swapaxes(batch_data[1], axis1=0, axis2=1) input_data = (input_data/(2.**15)).astype(floatX) target_data = (target_data/(2.**15)).astype(floatX) time_length = input_data.shape[0] num_samples = input_data.shape[1] truncate_grad_step = time_length cnt = cnt + 1 # update model update_input = [input_data, input_mask, None, None, target_data, truncate_grad_step] update_output = update_function(*update_input) # update result sample_cost = update_output[2].mean() if (batch_idx+1)%1000==0: print 'epoch {}, batch_idx {} : cost {} truncate({})'.format(e, batch_idx, sample_cost, truncate_grad_step) cost_list.append(sample_cost) if (batch_idx+1)%1000==0: plot_learning_curve(cost_values=[cost_list,], cost_names=['Input cost (train)',], save_as=model_name+'.png', legend_pos='upper left') if (batch_idx+1)%10000==0: generation_sample = 10 generation_length = 1000 input_data = numpy.random.uniform(low=-1.0, high=1.0, size=(generation_sample, input_feature_size)).astype(floatX) hidden_data = numpy.random.uniform(low=-1.0, high=1.0, size=(generation_sample, num_hiddens)).astype(floatX) cell_data = numpy.zeros(shape=(generation_sample, num_hiddens)).astype(floatX) output_data = numpy.zeros(shape=(generation_length, generation_sample, input_feature_size)) for t in xrange(generation_length): [hidden_data, cell_data, input_data] = generation_function(input_data, hidden_data, cell_data) output_data[t] = input_data output_data = numpy.swapaxes(output_data, axis1=0, axis2=1) output_data = output_data*(2.**15) output_data = output_data.astype(numpy.int16) save_wavfile(output_data, model_name+'_sample')
def train_model(feature_size, time_size, hidden_size, num_layers, recurrent_model, output_model, model_optimizer, data_stream, num_epochs, model_name): update_function = set_update_function(recurrent_model=recurrent_model, output_model=output_model, optimizer=model_optimizer, grad_clip=1.0) generation_function = set_generation_function(recurrent_model=recurrent_model, output_model=output_model) # for each epoch cost_list = [] cnt = 0 for e in xrange(num_epochs): # get data iterator data_iterator = data_stream.get_epoch_iterator() # for each batch for batch_idx, batch_data in enumerate(data_iterator): # source data source_data = batch_data[0] source_data = source_data.reshape(time_size, feature_size) source_data = numpy.expand_dims(source_data, axis=0) source_data = numpy.swapaxes(source_data, axis1=0, axis2=1) # source mask source_mask = numpy.ones(shape=source_data.shape[:2], dtype=floatX) # target data target_data = batch_data[1] target_data = target_data.reshape(time_size, feature_size) target_data = numpy.expand_dims(target_data, axis=0) target_data = numpy.swapaxes(target_data, axis1=0, axis2=1) # normalize source_data = (source_data/(2.**15)).astype(floatX) target_data = (target_data/(2.**15)).astype(floatX) # get time length time_length = source_data.shape[0] truncate_grad_step = time_length # update model update_input = [source_data, source_mask, None, None, target_data, truncate_grad_step] update_output = update_function(*update_input) # update result sample_cost = update_output[2].mean() if (batch_idx+1)%100==0: print 'epoch {}, batch_idx {} : cost {} truncate({})'.format(e, batch_idx, sample_cost, truncate_grad_step) cost_list.append(sample_cost) if (batch_idx+1)%100==0: plot_learning_curve(cost_values=[cost_list,], cost_names=['Input cost (train)',], save_as=model_name+'.png', legend_pos='upper left') if (batch_idx+1)%1000==0: generation_sample = 10 generation_length = 100 input_data = numpy.random.uniform(low=-1.0, high=1.0, size=(generation_sample, feature_size)).astype(floatX) hidden_data_list = [numpy.random.uniform(low=-1.0, high=1.0, size=(generation_sample, hidden_size)).astype(floatX) for l in xrange(num_layers)] cell_data_list = [numpy.zeros(shape=(generation_sample, hidden_size)).astype(floatX) for l in xrange(num_layers)] output_data = numpy.zeros(shape=(generation_length, generation_sample, feature_size)) input_list = [input_data, ] + hidden_data_list + cell_data_list for t in xrange(generation_length): result_data = generation_function(*input_list) hidden_data_list = result_data[0:num_layers] cell_data_list = result_data[num_layers:2*num_layers] input_data = result_data[-1] input_list = [input_data, ] + hidden_data_list + cell_data_list output_data[t] = input_data output_data = numpy.swapaxes(output_data, axis1=0, axis2=1) output_data = output_data.reshape((generation_sample, -1)) output_data = output_data*(2.**15) output_data = output_data.astype(numpy.int16) save_wavfile(output_data, model_name+'_sample')
def train_model(feature_size, hidden_size, generator_model, generator_gan_optimizer, generator_tf_optimizer, discriminator_model, discriminator_optimizer, num_epochs, model_name): # generator updater print 'COMPILING TEACHER FORCE UPDATE FUNCTION ' tf_generator_updater = set_teacher_force_update_function(generator_model=generator_model, generator_optimizer=generator_tf_optimizer, generator_grad_clipping=60.0) print 'COMPILING GAN UPDATE FUNCTION ' gan_generator_updater = set_gan_update_function(generator_model=generator_model, discriminator_model=discriminator_model, generator_optimizer=generator_gan_optimizer, discriminator_optimizer=discriminator_optimizer, generator_grad_clipping=60.0, discriminator_grad_clipping=40.0) # evaluator print 'COMPILING EVALUATION FUNCTION ' evaluator = set_evaluation_function(generator_model=generator_model) # sample generator print 'COMPILING SAMPLING FUNCTION ' sample_generator = set_sample_function(generator_model=generator_model) print 'START TRAINING' # for each epoch tf_generator_grad_list = [] tf_generator_cost_list = [] gan_generator_grad_list = [] gan_generator_cost_list = [] gan_discriminator_grad_list = [] gan_discriminator_cost_list = [] gan_true_score_list = [] gan_false_score_list = [] gan_mse_list = [] init_window_size = 100 for e in xrange(num_epochs): window_size = init_window_size + 5*e # set train data stream with proper length (window size) train_data_stream = set_train_datastream(feature_size=feature_size, window_size=window_size) # get train data iterator train_data_iterator = train_data_stream.get_epoch_iterator() # for each batch train_batch_count = 0 train_batch_size = 0 train_source_data = [] train_target_data = [] for batch_idx, batch_data in enumerate(train_data_iterator): # skip the beginning part if batch_idx<10000: continue # init train batch data if train_batch_size==0: train_source_data = [] train_target_data = [] # save source data single_data = batch_data[0] single_data = single_data.reshape(single_data.shape[0]/feature_size, feature_size) train_source_data.append(single_data) # save target data single_data = batch_data[1] single_data = single_data.reshape(single_data.shape[0]/feature_size, feature_size) train_target_data.append(single_data) train_batch_size += 1 if train_batch_size<128: continue else: # source data train_source_data = numpy.asarray(train_source_data, dtype=floatX) train_source_data = numpy.swapaxes(train_source_data, axis1=0, axis2=1) # target data train_target_data = numpy.asarray(train_target_data, dtype=floatX) train_target_data = numpy.swapaxes(train_target_data, axis1=0, axis2=1) train_batch_size = 0 # normalize train_source_data = (train_source_data/(1.15*2.**13)).astype(floatX) train_target_data = (train_target_data/(1.15*2.**13)).astype(floatX) # teacher force update tf_update_output = tf_generator_updater(train_source_data, train_target_data) tf_square_error = tf_update_output[0].mean() tf_generator_grad_norm = tf_update_output[1] # gan update gan_update_output = gan_generator_updater(train_source_data, train_target_data) generator_gan_cost = gan_update_output[0].mean() discriminator_gan_cost = gan_update_output[1].mean() discriminator_true_score = gan_update_output[2].mean() discriminator_false_score = gan_update_output[3].mean() gan_square_error = gan_update_output[4].mean() gan_generator_grad_norm = gan_update_output[5] gan_discriminator_grad_norm = gan_update_output[6] train_batch_count += 1 tf_generator_cost_list.append(tf_square_error) tf_generator_grad_list.append(tf_generator_grad_norm) gan_generator_grad_list.append(gan_generator_grad_norm) gan_generator_cost_list.append(generator_gan_cost) gan_discriminator_grad_list.append(gan_discriminator_grad_norm) gan_discriminator_cost_list.append(discriminator_gan_cost) gan_true_score_list.append(discriminator_true_score) gan_false_score_list.append(discriminator_false_score) gan_mse_list.append(gan_square_error) if train_batch_count%100==0: print '=============sample length {}============================='.format(window_size) print 'epoch {}, batch_cnt {} => TF generator mse cost {}'.format(e, train_batch_count, tf_generator_cost_list[-1]) print 'epoch {}, batch_cnt {} => GAN generator mse cost {}'.format(e, train_batch_count, gan_mse_list[-1]) print '----------------------------------------------------------' print 'epoch {}, batch_cnt {} => GAN generator cost {}'.format(e, train_batch_count, gan_generator_cost_list[-1]) print 'epoch {}, batch_cnt {} => GAN discriminator cost {}'.format(e, train_batch_count, gan_discriminator_cost_list[-1]) print '----------------------------------------------------------' print 'epoch {}, batch_cnt {} => GAN input score {}'.format(e, train_batch_count, gan_true_score_list[-1]) print 'epoch {}, batch_cnt {} => GAN sample score {}'.format(e, train_batch_count, gan_false_score_list[-1]) print '----------------------------------------------------------' print 'epoch {}, batch_cnt {} => TF generator grad norm {}'.format(e, train_batch_count, tf_generator_grad_list[-1]) print '----------------------------------------------------------' print 'epoch {}, batch_cnt {} => GAN generator grad norm {}'.format(e, train_batch_count, gan_generator_grad_list[-1]) print 'epoch {}, batch_cnt {} => GAN discrim. grad norm {}'.format(e, train_batch_count, gan_discriminator_grad_list[-1]) if train_batch_count%100==0: stop_flag = True numpy.save(file=model_name+'tf_mse', arr=numpy.asarray(tf_generator_cost_list)) numpy.save(file=model_name+'gan_mse', arr=numpy.asarray(gan_mse_list)) numpy.save(file=model_name+'gan_gen_cost', arr=numpy.asarray(gan_generator_cost_list)) numpy.save(file=model_name+'gan_disc_cost', arr=numpy.asarray(gan_true_score_list)) numpy.save(file=model_name+'gan_input_score', arr=numpy.asarray(gan_true_score_list)) numpy.save(file=model_name+'gan_sample_score', arr=numpy.asarray(gan_false_score_list)) numpy.save(file=model_name+'tf_gen_grad', arr=numpy.asarray(tf_generator_grad_list)) numpy.save(file=model_name+'gan_gen_grad', arr=numpy.asarray(gan_generator_grad_list)) numpy.save(file=model_name+'gan_disc_grad', arr=numpy.asarray(gan_discriminator_grad_list)) num_samples = 10 if train_batch_count%100==0: valid_data_stream = set_valid_datastream(feature_size=feature_size, window_size=1) # get train data iterator valid_data_iterator = valid_data_stream.get_epoch_iterator() # for each batch valid_batch_size = 0 sampling_seed_data = [] for batch_idx, batch_data in enumerate(valid_data_iterator): # source data single_data = batch_data[0] single_data = single_data.reshape(single_data.shape[0]/feature_size, feature_size) sampling_seed_data.append(single_data) valid_batch_size += 1 if valid_batch_size<num_samples: continue else: # source data sampling_seed_data = numpy.asarray(sampling_seed_data, dtype=floatX) # normalize sampling_seed_data = (sampling_seed_data/(1.15*2.**13)).astype(floatX) break num_sec = 10 sampling_length = num_sec*sampling_rate/feature_size curr_input_data = sampling_seed_data.reshape(num_samples, feature_size) prev_hidden_data = np_rng.normal(size=(num_samples, hidden_size)).astype(floatX) prev_hidden_data = numpy.tanh(prev_hidden_data) prev_cell_data = np_rng.normal(size=(num_samples, hidden_size)).astype(floatX) output_data = numpy.zeros(shape=(sampling_length, num_samples, feature_size)) for s in xrange(sampling_length): generator_input = [curr_input_data, prev_hidden_data, prev_cell_data] [curr_input_data, prev_hidden_data, prev_cell_data] = sample_generator(*generator_input) output_data[s] = curr_input_data sample_data = numpy.swapaxes(output_data, axis1=0, axis2=1) sample_data = sample_data.reshape((num_samples, -1)) sample_data = sample_data*(1.15*2.**13) sample_data = sample_data.astype(numpy.int16) save_wavfile(sample_data, model_name+'_sample')
def train_model(feature_size, hidden_size, init_window_size, generator_rnn_model, generator_output_model, generator_gan_optimizer, generator_tf_optimizer, discriminator_rnn_model, discriminator_output_model, discriminator_optimizer, num_epochs, model_name): # generator updater print 'COMPILING GAN UPDATE FUNCTION ' gan_updater = set_gan_update_function(generator_rnn_model=generator_rnn_model, generator_output_model=generator_output_model, discriminator_rnn_model=discriminator_rnn_model, discriminator_output_model=discriminator_output_model, generator_optimizer=generator_gan_optimizer, discriminator_optimizer=discriminator_optimizer, generator_grad_clipping=.0, discriminator_grad_clipping=.0) # print 'COMPILING TF UPDATE FUNCTION ' # tf_updater = set_tf_update_function(generator_rnn_model=generator_rnn_model, # generator_output_model=generator_output_model, # generator_optimizer=generator_tf_optimizer, # generator_grad_clipping=.0) # evaluator # print 'COMPILING EVALUATION FUNCTION ' # evaluator = set_evaluation_function(generator_rnn_model=generator_rnn_model, # generator_output_model=generator_output_model) # sample generator print 'COMPILING SAMPLING FUNCTION ' sample_generator = set_sample_function(generator_rnn_model=generator_rnn_model, generator_output_model=generator_output_model) print 'READ RAW WAV DATA' _, train_raw_data = wavfile.read('/data/lisatmp4/taesup/data/YouTubeAudio/XqaJ2Ol5cC4.wav') valid_raw_data = train_raw_data[160000000:] train_raw_data = train_raw_data[:160000000] train_raw_data = train_raw_data[2000:] train_raw_data = (train_raw_data/(1.15*2.**13)).astype(floatX) valid_raw_data = (valid_raw_data/(1.15*2.**13)).astype(floatX) num_train_total_steps = train_raw_data.shape[0] num_valid_total_steps = valid_raw_data.shape[0] batch_size = 64 num_samples = 10 last_seq_idx = num_valid_total_steps-feature_size valid_seq_orders = np_rng.permutation(last_seq_idx) valid_seq_orders = valid_seq_orders[:last_seq_idx-last_seq_idx%num_samples] valid_seq_orders = valid_seq_orders.reshape((-1, num_samples)) valid_seq_orders = valid_seq_orders[0] valid_source_idx = valid_seq_orders.reshape((num_samples, 1)) + numpy.repeat(numpy.arange(feature_size).reshape((1, feature_size)), num_samples, axis=0) valid_source_data = valid_raw_data[valid_source_idx] valid_source_data = valid_source_data.reshape((num_samples, feature_size)) print 'START TRAINING' # for each epoch tf_mse_list = [] tf_generator_grad_list = [] gan_generator_grad_list = [] gan_generator_cost_list = [] gan_discriminator_grad_list = [] gan_discriminator_cost_list = [] gan_true_score_list = [] gan_false_score_list = [] gan_mse_list = [] train_batch_count = 0 for e in xrange(num_epochs): window_size = init_window_size + 5*e sequence_size = feature_size*window_size last_seq_idx = num_train_total_steps-(sequence_size+feature_size) train_seq_orders = np_rng.permutation(last_seq_idx) train_seq_orders = train_seq_orders[:last_seq_idx-last_seq_idx%batch_size] train_seq_orders = train_seq_orders.reshape((-1, batch_size)) # for each batch for batch_idx, batch_info in enumerate(train_seq_orders): # source data train_source_idx = batch_info.reshape((batch_size, 1)) + numpy.repeat(numpy.arange(sequence_size).reshape((1, sequence_size)), batch_size, axis=0) train_source_data = train_raw_data[train_source_idx] train_source_data = train_source_data.reshape((batch_size, window_size, feature_size)) train_source_data = numpy.swapaxes(train_source_data, axis1=0, axis2=1) # target data train_target_idx = train_source_idx + feature_size train_target_data = train_raw_data[train_target_idx] train_target_data = train_target_data.reshape((batch_size, window_size, feature_size)) train_target_data = numpy.swapaxes(train_target_data, axis1=0, axis2=1) # tf update # tf_update_output = tf_updater(train_source_data, # train_target_data) # tf_square_error = tf_update_output[0].mean() # tf_generator_grad_norm = tf_update_output[1] # tf_square_error = 0.0 tf_generator_grad_norm = 0.0 # gan update gan_update_output = gan_updater(train_source_data, train_target_data) generator_gan_cost = gan_update_output[0].mean() discriminator_gan_cost = gan_update_output[1].mean() discriminator_true_score = gan_update_output[2].mean() discriminator_false_score = gan_update_output[3].mean() gan_square_error = gan_update_output[4].mean() gan_generator_grad_norm = gan_update_output[5] gan_discriminator_grad_norm = gan_update_output[6] # generator_gan_cost = 0.0 # discriminator_gan_cost = 0.0 # discriminator_true_score = 0.0 # discriminator_false_score = 0.0 # gan_square_error = 0.0 # gan_generator_grad_norm = 0.0 # gan_discriminator_grad_norm = 0.0 train_batch_count += 1 tf_generator_grad_list.append(tf_generator_grad_norm) tf_mse_list.append(tf_square_error) gan_generator_grad_list.append(gan_generator_grad_norm) gan_generator_cost_list.append(generator_gan_cost) gan_discriminator_grad_list.append(gan_discriminator_grad_norm) gan_discriminator_cost_list.append(discriminator_gan_cost) gan_true_score_list.append(discriminator_true_score) gan_false_score_list.append(discriminator_false_score) gan_mse_list.append(gan_square_error) if train_batch_count%10==0: print '============{}_LENGTH{}============'.format(model_name, window_size) # print 'epoch {}, batch_cnt {} => TF generator mse cost {}'.format(e, train_batch_count, tf_mse_list[-1]) print 'epoch {}, batch_cnt {} => GAN generator cost {}'.format(e, train_batch_count, gan_generator_cost_list[-1]) print 'epoch {}, batch_cnt {} => GAN discriminator cost {}'.format(e, train_batch_count, gan_discriminator_cost_list[-1]) print '----------------------------------------------------------' print 'epoch {}, batch_cnt {} => GAN input score {}'.format(e, train_batch_count, gan_true_score_list[-1]) print 'epoch {}, batch_cnt {} => GAN sample score {}'.format(e, train_batch_count, gan_false_score_list[-1]) print '----------------------------------------------------------' print 'epoch {}, batch_cnt {} => GAN discrim. grad norm {}'.format(e, train_batch_count, gan_discriminator_grad_list[-1]) print 'epoch {}, batch_cnt {} => GAN generator grad norm {}'.format(e, train_batch_count, gan_generator_grad_list[-1]) print '----------------------------------------------------------' print 'epoch {}, batch_cnt {} => GAN generator mse cost {}'.format(e, train_batch_count, gan_mse_list[-1]) # print '----------------------------------------------------------' # print 'epoch {}, batch_cnt {} => TF generator grad norm {}'.format(e, train_batch_count, tf_generator_grad_list[-1]) if train_batch_count%100==0: # numpy.save(file=model_name+'tf_mse', # arr=numpy.asarray(tf_mse_list)) # numpy.save(file=model_name+'tf_gen_grad', # arr=numpy.asarray(tf_generator_grad_list)) numpy.save(file=model_name+'gan_mse', arr=numpy.asarray(gan_mse_list)) numpy.save(file=model_name+'gan_gen_cost', arr=numpy.asarray(gan_generator_cost_list)) numpy.save(file=model_name+'gan_disc_cost', arr=numpy.asarray(gan_true_score_list)) numpy.save(file=model_name+'gan_input_score', arr=numpy.asarray(gan_true_score_list)) numpy.save(file=model_name+'gan_sample_score', arr=numpy.asarray(gan_false_score_list)) numpy.save(file=model_name+'gan_gen_grad', arr=numpy.asarray(gan_generator_grad_list)) numpy.save(file=model_name+'gan_disc_grad', arr=numpy.asarray(gan_discriminator_grad_list)) if train_batch_count%100==0: num_sec = 10 sampling_length = num_sec*sampling_rate/feature_size curr_input_data = valid_source_data prev_hidden_data = np_rng.normal(size=(num_samples, hidden_size)).astype(floatX) prev_hidden_data = numpy.tanh(prev_hidden_data) prev_cell_data = np_rng.normal(size=(num_samples, hidden_size)).astype(floatX) output_data = numpy.zeros(shape=(sampling_length, num_samples, feature_size)) for s in xrange(sampling_length): generator_input = [curr_input_data, prev_hidden_data, prev_cell_data] [curr_input_data, prev_hidden_data, prev_cell_data] = sample_generator(*generator_input) output_data[s] = curr_input_data sample_data = numpy.swapaxes(output_data, axis1=0, axis2=1) sample_data = sample_data.reshape((num_samples, -1)) sample_data = sample_data*(1.15*2.**13) sample_data = sample_data.astype(numpy.int16) save_wavfile(sample_data, model_name+'_sample')
def train_model(feature_size, hidden_size, num_layers, generator_rnn_model, generator_optimizer, discriminator_rnn_model, discriminator_output_model, discriminator_optimizer, num_epochs, model_name): # generator updater print 'DEBUGGING GENERATOR UPDATE FUNCTION ' generator_updater = set_generator_update_function(generator_rnn_model=generator_rnn_model, discriminator_rnn_model=discriminator_rnn_model, discriminator_output_model=discriminator_output_model, generator_optimizer=generator_optimizer, grad_clipping=3.6) # discriminator updater print 'DEBUGGING DISCRIMINATOR UPDATE FUNCTION ' discriminator_updater = set_discriminator_update_function(generator_rnn_model=generator_rnn_model, discriminator_rnn_model=discriminator_rnn_model, discriminator_output_model=discriminator_output_model, discriminator_optimizer=discriminator_optimizer, grad_clipping=1.8) # sample generator print 'DEBUGGING SAMPLE GENERATOR FUNCTION ' sample_generator = set_sample_generation_function(generator_rnn_model=generator_rnn_model) print 'START TRAINING' # for each epoch generator_cost_list = [] discriminator_cost_list = [] generator_grad_norm_mean = 0.0 discriminator_grad_norm_mean = 0.0 init_window_size = 20 for e in xrange(num_epochs): window_size = init_window_size + 5*e # set data stream with proper length (window size) data_stream = set_datastream(feature_size=feature_size, window_size=window_size) # get data iterator data_iterator = data_stream.get_epoch_iterator() # for each batch batch_count = 0 batch_size = 0 source_data = [] for batch_idx, batch_data in enumerate(data_iterator): if batch_size==0: source_data = [] # source data single_data = batch_data[0] single_data = single_data.reshape(window_size, feature_size) source_data.append(single_data) batch_size += 1 if batch_size<128: continue else: source_data = numpy.asarray(source_data, dtype=floatX) source_data = numpy.swapaxes(source_data, axis1=0, axis2=1) batch_size = 0 # normalize source_data = (source_data/(2.**15)).astype(floatX) # set generator initial values init_input_data = np_rng.normal(size=(source_data.shape[1], feature_size)).astype(floatX) init_input_data = numpy.clip(init_input_data, -1., 1.) # init_hidden_data = np_rng.normal(size=(num_layers, source_data.shape[1], hidden_size)).astype(floatX) # init_hidden_data = numpy.clip(init_hidden_data, -1., 1.) # init_cell_data = np_rng.normal(size=(num_layers, source_data.shape[1], hidden_size)).astype(floatX) init_hidden_data = numpy.zeros(shape=(num_layers, source_data.shape[1], hidden_size), dtype=floatX) init_cell_data = numpy.zeros(shape=(num_layers, source_data.shape[1], hidden_size), dtype=floatX) # update generator generator_updater_input = [init_input_data, init_hidden_data, init_cell_data, window_size] generator_updater_output = generator_updater(*generator_updater_input) generator_cost = generator_updater_output[1].mean() # generator_grad_norm = generator_updater_output[-1] # update discriminator init_input_data = np_rng.normal(size=(source_data.shape[1], feature_size)).astype(floatX) init_input_data = numpy.clip(init_input_data, -1., 1.) # init_hidden_data = np_rng.normal(size=(num_layers, source_data.shape[1], hidden_size)).astype(floatX) # init_hidden_data = numpy.clip(init_hidden_data, -1., 1.) # init_cell_data = np_rng.normal(size=(num_layers, source_data.shape[1], hidden_size)).astype(floatX) init_hidden_data = numpy.zeros(shape=(num_layers, source_data.shape[1], hidden_size), dtype=floatX) init_cell_data = numpy.zeros(shape=(num_layers, source_data.shape[1], hidden_size), dtype=floatX) discriminator_updater_input = [source_data, init_input_data, init_hidden_data, init_cell_data] discriminator_updater_output = discriminator_updater(*discriminator_updater_input) input_cost_data = discriminator_updater_output[0] sample_cost_data = discriminator_updater_output[1] discriminator_cost = discriminator_updater_output[2].mean() # discriminator_grad_norm = discriminator_updater_output[-1] # generator_grad_norm_mean += generator_grad_norm # discriminator_grad_norm_mean += discriminator_grad_norm batch_count += 1 if batch_count%500==0: print '=============sample length {}============================='.format(window_size) print 'epoch {}, batch_cnt {} => generator cost {}'.format(e, batch_count, generator_cost) print 'epoch {}, batch_cnt {} => discriminator cost {}'.format(e, batch_count, discriminator_cost) print 'epoch {}, batch_cnt {} => input data cost {}'.format(e, batch_count, input_cost_data.mean()) print 'epoch {}, batch_cnt {} => sample data cost {}'.format(e, batch_count, sample_cost_data.mean()) # print 'epoch {}, batch_cnt {} => generator grad norm{}'.format(e, batch_count, generator_grad_norm_mean/batch_count) # print 'epoch {}, batch_cnt {} => discriminator grad norm{}'.format(e, batch_count, discriminator_grad_norm_mean/batch_count) generator_cost_list.append(generator_cost) discriminator_cost_list.append(discriminator_cost) plot_learning_curve(cost_values=[generator_cost_list, discriminator_cost_list], cost_names=['Generator Cost', 'Discriminator Cost'], save_as=model_name+'_model_cost.png', legend_pos='upper left') plot_learning_curve(cost_values=[input_cost_data.mean(axis=(1, 2)), sample_cost_data.mean(axis=(1, 2))], cost_names=['Data Distribution', 'Model Distribution'], save_as=model_name+'_seq_cost{}.png'.format(batch_count), legend_pos='upper left') if batch_count%5000==0: num_samples = 10 num_sec = 10 sampling_length = num_sec*sampling_rate/feature_size # set generator initial values init_input_data = np_rng.normal(size=(num_samples, feature_size)).astype(floatX) init_input_data = numpy.clip(init_input_data, -1., 1.) # init_hidden_data = np_rng.normal(size=(num_layers, num_samples, hidden_size)).astype(floatX) # init_hidden_data = numpy.clip(init_hidden_data, -1., 1.) # init_cell_data = np_rng.normal(size=(num_layers, num_samples, hidden_size)).astype(floatX) init_hidden_data = numpy.zeros(shape=(num_layers, num_samples, hidden_size), dtype=floatX) init_cell_data = numpy.zeros(shape=(num_layers, num_samples, hidden_size), dtype=floatX) generator_input = [init_input_data, init_hidden_data, init_cell_data, sampling_length] sample_data = sample_generator(*generator_input)[0] sample_data = numpy.swapaxes(sample_data, axis1=0, axis2=1) sample_data = sample_data.reshape((num_samples, -1)) sample_data = sample_data*(2.**15) sample_data = sample_data.astype(numpy.int16) save_wavfile(sample_data, model_name+'_sample')
def train_model(feature_size, time_size, hidden_size, num_layers, recurrent_model, output_model, model_optimizer, controller_optimizer, data_stream, num_epochs, model_name): print 'DEBUGGING UPDATE FUNCTION' update_function = set_update_function(recurrent_model=recurrent_model, output_model=output_model, model_optimizer=model_optimizer, controller_optimizer=controller_optimizer, grad_clip=1.0) print 'DEBUGGING GENERATOR FUNCTION' generation_function = set_generation_function(recurrent_model=recurrent_model, output_model=output_model) # for each epoch cost_list = [] cnt = 0 for e in xrange(num_epochs): # get data iterator data_iterator = data_stream.get_epoch_iterator() # for each batch batch_count = 0 batch_size = 0 source_data = [] target_data = [] for batch_idx, batch_data in enumerate(data_iterator): if batch_size==0: source_data = [] target_data = [] # source data single_data = batch_data[0] single_data = single_data.reshape(time_size, feature_size) source_data.append(single_data) # target data single_data = batch_data[1] single_data = single_data.reshape(time_size, feature_size) target_data.append(single_data) batch_size += 1 if batch_size<128: continue else: source_data = numpy.asarray(source_data, dtype=floatX) source_data = numpy.swapaxes(source_data, axis1=0, axis2=1) target_data = numpy.asarray(target_data, dtype=floatX) target_data = numpy.swapaxes(target_data, axis1=0, axis2=1) batch_size = 0 # normalize source_data = (source_data/(2.**15)).astype(floatX) target_data = (target_data/(2.**15)).astype(floatX) # update model update_input = [source_data, target_data] update_output = update_function(*update_input) # update result sample_cost = update_output[2] batch_count += 1 if batch_count%100==0: print 'epoch {}, batch_count {} : mean cost {} max cost {})'.format(e, batch_count, sample_cost.mean(), sample_cost.max(axis=0).mean()) cost_list.append(sample_cost.mean()) if (batch_count+1)%100==0: plot_learning_curve(cost_values=[cost_list,], cost_names=['Input cost (train)',], save_as=model_name+'.png', legend_pos='upper left') if (batch_count+1)%1000==0: generation_sample = 10 generation_length = 100 input_data = numpy.clip(np_rng.normal(size=(generation_sample, feature_size)).astype(floatX), -1., 1.) hidden_data_list = [numpy.clip(np_rng.normal(size=(generation_sample, hidden_size)).astype(floatX), -1., 1.) for l in xrange(num_layers)] cell_data_list = [numpy.zeros(shape=(generation_sample, hidden_size)).astype(floatX) for l in xrange(num_layers)] output_data = numpy.zeros(shape=(generation_length, generation_sample, feature_size)) input_list = [input_data, ] + hidden_data_list + cell_data_list for t in xrange(generation_length): result_data = generation_function(*input_list) hidden_data_list = result_data[0:num_layers] cell_data_list = result_data[num_layers:2*num_layers] input_data = result_data[-1] input_list = [input_data, ] + hidden_data_list + cell_data_list output_data[t] = input_data output_data = numpy.swapaxes(output_data, axis1=0, axis2=1) output_data = output_data.reshape((generation_sample, -1)) output_data = output_data*(2.**15) output_data = output_data.astype(numpy.int16) save_wavfile(output_data, model_name+'_sample')
def test_sampling(feature_size, hidden_size, seed_window_size, generator_model, model_name): print 'COMPILING SEEDING FUNCTION ' t = time() seeding_function = set_seeding_function(generator_model=generator_model) print '%.2f SEC '%(time()-t) print 'COMPILING SAMPLING FUNCTION ' t = time() sampling_function = set_sampling_function(generator_model=generator_model) print '%.2f SEC '%(time()-t) print 'READ RAW WAV DATA' _, train_raw_data = wavfile.read('/data/lisatmp4/taesup/data/YouTubeAudio/XqaJ2Ol5cC4.wav') valid_raw_data = train_raw_data[160000000:] train_raw_data = train_raw_data[:160000000] train_raw_data = train_raw_data[2000:] train_raw_data = (train_raw_data/(1.15*2.**13)).astype(floatX) valid_raw_data = (valid_raw_data/(1.15*2.**13)).astype(floatX) num_train_total_steps = train_raw_data.shape[0] num_valid_total_steps = valid_raw_data.shape[0] batch_size = 64 num_valid_sequences = num_valid_total_steps/(feature_size*seed_window_size)-1 valid_source_data = valid_raw_data[:num_valid_sequences*(feature_size*seed_window_size)] valid_source_data = valid_source_data.reshape((num_valid_sequences, seed_window_size, feature_size)) valid_target_data = valid_raw_data[feature_size:feature_size+num_valid_sequences*(feature_size*seed_window_size)] valid_target_data = valid_target_data.reshape((num_valid_sequences, seed_window_size, feature_size)) valid_raw_data = None num_seeds = 10 valid_shuffle_idx = np_rng.permutation(num_valid_sequences) valid_source_data = valid_source_data[valid_shuffle_idx] valid_target_data = valid_target_data[valid_shuffle_idx] valid_seed_data = valid_source_data[:num_seeds][0][:] valid_source_data = numpy.swapaxes(valid_source_data, axis1=0, axis2=1) valid_target_data = numpy.swapaxes(valid_target_data, axis1=0, axis2=1) num_valid_batches = num_valid_sequences/batch_size print 'NUM OF VALID BATCHES : ', num_valid_sequences/batch_size best_valid = 10000. print 'FIRST GET SEEDING' valid_batch_count = 0 for valid_idx in xrange(num_valid_batches): start_idx = batch_size*valid_idx end_idx = batch_size*(valid_idx+1) evaluation_outputs = evaluation_function(valid_source_data[:][start_idx:end_idx][:], valid_target_data[:][start_idx:end_idx][:]) tf_valid_mse += evaluation_outputs[0].mean() valid_batch_count += 1 if valid_idx==0: recon_data = evaluation_outputs[1] recon_data = numpy.swapaxes(recon_data, axis1=0, axis2=1) recon_data = recon_data[:10] recon_data = recon_data.reshape((10, -1)) recon_data = recon_data*(1.15*2.**13) recon_data = recon_data.astype(numpy.int16) save_wavfile(recon_data, model_name+'_recon') orig_data = valid_target_data[:][start_idx:end_idx][:] orig_data = numpy.swapaxes(orig_data, axis1=0, axis2=1) orig_data = orig_data[:10] orig_data = orig_data.reshape((10, -1)) orig_data = orig_data*(1.15*2.**13) orig_data = orig_data.astype(numpy.int16) save_wavfile(orig_data, model_name+'_orig') valid_sample_cost_list.append(tf_valid_mse/valid_batch_count) print '----------------------------------------------------------' print 'epoch {}, batch_cnt {} => valid sample cost {}'.format(e, train_batch_count, valid_sample_cost_list[-1]) if best_valid>valid_sample_cost_list[-1]: best_valid = valid_sample_cost_list[-1]
arr=numpy.asarray(train_lambda_regularizer_list)) numpy.save(file=model_name+'_valid_sample_cost', arr=numpy.asarray(valid_sample_cost_list)) num_sec = 100 sampling_length = num_sec*sampling_rate/feature_size seed_input_data = valid_seed_data [generated_sequence, ] = sampling_function(seed_input_data, sampling_length) sample_data = numpy.swapaxes(generated_sequence, axis1=0, axis2=1) sample_data = sample_data.reshape((num_seeds, -1)) sample_data = sample_data*(1.15*2.**13) sample_data = sample_data.astype(numpy.int16) save_wavfile(sample_data, model_name+'_sample') if best_valid==valid_sample_cost_list[-1]: save_model_params(generator_model, model_name+'_model.pkl') if __name__=="__main__": feature_size = 1600 hidden_size = 800 model_name = 'LSTM_REGULARIZER_LAMBDA' \ + '_FEATURE{}'.format(int(feature_size)) \ + '_HIDDEN{}'.format(int(hidden_size)) \ # generator model generator_model = set_generator_model(input_size=feature_size,