Пример #1
0
def train_model(feature_size,
                hidden_size,
                init_window_size,
                generator_model,
                generator_gan_optimizer,
                generator_tf_optimizer,
                discriminator_feature_model,
                discriminator_output_model,
                discriminator_gan_optimizer,
                num_epochs,
                model_name):

    # generator updater
    print 'COMPILING GAN UPDATE FUNCTION '
    gan_updater = set_gan_update_function(generator_model=generator_model,
                                          discriminator_feature_model=discriminator_feature_model,
                                          discriminator_output_model=discriminator_output_model,
                                          generator_optimizer=generator_gan_optimizer,
                                          discriminator_optimizer=discriminator_gan_optimizer,
                                          generator_grad_clipping=.0,
                                          discriminator_grad_clipping=.0)

    print 'COMPILING TF UPDATE FUNCTION '
    tf_updater = set_tf_update_function(generator_model=generator_model,
                                        generator_optimizer=generator_tf_optimizer,
                                        generator_grad_clipping=.0)

    # evaluator
    print 'COMPILING EVALUATION FUNCTION '
    evaluator = set_evaluation_function(generator_model=generator_model)

    # sample generator
    print 'COMPILING SAMPLING FUNCTION '
    sample_generator = set_sample_function(generator_model=generator_model)

    print 'READ RAW WAV DATA'
    _, train_raw_data = wavfile.read('/data/lisatmp4/taesup/data/YouTubeAudio/XqaJ2Ol5cC4.wav')
    valid_raw_data  = train_raw_data[160000000:]
    train_raw_data  = train_raw_data[:160000000]
    train_raw_data  = train_raw_data[2000:]
    train_raw_data  = (train_raw_data/(1.15*2.**13)).astype(floatX)
    valid_raw_data  = (valid_raw_data/(1.15*2.**13)).astype(floatX)

    num_train_total_steps = train_raw_data.shape[0]
    num_valid_total_steps = valid_raw_data.shape[0]
    batch_size      = 64

    num_valid_sequences = num_valid_total_steps/(feature_size*init_window_size)-1
    valid_source_data = valid_raw_data[:num_valid_sequences*(feature_size*init_window_size)]
    valid_source_data = valid_source_data.reshape((num_valid_sequences, init_window_size, feature_size))
    valid_target_data = valid_raw_data[feature_size:feature_size+num_valid_sequences*(feature_size*init_window_size)]
    valid_target_data = valid_target_data.reshape((num_valid_sequences, init_window_size, feature_size))

    valid_raw_data = None
    num_seeds = 10
    valid_shuffle_idx = np_rng.permutation(num_valid_sequences)
    valid_source_data = valid_source_data[valid_shuffle_idx]
    valid_target_data = valid_target_data[valid_shuffle_idx]
    valid_seed_data   = valid_source_data[:num_seeds][0][:]
    valid_source_data = numpy.swapaxes(valid_source_data, axis1=0, axis2=1)
    valid_target_data = numpy.swapaxes(valid_target_data, axis1=0, axis2=1)
    num_valid_batches = num_valid_sequences/batch_size


    print 'NUM OF VALID BATCHES : ', num_valid_sequences/batch_size
    best_valid = 10000.

    print 'START TRAINING'
    # for each epoch
    tf_mse_list                = []
    tf_generator_grad_list     = []

    gan_generator_grad_list     = []
    gan_generator_cost_list     = []
    gan_discriminator_grad_list = []
    gan_discriminator_cost_list = []
    gan_true_score_list         = []
    gan_false_score_list        = []
    gan_mse_list                = []

    valid_mse_list = []

    train_batch_count = 0
    for e in xrange(num_epochs):
        window_size      = init_window_size + 5*e
        sequence_size    = feature_size*window_size
        last_seq_idx     = num_train_total_steps-(sequence_size+feature_size)
        train_seq_orders = np_rng.permutation(last_seq_idx)
        train_seq_orders = train_seq_orders[:last_seq_idx-last_seq_idx%batch_size]
        train_seq_orders = train_seq_orders.reshape((-1, batch_size))

        print 'NUM OF TRAIN BATCHES : ', train_seq_orders.shape[0]
        # for each batch
        for batch_idx, batch_info in enumerate(train_seq_orders):
            # source data
            train_source_idx  = batch_info.reshape((batch_size, 1)) + numpy.repeat(numpy.arange(sequence_size).reshape((1, sequence_size)), batch_size, axis=0)
            train_source_data = train_raw_data[train_source_idx]
            train_source_data = train_source_data.reshape((batch_size, window_size, feature_size))
            train_source_data = numpy.swapaxes(train_source_data, axis1=0, axis2=1)

            # target data
            train_target_idx  = train_source_idx + feature_size
            train_target_data = train_raw_data[train_target_idx]
            train_target_data = train_target_data.reshape((batch_size, window_size, feature_size))
            train_target_data = numpy.swapaxes(train_target_data, axis1=0, axis2=1)

            # tf update
            tf_update_output = tf_updater(train_source_data,
                                          train_target_data)
            tf_square_error        = tf_update_output[0].mean()
            tf_generator_grad_norm = tf_update_output[1]

            # gan update
            gan_update_output = gan_updater(train_source_data,
                                            train_target_data)
            generator_gan_cost               = gan_update_output[0].mean()
            discriminator_gan_cost           = gan_update_output[1].mean()
            discriminator_true_score         = gan_update_output[2].mean()
            discriminator_false_score        = gan_update_output[3].mean()
            gan_square_error                 = gan_update_output[4].mean()
            gan_generator_grad_norm          = gan_update_output[5]
            gan_discriminator_grad_norm      = gan_update_output[6]

            train_batch_count += 1

            tf_generator_grad_list.append(tf_generator_grad_norm)
            tf_mse_list.append(tf_square_error)

            gan_generator_grad_list.append(gan_generator_grad_norm)
            gan_generator_cost_list.append(generator_gan_cost)

            gan_discriminator_grad_list.append(gan_discriminator_grad_norm)
            gan_discriminator_cost_list.append(discriminator_gan_cost)

            gan_true_score_list.append(discriminator_true_score)
            gan_false_score_list.append(discriminator_false_score)

            gan_mse_list.append(gan_square_error)

            if train_batch_count%10==0:
                print '============{}_LENGTH{}============'.format(model_name, window_size)
                print 'epoch {}, batch_cnt {} => TF  generator mse cost  {}'.format(e, train_batch_count, tf_mse_list[-1])
                print 'epoch {}, batch_cnt {} => GAN generator mse cost  {}'.format(e, train_batch_count, gan_mse_list[-1])
                print '----------------------------------------------------------'
                print 'epoch {}, batch_cnt {} => GAN generator     cost  {}'.format(e, train_batch_count, gan_generator_cost_list[-1])
                print 'epoch {}, batch_cnt {} => GAN discriminator cost  {}'.format(e, train_batch_count, gan_discriminator_cost_list[-1])
                print '----------------------------------------------------------'
                print 'epoch {}, batch_cnt {} => GAN input score         {}'.format(e, train_batch_count, gan_true_score_list[-1])
                print 'epoch {}, batch_cnt {} => GAN sample score        {}'.format(e, train_batch_count, gan_false_score_list[-1])
                print '----------------------------------------------------------'
                print 'epoch {}, batch_cnt {} => GAN discrim.  grad norm {}'.format(e, train_batch_count, gan_discriminator_grad_list[-1])
                print 'epoch {}, batch_cnt {} => GAN generator grad norm {}'.format(e, train_batch_count, gan_generator_grad_list[-1])
                print '----------------------------------------------------------'
                print 'epoch {}, batch_cnt {} => TF  generator grad norm {}'.format(e, train_batch_count, tf_generator_grad_list[-1])

            if train_batch_count%100==0:
                tf_valid_mse = 0.0
                valid_batch_count = 0
                for valid_idx in xrange(num_valid_batches):
                    start_idx = batch_size*valid_idx
                    end_idx   = batch_size*(valid_idx+1)
                    evaluation_outputs = evaluator(valid_source_data[:][start_idx:end_idx][:],
                                                   valid_target_data[:][start_idx:end_idx][:])
                    tf_valid_mse += evaluation_outputs[0].mean()
                    valid_batch_count += 1

                    if valid_idx==0:
                        recon_data = evaluation_outputs[1]
                        recon_data = numpy.swapaxes(recon_data, axis1=0, axis2=1)
                        recon_data = recon_data[:10]
                        recon_data = recon_data.reshape((10, -1))
                        recon_data = recon_data*(1.15*2.**13)
                        recon_data = recon_data.astype(numpy.int16)
                        save_wavfile(recon_data, model_name+'_recon')

                        orig_data = valid_target_data[:][start_idx:end_idx][:]
                        orig_data = numpy.swapaxes(orig_data, axis1=0, axis2=1)
                        orig_data = orig_data[:10]
                        orig_data = orig_data.reshape((10, -1))
                        orig_data = orig_data*(1.15*2.**13)
                        orig_data = orig_data.astype(numpy.int16)
                        save_wavfile(orig_data, model_name+'_orig')

                valid_mse_list.append(tf_valid_mse/valid_batch_count)
                print '----------------------------------------------------------'
                print 'epoch {}, batch_cnt {} => TF  valid mse cost  {}'.format(e, train_batch_count, valid_mse_list[-1])

                if best_valid>valid_mse_list[-1]:
                    best_valid = valid_mse_list[-1]


            if train_batch_count%500==0:
                numpy.save(file=model_name+'tf_mse',
                           arr=numpy.asarray(tf_mse_list))
                numpy.save(file=model_name+'tf_gen_grad',
                           arr=numpy.asarray(tf_generator_grad_list))
                numpy.save(file=model_name+'gan_mse',
                           arr=numpy.asarray(gan_mse_list))
                numpy.save(file=model_name+'gan_gen_cost',
                           arr=numpy.asarray(gan_generator_cost_list))
                numpy.save(file=model_name+'gan_disc_cost',
                           arr=numpy.asarray(gan_true_score_list))
                numpy.save(file=model_name+'gan_input_score',
                           arr=numpy.asarray(gan_true_score_list))
                numpy.save(file=model_name+'gan_sample_score',
                           arr=numpy.asarray(gan_false_score_list))
                numpy.save(file=model_name+'gan_gen_grad',
                           arr=numpy.asarray(gan_generator_grad_list))
                numpy.save(file=model_name+'gan_disc_grad',
                           arr=numpy.asarray(gan_discriminator_grad_list))
                numpy.save(file=model_name+'valid_mse',
                           arr=numpy.asarray(valid_mse_list))

                num_sec = 100
                sampling_length = num_sec*sampling_rate/feature_size
                seed_input_data = valid_seed_data

                [generated_sequence, ] = sample_generator(seed_input_data,
                                                          sampling_length)

                sample_data = numpy.swapaxes(generated_sequence, axis1=0, axis2=1)
                sample_data = sample_data.reshape((num_seeds, -1))
                sample_data = sample_data*(1.15*2.**13)
                sample_data = sample_data.astype(numpy.int16)
                save_wavfile(sample_data, model_name+'_sample')

                if best_valid==valid_mse_list[-1]:
                    save_model_params(generator_model, model_name+'_gen_model.pkl')
                    save_model_params(discriminator_feature_model, model_name+'_disc_feat_model.pkl')
                    save_model_params(discriminator_output_model, model_name+'_disc_output_model.pkl')
Пример #2
0
def train_model(feature_size,
                hidden_size,
                init_window_size,
                generator_model,
                generator_optimizer,
                num_epochs,
                model_name):

    # model updater
    print 'COMPILING UPDATER FUNCTION '
    t = time()
    updater_function = set_updater_function(generator_model=generator_model,
                                            generator_optimizer=generator_optimizer,
                                            generator_grad_clipping=.0)
    print '%.2f SEC '%(time()-t)

    # evaluator
    print 'COMPILING EVALUATION FUNCTION '
    t = time()
    evaluation_function = set_evaluation_function(generator_model=generator_model)
    print '%.2f SEC '%(time()-t)

    # sample generator
    print 'COMPILING SAMPLING FUNCTION '
    t = time()
    sampling_function = set_sampling_function(generator_model=generator_model)
    print '%.2f SEC '%(time()-t)

    print 'READ RAW WAV DATA'
    _, train_raw_data = wavfile.read('/data/lisatmp4/taesup/data/YouTubeAudio/XqaJ2Ol5cC4.wav')
    valid_raw_data  = train_raw_data[160000000:]
    train_raw_data  = train_raw_data[:160000000]
    train_raw_data  = train_raw_data[2000:]
    train_raw_data  = (train_raw_data/(1.15*2.**13)).astype(floatX)
    valid_raw_data  = (valid_raw_data/(1.15*2.**13)).astype(floatX)

    num_train_total_steps = train_raw_data.shape[0]
    num_valid_total_steps = valid_raw_data.shape[0]
    batch_size      = 64

    num_valid_sequences = num_valid_total_steps/(feature_size*init_window_size)-1
    valid_source_data = valid_raw_data[:num_valid_sequences*(feature_size*init_window_size)]
    valid_source_data = valid_source_data.reshape((num_valid_sequences, init_window_size, feature_size))
    valid_target_data = valid_raw_data[feature_size:feature_size+num_valid_sequences*(feature_size*init_window_size)]
    valid_target_data = valid_target_data.reshape((num_valid_sequences, init_window_size, feature_size))

    valid_raw_data = None
    num_seeds = 10
    valid_shuffle_idx = np_rng.permutation(num_valid_sequences)
    valid_source_data = valid_source_data[valid_shuffle_idx]
    valid_target_data = valid_target_data[valid_shuffle_idx]
    valid_seed_data   = valid_source_data[:num_seeds][0][:]
    valid_source_data = numpy.swapaxes(valid_source_data, axis1=0, axis2=1)
    valid_target_data = numpy.swapaxes(valid_target_data, axis1=0, axis2=1)
    num_valid_batches = num_valid_sequences/batch_size


    print 'NUM OF VALID BATCHES : ', num_valid_sequences/batch_size
    best_valid = 10000.

    print 'START TRAINING'
    # for each epoch
    train_sample_cost_list        = []
    train_regularizer_cost_list   = []
    train_gradient_norm_list      = []
    train_lambda_regularizer_list = []
    valid_sample_cost_list        = []


    train_batch_count = 0
    for e in xrange(num_epochs):
        window_size      = init_window_size + 5*e
        sequence_size    = feature_size*window_size
        last_seq_idx     = num_train_total_steps-(sequence_size+feature_size)
        train_seq_orders = np_rng.permutation(last_seq_idx)
        train_seq_orders = train_seq_orders[:last_seq_idx-last_seq_idx%batch_size]
        train_seq_orders = train_seq_orders.reshape((-1, batch_size))

        print 'NUM OF TRAIN BATCHES : ', train_seq_orders.shape[0]
        # for each batch
        for batch_idx, batch_info in enumerate(train_seq_orders):
            # source data
            train_source_idx  = batch_info.reshape((batch_size, 1)) + numpy.repeat(numpy.arange(sequence_size).reshape((1, sequence_size)), batch_size, axis=0)
            train_source_data = train_raw_data[train_source_idx]
            train_source_data = train_source_data.reshape((batch_size, window_size, feature_size))
            train_source_data = numpy.swapaxes(train_source_data, axis1=0, axis2=1)

            # target data
            train_target_idx  = train_source_idx + feature_size
            train_target_data = train_raw_data[train_target_idx]
            train_target_data = train_target_data.reshape((batch_size, window_size, feature_size))
            train_target_data = numpy.swapaxes(train_target_data, axis1=0, axis2=1)


            # update model
            lambda_regularizer = 0.1
            updater_outputs = updater_function(train_source_data,
                                               train_target_data,
                                               lambda_regularizer)
            train_sample_cost      = updater_outputs[0].mean()
            train_regularizer_cost = updater_outputs[1].mean()
            train_gradient_norm    = updater_outputs[2]

            train_batch_count += 1

            train_sample_cost_list.append(train_sample_cost)
            train_regularizer_cost_list.append(train_regularizer_cost)
            train_gradient_norm_list.append(train_gradient_norm)
            train_lambda_regularizer_list.append(lambda_regularizer)

            if train_batch_count%10==0:
                print '============{}_LENGTH{}============'.format(model_name, window_size)
                print 'epoch {}, batch_cnt {} => train sample      cost   {}'.format(e, train_batch_count, train_sample_cost_list[-1])
                print 'epoch {}, batch_cnt {} => train regularizer cost   {}'.format(e, train_batch_count, train_regularizer_cost_list[-1])
                print '----------------------------------------------------------'
                print 'epoch {}, batch_cnt {} => train gradient    norm   {}'.format(e, train_batch_count, train_gradient_norm_list[-1])
                print 'epoch {}, batch_cnt {} => train regularizer lambda {}'.format(e, train_batch_count, train_lambda_regularizer_list[-1])


            if train_batch_count%100==0:
                tf_valid_mse = 0.0
                valid_batch_count = 0
                for valid_idx in xrange(num_valid_batches):
                    start_idx = batch_size*valid_idx
                    end_idx   = batch_size*(valid_idx+1)
                    evaluation_outputs = evaluation_function(valid_source_data[:][start_idx:end_idx][:],
                                                             valid_target_data[:][start_idx:end_idx][:])
                    tf_valid_mse += evaluation_outputs[0].mean()
                    valid_batch_count += 1

                    if valid_idx==0:
                        recon_data = evaluation_outputs[1]
                        recon_data = numpy.swapaxes(recon_data, axis1=0, axis2=1)
                        recon_data = recon_data[:10]
                        recon_data = recon_data.reshape((10, -1))
                        recon_data = recon_data*(1.15*2.**13)
                        recon_data = recon_data.astype(numpy.int16)
                        save_wavfile(recon_data, model_name+'_recon')

                        orig_data = valid_target_data[:][start_idx:end_idx][:]
                        orig_data = numpy.swapaxes(orig_data, axis1=0, axis2=1)
                        orig_data = orig_data[:10]
                        orig_data = orig_data.reshape((10, -1))
                        orig_data = orig_data*(1.15*2.**13)
                        orig_data = orig_data.astype(numpy.int16)
                        save_wavfile(orig_data, model_name+'_orig')

                valid_sample_cost_list.append(tf_valid_mse/valid_batch_count)
                print '----------------------------------------------------------'
                print 'epoch {}, batch_cnt {} => valid sample      cost   {}'.format(e, train_batch_count, valid_sample_cost_list[-1])

                if best_valid>valid_sample_cost_list[-1]:
                    best_valid = valid_sample_cost_list[-1]


            if train_batch_count%500==0:
                numpy.save(file=model_name+'_train_sample_cost',
                           arr=numpy.asarray(train_sample_cost_list))
                numpy.save(file=model_name+'_train_regularizer_cost',
                           arr=numpy.asarray(train_regularizer_cost_list))
                numpy.save(file=model_name+'_train_gradient_norm',
                           arr=numpy.asarray(train_gradient_norm_list))
                numpy.save(file=model_name+'_train_lambda_value',
                           arr=numpy.asarray(train_lambda_regularizer_list))
                numpy.save(file=model_name+'_valid_sample_cost',
                           arr=numpy.asarray(valid_sample_cost_list))

                num_sec = 100
                sampling_length = num_sec*sampling_rate/feature_size
                seed_input_data = valid_seed_data

                [generated_sequence, ] = sampling_function(seed_input_data,
                                                           sampling_length)

                sample_data = numpy.swapaxes(generated_sequence, axis1=0, axis2=1)
                sample_data = sample_data.reshape((num_seeds, -1))
                sample_data = sample_data*(1.15*2.**13)
                sample_data = sample_data.astype(numpy.int16)
                save_wavfile(sample_data, model_name+'_sample')

                if best_valid==valid_sample_cost_list[-1]:
                    save_model_params(generator_model, model_name+'_model.pkl')
Пример #3
0
def train_model(
    feature_size,
    hidden_size,
    num_layers,
    generator_rnn_model,
    generator_mean_model,
    generator_std_model,
    generator_optimizer,
    num_epochs,
    model_name,
):

    # generator updater
    print "DEBUGGING GENERATOR UPDATE FUNCTION "
    t = time()
    generator_updater = set_generator_update_function(
        generator_rnn_model=generator_rnn_model,
        generator_mean_model=generator_mean_model,
        generator_std_model=generator_std_model,
        generator_optimizer=generator_optimizer,
        grad_clipping=0.0,
    )
    print "{}.sec".format(time() - t)

    # generator evaluator
    print "DEBUGGING GENERATOR EVALUATION FUNCTION "
    t = time()
    generator_evaluator = set_generator_evaluation_function(
        generator_rnn_model=generator_rnn_model,
        generator_mean_model=generator_mean_model,
        generator_std_model=generator_std_model,
    )
    print "{}.sec".format(time() - t)

    # generator sampler
    print "DEBUGGING GENERATOR SAMPLING FUNCTION "
    t = time()
    generator_sampler = set_generator_sampling_function(
        generator_rnn_model=generator_rnn_model,
        generator_mean_model=generator_mean_model,
        generator_std_model=generator_std_model,
    )
    print "{}.sec".format(time() - t)

    print "START TRAINING"
    # for each epoch
    generator_train_cost_list = []
    generator_valid_cost_list = []

    generator_grad_norm_mean = 0.0

    init_window_size = 100
    for e in xrange(num_epochs):
        window_size = init_window_size + 5 * e

        # set train data stream with proper length (window size)
        train_data_stream = set_train_datastream(feature_size=feature_size, window_size=window_size)
        # get train data iterator
        train_data_iterator = train_data_stream.get_epoch_iterator()

        # for each batch
        train_batch_count = 0
        train_batch_size = 0
        train_source_data = []
        train_target_data = []
        for batch_idx, batch_data in enumerate(train_data_iterator):
            if batch_idx < 100:
                continue
            if train_batch_size == 0:
                train_source_data = []
                train_target_data = []

            # source data
            single_data = batch_data[0]
            single_data = single_data.reshape(single_data.shape[0] / feature_size, feature_size)
            train_source_data.append(single_data)

            # target data
            single_data = batch_data[1]
            single_data = single_data.reshape(single_data.shape[0] / feature_size, feature_size)
            train_target_data.append(single_data)

            train_batch_size += 1

            if train_batch_size < 128:
                continue
            else:
                # source data
                train_source_data = numpy.asarray(train_source_data, dtype=floatX)
                train_source_data = numpy.swapaxes(train_source_data, axis1=0, axis2=1)
                # target data
                train_target_data = numpy.asarray(train_target_data, dtype=floatX)
                train_target_data = numpy.swapaxes(train_target_data, axis1=0, axis2=1)
                train_batch_size = 0

            # normalize
            train_source_data = (train_source_data / (1.15 * 2.0 ** 13)).astype(floatX)
            train_target_data = (train_target_data / (1.15 * 2.0 ** 13)).astype(floatX)

            # update generator
            generator_updater_input = [train_source_data, train_target_data]

            generator_updater_output = generator_updater(*generator_updater_input)
            generator_train_cost = generator_updater_output[0].mean()
            generator_grad_norm = generator_updater_output[1]

            generator_grad_norm_mean += generator_grad_norm
            train_batch_count += 1

            sampling_seed_data = []
            if train_batch_count % 100 == 0:
                # set valid data stream with proper length (window size)
                valid_window_size = window_size
                valid_data_stream = set_valid_datastream(feature_size=feature_size, window_size=valid_window_size)
                # get train data iterator
                valid_data_iterator = valid_data_stream.get_epoch_iterator()

                # for each batch
                valid_batch_count = 0
                valid_batch_size = 0
                valid_source_data = []
                valid_target_data = []
                valid_cost_mean = 0.0
                for batch_idx, batch_data in enumerate(valid_data_iterator):
                    if valid_batch_size == 0:
                        valid_source_data = []
                        valid_target_data = []

                    # source data
                    single_data = batch_data[0]
                    single_data = single_data.reshape(single_data.shape[0] / feature_size, feature_size)
                    valid_source_data.append(single_data)

                    # target data
                    single_data = batch_data[1]
                    single_data = single_data.reshape(single_data.shape[0] / feature_size, feature_size)
                    valid_target_data.append(single_data)

                    valid_batch_size += 1

                    if valid_batch_size < 128:
                        continue
                    else:
                        # source data
                        valid_source_data = numpy.asarray(valid_source_data, dtype=floatX)
                        valid_source_data = numpy.swapaxes(valid_source_data, axis1=0, axis2=1)
                        # target data
                        valid_target_data = numpy.asarray(valid_target_data, dtype=floatX)
                        valid_target_data = numpy.swapaxes(valid_target_data, axis1=0, axis2=1)
                        valid_batch_size = 0

                    # normalize
                    valid_source_data = (valid_source_data / (1.15 * 2.0 ** 13)).astype(floatX)
                    valid_target_data = (valid_target_data / (1.15 * 2.0 ** 13)).astype(floatX)

                    generator_evaluator_input = [valid_source_data, valid_target_data]

                    generator_evaluator_output = generator_evaluator(*generator_evaluator_input)
                    generator_valid_cost = generator_evaluator_output[0].mean()

                    valid_cost_mean += generator_valid_cost
                    valid_batch_count += 1

                    if valid_batch_count > 100:
                        sampling_seed_data = valid_source_data
                        break

                valid_cost_mean = valid_cost_mean / valid_batch_count

                print "=============sample length {}=============================".format(window_size)
                print "epoch {}, batch_cnt {} => generator train cost {}".format(
                    e, train_batch_count, generator_train_cost
                )
                print "epoch {}, batch_cnt {} => generator valid cost {}".format(e, train_batch_count, valid_cost_mean)
                print "epoch {}, batch_cnt {} => generator grad norm  {}".format(
                    e, train_batch_count, generator_grad_norm_mean / train_batch_count
                )

                generator_train_cost_list.append(generator_train_cost)
                generator_valid_cost_list.append(valid_cost_mean)

                plot_learning_curve(
                    cost_values=[generator_train_cost_list, generator_valid_cost_list],
                    cost_names=["Train Cost", "Valid Cost"],
                    save_as=model_name + "_model_cost.png",
                    legend_pos="upper left",
                )

            if train_batch_count % 100 == 0:
                num_samples = 10
                num_sec = 10
                sampling_length = num_sec * sampling_rate / feature_size

                curr_input_data = sampling_seed_data[0][:num_samples]
                prev_hidden_data = np_rng.normal(size=(num_layers, num_samples, hidden_size)).astype(floatX)
                prev_hidden_data = numpy.tanh(prev_hidden_data)
                output_data = numpy.zeros(shape=(sampling_length, num_samples, feature_size))
                for s in xrange(sampling_length):

                    generator_input = [curr_input_data, prev_hidden_data]

                    [curr_input_data, prev_hidden_data] = generator_sampler(*generator_input)

                    output_data[s] = curr_input_data
                sample_data = numpy.swapaxes(output_data, axis1=0, axis2=1)
                sample_data = sample_data.reshape((num_samples, -1))
                sample_data = sample_data * (1.15 * 2.0 ** 13)
                sample_data = sample_data.astype(numpy.int16)
                save_wavfile(sample_data, model_name + "_sample")
Пример #4
0
def train_model(recurrent_model,
                output_model,
                num_hiddens,
                model_optimizer,
                data_stream,
                num_epochs,
                model_name):

    update_function = set_update_function(recurrent_model=recurrent_model,
                                          output_model=output_model,
                                          optimizer=model_optimizer,
                                          grad_clip=1.0)

    generation_function = set_generation_function(recurrent_model=recurrent_model,
                                                  output_model=output_model)

    # for each epoch
    cost_list = []
    cnt = 0
    for e in xrange(num_epochs):
        # get data iterator
        data_iterator = data_stream.get_epoch_iterator()
        # for each batch
        for batch_idx, batch_data in enumerate(data_iterator):
            input_data  = numpy.swapaxes(batch_data[0], axis1=0, axis2=1)
            input_mask  = numpy.ones(shape=input_data.shape[:2], dtype=floatX)
            target_data = numpy.swapaxes(batch_data[1], axis1=0, axis2=1)

            input_data  = (input_data/(2.**15)).astype(floatX)
            target_data = (target_data/(2.**15)).astype(floatX)

            time_length = input_data.shape[0]
            num_samples = input_data.shape[1]

            truncate_grad_step = time_length
            cnt = cnt + 1

            # update model
            update_input  = [input_data,
                             input_mask,
                             None,
                             None,
                             target_data,
                             truncate_grad_step]
            update_output = update_function(*update_input)

            # update result
            sample_cost = update_output[2].mean()
            if (batch_idx+1)%1000==0:
                print 'epoch {}, batch_idx {} : cost {} truncate({})'.format(e, batch_idx, sample_cost, truncate_grad_step)
                cost_list.append(sample_cost)

            if (batch_idx+1)%1000==0:
                plot_learning_curve(cost_values=[cost_list,],
                                    cost_names=['Input cost (train)',],
                                    save_as=model_name+'.png',
                                    legend_pos='upper left')

            if (batch_idx+1)%10000==0:
                generation_sample = 10
                generation_length = 1000
                input_data  = numpy.random.uniform(low=-1.0, high=1.0, size=(generation_sample, input_feature_size)).astype(floatX)
                hidden_data = numpy.random.uniform(low=-1.0, high=1.0, size=(generation_sample, num_hiddens)).astype(floatX)
                cell_data   = numpy.zeros(shape=(generation_sample, num_hiddens)).astype(floatX)
                output_data = numpy.zeros(shape=(generation_length, generation_sample, input_feature_size))
                for t in xrange(generation_length):
                    [hidden_data, cell_data, input_data] = generation_function(input_data, hidden_data, cell_data)
                    output_data[t] = input_data

                output_data = numpy.swapaxes(output_data, axis1=0, axis2=1)
                output_data = output_data*(2.**15)
                output_data = output_data.astype(numpy.int16)
                save_wavfile(output_data, model_name+'_sample')
Пример #5
0
def train_model(feature_size,
                time_size,
                hidden_size,
                num_layers,
                recurrent_model,
                output_model,
                model_optimizer,
                data_stream,
                num_epochs,
                model_name):

    update_function = set_update_function(recurrent_model=recurrent_model,
                                          output_model=output_model,
                                          optimizer=model_optimizer,
                                          grad_clip=1.0)

    generation_function = set_generation_function(recurrent_model=recurrent_model,
                                                  output_model=output_model)

    # for each epoch
    cost_list = []
    cnt = 0
    for e in xrange(num_epochs):
        # get data iterator
        data_iterator = data_stream.get_epoch_iterator()
        # for each batch
        for batch_idx, batch_data in enumerate(data_iterator):
            # source data
            source_data = batch_data[0]
            source_data = source_data.reshape(time_size, feature_size)
            source_data = numpy.expand_dims(source_data, axis=0)
            source_data = numpy.swapaxes(source_data, axis1=0, axis2=1)

            # source mask
            source_mask = numpy.ones(shape=source_data.shape[:2], dtype=floatX)

            # target data
            target_data = batch_data[1]
            target_data = target_data.reshape(time_size, feature_size)
            target_data = numpy.expand_dims(target_data, axis=0)
            target_data = numpy.swapaxes(target_data, axis1=0, axis2=1)

            # normalize
            source_data = (source_data/(2.**15)).astype(floatX)
            target_data = (target_data/(2.**15)).astype(floatX)

            # get time length
            time_length = source_data.shape[0]
            truncate_grad_step = time_length

            # update model
            update_input  = [source_data,
                             source_mask,
                             None,
                             None,
                             target_data,
                             truncate_grad_step]
            update_output = update_function(*update_input)

            # update result
            sample_cost = update_output[2].mean()
            if (batch_idx+1)%100==0:
                print 'epoch {}, batch_idx {} : cost {} truncate({})'.format(e, batch_idx, sample_cost, truncate_grad_step)
                cost_list.append(sample_cost)

            if (batch_idx+1)%100==0:
                plot_learning_curve(cost_values=[cost_list,],
                                    cost_names=['Input cost (train)',],
                                    save_as=model_name+'.png',
                                    legend_pos='upper left')

            if (batch_idx+1)%1000==0:
                generation_sample = 10
                generation_length = 100
                input_data  = numpy.random.uniform(low=-1.0, high=1.0, size=(generation_sample, feature_size)).astype(floatX)
                hidden_data_list = [numpy.random.uniform(low=-1.0, high=1.0, size=(generation_sample, hidden_size)).astype(floatX) for l in xrange(num_layers)]
                cell_data_list   = [numpy.zeros(shape=(generation_sample, hidden_size)).astype(floatX) for l in xrange(num_layers)]
                output_data = numpy.zeros(shape=(generation_length, generation_sample, feature_size))

                input_list = [input_data, ] + hidden_data_list + cell_data_list
                for t in xrange(generation_length):
                    result_data = generation_function(*input_list)

                    hidden_data_list = result_data[0:num_layers]
                    cell_data_list   = result_data[num_layers:2*num_layers]
                    input_data       = result_data[-1]
                    input_list = [input_data, ] + hidden_data_list + cell_data_list

                    output_data[t] = input_data
                output_data = numpy.swapaxes(output_data, axis1=0, axis2=1)
                output_data = output_data.reshape((generation_sample, -1))
                output_data = output_data*(2.**15)
                output_data = output_data.astype(numpy.int16)
                save_wavfile(output_data, model_name+'_sample')
Пример #6
0
def train_model(feature_size,
                hidden_size,
                generator_model,
                generator_gan_optimizer,
                generator_tf_optimizer,
                discriminator_model,
                discriminator_optimizer,
                num_epochs,
                model_name):

    # generator updater
    print 'COMPILING TEACHER FORCE UPDATE FUNCTION '
    tf_generator_updater = set_teacher_force_update_function(generator_model=generator_model,
                                                             generator_optimizer=generator_tf_optimizer,
                                                             generator_grad_clipping=60.0)

    print 'COMPILING GAN UPDATE FUNCTION '
    gan_generator_updater = set_gan_update_function(generator_model=generator_model,
                                                    discriminator_model=discriminator_model,
                                                    generator_optimizer=generator_gan_optimizer,
                                                    discriminator_optimizer=discriminator_optimizer,
                                                    generator_grad_clipping=60.0,
                                                    discriminator_grad_clipping=40.0)

    # evaluator
    print 'COMPILING EVALUATION FUNCTION '
    evaluator = set_evaluation_function(generator_model=generator_model)

    # sample generator
    print 'COMPILING SAMPLING FUNCTION '
    sample_generator = set_sample_function(generator_model=generator_model)


    print 'START TRAINING'
    # for each epoch
    tf_generator_grad_list = []
    tf_generator_cost_list = []

    gan_generator_grad_list     = []
    gan_generator_cost_list     = []
    gan_discriminator_grad_list = []
    gan_discriminator_cost_list = []
    gan_true_score_list         = []
    gan_false_score_list        = []
    gan_mse_list                = []

    init_window_size = 100
    for e in xrange(num_epochs):
        window_size = init_window_size + 5*e

        # set train data stream with proper length (window size)
        train_data_stream = set_train_datastream(feature_size=feature_size,
                                                 window_size=window_size)
        # get train data iterator
        train_data_iterator = train_data_stream.get_epoch_iterator()

        # for each batch
        train_batch_count = 0
        train_batch_size = 0
        train_source_data = []
        train_target_data = []
        for batch_idx, batch_data in enumerate(train_data_iterator):
            # skip the beginning part
            if batch_idx<10000:
                continue

            # init train batch data
            if train_batch_size==0:
                train_source_data = []
                train_target_data = []

            # save source data
            single_data = batch_data[0]
            single_data = single_data.reshape(single_data.shape[0]/feature_size, feature_size)
            train_source_data.append(single_data)

            # save target data
            single_data = batch_data[1]
            single_data = single_data.reshape(single_data.shape[0]/feature_size, feature_size)
            train_target_data.append(single_data)
            train_batch_size += 1


            if train_batch_size<128:
                continue
            else:
                # source data
                train_source_data = numpy.asarray(train_source_data, dtype=floatX)
                train_source_data = numpy.swapaxes(train_source_data, axis1=0, axis2=1)
                # target data
                train_target_data = numpy.asarray(train_target_data, dtype=floatX)
                train_target_data = numpy.swapaxes(train_target_data, axis1=0, axis2=1)
                train_batch_size = 0

            # normalize
            train_source_data = (train_source_data/(1.15*2.**13)).astype(floatX)
            train_target_data = (train_target_data/(1.15*2.**13)).astype(floatX)

            # teacher force update
            tf_update_output = tf_generator_updater(train_source_data, train_target_data)
            tf_square_error = tf_update_output[0].mean()
            tf_generator_grad_norm = tf_update_output[1]

            # gan update
            gan_update_output = gan_generator_updater(train_source_data, train_target_data)
            generator_gan_cost               = gan_update_output[0].mean()
            discriminator_gan_cost           = gan_update_output[1].mean()
            discriminator_true_score         = gan_update_output[2].mean()
            discriminator_false_score        = gan_update_output[3].mean()
            gan_square_error                 = gan_update_output[4].mean()
            gan_generator_grad_norm          = gan_update_output[5]
            gan_discriminator_grad_norm      = gan_update_output[6]

            train_batch_count += 1

            tf_generator_cost_list.append(tf_square_error)
            tf_generator_grad_list.append(tf_generator_grad_norm)

            gan_generator_grad_list.append(gan_generator_grad_norm)
            gan_generator_cost_list.append(generator_gan_cost)
            gan_discriminator_grad_list.append(gan_discriminator_grad_norm)
            gan_discriminator_cost_list.append(discriminator_gan_cost)
            gan_true_score_list.append(discriminator_true_score)
            gan_false_score_list.append(discriminator_false_score)
            gan_mse_list.append(gan_square_error)

            if train_batch_count%100==0:
                print '=============sample length {}============================='.format(window_size)
                print 'epoch {}, batch_cnt {} => TF  generator mse cost  {}'.format(e, train_batch_count, tf_generator_cost_list[-1])
                print 'epoch {}, batch_cnt {} => GAN generator mse cost  {}'.format(e, train_batch_count, gan_mse_list[-1])
                print '----------------------------------------------------------'
                print 'epoch {}, batch_cnt {} => GAN generator     cost  {}'.format(e, train_batch_count, gan_generator_cost_list[-1])
                print 'epoch {}, batch_cnt {} => GAN discriminator cost  {}'.format(e, train_batch_count, gan_discriminator_cost_list[-1])
                print '----------------------------------------------------------'
                print 'epoch {}, batch_cnt {} => GAN input score         {}'.format(e, train_batch_count, gan_true_score_list[-1])
                print 'epoch {}, batch_cnt {} => GAN sample score        {}'.format(e, train_batch_count, gan_false_score_list[-1])
                print '----------------------------------------------------------'
                print 'epoch {}, batch_cnt {} => TF generator grad norm  {}'.format(e, train_batch_count, tf_generator_grad_list[-1])
                print '----------------------------------------------------------'
                print 'epoch {}, batch_cnt {} => GAN generator grad norm {}'.format(e, train_batch_count, gan_generator_grad_list[-1])
                print 'epoch {}, batch_cnt {} => GAN discrim.  grad norm {}'.format(e, train_batch_count, gan_discriminator_grad_list[-1])


            if train_batch_count%100==0:
                stop_flag = True
                numpy.save(file=model_name+'tf_mse',
                           arr=numpy.asarray(tf_generator_cost_list))
                numpy.save(file=model_name+'gan_mse',
                           arr=numpy.asarray(gan_mse_list))
                numpy.save(file=model_name+'gan_gen_cost',
                           arr=numpy.asarray(gan_generator_cost_list))
                numpy.save(file=model_name+'gan_disc_cost',
                           arr=numpy.asarray(gan_true_score_list))
                numpy.save(file=model_name+'gan_input_score',
                           arr=numpy.asarray(gan_true_score_list))
                numpy.save(file=model_name+'gan_sample_score',
                           arr=numpy.asarray(gan_false_score_list))
                numpy.save(file=model_name+'tf_gen_grad',
                           arr=numpy.asarray(tf_generator_grad_list))
                numpy.save(file=model_name+'gan_gen_grad',
                           arr=numpy.asarray(gan_generator_grad_list))
                numpy.save(file=model_name+'gan_disc_grad',
                           arr=numpy.asarray(gan_discriminator_grad_list))

            num_samples = 10
            if train_batch_count%100==0:
                valid_data_stream = set_valid_datastream(feature_size=feature_size,
                                                         window_size=1)
                # get train data iterator
                valid_data_iterator = valid_data_stream.get_epoch_iterator()

                # for each batch
                valid_batch_size  = 0
                sampling_seed_data = []
                for batch_idx, batch_data in enumerate(valid_data_iterator):
                    # source data
                    single_data = batch_data[0]
                    single_data = single_data.reshape(single_data.shape[0]/feature_size, feature_size)
                    sampling_seed_data.append(single_data)

                    valid_batch_size += 1

                    if valid_batch_size<num_samples:
                        continue
                    else:
                        # source data
                        sampling_seed_data = numpy.asarray(sampling_seed_data, dtype=floatX)

                    # normalize
                    sampling_seed_data = (sampling_seed_data/(1.15*2.**13)).astype(floatX)
                    break

                num_sec     = 10
                sampling_length = num_sec*sampling_rate/feature_size

                curr_input_data  = sampling_seed_data.reshape(num_samples, feature_size)
                prev_hidden_data = np_rng.normal(size=(num_samples, hidden_size)).astype(floatX)
                prev_hidden_data = numpy.tanh(prev_hidden_data)
                prev_cell_data   = np_rng.normal(size=(num_samples, hidden_size)).astype(floatX)
                output_data      = numpy.zeros(shape=(sampling_length, num_samples, feature_size))
                for s in xrange(sampling_length):
                    generator_input = [curr_input_data,
                                       prev_hidden_data,
                                       prev_cell_data]

                    [curr_input_data, prev_hidden_data, prev_cell_data] = sample_generator(*generator_input)

                    output_data[s] = curr_input_data
                sample_data = numpy.swapaxes(output_data, axis1=0, axis2=1)
                sample_data = sample_data.reshape((num_samples, -1))
                sample_data = sample_data*(1.15*2.**13)
                sample_data = sample_data.astype(numpy.int16)
                save_wavfile(sample_data, model_name+'_sample')
Пример #7
0
def train_model(feature_size,
                hidden_size,
                init_window_size,
                generator_rnn_model,
                generator_output_model,
                generator_gan_optimizer,
                generator_tf_optimizer,
                discriminator_rnn_model,
                discriminator_output_model,
                discriminator_optimizer,
                num_epochs,
                model_name):

    # generator updater
    print 'COMPILING GAN UPDATE FUNCTION '
    gan_updater = set_gan_update_function(generator_rnn_model=generator_rnn_model,
                                          generator_output_model=generator_output_model,
                                          discriminator_rnn_model=discriminator_rnn_model,
                                          discriminator_output_model=discriminator_output_model,
                                          generator_optimizer=generator_gan_optimizer,
                                          discriminator_optimizer=discriminator_optimizer,
                                          generator_grad_clipping=.0,
                                          discriminator_grad_clipping=.0)

    # print 'COMPILING TF UPDATE FUNCTION '
    # tf_updater = set_tf_update_function(generator_rnn_model=generator_rnn_model,
    #                                     generator_output_model=generator_output_model,
    #                                     generator_optimizer=generator_tf_optimizer,
    #                                     generator_grad_clipping=.0)

    # evaluator
    # print 'COMPILING EVALUATION FUNCTION '
    # evaluator = set_evaluation_function(generator_rnn_model=generator_rnn_model,
    #                                     generator_output_model=generator_output_model)

    # sample generator
    print 'COMPILING SAMPLING FUNCTION '
    sample_generator = set_sample_function(generator_rnn_model=generator_rnn_model,
                                           generator_output_model=generator_output_model)

    print 'READ RAW WAV DATA'
    _, train_raw_data = wavfile.read('/data/lisatmp4/taesup/data/YouTubeAudio/XqaJ2Ol5cC4.wav')
    valid_raw_data  = train_raw_data[160000000:]
    train_raw_data  = train_raw_data[:160000000]
    train_raw_data  = train_raw_data[2000:]
    train_raw_data  = (train_raw_data/(1.15*2.**13)).astype(floatX)
    valid_raw_data  = (valid_raw_data/(1.15*2.**13)).astype(floatX)

    num_train_total_steps = train_raw_data.shape[0]
    num_valid_total_steps = valid_raw_data.shape[0]
    batch_size      = 64

    num_samples      = 10
    last_seq_idx     = num_valid_total_steps-feature_size
    valid_seq_orders = np_rng.permutation(last_seq_idx)
    valid_seq_orders = valid_seq_orders[:last_seq_idx-last_seq_idx%num_samples]
    valid_seq_orders = valid_seq_orders.reshape((-1, num_samples))
    valid_seq_orders = valid_seq_orders[0]
    valid_source_idx  = valid_seq_orders.reshape((num_samples, 1)) + numpy.repeat(numpy.arange(feature_size).reshape((1, feature_size)), num_samples, axis=0)
    valid_source_data = valid_raw_data[valid_source_idx]
    valid_source_data = valid_source_data.reshape((num_samples, feature_size))

    print 'START TRAINING'
    # for each epoch
    tf_mse_list                = []
    tf_generator_grad_list     = []

    gan_generator_grad_list     = []
    gan_generator_cost_list     = []
    gan_discriminator_grad_list = []
    gan_discriminator_cost_list = []
    gan_true_score_list         = []
    gan_false_score_list        = []
    gan_mse_list                = []

    train_batch_count = 0
    for e in xrange(num_epochs):
        window_size      = init_window_size + 5*e
        sequence_size    = feature_size*window_size
        last_seq_idx     = num_train_total_steps-(sequence_size+feature_size)
        train_seq_orders = np_rng.permutation(last_seq_idx)
        train_seq_orders = train_seq_orders[:last_seq_idx-last_seq_idx%batch_size]
        train_seq_orders = train_seq_orders.reshape((-1, batch_size))

        # for each batch
        for batch_idx, batch_info in enumerate(train_seq_orders):
            # source data
            train_source_idx  = batch_info.reshape((batch_size, 1)) + numpy.repeat(numpy.arange(sequence_size).reshape((1, sequence_size)), batch_size, axis=0)
            train_source_data = train_raw_data[train_source_idx]
            train_source_data = train_source_data.reshape((batch_size, window_size, feature_size))
            train_source_data = numpy.swapaxes(train_source_data, axis1=0, axis2=1)

            # target data
            train_target_idx  = train_source_idx + feature_size
            train_target_data = train_raw_data[train_target_idx]
            train_target_data = train_target_data.reshape((batch_size, window_size, feature_size))
            train_target_data = numpy.swapaxes(train_target_data, axis1=0, axis2=1)

            # tf update
            # tf_update_output = tf_updater(train_source_data,
            #                               train_target_data)
            # tf_square_error        = tf_update_output[0].mean()
            # tf_generator_grad_norm = tf_update_output[1]
            #
            tf_square_error        = 0.0
            tf_generator_grad_norm = 0.0

            # gan update
            gan_update_output = gan_updater(train_source_data,
                                            train_target_data)
            generator_gan_cost               = gan_update_output[0].mean()
            discriminator_gan_cost           = gan_update_output[1].mean()
            discriminator_true_score         = gan_update_output[2].mean()
            discriminator_false_score        = gan_update_output[3].mean()
            gan_square_error                 = gan_update_output[4].mean()
            gan_generator_grad_norm          = gan_update_output[5]
            gan_discriminator_grad_norm      = gan_update_output[6]
            # generator_gan_cost               = 0.0
            # discriminator_gan_cost           = 0.0
            # discriminator_true_score         = 0.0
            # discriminator_false_score        = 0.0
            # gan_square_error                 = 0.0
            # gan_generator_grad_norm          = 0.0
            # gan_discriminator_grad_norm      = 0.0

            train_batch_count += 1

            tf_generator_grad_list.append(tf_generator_grad_norm)
            tf_mse_list.append(tf_square_error)

            gan_generator_grad_list.append(gan_generator_grad_norm)
            gan_generator_cost_list.append(generator_gan_cost)

            gan_discriminator_grad_list.append(gan_discriminator_grad_norm)
            gan_discriminator_cost_list.append(discriminator_gan_cost)

            gan_true_score_list.append(discriminator_true_score)
            gan_false_score_list.append(discriminator_false_score)

            gan_mse_list.append(gan_square_error)

            if train_batch_count%10==0:
                print '============{}_LENGTH{}============'.format(model_name, window_size)
                # print 'epoch {}, batch_cnt {} => TF  generator mse cost  {}'.format(e, train_batch_count, tf_mse_list[-1])
                print 'epoch {}, batch_cnt {} => GAN generator     cost  {}'.format(e, train_batch_count, gan_generator_cost_list[-1])
                print 'epoch {}, batch_cnt {} => GAN discriminator cost  {}'.format(e, train_batch_count, gan_discriminator_cost_list[-1])
                print '----------------------------------------------------------'
                print 'epoch {}, batch_cnt {} => GAN input score         {}'.format(e, train_batch_count, gan_true_score_list[-1])
                print 'epoch {}, batch_cnt {} => GAN sample score        {}'.format(e, train_batch_count, gan_false_score_list[-1])
                print '----------------------------------------------------------'
                print 'epoch {}, batch_cnt {} => GAN discrim.  grad norm {}'.format(e, train_batch_count, gan_discriminator_grad_list[-1])
                print 'epoch {}, batch_cnt {} => GAN generator grad norm {}'.format(e, train_batch_count, gan_generator_grad_list[-1])
                print '----------------------------------------------------------'
                print 'epoch {}, batch_cnt {} => GAN generator mse cost  {}'.format(e, train_batch_count, gan_mse_list[-1])
                # print '----------------------------------------------------------'
                # print 'epoch {}, batch_cnt {} => TF  generator grad norm {}'.format(e, train_batch_count, tf_generator_grad_list[-1])


            if train_batch_count%100==0:
                # numpy.save(file=model_name+'tf_mse',
                #            arr=numpy.asarray(tf_mse_list))
                # numpy.save(file=model_name+'tf_gen_grad',
                #            arr=numpy.asarray(tf_generator_grad_list))
                numpy.save(file=model_name+'gan_mse',
                           arr=numpy.asarray(gan_mse_list))
                numpy.save(file=model_name+'gan_gen_cost',
                           arr=numpy.asarray(gan_generator_cost_list))
                numpy.save(file=model_name+'gan_disc_cost',
                           arr=numpy.asarray(gan_true_score_list))
                numpy.save(file=model_name+'gan_input_score',
                           arr=numpy.asarray(gan_true_score_list))
                numpy.save(file=model_name+'gan_sample_score',
                           arr=numpy.asarray(gan_false_score_list))
                numpy.save(file=model_name+'gan_gen_grad',
                           arr=numpy.asarray(gan_generator_grad_list))
                numpy.save(file=model_name+'gan_disc_grad',
                           arr=numpy.asarray(gan_discriminator_grad_list))


            if train_batch_count%100==0:
                num_sec = 10
                sampling_length = num_sec*sampling_rate/feature_size

                curr_input_data  = valid_source_data
                prev_hidden_data = np_rng.normal(size=(num_samples, hidden_size)).astype(floatX)
                prev_hidden_data = numpy.tanh(prev_hidden_data)
                prev_cell_data   = np_rng.normal(size=(num_samples, hidden_size)).astype(floatX)
                output_data      = numpy.zeros(shape=(sampling_length, num_samples, feature_size))
                for s in xrange(sampling_length):
                    generator_input = [curr_input_data,
                                       prev_hidden_data,
                                       prev_cell_data]

                    [curr_input_data, prev_hidden_data, prev_cell_data] = sample_generator(*generator_input)
                    output_data[s] = curr_input_data
                sample_data = numpy.swapaxes(output_data, axis1=0, axis2=1)
                sample_data = sample_data.reshape((num_samples, -1))
                sample_data = sample_data*(1.15*2.**13)
                sample_data = sample_data.astype(numpy.int16)
                save_wavfile(sample_data, model_name+'_sample')
Пример #8
0
def train_model(feature_size,
                hidden_size,
                num_layers,
                generator_rnn_model,
                generator_optimizer,
                discriminator_rnn_model,
                discriminator_output_model,
                discriminator_optimizer,
                num_epochs,
                model_name):

    # generator updater
    print 'DEBUGGING GENERATOR UPDATE FUNCTION '
    generator_updater = set_generator_update_function(generator_rnn_model=generator_rnn_model,
                                                      discriminator_rnn_model=discriminator_rnn_model,
                                                      discriminator_output_model=discriminator_output_model,
                                                      generator_optimizer=generator_optimizer,
                                                      grad_clipping=3.6)

    # discriminator updater
    print 'DEBUGGING DISCRIMINATOR UPDATE FUNCTION '
    discriminator_updater = set_discriminator_update_function(generator_rnn_model=generator_rnn_model,
                                                              discriminator_rnn_model=discriminator_rnn_model,
                                                              discriminator_output_model=discriminator_output_model,
                                                              discriminator_optimizer=discriminator_optimizer,
                                                              grad_clipping=1.8)

    # sample generator
    print 'DEBUGGING SAMPLE GENERATOR FUNCTION '
    sample_generator = set_sample_generation_function(generator_rnn_model=generator_rnn_model)



    print 'START TRAINING'
    # for each epoch
    generator_cost_list = []
    discriminator_cost_list = []

    generator_grad_norm_mean     = 0.0
    discriminator_grad_norm_mean = 0.0

    init_window_size = 20
    for e in xrange(num_epochs):
        window_size = init_window_size + 5*e

        # set data stream with proper length (window size)
        data_stream = set_datastream(feature_size=feature_size,
                                     window_size=window_size)
        # get data iterator
        data_iterator = data_stream.get_epoch_iterator()

        # for each batch
        batch_count = 0
        batch_size = 0
        source_data = []
        for batch_idx, batch_data in enumerate(data_iterator):
            if batch_size==0:
                source_data = []
            # source data
            single_data = batch_data[0]
            single_data = single_data.reshape(window_size, feature_size)
            source_data.append(single_data)
            batch_size += 1

            if batch_size<128:
                continue
            else:
                source_data = numpy.asarray(source_data, dtype=floatX)
                source_data = numpy.swapaxes(source_data, axis1=0, axis2=1)
                batch_size = 0

            # normalize
            source_data = (source_data/(2.**15)).astype(floatX)

            # set generator initial values
            init_input_data  = np_rng.normal(size=(source_data.shape[1], feature_size)).astype(floatX)
            init_input_data  = numpy.clip(init_input_data, -1., 1.)
            # init_hidden_data = np_rng.normal(size=(num_layers, source_data.shape[1], hidden_size)).astype(floatX)
            # init_hidden_data = numpy.clip(init_hidden_data, -1., 1.)
            # init_cell_data   = np_rng.normal(size=(num_layers, source_data.shape[1], hidden_size)).astype(floatX)
            init_hidden_data = numpy.zeros(shape=(num_layers, source_data.shape[1], hidden_size), dtype=floatX)
            init_cell_data   = numpy.zeros(shape=(num_layers, source_data.shape[1], hidden_size), dtype=floatX)

            # update generator
            generator_updater_input = [init_input_data,
                                       init_hidden_data,
                                       init_cell_data,
                                       window_size]

            generator_updater_output = generator_updater(*generator_updater_input)
            generator_cost = generator_updater_output[1].mean()
            # generator_grad_norm = generator_updater_output[-1]

            # update discriminator
            init_input_data  = np_rng.normal(size=(source_data.shape[1], feature_size)).astype(floatX)
            init_input_data  = numpy.clip(init_input_data, -1., 1.)
            # init_hidden_data = np_rng.normal(size=(num_layers, source_data.shape[1], hidden_size)).astype(floatX)
            # init_hidden_data = numpy.clip(init_hidden_data, -1., 1.)
            # init_cell_data   = np_rng.normal(size=(num_layers, source_data.shape[1], hidden_size)).astype(floatX)
            init_hidden_data = numpy.zeros(shape=(num_layers, source_data.shape[1], hidden_size), dtype=floatX)
            init_cell_data   = numpy.zeros(shape=(num_layers, source_data.shape[1], hidden_size), dtype=floatX)

            discriminator_updater_input = [source_data,
                                           init_input_data,
                                           init_hidden_data,
                                           init_cell_data]

            discriminator_updater_output = discriminator_updater(*discriminator_updater_input)
            input_cost_data    = discriminator_updater_output[0]
            sample_cost_data   = discriminator_updater_output[1]
            discriminator_cost = discriminator_updater_output[2].mean()
            # discriminator_grad_norm = discriminator_updater_output[-1]

            # generator_grad_norm_mean     += generator_grad_norm
            # discriminator_grad_norm_mean += discriminator_grad_norm

            batch_count += 1

            if batch_count%500==0:
                print '=============sample length {}============================='.format(window_size)
                print 'epoch {}, batch_cnt {} => generator     cost {}'.format(e, batch_count, generator_cost)
                print 'epoch {}, batch_cnt {} => discriminator cost {}'.format(e, batch_count, discriminator_cost)
                print 'epoch {}, batch_cnt {} => input data    cost {}'.format(e, batch_count, input_cost_data.mean())
                print 'epoch {}, batch_cnt {} => sample data   cost {}'.format(e, batch_count, sample_cost_data.mean())
                # print 'epoch {}, batch_cnt {} => generator     grad norm{}'.format(e, batch_count, generator_grad_norm_mean/batch_count)
                # print 'epoch {}, batch_cnt {} => discriminator grad norm{}'.format(e, batch_count, discriminator_grad_norm_mean/batch_count)

                generator_cost_list.append(generator_cost)
                discriminator_cost_list.append(discriminator_cost)
                plot_learning_curve(cost_values=[generator_cost_list, discriminator_cost_list],
                                    cost_names=['Generator Cost', 'Discriminator Cost'],
                                    save_as=model_name+'_model_cost.png',
                                    legend_pos='upper left')

                plot_learning_curve(cost_values=[input_cost_data.mean(axis=(1, 2)), sample_cost_data.mean(axis=(1, 2))],
                                    cost_names=['Data Distribution', 'Model Distribution'],
                                    save_as=model_name+'_seq_cost{}.png'.format(batch_count),
                                    legend_pos='upper left')


            if batch_count%5000==0:
                num_samples = 10
                num_sec     = 10
                sampling_length = num_sec*sampling_rate/feature_size
                # set generator initial values
                init_input_data  = np_rng.normal(size=(num_samples, feature_size)).astype(floatX)
                init_input_data  = numpy.clip(init_input_data, -1., 1.)
                # init_hidden_data = np_rng.normal(size=(num_layers, num_samples, hidden_size)).astype(floatX)
                # init_hidden_data = numpy.clip(init_hidden_data, -1., 1.)
                # init_cell_data   = np_rng.normal(size=(num_layers, num_samples, hidden_size)).astype(floatX)
                init_hidden_data = numpy.zeros(shape=(num_layers, num_samples, hidden_size), dtype=floatX)
                init_cell_data   = numpy.zeros(shape=(num_layers, num_samples, hidden_size), dtype=floatX)

                generator_input = [init_input_data,
                                   init_hidden_data,
                                   init_cell_data,
                                   sampling_length]

                sample_data = sample_generator(*generator_input)[0]

                sample_data = numpy.swapaxes(sample_data, axis1=0, axis2=1)
                sample_data = sample_data.reshape((num_samples, -1))
                sample_data = sample_data*(2.**15)
                sample_data = sample_data.astype(numpy.int16)
                save_wavfile(sample_data, model_name+'_sample')
Пример #9
0
def train_model(feature_size,
                time_size,
                hidden_size,
                num_layers,
                recurrent_model,
                output_model,
                model_optimizer,
                controller_optimizer,
                data_stream,
                num_epochs,
                model_name):

    print 'DEBUGGING UPDATE FUNCTION'
    update_function = set_update_function(recurrent_model=recurrent_model,
                                          output_model=output_model,
                                          model_optimizer=model_optimizer,
                                          controller_optimizer=controller_optimizer,
                                          grad_clip=1.0)

    print 'DEBUGGING GENERATOR FUNCTION'
    generation_function = set_generation_function(recurrent_model=recurrent_model,
                                                  output_model=output_model)

    # for each epoch
    cost_list = []
    cnt = 0
    for e in xrange(num_epochs):
        # get data iterator
        data_iterator = data_stream.get_epoch_iterator()
        # for each batch
        batch_count = 0
        batch_size = 0
        source_data = []
        target_data = []
        for batch_idx, batch_data in enumerate(data_iterator):
            if batch_size==0:
                source_data = []
                target_data = []
            # source data
            single_data = batch_data[0]
            single_data = single_data.reshape(time_size, feature_size)
            source_data.append(single_data)
            # target data
            single_data = batch_data[1]
            single_data = single_data.reshape(time_size, feature_size)
            target_data.append(single_data)
            batch_size += 1

            if batch_size<128:
                continue
            else:
                source_data = numpy.asarray(source_data, dtype=floatX)
                source_data = numpy.swapaxes(source_data, axis1=0, axis2=1)
                target_data = numpy.asarray(target_data, dtype=floatX)
                target_data = numpy.swapaxes(target_data, axis1=0, axis2=1)
                batch_size = 0

            # normalize
            source_data = (source_data/(2.**15)).astype(floatX)
            target_data = (target_data/(2.**15)).astype(floatX)

            # update model
            update_input  = [source_data,
                             target_data]
            update_output = update_function(*update_input)

            # update result
            sample_cost = update_output[2]


            batch_count += 1

            if batch_count%100==0:
                print 'epoch {}, batch_count {} : mean cost {} max cost {})'.format(e, batch_count, sample_cost.mean(), sample_cost.max(axis=0).mean())
                cost_list.append(sample_cost.mean())

            if (batch_count+1)%100==0:
                plot_learning_curve(cost_values=[cost_list,],
                                    cost_names=['Input cost (train)',],
                                    save_as=model_name+'.png',
                                    legend_pos='upper left')

            if (batch_count+1)%1000==0:
                generation_sample = 10
                generation_length = 100
                input_data  = numpy.clip(np_rng.normal(size=(generation_sample, feature_size)).astype(floatX), -1., 1.)
                hidden_data_list = [numpy.clip(np_rng.normal(size=(generation_sample, hidden_size)).astype(floatX), -1., 1.) for l in xrange(num_layers)]
                cell_data_list   = [numpy.zeros(shape=(generation_sample, hidden_size)).astype(floatX) for l in xrange(num_layers)]
                output_data = numpy.zeros(shape=(generation_length, generation_sample, feature_size))

                input_list = [input_data, ] + hidden_data_list + cell_data_list
                for t in xrange(generation_length):
                    result_data = generation_function(*input_list)

                    hidden_data_list = result_data[0:num_layers]
                    cell_data_list   = result_data[num_layers:2*num_layers]
                    input_data       = result_data[-1]
                    input_list = [input_data, ] + hidden_data_list + cell_data_list

                    output_data[t] = input_data
                output_data = numpy.swapaxes(output_data, axis1=0, axis2=1)
                output_data = output_data.reshape((generation_sample, -1))
                output_data = output_data*(2.**15)
                output_data = output_data.astype(numpy.int16)
                save_wavfile(output_data, model_name+'_sample')
Пример #10
0
def test_sampling(feature_size,
                  hidden_size,
                  seed_window_size,
                  generator_model,
                  model_name):


    print 'COMPILING SEEDING FUNCTION '
    t = time()
    seeding_function = set_seeding_function(generator_model=generator_model)
    print '%.2f SEC '%(time()-t)

    print 'COMPILING SAMPLING FUNCTION '
    t = time()
    sampling_function = set_sampling_function(generator_model=generator_model)
    print '%.2f SEC '%(time()-t)

    print 'READ RAW WAV DATA'
    _, train_raw_data = wavfile.read('/data/lisatmp4/taesup/data/YouTubeAudio/XqaJ2Ol5cC4.wav')
    valid_raw_data  = train_raw_data[160000000:]
    train_raw_data  = train_raw_data[:160000000]
    train_raw_data  = train_raw_data[2000:]
    train_raw_data  = (train_raw_data/(1.15*2.**13)).astype(floatX)
    valid_raw_data  = (valid_raw_data/(1.15*2.**13)).astype(floatX)

    num_train_total_steps = train_raw_data.shape[0]
    num_valid_total_steps = valid_raw_data.shape[0]
    batch_size      = 64

    num_valid_sequences = num_valid_total_steps/(feature_size*seed_window_size)-1
    valid_source_data = valid_raw_data[:num_valid_sequences*(feature_size*seed_window_size)]
    valid_source_data = valid_source_data.reshape((num_valid_sequences, seed_window_size, feature_size))
    valid_target_data = valid_raw_data[feature_size:feature_size+num_valid_sequences*(feature_size*seed_window_size)]
    valid_target_data = valid_target_data.reshape((num_valid_sequences, seed_window_size, feature_size))

    valid_raw_data = None
    num_seeds = 10
    valid_shuffle_idx = np_rng.permutation(num_valid_sequences)
    valid_source_data = valid_source_data[valid_shuffle_idx]
    valid_target_data = valid_target_data[valid_shuffle_idx]
    valid_seed_data   = valid_source_data[:num_seeds][0][:]
    valid_source_data = numpy.swapaxes(valid_source_data, axis1=0, axis2=1)
    valid_target_data = numpy.swapaxes(valid_target_data, axis1=0, axis2=1)
    num_valid_batches = num_valid_sequences/batch_size


    print 'NUM OF VALID BATCHES : ', num_valid_sequences/batch_size
    best_valid = 10000.

    print 'FIRST GET SEEDING'

    valid_batch_count = 0
    for valid_idx in xrange(num_valid_batches):
        start_idx = batch_size*valid_idx
        end_idx   = batch_size*(valid_idx+1)
        evaluation_outputs = evaluation_function(valid_source_data[:][start_idx:end_idx][:],
                                                 valid_target_data[:][start_idx:end_idx][:])
        tf_valid_mse += evaluation_outputs[0].mean()
        valid_batch_count += 1

        if valid_idx==0:
            recon_data = evaluation_outputs[1]
            recon_data = numpy.swapaxes(recon_data, axis1=0, axis2=1)
            recon_data = recon_data[:10]
            recon_data = recon_data.reshape((10, -1))
            recon_data = recon_data*(1.15*2.**13)
            recon_data = recon_data.astype(numpy.int16)
            save_wavfile(recon_data, model_name+'_recon')

            orig_data = valid_target_data[:][start_idx:end_idx][:]
            orig_data = numpy.swapaxes(orig_data, axis1=0, axis2=1)
            orig_data = orig_data[:10]
            orig_data = orig_data.reshape((10, -1))
            orig_data = orig_data*(1.15*2.**13)
            orig_data = orig_data.astype(numpy.int16)
            save_wavfile(orig_data, model_name+'_orig')

    valid_sample_cost_list.append(tf_valid_mse/valid_batch_count)
    print '----------------------------------------------------------'
    print 'epoch {}, batch_cnt {} => valid sample      cost   {}'.format(e, train_batch_count, valid_sample_cost_list[-1])

    if best_valid>valid_sample_cost_list[-1]:
        best_valid = valid_sample_cost_list[-1]
Пример #11
0
               arr=numpy.asarray(train_lambda_regularizer_list))
    numpy.save(file=model_name+'_valid_sample_cost',
               arr=numpy.asarray(valid_sample_cost_list))

    num_sec = 100
    sampling_length = num_sec*sampling_rate/feature_size
    seed_input_data = valid_seed_data

    [generated_sequence, ] = sampling_function(seed_input_data,
                                               sampling_length)

    sample_data = numpy.swapaxes(generated_sequence, axis1=0, axis2=1)
    sample_data = sample_data.reshape((num_seeds, -1))
    sample_data = sample_data*(1.15*2.**13)
    sample_data = sample_data.astype(numpy.int16)
    save_wavfile(sample_data, model_name+'_sample')

    if best_valid==valid_sample_cost_list[-1]:
        save_model_params(generator_model, model_name+'_model.pkl')


if __name__=="__main__":
    feature_size  = 1600
    hidden_size   =  800

    model_name = 'LSTM_REGULARIZER_LAMBDA' \
                + '_FEATURE{}'.format(int(feature_size)) \
                + '_HIDDEN{}'.format(int(hidden_size)) \

    # generator model
    generator_model = set_generator_model(input_size=feature_size,