예제 #1
0
 def setUp(self):
     self.coord = tf.train.Coordinator()
     self.reader = AudioReader(DATA_DIR,
                               DATA_OUT_DIR,
                               self.coord,
                               sample_rate=SAMPLE_RATE,
                               sample_size=SAMPLE_SIZE,
                               silence_threshold=SILENCE_THRESHOLD)
예제 #2
0
def main():
    reader = AudioReader('./input',
                         './output',
                         None,
                         16000,
                         sample_size=10000,
                         silence_threshold=0.3)

    reader.start_threads(None)
예제 #3
0
class TestAudioReader(tf.test.TestCase):
    def setUp(self):
        self.coord = tf.train.Coordinator()
        self.reader = AudioReader(DATA_DIR,
                                  DATA_OUT_DIR,
                                  self.coord,
                                  sample_rate=SAMPLE_RATE,
                                  sample_size=SAMPLE_SIZE,
                                  silence_threshold=SILENCE_THRESHOLD)

    def testAudioThread(self):
        max_allowed_mse = 1.0
        with self.test_session() as sess:
            threads = tf.train.start_queue_runners(sess=sess, coord=self.coord)
            self.reader.start_threads(sess)
            input_batch = self.reader.dequeue(1)
            input_audio, output_audio = sess.run(input_batch)

            print(input_audio)
            test = self.reader.dequeue(1)

            print(sess.run(test)[0])
            test = self.reader.dequeue(1)
            print(sess.run(test)[0])
            test = self.reader.dequeue(1)
            print(sess.run(test)[0])
            test = self.reader.dequeue(1)
            print(sess.run(test)[0])
            test = self.reader.dequeue(1)
            print(sess.run(test)[0])
            mse = mean_squared_error(input_audio.flatten(),
                                     output_audio.flatten())
            self.assertLess(mse, max_allowed_mse)
            self.coord.request_stop()
예제 #4
0
# Only update D(X)'s parameters, so var_list = theta_D
D_solver = tf.train.GradientDescentOptimizer(learning_rate=3e-3).minimize(
    D_loss, var_list=theta_D)
# Only update G(X)'s parameters, so var_list = theta_G
G_solver = tf.train.GradientDescentOptimizer(learning_rate=3e-3).minimize(
    G_loss, var_list=theta_G)

coord = tf.train.Coordinator()
sess = tf.Session()

directory = './sampleTrue'
reader = AudioReader(directory,
                     coord,
                     sample_rate=22000,
                     gc_enabled=False,
                     receptive_field=1000,
                     sample_size=21000,
                     silence_threshold=0.05)
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
reader.start_threads(sess)

init = tf.global_variables_initializer()
sess.run(init)

prevA = []
for it in range(1000):
    batch_data = []

    start_time = time.time()
예제 #5
0
def main():
    args = get_arguments()

    try:
        directories = validate_directories(args)
    except ValueError as e:
        print("Some arguments are wrong:")
        print(str(e))
        return

    logdir = directories['logdir']
    logdir_root = directories['logdir_root']
    restore_from = directories['restore_from']

    # Even if we restored the model, we will treat it as new training
    # if the trained model is written into an arbitrary location.
    is_overwritten_training = logdir != restore_from

    with open(args.wavenet_params, 'r') as f:
        wavenet_params = json.load(f)

    # Create coordinator.
    coord = tf.train.Coordinator()

    # Load raw waveform from VCTK corpus.
    with tf.name_scope('create_inputs'):
        # Allow silence trimming to be skipped by specifying a threshold near
        # zero.
        silence_threshold = args.silence_threshold if args.silence_threshold > \
                                                      EPSILON else None
        reader = AudioReader(args.data_dir,
                             coord,
                             sample_rate=wavenet_params['sample_rate'],
                             sample_size=args.sample_size,
                             silence_threshold=args.silence_threshold)

    tower_grads = []
    tower_losses = []
    for device_index in xrange(args.num_gpus):
        with tf.device('/gpu:%d' % device_index), tf.name_scope(
                'tower_%d' % device_index) as scope:
            audio_batch = reader.dequeue(args.batch_size)
            loss, optimizer, trainable = make_net(args,
                                                  wavenet_params,
                                                  audio_batch,
                                                  reuse_variables=True)
            grads = optimizer.compute_gradients(loss, var_list=trainable)
            tower_losses.append(loss)
            tower_grads.append(grads)
            summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)
            tf.get_variable_scope().reuse_variables()

    if args.num_gpus == 1:
        optim = optimizer.minimize(loss, var_list=trainable)
    else:
        loss = tf.reduce_mean(tower_losses)
        average_grads = []
        for grad_and_vars in zip(*tower_grads):
            grads = []
            for g, _ in grad_and_vars:
                if g is None:
                    continue
                expanded_g = tf.expand_dims(g, 0)
                grads.append(expanded_g)

            if len(grads) == 0:
                average_grads.append((None, v))
                continue
            grad = tf.concat(0, grads)
            grad = tf.reduce_mean(grad, 0)

            v = grad_and_vars[0][1]
            grad_and_var = (grad, v)
            average_grads.append(grad_and_var)
        optim = optimizer.apply_gradients(average_grads)

    # Set up logging for TensorBoard.
    writer = tf.train.SummaryWriter(logdir)
    writer.add_graph(tf.get_default_graph())
    run_metadata = tf.RunMetadata()
    summaries = tf.merge_summary(summaries)
    #summaries = tf.merge_all_summaries()

    # Set up session
    sess = tf.Session(config=tf.ConfigProto(log_device_placement=False,
                                            allow_soft_placement=True))
    init = tf.initialize_all_variables()
    sess.run(init)

    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(var_list=tf.trainable_variables())

    try:
        saved_global_step = load(saver, sess, restore_from)
        if is_overwritten_training or saved_global_step is None:
            # The first training step will be saved_global_step + 1,
            # therefore we put -1 here for new or overwritten trainings.
            saved_global_step = -1

    except:
        print("Something went wrong while restoring checkpoint. "
              "We will terminate training to avoid accidentally overwriting "
              "the previous model.")
        raise

    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    reader.start_threads(sess)

    step = None
    try:
        last_saved_step = saved_global_step
        for step in range(saved_global_step + 1, args.num_steps):
            start_time = time.time()
            if args.store_metadata and step % 50 == 0:
                # Slow run that stores extra information for debugging.
                print('Storing metadata')
                run_options = tf.RunOptions(
                    trace_level=tf.RunOptions.FULL_TRACE)
                summary, loss_value, _ = sess.run([summaries, loss, optim],
                                                  options=run_options,
                                                  run_metadata=run_metadata)
                writer.add_summary(summary, step)
                writer.add_run_metadata(run_metadata,
                                        'step_{:04d}'.format(step))
                tl = timeline.Timeline(run_metadata.step_stats)
                timeline_path = os.path.join(logdir, 'timeline.trace')
                with open(timeline_path, 'w') as f:
                    f.write(tl.generate_chrome_trace_format(show_memory=True))
            else:
                summary, loss_value, _ = sess.run([summaries, loss, optim])
                writer.add_summary(summary, step)

            duration = time.time() - start_time
            print('step {:d} - loss = {:.3f}, ({:.3f} sec/step)'.format(
                step, loss_value, duration))

            if step % args.checkpoint_every == 0:
                save(saver, sess, logdir, step)
                last_saved_step = step

    except KeyboardInterrupt:
        # Introduce a line break after ^C is displayed so save message
        # is on its own line.
        print()
    finally:
        if step > last_saved_step:
            save(saver, sess, logdir, step)
        coord.request_stop()
        coord.join(threads)
예제 #6
0
def main():
    args = get_arguments()

    try:
        directories = validate_directories(args)
    except ValueError as e:
        print("Some arguments are wrong:")
        print(str(e))
        return

    logdir = directories['logdir']
    logdir_root = directories['logdir_root']
    restore_from = directories['restore_from']

    # Even if we restored the model, we will treat it as new training
    # if the trained model is written into an arbitrary location.
    is_overwritten_training = logdir != restore_from

    with open(args.wavenet_params, 'r') as f:
        wavenet_params = json.load(f)

    # Create coordinator.
    coord = tf.train.Coordinator()

    # Load raw waveform from VCTK corpus.
    with tf.name_scope('create_inputs'):
        # Allow silence trimming to be skipped by specifying a threshold near
        # zero.
        silence_threshold = args.silence_threshold if args.silence_threshold > \
                                                      EPSILON else None
        reader = AudioReader(
            args.data_dir,
            coord,
            sample_rate=wavenet_params['sample_rate'],
            sample_size=args.sample_size,
            silence_threshold=args.silence_threshold)
        audio_batch = reader.dequeue(args.batch_size)

    # Create network.
    net = WaveNetModel(
        batch_size=args.batch_size,
        dilations=wavenet_params["dilations"],
        filter_width=wavenet_params["filter_width"],
        residual_channels=wavenet_params["residual_channels"],
        dilation_channels=wavenet_params["dilation_channels"],
        skip_channels=wavenet_params["skip_channels"],
        quantization_channels=wavenet_params["quantization_channels"],
        use_biases=wavenet_params["use_biases"])
    if args.l2_regularization_strength == 0:
        args.l2_regularization_strength = None
    loss = net.loss(audio_batch, args.l2_regularization_strength)
    optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
    trainable = tf.trainable_variables()
    optim = optimizer.minimize(loss, var_list=trainable)

    # Set up logging for TensorBoard.
    writer = tf.train.SummaryWriter(logdir)
    writer.add_graph(tf.get_default_graph())
    run_metadata = tf.RunMetadata()
    summaries = tf.merge_all_summaries()

    # Set up session
    sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
    init = tf.initialize_all_variables()
    sess.run(init)

    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver()

    try:
        saved_global_step = load(saver, sess, restore_from)
        if is_overwritten_training or saved_global_step is None:
            # The first training step will be saved_global_step + 1,
            # therefore we put -1 here for new or overwritten trainings.
            saved_global_step = -1

    except:
        print("Something went wrong while restoring checkpoint. "
              "We will terminate training to avoid accidentally overwriting "
              "the previous model.")
        raise

    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    reader.start_threads(sess)

    try:
        last_saved_step = saved_global_step
        for step in range(saved_global_step + 1, args.num_steps):
            start_time = time.time()
            if args.store_metadata and step % 50 == 0:
                # Slow run that stores extra information for debugging.
                print('Storing metadata')
                run_options = tf.RunOptions(
                    trace_level=tf.RunOptions.FULL_TRACE)
                summary, loss_value, _ = sess.run(
                    [summaries, loss, optim],
                    options=run_options,
                    run_metadata=run_metadata)
                writer.add_summary(summary, step)
                writer.add_run_metadata(run_metadata,
                                        'step_{:04d}'.format(step))
                tl = timeline.Timeline(run_metadata.step_stats)
                timeline_path = os.path.join(logdir, 'timeline.trace')
                with open(timeline_path, 'w') as f:
                    f.write(tl.generate_chrome_trace_format(show_memory=True))
            else:
                summary, loss_value, _ = sess.run([summaries, loss, optim])
                writer.add_summary(summary, step)

            duration = time.time() - start_time
            print('step {:d} - loss = {:.3f}, ({:.3f} sec/step)'
                  .format(step, loss_value, duration))

            if step % args.checkpoint_every == 0:
                save(saver, sess, logdir, step)
                last_saved_step = step

    except KeyboardInterrupt:
        # Introduce a line break after ^C is displayed so save message
        # is on its own line.
        print()
    finally:
        if step > last_saved_step:
            save(saver, sess, logdir, step)
        coord.request_stop()
        coord.join(threads)
예제 #7
0
def main():
    args = get_arguments()

    try:
        directories = validate_directories(args)
    except ValueError as e:
        print("Some arguments are wrong:")
        print(str(e))
        return

    logdir = directories['logdir']
    logdir_root = directories['logdir_root']
    restore_from = directories['restore_from']

    # Even if we restored the model, we will treat it as new training
    # if the trained model is written into an arbitrary location.
    is_overwritten_training = logdir != restore_from

    with open(args.wavenet_params, 'r') as f:
        wavenet_params = json.load(f)

    # Create coordinator.
    coord = tf.train.Coordinator()

    # Load raw waveform from VCTK corpus.
    with tf.name_scope('create_inputs'):
        # Allow silence trimming to be skipped by specifying a threshold near
        # zero.
        silence_threshold = args.silence_threshold if args.silence_threshold > \
                                                      EPSILON else None
        reader = AudioReader(args.data_dir,
                             coord,
                             sample_rate=wavenet_params['sample_rate'],
                             sample_size=args.sample_size,
                             silence_threshold=args.silence_threshold)
        audio_batch = reader.dequeue(args.batch_size)

    # Create network.
    net = WaveNetModel(
        batch_size=args.batch_size,
        dilations=wavenet_params["dilations"],
        filter_width=wavenet_params["filter_width"],
        residual_channels=wavenet_params["residual_channels"],
        dilation_channels=wavenet_params["dilation_channels"],
        skip_channels=wavenet_params["skip_channels"],
        quantization_channels=wavenet_params["quantization_channels"],
        use_biases=wavenet_params["use_biases"],
        scalar_input=wavenet_params["scalar_input"],
        initial_filter_width=wavenet_params["initial_filter_width"])
    if args.l2_regularization_strength == 0:
        args.l2_regularization_strength = None
    loss = net.loss(audio_batch, args.l2_regularization_strength)
    if args.optimizer == ADAM_OPTIMIZER:
        optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
    elif args.optimizer == SGD_OPTIMIZER:
        optimizer = tf.train.MomentumOptimizer(
            learning_rate=args.learning_rate, momentum=args.sgd_momentum)
    else:
        # This shouldn't happen, given the choices specified in argument
        # specification.
        raise RuntimeError('Invalid optimizer option.')
    trainable = tf.trainable_variables()
    optim = optimizer.minimize(loss, var_list=trainable)

    # Set up logging for TensorBoard.
    writer = tf.train.SummaryWriter(logdir)
    writer.add_graph(tf.get_default_graph())
    run_metadata = tf.RunMetadata()
    summaries = tf.merge_all_summaries()

    # Set up session
    sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
    init = tf.initialize_all_variables()
    sess.run(init)

    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(var_list=tf.trainable_variables())

    try:
        saved_global_step = load(saver, sess, restore_from)
        if is_overwritten_training or saved_global_step is None:
            # The first training step will be saved_global_step + 1,
            # therefore we put -1 here for new or overwritten trainings.
            saved_global_step = -1

    except:
        print("Something went wrong while restoring checkpoint. "
              "We will terminate training to avoid accidentally overwriting "
              "the previous model.")
        raise

    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    reader.start_threads(sess)

    try:
        last_saved_step = saved_global_step
        for step in range(saved_global_step + 1, args.num_steps):
            start_time = time.time()
            if args.store_metadata and step % 50 == 0:
                # Slow run that stores extra information for debugging.
                print('Storing metadata')
                run_options = tf.RunOptions(
                    trace_level=tf.RunOptions.FULL_TRACE)
                summary, loss_value, _ = sess.run([summaries, loss, optim],
                                                  options=run_options,
                                                  run_metadata=run_metadata)
                writer.add_summary(summary, step)
                writer.add_run_metadata(run_metadata,
                                        'step_{:04d}'.format(step))
                tl = timeline.Timeline(run_metadata.step_stats)
                timeline_path = os.path.join(logdir, 'timeline.trace')
                with open(timeline_path, 'w') as f:
                    f.write(tl.generate_chrome_trace_format(show_memory=True))
            else:
                summary, loss_value, _ = sess.run([summaries, loss, optim])
                writer.add_summary(summary, step)

            duration = time.time() - start_time
            print('step {:d} - loss = {:.3f}, ({:.3f} sec/step)'.format(
                step, loss_value, duration))

            if step % args.checkpoint_every == 0:
                save(saver, sess, logdir, step)
                last_saved_step = step

    except KeyboardInterrupt:
        # Introduce a line break after ^C is displayed so save message
        # is on its own line.
        print()
    finally:
        if step > last_saved_step:
            save(saver, sess, logdir, step)
        coord.request_stop()
        coord.join(threads)
예제 #8
0
def main():

    with tf.Graph().as_default():
        coord = tf.train.Coordinator()
        sess = tf.Session()

        batch_size = 1
        hidden1_units = 5202
        hidden2_units = 2601
        hidden3_units = 1300
        hidden4_units = 650
        hidden5_units = 325
        max_training_steps = 1

        global_step = tf.Variable(0, name='global_step', trainable=False)
        initial_training_learning_rate = 3e-2
        training_learning_rate = tf.train.exponential_decay(
            initial_training_learning_rate,
            global_step,
            100,
            0.9,
            staircase=True)

        inputs_placeholder, labels_placeholder = placeholder_inputs(batch_size)

        logits = ffnn.inference(inputs_placeholder, hidden1_units,
                                hidden2_units, hidden3_units, hidden4_units,
                                hidden5_units)
        loss = ffnn.loss(logits, labels_placeholder)
        train_op = ffnn.training(loss, training_learning_rate, global_step)
        eval_correct = ffnn.evaluation(logits, labels_placeholder)

        summary = tf.summary.merge_all()
        init = tf.global_variables_initializer()
        saver = tf.train.Saver()

        summary_writer = tf.summary.FileWriter('./logdir', sess.graph)

        sess.run(init)

        args = get_arguments()

        # Load parameters from wavenet params json file
        with open(args.wavenet_params, 'r') as f:
            wavenet_params = json.load(f)

        quantization_channels = wavenet_params['quantization_channels']

        if args.restore_from != None:
            restore_from = args.restore_from
            print("Restoring from: ")
            print(restore_from)

        else:
            restore_from = ""

        try:
            saved_global_step = load(saver, sess, restore_from)
            if saved_global_step is None:
                # The first training step will be saved_global_step + 1,
                # therefore we put -1 here for new or overwritten trainings.
                saved_global_step = -1
            else:
                counter = saved_global_step % label_batch_size

        except:
            print(
                "Something went wrong while restoring checkpoint. "
                "We will terminate training to avoid accidentally overwriting "
                "the previous model.")
            raise

        # TODO: Find a more robust way to find different data sets

        # Training data
        directory = './sampleTrue'
        reader = AudioReader(directory,
                             coord,
                             sample_rate=16000,
                             gc_enabled=False,
                             receptive_field=5117,
                             sample_size=15117,
                             silence_threshold=0.05)
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        reader.start_threads(sess)

        directory = './sampleFalse'
        reader2 = AudioReader(directory,
                              coord,
                              sample_rate=16000,
                              gc_enabled=False,
                              receptive_field=5117,
                              sample_size=15117,
                              silence_threshold=0.05)
        threads2 = tf.train.start_queue_runners(sess=sess, coord=coord)
        reader2.start_threads(sess)

        total_loss = 0
        for step in range(saved_global_step + 1, max_training_steps):
            start_time = time.time()

            batch_data = []
            label_data = []

            if (step % 100 == 0):
                print('Current learning rate: %6f' %
                      (sess.run(training_learning_rate)))

            for b in range(batch_size):
                label = randint(0, 1)

                if label == 1:
                    data = sess.run(reader.dequeue(1))
                    while (len(data[0]) < ffnn.INPUT_SIZE):
                        data = sess.run(reader.dequeue(1))
                else:
                    data = sess.run(reader2.dequeue(1))
                    while (len(data[0]) < ffnn.INPUT_SIZE):
                        data = sess.run(reader2.dequeue(1))
                data = np.array(data[0])

                cut = []
                for i in range(ffnn.INPUT_SIZE):
                    cut.append(data[i])

                data = cut

                # processing
                samples = process(data, quantization_channels, 1)

                batch_data.append(samples)
                label_data.append(label)

            feed_dict = fill_feed_dict(batch_data, label_data,
                                       inputs_placeholder, labels_placeholder)

            _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)

            duration = time.time() - start_time
            total_loss = total_loss + loss_value

            print('Step %d: loss = %.7f (%.3f sec)' %
                  (step, loss_value, duration))
            '''
            if step % 100 == 0 or (step + 1) == max_training_steps:
                average = total_loss / (step + 1)
                print('Cumulative average loss: %6f' % (average))
                # TODO: Update train script to add data to new directory
                checkpoint_file = os.path.join('./logdir/init-train/', 'model.ckpt')
                print("Generating checkpoint file...")
                saver.save(sess, checkpoint_file, global_step=step)
            '''

        # Lambda for white noise sampler
        gi_sampler = get_generator_input_sampler()

        # Intialize generator WaveNet
        G = WaveNetModel(
            batch_size=1,
            dilations=wavenet_params["dilations"],
            filter_width=wavenet_params["filter_width"],
            residual_channels=wavenet_params["residual_channels"],
            dilation_channels=wavenet_params["dilation_channels"],
            skip_channels=wavenet_params["skip_channels"],
            quantization_channels=wavenet_params["quantization_channels"],
            use_biases=wavenet_params["use_biases"],
            initial_filter_width=wavenet_params["initial_filter_width"])

        # White noise generator params
        white_mean = 0
        white_sigma = 1
        white_length = ffnn.INPUT_SIZE

        white_noise = gi_sampler(white_mean, white_sigma, white_length)
        white_noise = process(white_noise, quantization_channels, 1)
        white_noise_t = tf.convert_to_tensor(white_noise)

        # initialize generator
        w_loss, w_prediction = G.loss(input_batch=white_noise_t,
                                      name='generator')

        G_variables = tf.trainable_variables(scope='wavenet')
        optimizer = optimizer_factory[args.optimizer](learning_rate=3e-2,
                                                      momentum=args.momentum)
        optim = optimizer.minimize(w_loss, var_list=G_variables)

        init = tf.global_variables_initializer()
        sess.run(init)

        print(sess.run(tf.shape(w_prediction)))

        # main GAN training loop
        for step in range(NUM_EPOCHS):
            batch_data = []
            label_data = []

            # train D on real
            for d_index in range(batch_size):
                data = sess.run(reader.dequeue(1))
                data = data[0]

                d_real_data = process(data, quantization_channels, 1)

                batch_data.append(d_real_data)
                label_data.append(1)

            feed_dict = fill_feed_dict(batch_data, label_data,
                                       inputs_placeholder, labels_placeholder)

            _, d_real_loss = sess.run([train_op, loss], feed_dict=feed_dict)

            print("Real loss")
            print(d_real_loss)

            batch_data = []
            label_data = []

            # train D on fake
            for d_index in range(batch_size):
                samples = tf.placeholder(tf.int32)

                if args.fast_generation:
                    next_sample = G.predict_proba_incremental(
                        samples, args.gc_id)
                else:
                    next_sample = G.predict_proba(samples, args.gc_id)

                if args.fast_generation:
                    sess.run(tf.global_variables_initializer())
                    sess.run(G.init_ops)

                waveform = [0]

                for step in range(ffnn.INPUT_SIZE):
                    if args.fast_generation:
                        outputs = [next_sample]
                        outputs.extend(G.push_ops)
                        window = waveform[-1]
                    else:
                        if len(waveform) > G.receptive_field:
                            window = waveform[-G.receptive_field:]
                        else:
                            window = waveform
                        outputs = [next_sample]

                    # Run the WaveNet to predict the next sample.
                    prediction = sess.run(outputs, feed_dict={samples:
                                                              window})[0]

                    # Scale prediction distribution using temperature.
                    np.seterr(divide='ignore')
                    scaled_prediction = np.log(prediction) / 1
                    scaled_prediction = (
                        scaled_prediction -
                        np.logaddexp.reduce(scaled_prediction))
                    scaled_prediction = np.exp(scaled_prediction)
                    np.seterr(divide='warn')

                    sample = np.random.choice(np.arange(quantization_channels),
                                              p=scaled_prediction)
                    waveform.append(sample)

                del waveform[0]

                d_fake_data = process(waveform, quantization_channels, 0)

                batch_data.append(d_fake_data)
                label_data.append(0)

            feed_dict = fill_feed_dict(batch_data, label_data,
                                       inputs_placeholder, labels_placeholder)

            _, d_fake_loss = sess.run([train_op, loss], feed_dict=feed_dict)

            print("Fake loss")
            print(d_fake_loss)

            batch_data = []
            label_data = []

            # train G, but don't train D
            for g_index in range(batch_size):
                samples = tf.placeholder(tf.int32)

                if args.fast_generation:
                    next_sample = G.predict_proba_incremental(
                        samples, args.gc_id)
                else:
                    next_sample = G.predict_proba(samples, args.gc_id)

                if args.fast_generation:
                    sess.run(tf.global_variables_initializer())
                    sess.run(G.init_ops)

                waveform = [0]

                for step in range(ffnn.INPUT_SIZE):
                    if args.fast_generation:
                        outputs = [next_sample]
                        outputs.extend(G.push_ops)
                        window = waveform[-1]
                    else:
                        if len(waveform) > G.receptive_field:
                            window = waveform[-G.receptive_field:]
                        else:
                            window = waveform
                        outputs = [next_sample]

                    # Run the WaveNet to predict the next sample.
                    prediction = sess.run(outputs, feed_dict={samples:
                                                              window})[0]

                    # Scale prediction distribution using temperature.
                    np.seterr(divide='ignore')
                    scaled_prediction = np.log(prediction) / 1
                    scaled_prediction = (
                        scaled_prediction -
                        np.logaddexp.reduce(scaled_prediction))
                    scaled_prediction = np.exp(scaled_prediction)
                    np.seterr(divide='warn')

                    sample = np.random.choice(np.arange(quantization_channels),
                                              p=scaled_prediction)
                    waveform.append(sample)

                del waveform[0]

                g_data = process(waveform, quantization_channels, 0)

                batch_data.append(g_data)
                label_data.append(1)

            feed_dict = fill_feed_dict(batch_data, label_data,
                                       inputs_placeholder, labels_placeholder)

            _, g_loss = sess.run([optim, loss], feed_dict=feed_dict)

            print("Generator loss")
            print(g_loss)
        '''
예제 #9
0
def main():
    args = get_arguments()

    # Load parameters from wavenet params json file
    with open(args.wavenet_params, 'r') as f:
        wavenet_params = json.load(f)  

    quantization_channels = wavenet_params['quantization_channels']

    with tf.Graph().as_default():
        coord = tf.train.Coordinator()
        sess = tf.Session()

        # Lambda for white noise sampler
        gi_sampler = get_generator_input_sampler()

        # Intialize generator WaveNet
        G = WaveNetModel(
            batch_size=1,
            dilations=wavenet_params["dilations"],
            filter_width=wavenet_params["filter_width"],
            residual_channels=wavenet_params["residual_channels"],
            dilation_channels=wavenet_params["dilation_channels"],
            skip_channels=wavenet_params["skip_channels"],
            quantization_channels=wavenet_params["quantization_channels"],
            use_biases=wavenet_params["use_biases"],
            initial_filter_width=wavenet_params["initial_filter_width"])

        gi_sampler = get_generator_input_sampler()

        # White noise generator params
        white_mean = 0
        white_sigma = 1
        white_length = ffnn.INPUT_SIZE

        white_noise = gi_sampler(white_mean, white_sigma, white_length)
        white_noise = process(white_noise, quantization_channels, 1)
        white_noise_t = tf.convert_to_tensor(white_noise)

        directory = './sampleTrue'
        reader = AudioReader(directory, coord, sample_rate = 16000, gc_enabled=False, receptive_field=5117, sample_size=15117, silence_threshold=0.05)
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        reader.start_threads(sess)

        audio_batch = reader.dequeue(1)

        # initialize generator
        w_loss, w_prediction = G.loss(input_batch=white_noise_t, name='generator')
        #w_loss, w_prediction = G.loss(input_batch=audio_batch, name='generator')

        G_variables = tf.trainable_variables(scope='wavenet')
        optimizer = optimizer_factory[args.optimizer](
                    learning_rate=1e-3,
                    momentum=args.momentum)
        optim = optimizer.minimize(w_loss, var_list=G_variables)

        init = tf.global_variables_initializer()
        sess.run(init)

        '''
        for step in range(300):
            loss_value, _ = sess.run([w_loss, optim])
            print('step {:d} - loss = {:.3f}'.format(step, loss_value))

        prediction = sess.run(w_prediction)
        '''

        '''
        maxs = []
        maxs_2 = []
        maxs_3 = []

        for i in range(0, 10000):
            temp = prediction[i]
            temp.sort()
            maxs_3.append(temp[253])
            maxs_2.append(temp[254])
            maxs.append(temp[255])
        
        plt.plot(maxs)
        plt.plot(maxs_2)
        plt.plot(maxs_3)
        plt.ylabel('Value')
        plt.xlabel('Sample')
        plt.savefig('logits_after.png')
        
        np.set_printoptions(threshold=np.nan)
        print(sess.run(tf.nn.softmax(w_prediction)))
        ''' 
        
        '''
예제 #10
0
파일: train.py 프로젝트: weixsong/Codeblock
def main():
    args = get_arguments()

    try:
        directories = validate_directories(args)
    except ValueError as e:
        print("Some arguments are wrong:")
        print(str(e))
        return

    logdir = directories['logdir']
    logdir_root = directories['logdir_root']
    restore_from = directories['restore_from']

    # Even if we restored the model, we will treat it as new training
    # if the trained model is written into an arbitrary location.
    is_overwritten_training = logdir != restore_from

    with open(args.wavenet_params, 'r') as f:
        wavenet_params = json.load(f)

    with tf.device("/cpu:0"):
        # Create coordinator.
        coord = tf.train.Coordinator()

        # Load raw waveform from VCTK corpus.
        with tf.name_scope('create_inputs'):
            # Allow silence trimming to be skipped by specifying a threshold near
            # zero.
            silence_threshold = args.silence_threshold if args.silence_threshold > \
                                                          EPSILON else None
            gc_enabled = args.gc_channels is not None
            reader = AudioReader(
                args.data_dir,
                coord,
                sample_rate=wavenet_params['sample_rate'],
                gc_enabled=gc_enabled,
                sample_size=args.sample_size,
                silence_threshold=silence_threshold)

        # Create network.
        net = WaveNetModel(
            batch_size=args.batch_size,
            dilations=wavenet_params["dilations"],
            filter_width=wavenet_params["filter_width"],
            residual_channels=wavenet_params["residual_channels"],
            dilation_channels=wavenet_params["dilation_channels"],
            skip_channels=wavenet_params["skip_channels"],
            quantization_channels=wavenet_params["quantization_channels"],
            use_biases=wavenet_params["use_biases"],
            scalar_input=wavenet_params["scalar_input"],
            initial_filter_width=wavenet_params["initial_filter_width"],
            histograms=args.histograms,
            global_condition_channels=args.gc_channels,
            global_condition_cardinality=reader.gc_category_cardinality)

        if args.l2_regularization_strength == 0:
            args.l2_regularization_strength = None

        global_step = tf.get_variable("global_step", [], initializer=tf.constant_initializer(0), trainable=False)

        optimizer = optimizer_factory[args.optimizer](
            learning_rate=args.learning_rate,
            momentum=args.momentum)

        tower_grads = []
        tower_losses = []
        with tf.variable_scope(tf.get_variable_scope()):
            for i in range(args.gpu_nums):
                with tf.device("/gpu:%d" % i), tf.name_scope("tower_%d" % i) as scope:
                    audio_batch = reader.dequeue(args.batch_size)
                    if gc_enabled:
                        gc_id_batch = reader.dequeue_gc(args.batch_size)
                    else:
                        gc_id_batch = None

                    loss = net.loss(input_batch=audio_batch,
                                    global_condition_batch=gc_id_batch,
                                    l2_regularization_strength=args.l2_regularization_strength)
                    tower_losses.append(loss)

                    trainable = tf.trainable_variables()
                    grads = optimizer.compute_gradients(loss, var_list=trainable)
                    tower_grads.append(grads)

                    summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)
                    tf.get_variable_scope().reuse_variables()

        # calculate the mean of each gradient. Synchronization point across all towers
        grads = average_gradients(tower_grads)
        train_ops = optimizer.apply_gradients(grads, global_step=global_step)

        # calculate the mean loss
        loss = tf.reduce_mean(tower_losses)

        # Set up logging for TensorBoard.
        writer = tf.summary.FileWriter(logdir)
        writer.add_graph(tf.get_default_graph())
        run_metadata = tf.RunMetadata()
        summaries_ops = tf.summary.merge(summaries)

        # Set up session
        sess = tf.Session(config=tf.ConfigProto(log_device_placement=False, allow_soft_placement=True))
        init = tf.global_variables_initializer()
        sess.run(init)

        # Saver for storing checkpoints of the model.
        saver = tf.train.Saver(var_list=tf.trainable_variables())

        try:
            saved_global_step = load(saver, sess, restore_from)
            if is_overwritten_training or saved_global_step is None:
                # The first training step will be saved_global_step + 1,
                # therefore we put -1 here for new or overwritten trainings.
                saved_global_step = -1

        except:
            print("Something went wrong while restoring checkpoint. "
                  "We will terminate training to avoid accidentally overwriting "
                  "the previous model.")
            raise

        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        reader.start_threads(sess)

        step = None
        try:
            last_saved_step = saved_global_step
            for step in range(saved_global_step + 1, args.num_steps):
                start_time = time.time()
                if args.store_metadata and step % 50 == 0:
                    # Slow run that stores extra information for debugging.
                    print('Storing metadata')
                    run_options = tf.RunOptions(
                        trace_level=tf.RunOptions.FULL_TRACE)
                    summary, loss_value, _ = sess.run(
                        [summaries_ops, loss, train_ops],
                        options=run_options,
                        run_metadata=run_metadata)
                    writer.add_summary(summary, step)
                    writer.add_run_metadata(run_metadata,
                                            'step_{:04d}'.format(step))
                    tl = timeline.Timeline(run_metadata.step_stats)
                    timeline_path = os.path.join(logdir, 'timeline.trace')
                    with open(timeline_path, 'w') as f:
                        f.write(tl.generate_chrome_trace_format(show_memory=True))
                else:
                    summary, loss_value, _ = sess.run([summaries_ops, loss, train_ops])
                    writer.add_summary(summary, step)

                duration = time.time() - start_time
                print('step {:d} - loss = {:.3f}, ({:.3f} sec/step)'
                      .format(step, loss_value, duration))

                if step % args.checkpoint_every == 0:
                    save(saver, sess, logdir, step)
                    last_saved_step = step

        except KeyboardInterrupt:
            # Introduce a line break after ^C is displayed so save message
            # is on its own line.
            print()
        finally:
            if step > last_saved_step:
                save(saver, sess, logdir, step)
            coord.request_stop()
            coord.join(threads)
예제 #11
0
def main():
    args = get_arguments()

    try:
        directories = validate_directories(args)
    except ValueError as e:
        print("Some arguments are wrong:")
        print(str(e))
        return

    logdir = directories['logdir']
    restore_from = directories['restore_from']

    # Even if we restored the model, we will treat it as new training
    # if the trained model is written into an arbitrary location.
    is_overwritten_training = logdir != restore_from

    with open(args.wavenet_params, 'r') as f:
        wavenet_params = json.load(f)

    # Create coordinator.
    coord = tf.train.Coordinator()

    # Load raw waveform from VCTK corpus.
    with tf.name_scope('create_inputs'):
        # Allow silence trimming to be skipped by specifying a threshold near
        # zero.
        silence_threshold = args.silence_threshold if args.silence_threshold > \
                                                      EPSILON else None
        gc_enabled = args.gc_channels is not None
        reader = AudioReader(
            args.data_dir,
            coord,
            sample_rate=wavenet_params['sample_rate'],
            gc_enabled=gc_enabled,
            receptive_field=WaveNetModel.calculate_receptive_field(
                wavenet_params["filter_width"], wavenet_params["dilations"],
                wavenet_params["scalar_input"],
                wavenet_params["initial_filter_width"]),
            sample_size=args.sample_size,
            silence_threshold=silence_threshold,
            normalize_peak=args.normalize_peak,
            queue_size=32 * max(args.num_gpus, 1))
        if gc_enabled:
            gc_id_batch = reader.dequeue_gc(args.batch_size)
        else:
            gc_id_batch = None

    if args.l2_regularization_strength == 0:
        args.l2_regularization_strength = None

    if args.num_gpus <= 1:
        print("Falling back to single computation unit.")
        audio_batch = reader.dequeue(args.batch_size)
        net = make_model(args, wavenet_params, reader)
        loss = net.loss(
            input_batch=audio_batch,
            global_condition_batch=gc_id_batch,
            l2_regularization_strength=args.l2_regularization_strength)
        optimizer = optimizer_factory[args.optimizer](
            learning_rate=args.learning_rate, momentum=args.momentum)
        trainable = tf.trainable_variables()
        gradients = optimizer.compute_gradients(loss, var_list=trainable)
        for gradient, variable in gradients:
            if gradient is not None:
                tf.summary.scalar(variable.name + '/gradient',
                                  tf.norm(gradient))
        optim = optimizer.apply_gradients(gradients)
    else:
        print("Using {} GPUs for compuation.".format(args.num_gpus))
        with tf.device('/gpu:0'), tf.name_scope('tower_0'):
            optimizer = optimizer_factory[args.optimizer](
                learning_rate=args.learning_rate, momentum=args.momentum)
        losses = []
        gradients = []
        with tf.variable_scope(tf.get_variable_scope()) as scope:
            for i in range(args.num_gpus):
                with tf.device('/gpu:%d' % i), tf.name_scope('tower_%d' % i):
                    audio_batch = reader.dequeue(args.batch_size)
                    net = make_model(args, wavenet_params, reader, i)
                    loss = net.loss(input_batch=audio_batch,
                                    global_condition_batch=gc_id_batch,
                                    l2_regularization_strength=args.
                                    l2_regularization_strength)
                    trainable = tf.trainable_variables()
                    gradient = optimizer.compute_gradients(loss,
                                                           var_list=trainable)
                    losses.append(loss)
                    gradients.append(gradient)
                    scope.reuse_variables()

        with tf.device('/gpu:0'), tf.name_scope('tower_0'):
            loss = tf.reduce_mean(losses)
            tf.summary.scalar('mean_total_loss', loss)
            average_gradients = []
            for grouped_gradients in zip(*gradients):
                expanded_gradients = []
                for gradient, _ in grouped_gradients:
                    if gradient is not None:
                        expanded_gradients.append(tf.expand_dims(gradient, 0))

                # Since all GPUs share the same variable we can just the the one from gpu:0
                _, variable = grouped_gradients[0]
                if len(expanded_gradients) == 0:
                    print('No gradient for %s' % variable.name)
                    average_gradients.append((None, variable))
                    continue

                merged_gradients = tf.concat(expanded_gradients, 0)
                average_gradient = tf.reduce_mean(merged_gradients, 0)
                average_gradients.append((average_gradient, variable))

                tf.summary.scalar(variable.name + '/gradient',
                                  tf.norm(average_gradient))
            optim = optimizer.apply_gradients(average_gradients)

    # Set up logging for TensorBoard.
    writer = tf.summary.FileWriter(logdir)
    writer.add_graph(tf.get_default_graph())
    run_metadata = tf.RunMetadata()
    summaries = tf.summary.merge_all()

    # Set up session
    config = tf.ConfigProto(log_device_placement=False,
                            allow_soft_placement=True)
    # Workaround for avoiding allocating memory on all GPUs due to tensorflow#8021
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    init = tf.global_variables_initializer()
    sess.run(init)

    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(var_list=tf.trainable_variables(),
                           max_to_keep=args.max_checkpoints)

    try:
        saved_global_step = load(saver, sess, restore_from)
        if is_overwritten_training or saved_global_step is None:
            # The first training step will be saved_global_step + 1,
            # therefore we put -1 here for new or overwritten trainings.
            saved_global_step = -1

    except:
        print("Something went wrong while restoring checkpoint. "
              "We will terminate training to avoid accidentally overwriting "
              "the previous model.")
        raise

    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    reader.start_threads(sess)

    step = None
    last_saved_step = saved_global_step
    try:
        for step in range(saved_global_step + 1, args.num_steps):
            start_time = time.time()
            if args.store_metadata and step % 50 == 0:
                # Slow run that stores extra information for debugging.
                print('Storing metadata')
                run_options = tf.RunOptions(
                    trace_level=tf.RunOptions.FULL_TRACE)
                summary, loss_value, _ = sess.run([summaries, loss, optim],
                                                  options=run_options,
                                                  run_metadata=run_metadata)
                writer.add_summary(summary, step)
                writer.add_run_metadata(run_metadata,
                                        'step_{:04d}'.format(step))
                tl = timeline.Timeline(run_metadata.step_stats)
                timeline_path = os.path.join(logdir, 'timeline.trace')
                with open(timeline_path, 'w') as f:
                    f.write(tl.generate_chrome_trace_format(show_memory=True))
            else:
                summary, loss_value, _ = sess.run([summaries, loss, optim])
                writer.add_summary(summary, step)

            duration = time.time() - start_time
            print('step {:d} - loss = {:.3f}, ({:.3f} sec/step)'.format(
                step, loss_value, duration))

            if step % args.checkpoint_every == 0:
                save(saver, sess, logdir, step)
                last_saved_step = step

    except KeyboardInterrupt:
        # Introduce a line break after ^C is displayed so save message
        # is on its own line.
        print()
    finally:
        if step is not None and step > last_saved_step:
            save(saver, sess, logdir, step)
        coord.request_stop()
        coord.join(threads)
예제 #12
0
def main():

    with tf.Graph().as_default():
        coord = tf.train.Coordinator()
        sess = tf.Session()

        batch_size = 10
        hidden1_units = 5202
        hidden2_units = 2601
        hidden3_units = 1300
        hidden4_units = 650
        hidden5_units = 325
        max_steps = 1000
        """
        learning_rate = 1e-2
        print('Learning Rate:')
        print(learning_rate)
        print('Layers')
        print(5)
        """
        global_step = tf.Variable(0, name='global_step', trainable=False)
        initial_learning_rate = 4e-2
        learning_rate = tf.train.exponential_decay(initial_learning_rate,
                                                   global_step,
                                                   100,
                                                   0.95,
                                                   staircase=True)

        inputs_placeholder, labels_placeholder = placeholder_inputs(batch_size)

        logits = ffnn.inference(inputs_placeholder, hidden1_units,
                                hidden2_units, hidden3_units, hidden4_units,
                                hidden5_units)
        loss = ffnn.loss(logits, labels_placeholder)
        train_op = ffnn.training(loss, learning_rate, global_step)
        eval_correct = ffnn.evaluation(logits, labels_placeholder)

        summary = tf.summary.merge_all()
        init = tf.global_variables_initializer()
        saver = tf.train.Saver()

        summary_writer = tf.summary.FileWriter('./logdir', sess.graph)

        sess.run(init)

        args = get_arguments()

        if args.restore_from != None:
            restore_from = args.restore_from
            print("Restoring from: ")
            print(restore_from)

        else:
            restore_from = ""

        try:
            saved_global_step = load(saver, sess, restore_from)
            if saved_global_step is None:
                # The first training step will be saved_global_step + 1,
                # therefore we put -1 here for new or overwritten trainings.
                saved_global_step = -1
            else:
                counter = saved_global_step % label_batch_size

        except:
            print(
                "Something went wrong while restoring checkpoint. "
                "We will terminate training to avoid accidentally overwriting "
                "the previous model.")
            raise

        # TODO: Find a more robust way to find different data sets

        # Training data
        directory = './sampleTrue'
        reader = AudioReader(directory,
                             coord,
                             sample_rate=16000,
                             gc_enabled=False,
                             receptive_field=5117,
                             sample_size=10000,
                             silence_threshold=0.05)
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        reader.start_threads(sess)

        directory = './sampleFalse'
        reader2 = AudioReader(directory,
                              coord,
                              sample_rate=16000,
                              gc_enabled=False,
                              receptive_field=5117,
                              sample_size=10000,
                              silence_threshold=0.05)
        threads2 = tf.train.start_queue_runners(sess=sess, coord=coord)
        reader2.start_threads(sess)

        total_loss = 0
        for step in range(saved_global_step + 1, max_steps):
            start_time = time.time()

            batch_data = []
            label_data = []

            if (step % 100 == 0):
                print('Current learning rate: %6f' % (sess.run(learning_rate)))

            for b in range(batch_size):
                label = randint(0, 1)

                if label == 1:
                    data = sess.run(reader.dequeue(1))

                    while (len(data[0]) < ffnn.INPUT_SIZE):
                        data = sess.run(reader.dequeue(1))
                else:
                    data = sess.run(reader2.dequeue(1))

                    while (len(data[0]) < ffnn.INPUT_SIZE):
                        data = sess.run(reader2.dequeue(1))

                data = np.array(data[0])
                mean = np.mean(data)
                std = np.std(data)

                standardized = []

                for d in data:
                    standardized.append(float(d - mean) / std)

                batch_data.append(standardized)
                label_data.append(label)

            feed_dict = fill_feed_dict(batch_data, label_data,
                                       inputs_placeholder, labels_placeholder)

            _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)

            duration = time.time() - start_time
            total_loss = total_loss + loss_value

            print('Step %d: loss = %.7f (%.3f sec)' %
                  (step, loss_value, duration))

            summary_str = sess.run(summary, feed_dict=feed_dict)
            summary_writer.add_summary(summary_str, step)
            summary_writer.flush()

            if step % 100 == 0 or (step + 1) == max_steps:
                average = total_loss / (step + 1)
                print('Cumulative average loss: %6f' % (average))
                # TODO: Update train script to add data to new directory
                checkpoint_file = os.path.join('./logdir/init-train/',
                                               'model.ckpt')
                print("Generating checkpoint file...")
                saver.save(sess, checkpoint_file, global_step=step)
            """
예제 #13
0
def main():
    args = get_arguments()

    try:
        directories = validate_directories(args)
    except ValueError as e:
        print("Some arguments are wrong:")
        print(str(e))
        return

    logdir = directories['logdir']
    restore_from = directories['restore_from']

    # Even if we restored the model, we will treat it as new training
    # if the trained model is written into an arbitrary location.
    is_overwritten_training = logdir != restore_from

    with open(args.wavenet_params, 'r') as f:
        wavenet_params = json.load(f)

    # Set up session
    sess = tf.Session()

    # Create coordinator.
    coord = tf.train.Coordinator()

    # Load raw waveform from corpus.
    with tf.name_scope('create_inputs'):
        reader = AudioReader(args.data_dir,
                             coord,
                             sample_rate=wavenet_params['sample_rate'],
                             gc_channels=args.gc_channels,
                             lc_channels=args.lc_channels)
        inputs_dict = reader.get_batch(args.batch_size)

    # Create network.
    audio_batch = inputs_dict['audio_batch']
    gc_batch = inputs_dict['gc_batch']
    lc_batch = inputs_dict['lc_batch']
    net = WaveNetModel(
        batch_size=args.batch_size,
        dilations=wavenet_params["dilations"],
        filter_width=wavenet_params["filter_width"],
        residual_channels=wavenet_params["residual_channels"],
        skip_channels=wavenet_params["skip_channels"],
        input_channels=audio_batch.get_shape().as_list()[2],
        quantization_channels=wavenet_params["quantization_channels"],
        gc_channels=args.gc_channels,
        gc_cardinality=reader.gc_cardinality,
        lc_channels=args.lc_channels)

    output_dict = net.loss(input_batch=audio_batch,
                           gc_batch=gc_batch \
                               if args.gc_channels is not None else None,
                           lc_batch=lc_batch \
                               if args.lc_channels is not None else None)

    loss = output_dict['loss']
    tf.summary.scalar('train_loss', loss)

    global_step = tf.get_variable("global_step", [],
                                  tf.int32,
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)

    assert len(LEARNING_RATE_SCHEDULE) == len(LEARNING_RATE_TRANSITION_STEPS)
    lr = tf.constant(LEARNING_RATE_SCHEDULE[0])
    for s, v in zip(LEARNING_RATE_TRANSITION_STEPS, LEARNING_RATE_SCHEDULE):
        lr = tf.cond(tf.less(global_step, s), lambda: lr,
                     lambda: tf.constant(v))
    tf.summary.scalar("learning_rate", lr)

    optimizer = optimizer_factory[args.optimizer](learning_rate=lr,
                                                  momentum=args.momentum)
    train_op = optimizer.minimize(loss, global_step=global_step, name='train')

    # Set up logging for TensorBoard.
    writer = tf.summary.FileWriter(logdir)
    writer.add_graph(tf.get_default_graph())
    summary_op = tf.summary.merge_all()

    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(var_list=tf.trainable_variables())

    try:
        saved_global_step = load(saver, sess, restore_from)
        if is_overwritten_training or saved_global_step is None:
            # The first training step will be saved_global_step + 1,
            # therefore we put -1 here for new or overwritten trainings.
            saved_global_step = -1
    except:
        print("Something went wrong while restoring checkpoint. "
              "We will terminate training to avoid accidentally overwriting "
              "the previous model.")
        raise

    init_op = tf.global_variables_initializer()
    sess.run(init_op)

    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    # Start enqueue op
    enqueue_thread = threading.Thread(target=reader.enqueue, args=[sess])
    enqueue_thread.daemon = True
    enqueue_thread.start()

    step = None
    last_saved_step = saved_global_step
    try:
        for step in range(saved_global_step + 1, args.num_steps):
            start_time = time.time()

            summary, loss_value, _ = sess.run([summary_op, loss, train_op])
            writer.add_summary(summary, step)

            duration = time.time() - start_time
            print('step {:d} - loss = {:.3f}, ({:.3f} sec/step)'.format(
                step, loss_value, duration))

            if step % args.checkpoint_every == 0:
                save(saver, sess, logdir, step)
                last_saved_step = step

    except KeyboardInterrupt:
        # Introduce a line break after ^C is displayed so save message
        # is on its own line.
        print()
    finally:
        if step > last_saved_step:
            save(saver, sess, logdir, step)
        coord.request_stop()
        coord.join(threads)
def main():
    args = get_arguments()

    try:
        directories = validate_directories(args)
    except ValueError as e:
        print("Some arguments are wrong:")
        print(str(e))
        return

    logdir = directories['logdir']
    restore_from = directories['restore_from']

    # Even if we restored the model, we will treat it as new training
    # if the trained model is written into an arbitrary location.
    is_overwritten_training = logdir != restore_from

    with open(args.wavenet_params, 'r') as f:
        wavenet_params = json.load(f)

    # Create coordinator.
    coord = tf.train.Coordinator()

    # Load raw waveform from VCTK corpus.
    with tf.name_scope('create_inputs'):
        gc_enabled = args.gc_channels is not None
        reader = AudioReader(
            args.data_dir,
            coord,
            sample_rate=wavenet_params['sample_rate'],
            gc_enabled=gc_enabled,
            max_samples=get_max_samples(args.data_dir,
                                        wavenet_params['sample_rate']),
            receptive_field=WaveNetModel.calculate_receptive_field(
                wavenet_params["filter_width"], wavenet_params["dilations"],
                wavenet_params["scalar_input"],
                wavenet_params["initial_filter_width"]),
            sample_size=args.sample_size,
            silence_threshold=args.silence_threshold
            if args.silence_threshold > EPSILON else None)
        audio_batch = reader.dequeue(args.batch_size)
        if gc_enabled:
            gc_id_batch = reader.dequeue_gc(args.batch_size)
        else:
            gc_id_batch = None

    # Create network.
    net = WaveNetModel(
        batch_size=args.batch_size,
        dilations=wavenet_params["dilations"],
        filter_width=wavenet_params["filter_width"],
        residual_channels=wavenet_params["residual_channels"],
        dilation_channels=wavenet_params["dilation_channels"],
        skip_channels=wavenet_params["skip_channels"],
        quantization_channels=wavenet_params["quantization_channels"],
        use_biases=wavenet_params["use_biases"],
        scalar_input=wavenet_params["scalar_input"],
        initial_filter_width=wavenet_params["initial_filter_width"],
        histograms=args.histograms,
        global_condition_channels=args.gc_channels,
        global_condition_cardinality=reader.gc_category_cardinality)

    if args.l2_regularization_strength == 0:
        args.l2_regularization_strength = None
    loss = net.loss(input_batch=audio_batch,
                    global_condition_batch=gc_id_batch,
                    l2_regularization_strength=args.l2_regularization_strength)
    learning_rate_placeholder = tf.placeholder(tf.float32, [])
    optimizer = tf.train.RMSPropOptimizer(
        learning_rate=learning_rate_placeholder, momentum=args.momentum)
    train_op = optimizer.minimize(loss)

    # Set up logging for TensorBoard.
    writer = tf.summary.FileWriter(logdir)
    writer.add_graph(tf.get_default_graph())
    run_metadata = tf.RunMetadata()
    summaries = tf.summary.merge_all()

    # Set up session
    sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
    init = tf.global_variables_initializer()
    sess.run(init)

    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(var_list=tf.trainable_variables(),
                           max_to_keep=args.max_checkpoints)

    try:
        saved_global_step = load(saver, sess, restore_from)
        if is_overwritten_training or saved_global_step is None:
            # The first training step will be saved_global_step + 1,
            # therefore we put -1 here for new or overwritten trainings.
            saved_global_step = -1

    except:
        print("Something went wrong while restoring checkpoint. "
              "We will terminate training to avoid accidentally overwriting "
              "the previous model.")
        raise

    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    reader.start_threads(sess)

    step = None
    loss_value = None
    update = 0
    last_saved_step = saved_global_step
    learning_rate = args.learning_rate
    print('learning_rate {:f})'.format(learning_rate))
    try:
        for step in range(saved_global_step + 1, args.num_steps):
            start_time = time.time()

            if args.store_metadata and step % 50 == 0:
                # Slow run that stores extra information for debugging.
                print('Storing metadata')
                run_options = tf.RunOptions(
                    trace_level=tf.RunOptions.FULL_TRACE)
                summary, loss_value, _ = sess.run(
                    [summaries, loss, train_op],
                    feed_dict={learning_rate_placeholder: learning_rate},
                    options=run_options,
                    run_metadata=run_metadata)
                writer.add_summary(summary, step)
                writer.add_run_metadata(run_metadata,
                                        'step_{:04d}'.format(step))
                tl = timeline.Timeline(run_metadata.step_stats)
                timeline_path = os.path.join(logdir, 'timeline.trace')
                with open(timeline_path, 'w') as f:
                    f.write(tl.generate_chrome_trace_format(show_memory=True))
            else:
                summary, loss_value, _ = sess.run(
                    [summaries, loss, train_op],
                    feed_dict={learning_rate_placeholder: learning_rate})
                writer.add_summary(summary, step)

            if 1.5 >= loss_value > 0.5 and update == 0:
                learning_rate = learning_rate * 0.1
                update += 1
                print('learning_rate {:f})'.format(learning_rate))
            elif loss_value <= 0.5 and update == 1:
                learning_rate = learning_rate * 0.1
                update += 1
                print('learning_rate {:f})'.format(learning_rate))

            duration = time.time() - start_time
            print('step {:d} - loss = {:.3f}, ({:.3f} sec/step)'.format(
                step, loss_value, duration))

            if step % args.checkpoint_every == 0:
                save(saver, sess, logdir, step)
                last_saved_step = step

    except KeyboardInterrupt:
        # Introduce a line break after ^C is displayed so save message
        # is on its own line.
        print()
    finally:
        if step > last_saved_step:
            save(saver, sess, logdir, step)
        coord.request_stop()
        coord.join(threads)
예제 #15
0
def main():
    args = get_arguments()

    try:
        directories = validate_directories(args)
    except ValueError as e:
        print("Some arguments are wrong:")
        print(str(e))
        return

    logdir = directories['logdir']
    logdir_root = directories['logdir_root']
    restore_from = directories['restore_from']

    # Even if we restored the model, we will treat it as new training
    # if the trained model is written into an arbitrary location.
    is_overwritten_training = logdir != restore_from

    with open(args.wavenet_params, 'r') as f:
        wavenet_params = json.load(f)

    # Create coordinator.
    coord = tf.train.Coordinator()

    # Load raw waveform from VCTK corpus.
    with tf.name_scope('create_inputs'):
        # Allow silence trimming to be skipped by specifying a threshold near
        # zero.
        silence_threshold = args.silence_threshold if args.silence_threshold > \
                                                      EPSILON else None
        reader = AudioReader(args.data_dir,
                             coord,
                             sample_rate=wavenet_params['sample_rate'],
                             sample_size=args.sample_size,
                             silence_threshold=args.silence_threshold)
        #audio_batch, input_IDs = reader.dequeue(args.batch_size)#单GPu转成下面的多GPU

    # Create network.
    batch_size_single_GPU = int(1.0 * args.batch_size / args.num_gpus)
    net = WaveNetModel(
        batch_size=batch_size_single_GPU,
        dilations=wavenet_params["dilations"],
        filter_width=wavenet_params["filter_width"],
        residual_channels=wavenet_params["residual_channels"],
        dilation_channels=wavenet_params["dilation_channels"],
        skip_channels=wavenet_params["skip_channels"],
        quantization_channels=wavenet_params["quantization_channels"],
        ID_channels=wavenet_params["ID_channels"],
        use_biases=wavenet_params["use_biases"],
        scalar_input=wavenet_params["scalar_input"],  #标量输入与矢量输入?
        initial_filter_width=wavenet_params["initial_filter_width"])
    if args.l2_regularization_strength == 0:
        args.l2_regularization_strength = None

    optimizer = optimizer_factory[args.optimizer](
        learning_rate=args.learning_rate, momentum=args.momentum)
    trainable = tf.trainable_variables()

    tower_grads = []
    #for i in range(args.num_gpus):
    with tf.device('/gpu:0'):
        with tf.name_scope('losstower_0') as scope:
            audio_batch, input_IDs = reader.dequeue(batch_size_single_GPU)
            all_loss = net.loss(audio_batch, input_IDs,
                                args.l2_regularization_strength)
            loss, L1 = all_loss  #total loss
            tf.get_variable_scope().reuse_variables()
            grads_vars = optimizer.compute_gradients(loss, var_list=trainable)
            tower_grads.append(grads_vars)  #
    update_wei_op = []
    with tf.device('/cpu:0'):  ###
        for gv in tower_grads:
            app_grad = optimizer.apply_gradients(gv)
            update_wei_op.append(app_grad)

    with tf.control_dependencies(update_wei_op):
        train_op = tf.no_op()

    # Set up logging for TensorBoard.
    writer = tf.train.SummaryWriter(logdir)
    writer.add_graph(tf.get_default_graph())
    run_metadata = tf.RunMetadata()
    summaries = tf.merge_all_summaries()

    # Set up session
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    init = tf.initialize_all_variables()
    sess.run(init)

    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(var_list=tf.trainable_variables())

    try:
        saved_global_step = load(saver, sess, restore_from)
        if is_overwritten_training or saved_global_step is None:
            # The first training step will be saved_global_step + 1,
            # therefore we put -1 here for new or overwritten trainings.
            saved_global_step = -1

    except:
        print("Something went wrong while restoring checkpoint. "
              "We will terminate training to avoid accidentally overwriting "
              "the previous model.")
        raise

    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    reader.start_threads(sess, N_THREADS)

    step = None
    try:
        last_saved_step = saved_global_step
        avg_loss_value = 0.0
        avg_L1_value = 0.0
        start_time = time.time()
        for step in range(saved_global_step + 1, args.num_steps):
            if args.store_metadata and step % 50 == 0:
                # Slow run that stores extra information for debugging.
                print('Storing metadata')
                run_options = tf.RunOptions(
                    trace_level=tf.RunOptions.FULL_TRACE)
                summary, all_loss_value, _ = sess.run(
                    [summaries, all_loss, train_op],
                    options=run_options,
                    run_metadata=run_metadata)
                writer.add_summary(summary, step)
                writer.add_run_metadata(run_metadata,
                                        'step_{:04d}'.format(step))
                tl = timeline.Timeline(run_metadata.step_stats)
                timeline_path = os.path.join(logdir, 'timeline.trace')
                with open(timeline_path, 'w') as f:
                    f.write(tl.generate_chrome_trace_format(show_memory=True))
            else:
                all_loss_value, _ = sess.run([all_loss, train_op])
                #writer.add_summary(summary, step)
            loss_value, L1_value = all_loss_value
            avg_loss_value += loss_value
            avg_L1_value += L1_value

            if step % args.checkloss_every == 0:
                avg_loss_value = avg_loss_value / args.checkloss_every
                avg_L1_value = avg_L1_value / args.checkloss_every
                duration = (time.time() -
                            start_time) * 1.0 / args.checkloss_every
                print(
                    'step {:d} - avg_loss = {:.3f}, avg_L1 = {:.3f}, ({:.3f} sec/step)'
                    .format(step, loss_value, L1_value, duration))
                sys.stdout.flush()
                avg_loss_value = 0.0
                avg_L1_value = 0.0
                start_time = time.time()

            if step % args.checkpoint_every == 0:
                save(saver, sess, logdir, step)
                last_saved_step = step

    except KeyboardInterrupt:
        # Introduce a line break after ^C is displayed so save message
        # is on its own line.
        print()
    finally:
        if step > last_saved_step:
            save(saver, sess, logdir, step)
        coord.request_stop()
        coord.join(threads)
예제 #16
0
def main():
    args = get_arguments()

    try:
        directories = validate_directories(args)
    except ValueError as e:
        print("Some arguments are wrong:")
        print(str(e))
        return

    logdir = directories['logdir']
    restore_from = directories['restore_from']

    # Even if we restored the model, we will treat it as new training
    # if the trained model is written into an arbitrary location.
    is_overwritten_training = logdir != restore_from

    with open(args.wavenet_params, 'r') as f:
        wavenet_params = json.load(f)

    # Create coordinator.
    coord = tf.train.Coordinator()

    # Load raw waveform from VCTK corpus.
    with tf.name_scope('create_inputs'):
        # Allow silence trimming to be skipped by specifying a threshold near
        # zero.
        silence_threshold = args.silence_threshold if args.silence_threshold > \
                                                      EPSILON else None
        gc_enabled = args.gc_channels is not None
        reader = AudioReader(
            args.data_dir,
            coord,
            sample_rate=wavenet_params['sample_rate'],
            gc_enabled=gc_enabled,
            receptive_field=WaveNetModel.calculate_receptive_field(
                wavenet_params["filter_width"], wavenet_params["dilations"],
                wavenet_params["scalar_input"],
                wavenet_params["initial_filter_width"]),
            sample_size=args.sample_size,
            silence_threshold=silence_threshold)
        audio_batch = reader.dequeue(args.batch_size)
        if gc_enabled:
            gc_id_batch = reader.dequeue_gc(args.batch_size)
        else:
            gc_id_batch = None

    # Create network.
    net = WaveNetModel(
        batch_size=args.batch_size,
        dilations=wavenet_params["dilations"],
        filter_width=wavenet_params["filter_width"],
        residual_channels=wavenet_params["residual_channels"],
        dilation_channels=wavenet_params["dilation_channels"],
        skip_channels=wavenet_params["skip_channels"],
        quantization_channels=wavenet_params["quantization_channels"],
        use_biases=wavenet_params["use_biases"],
        scalar_input=wavenet_params["scalar_input"],
        initial_filter_width=wavenet_params["initial_filter_width"],
        histograms=args.histograms,
        global_condition_channels=args.gc_channels,
        global_condition_cardinality=reader.gc_category_cardinality)

    if args.l2_regularization_strength == 0:
        args.l2_regularization_strength = None
    loss = net.loss(input_batch=audio_batch,
                    global_condition_batch=gc_id_batch,
                    l2_regularization_strength=args.l2_regularization_strength)
    optimizer = optimizer_factory[args.optimizer](
        learning_rate=args.learning_rate, momentum=args.momentum)
    trainable = tf.trainable_variables()
    optim = optimizer.minimize(loss, var_list=trainable)

    # Set up logging for TensorBoard.
    writer = tf.summary.FileWriter(logdir)
    writer.add_graph(tf.get_default_graph())
    run_metadata = tf.RunMetadata()
    summaries = tf.summary.merge_all()

    # Set up session

    sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))

    init = tf.global_variables_initializer()
    sess.run(init)
    #sess = tf_debug.LocalCLIDebugWrapperSession(sess, thread_name_filter="MainThread$", dump_root="C:\\MProjects\\WaveNet\\tensorflow-wavenet-master\\debugDump")

    # run --node_name_filter wavenet_1/loss/Reshape_1:0 -- (36352, 256)
    # run --node_name_filter (.*loss.*)|(.*encode.*)
    # pt -a tensorName > C:/Users/russkov.alexander/Desktop/WaveNet/tensorflow-wavenet-master/myDebugInfo/file.txt
    #encoded_input = Tensor("wavenet_1/encode/ToInt32:0", shape=(1, ?, 1), dtype=int32)  -- (1, 59901, 1)
    #encoded = Tensor("wavenet_1/one_hot_encode/Reshape:0", shape=(1, ?, 256), dtype=float32) -- (1, 59901, 256)

    #https: // www.tensorflow.org / guide / debugger  # frequently_asked_questions
    #Q: The model I am debugging is very large. The data dumped by tfdbg fills up the free space of my disk. What can I do?
    #https: // github.com / tensorflow / tensorflow / issues / 8753
    #sess = tf_debug.TensorBoardDebugWrapperSession(sess, "RUSSKOV-NB-W10:6064", send_traceback_and_source_code=False)

    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(var_list=tf.trainable_variables(),
                           max_to_keep=args.max_checkpoints)

    try:
        saved_global_step = load(saver, sess, restore_from)
        if is_overwritten_training or saved_global_step is None:
            # The first training step will be saved_global_step + 1,
            # therefore we put -1 here for new or overwritten trainings.
            saved_global_step = -1

    except:
        print("Something went wrong while restoring checkpoint. "
              "We will terminate training to avoid accidentally overwriting "
              "the previous model.")
        raise

    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    reader.start_threads(sess)

    step = None
    last_saved_step = saved_global_step
    try:
        for step in range(saved_global_step + 1, args.num_steps):
            start_time = time.time()
            if args.store_metadata and step % 50 == 0:
                # Slow run that stores extra information for debugging.
                print('Storing metadata')
                run_options = tf.RunOptions(
                    trace_level=tf.RunOptions.FULL_TRACE)
                summary, loss_value, _ = sess.run([summaries, loss, optim],
                                                  options=run_options,
                                                  run_metadata=run_metadata)
                writer.add_summary(summary, step)
                writer.add_run_metadata(run_metadata,
                                        'step_{:04d}'.format(step))
                tl = timeline.Timeline(run_metadata.step_stats)
                timeline_path = os.path.join(logdir, 'timeline.trace')
                with open(timeline_path, 'w') as f:
                    f.write(tl.generate_chrome_trace_format(show_memory=True))
            else:
                summary, loss_value, _ = sess.run([summaries, loss, optim])
                writer.add_summary(summary, step)

            duration = time.time() - start_time
            print('step {:d} - loss = {:.3f}, ({:.3f} sec/step)'.format(
                step, loss_value, duration))

            if step % args.checkpoint_every == 0:
                save(saver, sess, logdir, step)
                last_saved_step = step

    except KeyboardInterrupt:
        # Introduce a line break after ^C is displayed so save message
        # is on its own line.
        print()
    finally:
        if step > last_saved_step:
            save(saver, sess, logdir, step)
        coord.request_stop()
        coord.join(threads)
예제 #17
0
def main():
    args = get_arguments()

    try:
        directories = validate_directories(args)
    except ValueError as e:
        print("Some arguments are wrong:")
        print(str(e))
        return

    logdir = directories['logdir']
    restore_from = directories['restore_from']

    # Even if we restored the model, we will treat it as new training
    # if the trained model is written into an arbitrary location.
    is_overwritten_training = logdir != restore_from

    with open(args.wavenet_params, 'r') as f:
        wavenet_params = json.load(f)

    # Create coordinator.
    coord = tf.train.Coordinator()

    # Load raw waveform from VCTK corpus.
    with tf.name_scope('create_inputs'):
        # Allow silence trimming to be skipped by specifying a threshold near
        # zero.
        silence_threshold = args.silence_threshold if args.silence_threshold > \
                                                      EPSILON else None
        gc_enabled = args.gc_channels is not None
        reader = AudioReader(
            args.data_dir,
            coord,
            sample_rate=wavenet_params['sample_rate'],
            gc_enabled=gc_enabled,
            receptive_field=WaveNetModel.calculate_receptive_field(
                wavenet_params["filter_width"], wavenet_params["dilations"],
                wavenet_params["scalar_input"],
                wavenet_params["initial_filter_width"]),
            sample_size=args.sample_size,
            silence_threshold=silence_threshold)
        audio_batch = reader.dequeue(args.batch_size)
        if gc_enabled:
            gc_id_batch = reader.dequeue_gc(args.batch_size)
        else:
            gc_id_batch = None

    # Create network.
    net = WaveNetModel(
        batch_size=args.batch_size,
        dilations=wavenet_params["dilations"],
        filter_width=wavenet_params["filter_width"],
        residual_channels=wavenet_params["residual_channels"],
        dilation_channels=wavenet_params["dilation_channels"],
        skip_channels=wavenet_params["skip_channels"],
        quantization_channels=wavenet_params["quantization_channels"],
        use_biases=wavenet_params["use_biases"],
        scalar_input=wavenet_params["scalar_input"],
        initial_filter_width=wavenet_params["initial_filter_width"],
        histograms=args.histograms,
        global_condition_channels=args.gc_channels,
        global_condition_cardinality=reader.gc_category_cardinality)

    if args.l2_regularization_strength == 0:
        args.l2_regularization_strength = None
    #aleix
    loss, global_condition_batch, gc_embedding, conv_filter, conv_filter0, conv_filter1, conv_gate, \
    embedding_table, weights_gc_filter, input_batch = net.loss(input_batch=audio_batch,
                    global_condition_batch=gc_id_batch,
                    l2_regularization_strength=args.l2_regularization_strength)
    optimizer = optimizer_factory[args.optimizer](
        learning_rate=args.learning_rate, momentum=args.momentum)
    trainable = tf.trainable_variables()
    optim = optimizer.minimize(loss, var_list=trainable)

    # Set up logging for TensorBoard.
    writer = tf.summary.FileWriter(logdir)
    writer.add_graph(tf.get_default_graph())
    run_metadata = tf.RunMetadata()
    summaries = tf.summary.merge_all()

    # Set up session
    sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
    init = tf.global_variables_initializer()
    sess.run(init)

    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(var_list=tf.trainable_variables(),
                           max_to_keep=args.max_checkpoints)

    try:
        saved_global_step = load(saver, sess, restore_from)
        if is_overwritten_training or saved_global_step is None:
            # The first training step will be saved_global_step + 1,
            # therefore we put -1 here for new or overwritten trainings.
            saved_global_step = -1

    except:
        print("Something went wrong while restoring checkpoint. "
              "We will terminate training to avoid accidentally overwriting "
              "the previous model.")
        raise

    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    reader.start_threads(sess)

    step = None
    last_saved_step = saved_global_step
    loss_plot = []  #store loss function (aleix)
    try:
        for step in range(saved_global_step + 1, args.num_steps):
            start_time = time.time()
            if args.store_metadata and step % 50 == 0:
                # Slow run that stores extra information for debugging.
                print('Storing metadata')
                run_options = tf.RunOptions(
                    trace_level=tf.RunOptions.FULL_TRACE)
                summary, loss_value, _ = sess.run([summaries, loss, optim],
                                                  options=run_options,
                                                  run_metadata=run_metadata)
                writer.add_summary(summary, step)
                writer.add_run_metadata(run_metadata,
                                        'step_{:04d}'.format(step))
                tl = timeline.Timeline(run_metadata.step_stats)
                timeline_path = os.path.join(logdir, 'timeline.trace')
                with open(timeline_path, 'w') as f:
                    f.write(tl.generate_chrome_trace_format(show_memory=True))
            else:
                #aleix
                summary, loss_value, global_condition_batch0, gc_embedding0, conv_filter_end, conv_filter0_0, \
                conv_filter0_1, conv_gate0,embedding_table0, weights_gc_filter0,input_batch0, _ = sess.run([
                    summaries, loss, global_condition_batch, gc_embedding, conv_filter, conv_filter0, conv_filter1,
                    conv_gate, embedding_table, weights_gc_filter, input_batch, optim])
                #print('global_condition_batch:')
                #print(global_condition_batch0)
                #print(global_condition_batch0.shape)
                #print()
                #print('gc_embedding')
                #print(gc_embedding0)
                #print(gc_embedding0.shape)
                #print()
                #print('conv_filter')
                #print(conv_filter_end)
                #print(conv_filter_end.shape)
                #print()
                #print('conv_filter0')
                #print(conv_filter0_0)
                #print(conv_filter0_0.shape)
                #print()
                #print('conv_filter1')
                #print(conv_filter0_1)
                #print(conv_filter0_1.shape)
                #print()
                #print('conv_gate')
                #print(conv_gate0)
                #print(conv_gate0.shape)
                #print()
                #print('embedding_table')
                #print(embedding_table0)
                #print(embedding_table0.shape)
                #print(target_output00)
                #print(target_output00.shape)
                #print(target_output10)
                #print(target_output10.shape)
                #print()
                #print('weights_gc_filter')
                #print(weights_gc_filter0)
                #print(weights_gc_filter.shape)
                #print(input_batch0.shape)
                writer.add_summary(summary, step)

            duration = time.time() - start_time
            print('step {:d} - loss = {:.3f}, ({:.3f} sec/step)'.format(
                step, loss_value, duration))
            loss_plot.append(loss_value)
            if step % args.checkpoint_every == 0:
                save(saver, sess, logdir, step)
                last_saved_step = step
        plt.figure(1)  #store loss function (aleix)
        plt.plot(loss_plot)
        #plt.show()
        plt.savefig(os.path.join(args.data_dir, 'loss.png'))
        print()
        print('Loss .plot saved')
        file00 = open(os.path.join(args.data_dir, 'loss.txt'), 'w')
        for item in loss_plot:
            file00.write("%s\n" % item)
        file00.close()
        print('Loss .txt saved')
        print()
    except KeyboardInterrupt:
        plt.figure(1)  #store loss function (aleix)
        plt.plot(loss_plot)
        plt.savefig(os.path.join(args.data_dir, 'loss.png'))
        print()
        print('Loss plot saved')
        file00 = open(os.path.join(args.data_dir, 'loss.txt'), 'w')
        for item in loss_plot:
            file00.write("%s\n" % item)
        file00.close()
        print('Loss .txt saved')
        print()
        #plt.show()

        # Introduce a line break after ^C is displayed so save message
        # is on its own line.
        print()
    finally:
        if step > last_saved_step:
            save(saver, sess, logdir, step)
        coord.request_stop()
        coord.join(threads)
예제 #18
0
def main():
    args = get_arguments()

    try:
        directories = validate_directories(args)
    except ValueError as e:
        print("Some arguments are wrong:")
        print(str(e))
        return

    logdir = directories['logdir']
    restore_from = directories['restore_from']

    # Even if we restored the model, we will treat it as new training
    # if the trained model is written into an arbitrary location.
    is_overwritten_training = logdir != restore_from

    with open(args.wavenet_params, 'r') as f:
        wavenet_params = json.load(f)

    # Create coordinator.
    coord = tf.train.Coordinator()

    with tf.name_scope('create_inputs'):
        # Allow silence trimming to be skipped by specifying a threshold near
        # zero.
        silence_threshold = args.silence_threshold if args.silence_threshold > \
                                                      EPSILON else None
        gc_enabled = args.gc_channels is not None
        reader = AudioReader(
            audio_dir=args.data_dir,
            coord=coord,
            sample_rate=wavenet_params["sample_rate"],
            gc_enabled=gc_enabled,
            receptive_field=WaveNetModel.calculate_receptive_field(
                wavenet_params["filter_width"], wavenet_params["dilations"],
                wavenet_params["scalar_input"],
                wavenet_params["initial_filter_width"]),
            sample_size=args.sample_size,
            mfsc_dim=wavenet_params["MFSC_channels"],
            ap_dim=wavenet_params["AP_channels"],
            F0_dim=wavenet_params["F0_channels"],
            phone_dim=wavenet_params["phones_channels"],
            phone_pos_dim=wavenet_params["phone_pos_channels"],
            silence_threshold=silence_threshold)

        ap_batch, lc_batch = reader.dequeue(args.batch_size)
        # print ("mfsc_batch_shape:", mfsc_batch.get_shape().as_list())
        if gc_enabled:
            gc_id_batch = reader.dequeue_gc(args.batch_size)
        else:
            gc_id_batch = None

    # Create network.
    net = WaveNetModel(
        batch_size=args.batch_size,
        dilations=wavenet_params["dilations"],
        filter_width=wavenet_params["filter_width"],
        residual_channels=wavenet_params["residual_channels"],
        dilation_channels=wavenet_params["dilation_channels"],
        skip_channels=wavenet_params["skip_channels"],
        use_biases=wavenet_params["use_biases"],
        scalar_input=wavenet_params["scalar_input"],
        initial_filter_width=wavenet_params["initial_filter_width"],
        histograms=args.histograms,
        global_condition_channels=args.gc_channels,
        global_condition_cardinality=reader.gc_category_cardinality,
        MFSC_channels=wavenet_params["MFSC_channels"],
        F0_channels=wavenet_params["F0_channels"],
        phone_channels=wavenet_params["phones_channels"],
        phone_pos_channels=wavenet_params["phone_pos_channels"])

    if args.l2_regularization_strength == 0:
        args.l2_regularization_strength = None
    # pdb.set_trace()
    loss = net.loss(
        input_batch=
        ap_batch,  # audio_batch shape: [receptive_filed + sample_size, 1]
        lc_batch=lc_batch,
        global_condition_batch=gc_id_batch,  # gc_id_batch shape: scalar
        l2_regularization_strength=args.l2_regularization_strength)
    optimizer = optimizer_factory[args.optimizer](
        learning_rate=args.learning_rate, momentum=args.momentum)
    trainable = tf.trainable_variables()
    optim = optimizer.minimize(loss, var_list=trainable)

    # Set up logging for TensorBoard.
    writer = tf.summary.FileWriter(logdir)
    writer.add_graph(tf.get_default_graph())
    run_metadata = tf.RunMetadata()
    summaries = tf.summary.merge_all()

    # Set up session
    sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
    init = tf.global_variables_initializer()
    sess.run(init)

    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(var_list=tf.trainable_variables(),
                           max_to_keep=args.max_checkpoints)

    try:
        saved_global_step = load(saver, sess, restore_from)
        if is_overwritten_training or saved_global_step is None:
            # The first training step will be saved_global_step + 1,
            # therefore we put -1 here for new or overwritten trainings.
            saved_global_step = -1

    except:
        print("Something went wrong while restoring checkpoint. "
              "We will terminate training to avoid accidentally overwriting "
              "the previous model.")
        raise

    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    reader.start_threads(sess)
    print("========================================")
    print(
        "Total number of parameteres for mfsc model:",
        np.sum([
            np.prod(v.get_shape().as_list()) for v in tf.trainable_variables()
        ]))
    # pdb.set_trace()
    step = None
    last_saved_step = saved_global_step
    try:
        for step in range(saved_global_step + 1, args.num_steps):
            start_time = time.time()

            if args.store_metadata and step % 50 == 0:
                # Slow run that stores extra information for debugging.
                print('Storing metadata')
                run_options = tf.RunOptions(
                    trace_level=tf.RunOptions.FULL_TRACE)
                summary, loss_value, _ = sess.run([summaries, loss, optim],
                                                  options=run_options,
                                                  run_metadata=run_metadata)
                writer.add_summary(summary, step)
                writer.add_run_metadata(run_metadata,
                                        'step_{:04d}'.format(step))
                tl = timeline.Timeline(run_metadata.step_stats)
                timeline_path = os.path.join(logdir, 'timeline.trace')
                with open(timeline_path, 'w') as f:
                    f.write(tl.generate_chrome_trace_format(show_memory=True))
            else:
                summary, loss_value, _ = sess.run([summaries, loss, optim])
                writer.add_summary(summary, step)

            duration = time.time() - start_time
            if step % 10 == 0:
                print('step {:d} - loss = {:.3f}, ({:.3f} sec/step)'.format(
                    step, loss_value, duration))
            if step % args.checkpoint_every == 0:
                save(saver, sess, logdir, step)
                last_saved_step = step

    except KeyboardInterrupt:
        # Introduce a line break after ^C is displayed so save message
        # is on its own line.
        print()
    # finally:
    if step > last_saved_step:
        save(saver, sess, logdir, step)
    coord.request_stop()
    coord.join(threads)
예제 #19
0
def main():
    def _str_to_bool(s):
        """Convert string to bool (in argparse context)."""
        if s.lower() not in ['true', 'false']:
            raise ValueError('Argument needs to be a boolean, got {}'.format(s))
        return {'true': True, 'false': False}[s.lower()]
    
    
    parser = argparse.ArgumentParser(description='WaveNet example network')
    
    DATA_DIRECTORY =  'D:\\hccho\\multi-speaker-tacotron-tensorflow-master\\datasets\son\\audio'   #   './VCTK-Corpus'
    parser.add_argument('--data_dir', type=str, default=DATA_DIRECTORY, help='The directory containing the VCTK corpus.')


    LOGDIR = None
    #LOGDIR = './/logdir//train//2018-11-25T14-10-48'   # son
    #LOGDIR = './/logdir//train//2018-11-30T22-22-58'   # test
    parser.add_argument('--logdir', type=str, default=LOGDIR,help='Directory in which to store the logging information for TensorBoard. If the model already exists, it will restore the state and will continue training. Cannot use with --logdir_root and --restore_from.')
    
    
    
    parser.add_argument('--logdir_root', type=str, default=None,help='Root directory to place the logging output and generated model. These are stored under the dated subdirectory of --logdir_root. Cannot use with --logdir.')
    parser.add_argument('--restore_from', type=str, default=None,help='Directory in which to restore the model from. This creates the new model under the dated directory in --logdir_root. Cannot use with --logdir.')
    
    
    CHECKPOINT_EVERY = 20   # checkpoint 저장 주기
    parser.add_argument('--checkpoint_every', type=int, default=CHECKPOINT_EVERY,help='How many steps to save each checkpoint after. Default: ' + str(CHECKPOINT_EVERY) + '.')
    
    
    
   
    
    config = parser.parse_args()  # command 창에서 입력받을 수 있는 조건
    
    
    try:
        directories = validate_directories(config,hparams)
    except ValueError as e:
        print("Some arguments are wrong:")
        print(str(e))
        return

    logdir = directories['logdir']
    restore_from = directories['restore_from']

    # Even if we restored the model, we will treat it as new training
    # if the trained model is written into an arbitrary location.
    is_overwritten_training = logdir != restore_from



    # Create coordinator.
    coord = tf.train.Coordinator()

    # Load raw waveform from VCTK corpus.
    with tf.name_scope('create_inputs'):
        # Allow silence trimming to be skipped by specifying a threshold near
        # zero.
        silence_threshold = hparams.silence_threshold if hparams.silence_threshold > EPSILON else None
        gc_enabled = hparams.gc_channels is not None
        
        # AudioReader에서 wav 파일을 잘라 input값을 만든다. receptive_field길이만큼을 앞부분에 pad하거나 앞조각에서 가져온다. (receptive_field+ sample_size)크기로 자른다.
        reader = AudioReader(config.data_dir,coord,sample_rate=hparams.sample_rate,gc_enabled=gc_enabled,
                                receptive_field=WaveNetModel.calculate_receptive_field(hparams.filter_width, hparams.dilations,hparams.scalar_input, hparams.initial_filter_width),
                                sample_size=hparams.sample_size,silence_threshold=silence_threshold)
        if gc_enabled:
            audio_batch, gc_id_batch = reader.dequeue(hparams.batch_size)  # (batch_size, ?, 1)
        else:
            audio_batch = reader.dequeue(hparams.batch_size)

    # Create network.
    net = WaveNetModel(
        batch_size=hparams.batch_size,
        dilations=hparams.dilations,
        filter_width=hparams.filter_width,
        residual_channels=hparams.residual_channels,
        dilation_channels=hparams.dilation_channels,
        quantization_channels=hparams.quantization_channels,
        out_channels =hparams.out_channels,
        skip_channels=hparams.skip_channels,
        use_biases=hparams.use_biases,  #  True
        scalar_input=hparams.scalar_input,
        initial_filter_width=hparams.initial_filter_width,
        histograms=hparams.histograms,
        global_condition_channels=hparams.gc_channels,
        global_condition_cardinality=reader.gc_category_cardinality,
        train_mode=True)

    if hparams.l2_regularization_strength == 0:
        hparams.l2_regularization_strength = None
        
       
    loss = net.loss(input_batch=audio_batch, global_condition_batch=gc_id_batch, l2_regularization_strength=hparams.l2_regularization_strength)
     
    optimizer = optimizer_factory[hparams.optimizer](learning_rate=hparams.learning_rate,momentum=hparams.momentum)
    
    trainable = tf.trainable_variables()
    
    optim = optimizer.minimize(loss, var_list=trainable)

    run_metadata = tf.RunMetadata()

    # Set up session
    sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))  # log_device_placement=False --> cpu/gpu 자동 배치.
    init = tf.global_variables_initializer()
    sess.run(init)

    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=hparams.max_checkpoints)  # 최대 checkpoint 저장 갯수 지정

    try:
        saved_global_step = load(saver, sess, restore_from)  # checkpoint load
        if is_overwritten_training or saved_global_step is None:
            # The first training step will be saved_global_step + 1,
            # therefore we put -1 here for new or overwritten trainings.
            saved_global_step = -1

    except:
        print("Something went wrong while restoring checkpoint. We will terminate training to avoid accidentally overwriting the previous model.")
        raise

    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    reader.start_threads(sess)

    step = None
    last_saved_step = saved_global_step
    try:
        for step in range(saved_global_step + 1, hparams.num_steps+1):
            start_time = time.time()
            if hparams.store_metadata and step % 50 == 0:
                # Slow run that stores extra information for debugging.
                print('Storing metadata')
                run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
                loss_value, _ = sess.run([loss, optim],options=run_options,run_metadata=run_metadata)

                tl = timeline.Timeline(run_metadata.step_stats)
                timeline_path = os.path.join(logdir, 'timeline.trace')
                with open(timeline_path, 'w') as f:
                    f.write(tl.generate_chrome_trace_format(show_memory=True))
            else:
                loss_value, _ = sess.run([loss, optim])

            duration = time.time() - start_time
            print('step {:d} - loss = {:.3f}, ({:.3f} sec/step)'.format(step, loss_value, duration))

            if step % config.checkpoint_every == 0:
                save(saver, sess, logdir, step)
                last_saved_step = step

    except KeyboardInterrupt:
        # Introduce a line break after ^C is displayed so save message
        # is on its own line.
        print()
    finally:
        if step > last_saved_step:
            save(saver, sess, logdir, step)
        coord.request_stop()
        coord.join(threads)