示例#1
0
    def build_model_single_gpu(self, gpu_idx):
        if gpu_idx == 0:
            # create the nodes to load for input pipeline
            filename_queue = tf.train.string_input_producer([self.e2e_dataset])
            self.get_wav, self.get_noisy = read_and_decode(
                filename_queue, 2**14)
        # load the data to input pipeline
        wavbatch, \
        noisybatch = tf.train.shuffle_batch([self.get_wav,
                                             self.get_noisy],
                                             batch_size=self.batch_size,
                                             num_threads=2,
                                             capacity=1000 + 3 * self.batch_size,
                                             min_after_dequeue=1000,
                                             name='wav_and_noisy')
        if gpu_idx == 0:
            self.Gs = []
            self.zs = []
            self.gtruth_wavs = []
            self.gtruth_noisy = []

        self.gtruth_wavs.append(wavbatch)
        self.gtruth_noisy.append(noisybatch)

        # add channels dimension to manipulate in D and G
        wavbatch = tf.expand_dims(wavbatch, -1)
        noisybatch = tf.expand_dims(noisybatch, -1)
        if gpu_idx == 0:
            #self.sample_wavs = tf.placeholder(tf.float32, [self.batch_size,
            #                                               self.canvas_size],
            #                                  name='sample_wavs')
            self.reference_G = self.generator(noisybatch,
                                              is_ref=True,
                                              spk=None,
                                              z_on=False)
            self.reference_G = self.reference_G[0]

        G = self.generator(noisybatch, is_ref=False, spk=None, z_on=False)
        print('GAE shape: ', G.get_shape())
        self.Gs.append(G)

        self.rl_audio_summ = audio_summary('real_audio', wavbatch)
        self.real_w_summ = histogram_summary('real_wav', wavbatch)
        self.noisy_audio_summ = audio_summary('noisy_audio', noisybatch)
        self.noisy_w_summ = histogram_summary('noisy_wav', noisybatch)
        self.gen_audio_summ = audio_summary('G_audio', G)
        self.gen_summ = histogram_summary('G_wav', G)

        if gpu_idx == 0:
            self.g_losses = []

        # Add the L1 loss to G
        g_loss = tf.reduce_mean(tf.abs(tf.sub(G, wavbatch)))

        self.g_losses.append(g_loss)

        self.g_loss_sum = scalar_summary("g_loss", g_loss)

        if gpu_idx == 0:
            self.get_vars()
示例#2
0
文件: model.py 项目: cc-cherie/segan
    def build_model_single_gpu(self, gpu_idx):
        if gpu_idx == 0:
            # create the nodes to load for input pipeline
            filename_queue = tf.train.string_input_producer([self.e2e_dataset])
            self.get_wav, self.get_noisy = read_and_decode(filename_queue,
                                                           2 ** 14)
        # load the data to input pipeline
        wavbatch, \
        noisybatch = tf.train.shuffle_batch([self.get_wav,
                                             self.get_noisy],
                                             batch_size=self.batch_size,
                                             num_threads=2,
                                             capacity=1000 + 3 * self.batch_size,
                                             min_after_dequeue=1000,
                                             name='wav_and_noisy')
        if gpu_idx == 0:
            self.Gs = []
            self.zs = []
            self.gtruth_wavs = []
            self.gtruth_noisy = []

        self.gtruth_wavs.append(wavbatch)
        self.gtruth_noisy.append(noisybatch)

        # add channels dimension to manipulate in D and G
        wavbatch = tf.expand_dims(wavbatch, -1)
        noisybatch = tf.expand_dims(noisybatch, -1)
        if gpu_idx == 0:
            #self.sample_wavs = tf.placeholder(tf.float32, [self.batch_size,
            #                                               self.canvas_size],
            #                                  name='sample_wavs')
            self.reference_G = self.generator(noisybatch, is_ref=True,
                                              spk=None, z_on=False)

        G = self.generator(noisybatch, is_ref=False, spk=None, z_on=False)
        print('GAE shape: ', G.get_shape())
        self.Gs.append(G)

        self.rl_audio_summ = audio_summary('real_audio', wavbatch)
        self.real_w_summ = histogram_summary('real_wav', wavbatch)
        self.noisy_audio_summ = audio_summary('noisy_audio', noisybatch)
        self.noisy_w_summ = histogram_summary('noisy_wav', noisybatch)
        self.gen_audio_summ = audio_summary('G_audio', G)
        self.gen_summ = histogram_summary('G_wav', G)

        if gpu_idx == 0:
            self.g_losses = []

        # Add the L1 loss to G
        g_loss = tf.reduce_mean(tf.abs(tf.sub(G, wavbatch)))

        self.g_losses.append(g_loss)

        self.g_loss_sum = scalar_summary("g_loss", g_loss)

        if gpu_idx == 0:
            self.get_vars()
示例#3
0
def input_fn(dataset_dir='', num_epochs=1, canvas_size=32, preemph=0.1, batch_size=32):
    filename_queue = tf.train.string_input_producer([dataset_dir], num_epochs=num_epochs)
    get_wav, get_noisy = read_and_decode(filename_queue, canvas_size, preemph)

    # # try dataset API
    # dataset = tf.data.TFRecordDataset(dataset_dir)
    # dataset = dataset.batch(batch_size, drop_remainder=True)
    # dataset = dataset.map(map_func=read_and_decode, num_parallel_calls=2)

    # dataset = dataset.shuffle(buffer_size=1000)

    # if num_epochs > 1:
    #     dataset = dataset.repeat(num_epochs)
    
    # iterator = dataset.make_one_shot_iterator()

    # wavbatch_data, noisybatch_data = iterator.get_next(name='Train_IteratorGetNext')

    # load the data to input pipeline
    print(get_wav)
    wavbatch, \
        noisybatch = tf.train.shuffle_batch([get_wav,
                                             get_noisy],
                                            batch_size=batch_size,
                                            num_threads=2,
                                            capacity=1000 + 3 * batch_size,
                                            min_after_dequeue=1000,
                                            name='wav_and_noisy')
    print(wavbatch)

    num_examples = 0
    for record in tf.python_io.tf_record_iterator(dataset_dir):
        num_examples += 1
    print('!!!!!!!!!!!!!!!!total examples in TFRecords {}: {}'.format(dataset_dir,
                                                        num_examples))
    ################################################################
    labels = wavbatch
    # labels = wavbatch_data
    return {"wav_and_noisy": noisybatch}, labels
示例#4
0
def main(_):
    file_queue = tf.train.string_input_producer([FLAGS.e2e_dataset])
    get_wav = read_and_decode(file_queue, FLAGS.canvas_size)
    wavbatch = tf.train.shuffle_batch([get_wav],
                                      batch_size=FLAGS.batch_size,
                                      num_threads=2,
                                      capacity=1000 + 3 * FLAGS.batch_size,
                                      min_after_dequeue=1000,
                                      name='wav_and_noisy')
    lambdaG = 100
    lambdaprediction = 1
    savefile = 'DeepLPcoeff.npz'
    learning_rate = 0.00001  # 0.0001

    deltamaxstep = 50
    maxstep = 5000  # 10000
    test_epochs = 100

    training_epochs = 10  # 5000
    display_step = int(maxstep / 10)  # 500
    p = 8
    p = 18

    rng = np.random

    FLAGS.canvas_size = FLAGS.canvas_size + p

    # tf Graph input (only pictures)
    X = tf.placeholder(tf.float32, [FLAGS.canvas_size, p])
    # X0 = tf.placeholder(tf.float32, [FLAGS.canvas_size, p])

    Y = tf.placeholder(tf.float32, [FLAGS.canvas_size, 1])

    class param:
        def __init__(self):
            self.g_enc_depths = ''  # 名称
            self.d_num_fmaps = ''  # 尺寸
            self.bias_downconv = False
            self.deconv_type = 'deconv'
            self.bias_deconv = False
            # self.list = []  # 列表

    aparam = param()  # 定义结构对象

    aparam.g_enc_depths = [
        16
    ]  # , 32]#, 32, 64]#, 64, 128, 128, 256, 256, 512, 1024]
    # Define D fmaps
    # aparam.d_num_fmaps = [16, 32, 32, 64, 64, 128, 128, 256, 256, 512, 1024]

    generator = AEGenerator(aparam)

    G = generator(X, is_ref=False, z_on=False)

    G = tf.squeeze(G)

    # Set model weights
    W = tf.placeholder(tf.float32, [p, 1])  # rng.randn(p,1)
    # b = tf.Variable(rng.randn(1), name="lastbias", dtype=tf.float32) #tf.zeros([p,1])

    # Construct a linear model
    # pred = tf.add(tf.matmul(X, W), b) # tf.multiply is wrong

    # W0 = tf.Variable(rng.randn(p, 1), name="lastweight0", dtype=tf.float32)  # rng.randn(p,1)

    # y_pred0=tf.matmul(X0,W0)

    # Prediction
    y_pred = tf.matmul(G, W)  # what if i use lpca for w initialization

    # Define loss and optimizer, minimize the squared error
    cost = tf.reduce_mean(tf.pow(Y - y_pred, 2))
    # cost0 = tf.reduce_mean(tf.pow(Y - y_pred0, 2))

    # cost=lambdaG*tf.reduce_mean(tf.pow(G-X,2))+lambdaprediction*cost0

    optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(cost)
    # optimizer0 = tf.train.RMSPropOptimizer(learning_rate0).minimize(cost0)

    # optimizertest = tf.train.RMSPropOptimizer(learning_rate).minimize(cost, var_list=[W])

    # init = tf.global_variables_initializer()

    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        sess.run(tf.global_variables_initializer())

        state = load_trainable_vars(
            sess, savefile)  # must load AFTER the initializer

        # must use this same Session to perform all training
        # if we start a new Session, things would replay and we'd be training with our validation set (no no)

        # done = state.get('done', [])
        log = str(state.get('log', ''))

        step = 1

        training_cost = 0
        # init.run()

        try:
            while not coord.should_stop():
                inputdata = sess.run([wavbatch])
                inputdata = np.squeeze(inputdata)

                train_X = np.asarray(
                    hankel(np.append(np.zeros(p), inputdata), np.zeros(
                        (p, 1))))

                # print(train_X.shape)
                ##print(inputdata)
                train_Y = np.asarray([np.append(inputdata, np.zeros((p, 1)))])

                a, _, _ = lpc2(inputdata, p)
                b = -a[1:]
                lpca = np.asarray([b[::-1]]).T

                # print('linear prediction coeff=',lpca)
                train_Y = train_Y.T

                # ++++++++++++++++++++++
                for epoch in range(training_epochs):
                    # for (x, y) in zip(train_X, train_Y):
                    sess.run(optimizer,
                             feed_dict={
                                 X: train_X,
                                 Y: train_Y,
                                 W: lpca
                             })
                    # sess.run(optimizer0, feed_dict={X0: train_X, Y: train_Y})
                    # Display logs per epoch step
                    # if (epoch + 1) % display_step == 0:
                    #     c = sess.run(cost, feed_dict={X: train_X, Y: train_Y, W: lpca})
                    #     print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(c))
                    # c = sess.run(cost0, feed_dict={X0: train_X, Y: train_Y})
                    # print("Epoch:", '%04d' % (epoch + 1), "cost0=", "{:.9f}".format(c))

                # print("Optimization Finished!")

                training_cost += sess.run(cost,
                                          feed_dict={
                                              X: train_X,
                                              Y: train_Y,
                                              W: lpca
                                          })

                averagecost = 10 * np.log10(training_cost / step)

                if step % display_step == 0:
                    print("step ", step, "Training cost=", averagecost, '\n')
                # print('W=', sess.run(W),'\n')

                step += 1

                # +++++++++++++++++++++++

                if step >= maxstep:
                    log = log + '\n cost={nmse:.6f} dB in {i} iterations'.format(
                        nmse=averagecost, i=step)

                    state['log'] = log

                    save_trainable_vars(sess, savefile, **state)

                    maxstep = maxstep + deltamaxstep

                    for i in range(test_epochs):
                        inputdata, noisybatch0 = sess.run(
                            [wavbatch, noisybatch])
                        inputdata = np.squeeze(inputdata)
                        xt = inputdata
                        num_sample = len(xt)

                        def nextpow2(x):
                            return np.ceil(np.log2(x))

                        zpf = 3
                        Nfft = int(2**nextpow2(num_sample * zpf))

                        Org_XW = sp.fft(xt, Nfft)

                        test_X = np.asarray(
                            hankel(np.append(np.zeros(p), inputdata),
                                   np.zeros((p, 1))))

                        test_Y = np.asarray(
                            [np.append(inputdata, np.zeros((p, 1)))])

                        a, _, _ = lpc2(inputdata, p)
                        b = -a[1:]
                        lpca = np.asarray([b[::-1]]).T

                        test_Y = test_Y.T
                        test_G = sess.run(G, feed_dict={X: test_X})

                        invX = np.linalg.pinv(test_G)

                        myW = np.dot(invX, test_Y)

                        my_est = np.dot(test_G, myW)

                        my_est = my_est[0:-p]

                        plt.figure(1)
                        plt.subplot(221)
                        plt.plot(test_Y[0:-p], label='Original data')
                        plt.plot(test_Y[0:-p] - my_est,
                                 'r',
                                 label='my residue line')
                        plt.plot(test_Y[0:-p] - np.matmul(test_X[0:-p], lpca),
                                 'b--',
                                 label='LP residue line')
                        plt.legend()
                        print(
                            "LPC error is ",
                            np.mean(
                                np.square(test_Y[0:-p] -
                                          np.matmul(test_X[0:-p], lpca))))
                        print("my error is",
                              np.mean(np.square(test_Y[0:-p] - my_est)))

                        plt.subplot(222)
                        plt.plot(lpca, 'r--', label='LP coef')
                        plt.plot(myW, 'b', label='deep LP coef')
                        plt.legend()

                        Fs = 16000
                        myDLPcoef = np.append(1, -myW[::-1])

                        w0, Org_h0 = sig.freqz(1, myDLPcoef, Nfft, whole=True)
                        Org_F0 = Fs * w0 / (2 * np.pi)
                        Org_LP_coef = a
                        w, Org_h = sig.freqz(1, Org_LP_coef, Nfft, whole=True)
                        Org_F = Fs * w / (2 * np.pi)

                        Org_mag = abs(Org_XW)

                        Org_mag = 20 * np.log10(Org_mag)

                        f = np.asarray(range(Nfft)).astype(
                            np.float32) * Fs / Nfft

                        plt.subplot(212)
                        plt.plot(f, Org_mag, 'k-', label='signal')

                        plt.plot(Org_F,
                                 20 * np.log10(abs(Org_h)),
                                 'b--',
                                 label='lpc')
                        plt.plot(Org_F0,
                                 20 * np.log10(abs(Org_h0)),
                                 label='mylpc')

                        plt.xlim((0, Fs / 2))
                        plt.legend()

                        filtercoeff = np.append(0, -Org_LP_coef[1:])
                        est_x = sig.lfilter(filtercoeff, 1,
                                            xt)  # Estimated signal
                        e = xt - est_x

                        plt.show()
                        plt.close('all')

        except Exception as e:
            print(e)
            coord.request_stop()
        except IOError as e:

            coord.should_stop()
        else:
            pass

        finally:
            pass

        coord.request_stop()

        coord.join(threads)
示例#5
0
文件: model.py 项目: zhuzigege/segan
    def build_model_single_gpu(self, gpu_idx):
        if gpu_idx == 0:
            # create the nodes to load for input pipeline
            filename_queue = tf.train.string_input_producer([self.e2e_dataset])
            self.get_wav, self.get_noisy = read_and_decode(
                filename_queue, self.canvas_size, self.preemph)
        # load the data to input pipeline
        wavbatch, \
        noisybatch = tf.train.shuffle_batch([self.get_wav,
                                             self.get_noisy],
                                             batch_size=self.batch_size,
                                             num_threads=2,
                                             capacity=1000 + 3 * self.batch_size,
                                             min_after_dequeue=1000,
                                             name='wav_and_noisy')
        if gpu_idx == 0:
            self.Gs = []
            self.zs = []
            self.gtruth_wavs = []
            self.gtruth_noisy = []

        self.gtruth_wavs.append(wavbatch)
        self.gtruth_noisy.append(noisybatch)

        # add channels dimension to manipulate in D and G
        wavbatch = tf.expand_dims(wavbatch, -1)
        noisybatch = tf.expand_dims(noisybatch, -1)
        # by default leaky relu is used
        do_prelu = False
        if self.g_nl == 'prelu':
            do_prelu = True
        if gpu_idx == 0:
            #self.sample_wavs = tf.placeholder(tf.float32, [self.batch_size,
            #                                               self.canvas_size],
            #                                  name='sample_wavs')
            ref_Gs = self.generator(noisybatch,
                                    is_ref=True,
                                    spk=None,
                                    do_prelu=do_prelu)
            print('num of G returned: ', len(ref_Gs))
            self.reference_G = ref_Gs[0]
            self.ref_z = ref_Gs[1]
            if do_prelu:
                self.ref_alpha = ref_Gs[2:]
                self.alpha_summ = []
                for m, ref_alpha in enumerate(self.ref_alpha):
                    # add a summary per alpha
                    self.alpha_summ.append(
                        histogram_summary('alpha_{}'.format(m), ref_alpha))
            # make a dummy copy of discriminator to have variables and then
            # be able to set up the variable reuse for all other devices
            # merge along channels and this would be a real batch
            dummy_joint = tf.concat(2, [wavbatch, noisybatch])
            dummy = discriminator(self, dummy_joint, reuse=False)

        G, z = self.generator(noisybatch,
                              is_ref=False,
                              spk=None,
                              do_prelu=do_prelu)
        self.Gs.append(G)
        self.zs.append(z)

        # add new dimension to merge with other pairs
        D_rl_joint = tf.concat(2, [wavbatch, noisybatch])
        D_fk_joint = tf.concat(2, [G, noisybatch])
        # build rl discriminator
        d_rl_logits = discriminator(self, D_rl_joint, reuse=True)
        # build fk G discriminator
        d_fk_logits = discriminator(self, D_fk_joint, reuse=True)

        # make disc variables summaries
        self.d_rl_sum = histogram_summary("d_real", d_rl_logits)
        self.d_fk_sum = histogram_summary("d_fake", d_fk_logits)
        #self.d_nfk_sum = histogram_summary("d_noisyfake", d_nfk_logits)

        self.rl_audio_summ = audio_summary('real_audio', wavbatch)
        self.real_w_summ = histogram_summary('real_wav', wavbatch)
        self.noisy_audio_summ = audio_summary('noisy_audio', noisybatch)
        self.noisy_w_summ = histogram_summary('noisy_wav', noisybatch)
        self.gen_audio_summ = audio_summary('G_audio', G)
        self.gen_summ = histogram_summary('G_wav', G)

        if gpu_idx == 0:
            self.g_losses = []
            self.g_l1_losses = []
            self.g_adv_losses = []
            self.d_rl_losses = []
            self.d_fk_losses = []
            #self.d_nfk_losses = []
            self.d_losses = []

        d_rl_loss = tf.reduce_mean(tf.squared_difference(d_rl_logits, 1.))
        d_fk_loss = tf.reduce_mean(tf.squared_difference(d_fk_logits, 0.))
        #d_nfk_loss = tf.reduce_mean(tf.squared_difference(d_nfk_logits, 0.))
        g_adv_loss = tf.reduce_mean(tf.squared_difference(d_fk_logits, 1.))

        d_loss = d_rl_loss + d_fk_loss

        # Add the L1 loss to G
        g_l1_loss = self.l1_lambda * tf.reduce_mean(tf.abs(tf.sub(G,
                                                                  wavbatch)))

        g_loss = g_adv_loss + g_l1_loss

        self.g_l1_losses.append(g_l1_loss)
        self.g_adv_losses.append(g_adv_loss)
        self.g_losses.append(g_loss)
        self.d_rl_losses.append(d_rl_loss)
        self.d_fk_losses.append(d_fk_loss)
        #self.d_nfk_losses.append(d_nfk_loss)
        self.d_losses.append(d_loss)

        self.d_rl_loss_sum = scalar_summary("d_rl_loss", d_rl_loss)
        self.d_fk_loss_sum = scalar_summary("d_fk_loss", d_fk_loss)
        #self.d_nfk_loss_sum = scalar_summary("d_nfk_loss",
        #                                     d_nfk_loss)
        self.g_loss_sum = scalar_summary("g_loss", g_loss)
        self.g_loss_l1_sum = scalar_summary("g_l1_loss", g_l1_loss)
        self.g_loss_adv_sum = scalar_summary("g_adv_loss", g_adv_loss)
        self.d_loss_sum = scalar_summary("d_loss", d_loss)

        if gpu_idx == 0:
            self.get_vars()
示例#6
0
    def build_model_single_gpu(self, gpu_idx):
        if gpu_idx == 0:
            # create the nodes to load for input pipeline
            filename_queue = tf.train.string_input_producer([self.e2e_dataset])
            self.get_wav, self.get_noisy = read_and_decode(
                filename_queue, self.canvas_size, self.preemph)
        # load the data to input pipeline
        wavbatch, \
        noisybatch = tf.train.shuffle_batch([self.get_wav,
                                             self.get_noisy],
                                             batch_size=self.batch_size,
                                             num_threads=2,
                                             capacity=1000 + 3 * self.batch_size,
                                             min_after_dequeue=1000,
                                             name='wav_and_noisy')
        if gpu_idx == 0:
            self.Gs = []
            self.zs = []
            self.gtruth_wavs = []
            self.gtruth_noisy = []
            for nr in range(self.depth):
                self.Gs.append([])
                self.zs.append([])
        self.gtruth_wavs.append(wavbatch)
        self.gtruth_noisy.append(noisybatch)

        # add channels dimension to manipulate in D and G
        wavbatch = tf.expand_dims(wavbatch, -1)
        noisybatch = tf.expand_dims(noisybatch, -1)
        # by default leaky relu is used
        do_prelu = False
        if self.g_nl == 'prelu':
            do_prelu = True
        if gpu_idx == 0:
            ref_Gs = self.generator(noisybatch,
                                    is_ref=True,
                                    spk=None,
                                    do_prelu=do_prelu)
            print('num of G returned: ', len(ref_Gs))
            self.reference_G = ref_Gs[0]  # returned wave by the generator
            self.ref_z = ref_Gs[1]  # returned z by the generator
            if do_prelu:
                self.ref_alpha = ref_Gs[2]
                self.alpha_summ = []
                for nr in range(self.depth):
                    self.alpha_summ.append([])
                    for m, ref_alpha_nr in enumerate(self.ref_alpha[nr]):
                        # add a summary per alpha
                        self.alpha_summ[nr].append(
                            histogram_summary('alpha_{}_{}'.format(nr, m),
                                              ref_alpha_nr))
            # make a dummy copy of discriminator to have variables and then
            # be able to set up the variable reuse for all other devices
            # merge along channels and this would be a real batch
            dummy_joint = tf.concat([wavbatch, noisybatch], 2)
            dummy = discriminator(self, dummy_joint, reuse=False)

        Gs, zs = self.generator(noisybatch,
                                is_ref=False,
                                spk=None,
                                do_prelu=do_prelu)
        for nr in range(self.depth):
            self.Gs[nr].append(Gs[nr])
            self.zs[nr].append(zs[nr])

        # add new dimension to merge with other pairs
        D_rl_joint = tf.concat([wavbatch, noisybatch], 2)  # real
        D_fk_joint = []
        for nr in range(self.depth):
            D_fk_joint.append(tf.concat([Gs[nr], noisybatch], 2))
        # build rl discriminator
        d_rl_logits = discriminator(self, D_rl_joint, reuse=True)
        # build fk G discriminator
        d_fk_logits = []
        for nr in range(self.depth):
            d_fk_logits.append(discriminator(self, D_fk_joint[nr], reuse=True))

        if gpu_idx == 0:
            self.g_losses = []
            self.g_l1_losses = []
            for nr in range(self.depth):
                self.g_l1_losses.append([])
            self.g_adv_losses = []
            self.d_rl_losses = []
            self.d_fk_losses = []
            for nr in range(self.depth):
                self.d_fk_losses.append([])
            self.d_losses = []

        d_rl_loss = tf.reduce_mean(tf.squared_difference(d_rl_logits, 1.))
        d_fk_loss = []
        for nr in range(self.depth):
            d_fk_loss.append(
                tf.reduce_mean(tf.squared_difference(d_fk_logits[nr], 0.)))
        g_adv_loss = 0.
        for nr in range(self.depth):
            g_adv_loss += tf.reduce_mean(
                tf.squared_difference(d_fk_logits[nr], 1.))
        ## corrected division of self.depth here
        g_adv_loss /= self.depth

        d_loss = d_rl_loss
        for nr in range(self.depth):
            ## corrected division of self.depth here
            d_loss += d_fk_loss[nr] / self.depth

        # Add the L1 loss to G
        g_l1_loss = []
        for nr in range(self.depth):
            g_l1_loss.append(
                self.l1_lambda * self.weights[nr] *
                tf.reduce_mean(tf.abs(tf.subtract(Gs[nr], wavbatch))))

        g_loss = g_adv_loss
        for nr in range(self.depth):
            g_loss += g_l1_loss[nr]

        for nr in range(self.depth):
            self.g_l1_losses[nr].append(g_l1_loss[nr])
        self.g_adv_losses.append(g_adv_loss)
        self.g_losses.append(g_loss)
        self.d_rl_losses.append(d_rl_loss)
        for nr in range(self.depth):
            self.d_fk_losses[nr].append(d_fk_loss[nr])
        self.d_losses.append(d_loss)

        if gpu_idx == 0:
            self.get_vars()
示例#7
0
文件: model.py 项目: sy2358/SAGAN
    def build_model_single_gpu(self, gpu_idx):
        if gpu_idx == 0:
            # create the nodes to load for input pipeline
            filename_queue = tf.train.string_input_producer([self.e2e_dataset])
            self.get_nn,\
                self.get_ref = read_and_decode(filename_queue,
                                                                  self.canvas_size,
                                                                  self.preemph)

        # load the data to input pipeline
        nnbatch, refbatch\
                   = tf.train.shuffle_batch([self.get_nn,
                                             self.get_ref],
                                             batch_size=self.batch_size,
                                             num_threads=2,
                                             capacity=1000 + 3 * self.batch_size,
                                             min_after_dequeue=1000,
                                             name='nn_and_ref')
        if gpu_idx == 0:
            self.gtruth_nn = []
            self.gtruth_ref = []

        self.gtruth_nn.append(nnbatch)
        self.gtruth_ref.append(refbatch)

        # add channels dimension to manipulate in D and G
        nnbatch = tf.expand_dims(nnbatch, -1)
        refbatch = tf.expand_dims(refbatch, -1)

        # by default leaky relu is used
        do_prelu = False
        if self.g_nl == 'prelu':
            do_prelu = True
        if gpu_idx == 0:
            #self.sample_wavs = tf.placeholder(tf.float32, [self.batch_size,
            #                                               self.canvas_size],
            #                                  name='sample_wavs')
            # make a dummy copy of discriminator to have variables and then
            # be able to set up the variable reuse for all other devices
            # merge along channels and this would be a real batch
            dummy_joint = tf.concat([nnbatch, refbatch], 2)
            dummy = discriminator(self, dummy_joint, reuse=False)

        # add new dimension to merge with other pairs
        D_rl_joint = tf.concat([nnbatch, refbatch], 2)
        D_fk_joint = tf.concat([nnbatch, G], 2)

        # build rl discriminator
        d_rl_logits = discriminator(self, D_rl_joint, reuse=True)
        # build fk G discriminator
        d_fk_logits = discriminator(self, D_fk_joint, reuse=True)

        d_rl_logits = tf.Print(d_rl_logits, [
            tf.reduce_mean(d_rl_logits),
            tf.reduce_mean(tf.cast(tf.greater(d_rl_logits, 0.5), tf.float32)),
            tf.reduce_mean(d_fk_logits),
            tf.reduce_mean(tf.cast(tf.less(d_fk_logits, 0.5), tf.float32))
        ], 'D_rl/D_fk (avg,#ratio correct) = ')

        self.d_rl_logits = d_rl_logits
        self.d_fk_logits = d_fk_logits

        # make disc variables summaries
        self.d_rl_sum = histogram_summary("d_real", d_rl_logits)
        self.d_fk_sum = histogram_summary("d_fake", d_fk_logits)
        #self.d_nfk_sum = histogram_summary("d_noisyfake", d_nfk_logits)

        self.noisy_audio_summ = audio_summary('nn_audio', nnbatch)
        self.noisy_w_summ = histogram_summary('nn_wav', nnbatch)

        if gpu_idx == 0:
            self.d_rl_losses = []
            self.d_fk_losses = []
            #self.d_nfk_losses = []
            self.d_losses = []

        d_rl_loss = tf.reduce_mean(tf.squared_difference(d_rl_logits, 1.))
        d_fk_loss = tf.reduce_mean(tf.squared_difference(d_fk_logits, 0.))

        d_loss = d_rl_loss + d_fk_loss

        self.d_rl_losses.append(d_rl_loss)
        self.d_fk_losses.append(d_fk_loss)
        #self.d_nfk_losses.append(d_nfk_loss)
        self.d_losses.append(d_loss)

        self.d_rl_loss_sum = scalar_summary("d_rl_loss", d_rl_loss)
        self.d_fk_loss_sum = scalar_summary("d_fk_loss", d_fk_loss)
        self.d_loss_sum = scalar_summary("d_loss", d_loss)
        #self.d_nfk_loss_sum = scalar_summary("d_nfk_loss",
        #                                     d_nfk_loss)

        if gpu_idx == 0:
            self.get_vars()
示例#8
0
文件: model.py 项目: cc-cherie/segan
    def build_model_single_gpu(self, gpu_idx):
        if gpu_idx == 0:
            # create the nodes to load for input pipeline
            filename_queue = tf.train.string_input_producer([self.e2e_dataset])
            self.get_wav, self.get_noisy = read_and_decode(filename_queue,
                                                           self.canvas_size,
                                                           self.preemph)
        # load the data to input pipeline
        wavbatch, \
        noisybatch = tf.train.shuffle_batch([self.get_wav,
                                             self.get_noisy],
                                             batch_size=self.batch_size,
                                             num_threads=2,
                                             capacity=1000 + 3 * self.batch_size,
                                             min_after_dequeue=1000,
                                             name='wav_and_noisy')
        if gpu_idx == 0:
            self.Gs = []
            self.zs = []
            self.gtruth_wavs = []
            self.gtruth_noisy = []

        self.gtruth_wavs.append(wavbatch)
        self.gtruth_noisy.append(noisybatch)

        # add channels dimension to manipulate in D and G
        wavbatch = tf.expand_dims(wavbatch, -1)
        noisybatch = tf.expand_dims(noisybatch, -1)
        # by default leaky relu is used
        do_prelu = False
        if self.g_nl == 'prelu':
            do_prelu = True
        if gpu_idx == 0:
            #self.sample_wavs = tf.placeholder(tf.float32, [self.batch_size,
            #                                               self.canvas_size],
            #                                  name='sample_wavs')
            ref_Gs = self.generator(noisybatch, is_ref=True,
                                    spk=None,
                                    do_prelu=do_prelu)
            print('num of G returned: ', len(ref_Gs))
            self.reference_G = ref_Gs[0]
            self.ref_z = ref_Gs[1]
            if do_prelu:
                self.ref_alpha = ref_Gs[2:]
                self.alpha_summ = []
                for m, ref_alpha in enumerate(self.ref_alpha):
                    # add a summary per alpha
                    self.alpha_summ.append(histogram_summary('alpha_{}'.format(m),
                                                             ref_alpha))
            # make a dummy copy of discriminator to have variables and then
            # be able to set up the variable reuse for all other devices
            # merge along channels and this would be a real batch
            dummy_joint = tf.concat(2, [wavbatch, noisybatch])
            dummy = discriminator(self, dummy_joint,
                                  reuse=False)

        G, z  = self.generator(noisybatch, is_ref=False, spk=None,
                               do_prelu=do_prelu)
        self.Gs.append(G)
        self.zs.append(z)

        # add new dimension to merge with other pairs
        D_rl_joint = tf.concat(2, [wavbatch, noisybatch])
        D_fk_joint = tf.concat(2, [G, noisybatch])
        # build rl discriminator
        d_rl_logits = discriminator(self, D_rl_joint, reuse=True)
        # build fk G discriminator
        d_fk_logits = discriminator(self, D_fk_joint, reuse=True)

        # make disc variables summaries
        self.d_rl_sum = histogram_summary("d_real", d_rl_logits)
        self.d_fk_sum = histogram_summary("d_fake", d_fk_logits)
        #self.d_nfk_sum = histogram_summary("d_noisyfake", d_nfk_logits)

        self.rl_audio_summ = audio_summary('real_audio', wavbatch)
        self.real_w_summ = histogram_summary('real_wav', wavbatch)
        self.noisy_audio_summ = audio_summary('noisy_audio', noisybatch)
        self.noisy_w_summ = histogram_summary('noisy_wav', noisybatch)
        self.gen_audio_summ = audio_summary('G_audio', G)
        self.gen_summ = histogram_summary('G_wav', G)

        if gpu_idx == 0:
            self.g_losses = []
            self.g_l1_losses = []
            self.g_adv_losses = []
            self.d_rl_losses = []
            self.d_fk_losses = []
            #self.d_nfk_losses = []
            self.d_losses = []

        d_rl_loss = tf.reduce_mean(tf.squared_difference(d_rl_logits, 1.))
        d_fk_loss = tf.reduce_mean(tf.squared_difference(d_fk_logits, 0.))
        #d_nfk_loss = tf.reduce_mean(tf.squared_difference(d_nfk_logits, 0.))
        g_adv_loss = tf.reduce_mean(tf.squared_difference(d_fk_logits, 1.))

        d_loss = d_rl_loss + d_fk_loss

        # Add the L1 loss to G
        g_l1_loss = self.l1_lambda * tf.reduce_mean(tf.abs(tf.sub(G,
                                                                  wavbatch)))

        g_loss = g_adv_loss + g_l1_loss

        self.g_l1_losses.append(g_l1_loss)
        self.g_adv_losses.append(g_adv_loss)
        self.g_losses.append(g_loss)
        self.d_rl_losses.append(d_rl_loss)
        self.d_fk_losses.append(d_fk_loss)
        #self.d_nfk_losses.append(d_nfk_loss)
        self.d_losses.append(d_loss)

        self.d_rl_loss_sum = scalar_summary("d_rl_loss", d_rl_loss)
        self.d_fk_loss_sum = scalar_summary("d_fk_loss",
                                            d_fk_loss)
        #self.d_nfk_loss_sum = scalar_summary("d_nfk_loss",
        #                                     d_nfk_loss)
        self.g_loss_sum = scalar_summary("g_loss", g_loss)
        self.g_loss_l1_sum = scalar_summary("g_l1_loss", g_l1_loss)
        self.g_loss_adv_sum = scalar_summary("g_adv_loss", g_adv_loss)
        self.d_loss_sum = scalar_summary("d_loss", d_loss)

        if gpu_idx == 0:
            self.get_vars()
示例#9
0
    def build_model_single_gpu(self, gpu_idx):
        if gpu_idx == 0:
            # create the nodes to load for input pipeline
            filename_queue = tf.train.string_input_producer([self.e2e_dataset])
            self.get_seiz, self.get_nonseiz = read_and_decode(
                filename_queue, self.canvas_size)
        # load the data to input pipeline
        seiz_batch, nonseiz_batch = tf.train.shuffle_batch(
            [self.get_seiz, self.get_nonseiz],
            batch_size=self.batch_size,
            num_threads=2,
            capacity=1000 + 3 * self.batch_size,
            min_after_dequeue=1000,
            name='seiz_and_nonseiz')
        if gpu_idx == 0:
            self.Gs = []
            self.zs = []
            self.gtruth_seiz = []
            self.gtruth_nonseiz = []

        self.gtruth_seiz.append(seiz_batch)
        self.gtruth_nonseiz.append(nonseiz_batch)

        # add channels dimension to manipulate in D and G
        seiz_batch = tf.expand_dims(seiz_batch, -1)
        nonseiz_batch = tf.expand_dims(nonseiz_batch, -1)
        if gpu_idx == 0:
            ref_Gs = self.generator(nonseiz_batch, is_ref=True)
            print('num of G returned: ', len(ref_Gs))
            self.reference_G = ref_Gs[0]
            self.ref_z = ref_Gs[1]

            # make a dummy copy of discriminator to create the variables
            dummy_joint = tf.concat(2, [seiz_batch, nonseiz_batch])
            dummy = discriminator(self, dummy_joint, reuse=False)
        # build generator
        G, z = self.generator(nonseiz_batch, is_ref=False)
        self.Gs.append(G)
        self.zs.append(z)

        D_rl_joint = tf.concat(2, [seiz_batch, nonseiz_batch])
        D_fk_joint = tf.concat(2, [G, nonseiz_batch])
        # build discriminator
        d_rl_logits = discriminator(self, D_rl_joint, reuse=True)
        d_fk_logits = discriminator(self, D_fk_joint, reuse=True)

        if gpu_idx == 0:
            self.g_losses = []
            self.g_l1_losses = []
            self.g_adv_losses = []
            self.d_rl_losses = []
            self.d_fk_losses = []
            self.d_losses = []

        ### Discriminator loss ###
        d_rl_loss = tf.reduce_mean(tf.squared_difference(d_rl_logits, 1.))
        d_fk_loss = tf.reduce_mean(tf.squared_difference(d_fk_logits, 0.))
        g_adv_loss = tf.reduce_mean(tf.squared_difference(d_fk_logits, 1.))
        d_loss = d_rl_loss + d_fk_loss
        ### Generator loss ###
        g_l1_loss = self.l1_lambda * tf.reduce_mean(
            tf.abs(tf.sub(G, seiz_batch)))
        g_loss = g_adv_loss + g_l1_loss

        self.g_l1_losses.append(g_l1_loss)
        self.g_adv_losses.append(g_adv_loss)
        self.g_losses.append(g_loss)
        self.d_rl_losses.append(d_rl_loss)
        self.d_fk_losses.append(d_fk_loss)
        self.d_losses.append(d_loss)

        if gpu_idx == 0:
            self.get_vars()