Exemplo n.º 1
0
def initialize_networks(args, device):
    # network
    En_A = networks.encoder(in_nc=args.in_ngc,
                            nf=args.ngf,
                            img_size=args.img_size).to(device)
    En_B = networks.encoder(in_nc=args.in_ngc,
                            nf=args.ngf,
                            img_size=args.img_size).to(device)
    De_A = networks.decoder(out_nc=args.out_ngc, nf=args.ngf).to(device)
    De_B = networks.decoder(out_nc=args.out_ngc, nf=args.ngf).to(device)
    Disc_A = networks.discriminator(in_nc=args.in_ndc,
                                    out_nc=args.out_ndc,
                                    nf=args.ndf,
                                    img_size=args.img_size).to(device)
    Disc_B = networks.discriminator(in_nc=args.in_ndc,
                                    out_nc=args.out_ndc,
                                    nf=args.ndf,
                                    img_size=args.img_size).to(device)

    print('---------- Networks initialized -------------')
    utils.print_network(En_A)
    utils.print_network(En_B)
    utils.print_network(De_A)
    utils.print_network(De_B)
    utils.print_network(Disc_A)
    utils.print_network(Disc_B)
    print('-----------------------------------------------')

    all_networks = [En_A, En_B, De_A, De_B, Disc_A, Disc_B]
    return all_networks
def convert_decoder_to_pb(X):

    features_to_use = 'Relu_{}_1'.format(X)
    pb_file_path = 'inference/decoder_{}.pb'.format(X)

    graph = tf.Graph()
    config = tf.ConfigProto()
    config.gpu_options.visible_device_list = GPU_TO_USE

    with graph.as_default():

        features = tf.placeholder(dtype=tf.float32,
                                  name='features',
                                  shape=[None, None, None, NUM_FEATURES[X]])
        restored_images = tf.identity(decoder(features, features_to_use),
                                      'restored_images')

        saver = tf.train.Saver()
        with tf.Session(graph=graph, config=config) as sess:
            saver.restore(sess, CHECKPOINT[X])

            # output ops
            keep_nodes = ['restored_images']

            input_graph_def = tf.graph_util.convert_variables_to_constants(
                sess, graph.as_graph_def(), output_node_names=keep_nodes)
            output_graph_def = tf.graph_util.remove_training_nodes(
                input_graph_def, protected_nodes=keep_nodes)

            with tf.gfile.GFile(pb_file_path, 'wb') as f:
                f.write(output_graph_def.SerializeToString())
            print('%d ops in the final graph.' % len(output_graph_def.node))
Exemplo n.º 3
0
def train_autoencoder(X_dir, Y_dir, batch_size, dim, X_channels, Y_channels,
                      log_dir, shuffle, **kwargs):
    # Dataset
    pairs_filename = load_dataset(X_dir, Y_dir)
    partition = partition_dataset(pairs_filename)
    # Generators
    training_generator = DataGenerator(partition['train'], batch_size, dim,
                                       X_channels, Y_channels, shuffle)
    validation_generator = DataGenerator(partition['validation'], batch_size,
                                         dim, X_channels, Y_channels, shuffle)
    # Design model
    input_img = Input(shape=(*dim, X_channels))
    encoder_img = encoder(n_features=8)
    decoder_lbl = decoder(n_output_features=Y_channels, n_features=8)
    latent_img = encoder_img(input_img)
    latent_lbl = latent_img  # TODO Put res_net here for image to label translation
    restored_lbl = decoder_lbl(latent_lbl)
    img2lbl = Model(input_img, restored_lbl)
    img2lbl.compile(optimizer='adadelta', loss='mean_squared_error')
    # Print summary
    img2lbl.summary()
    print('Model contains a total of %d trainable layers.\n' %
          len(img2lbl.trainable_weights))
    # Train model
    tbi_callback = TensorBoardImage(log_dir=log_dir,
                                    validation_data=validation_generator)
    tb_callback = TensorBoard(log_dir=log_dir)
    img2lbl.fit_generator(generator=training_generator,
                          validation_data=validation_generator,
                          epochs=50,
                          callbacks=[tb_callback, tbi_callback],
                          use_multiprocessing=True,
                          workers=2)
Exemplo n.º 4
0
def generator(speaker_embedding,
              inputs,
              is_training=True,
              scope_name='generator',
              reuse=None):
    '''Generate features.

    Args:
      speaker_embedding: A `Tensor` with type `float32` contains speaker information. [N, E]
      inputs: A `Tensor` with type `float32` contains speech features.
      is_training: Boolean, whether to train or inference.
      scope_name: Optional scope for `variable_scope`.
      reuse: Boolean, whether to reuse the weights of a previous layer
        by the same name.

    Returns:
      A decoded `Tensor` with aim speaker.
      vae mu vector.
      vae log_var vector.
    '''
    with tf.variable_scope(scope_name, reuse=reuse):
        sample, mu, log_var = encoder(inputs,
                                      is_training=is_training,
                                      scope='vae_encoder')  # [N, T, E]
        #speaker_embedding = tf.expand_dims(speaker_embedding, axis=1) # [N, 1, E]
        speaker_embedding = tf.tile(speaker_embedding,
                                    [1, tf.shape(sample)[1], 1])  # [N, T, E]
        encoded = tf.concat((speaker_embedding, sample),
                            axis=-1)  # [N, T, E+G]
        outputs = decoder(encoded,
                          is_training=is_training,
                          scope='vae_decoder')
        return outputs, mu, log_var  # [N, T, C]
Exemplo n.º 5
0
    def sample_x_from_prior(self, noise):

        sample_x, _ = decoder(self.opts,
                              input=noise,
                              output_dim=self.output_dim,
                              scope='decoder',
                              reuse=True,
                              is_training=False)
        return sample_x
def generator(inputs, is_training=True, scope_name='generator', reuse=None):
    with tf.variable_scope(scope_name, reuse=reuse):
        sample, mu, log_var = encoder(inputs,
                                      is_training=is_training,
                                      scope='vae_encoder')  # [N, T, E]
        # speaker_embedding = tf.tile(speaker_embedding, [1, tf.shape(sample)[1], 1]) # [N, T, E]
        # tf.tile() 用来对张量(Tensor)进行扩展的,表示每一维度,拓展复制几次;
        # encoded = tf.concat((speaker_embedding, sample), axis=-1) # [N, T, E+G]
        outputs = decoder(sample, is_training=is_training, scope='vae_decoder')
        return outputs, mu, log_var  # [N, T, C]
Exemplo n.º 7
0
    def forward_pass(self, inputs, is_training, reuse=False):

        enc_z, enc_mean, enc_Sigma = encoder(self.opts,
                                             input=inputs,
                                             output_dim=2 * self.opts['zdim'],
                                             scope='encoder',
                                             reuse=reuse,
                                             is_training=is_training)

        dec_x, dec_mean = decoder(self.opts,
                                  input=enc_z,
                                  output_dim=self.output_dim,
                                  scope='decoder',
                                  reuse=reuse,
                                  is_training=is_training)
        return enc_z, enc_mean, enc_Sigma, dec_x, dec_mean
Exemplo n.º 8
0
    def forward_pass(self, inputs, is_training, reuse=False):
        """Performs a full pass over the model.

        inputs:                                 [batch,imgdim]
        return:
        enc_cat_logits:                         [batch,K]
        enc_z/enc_gauss_mean/enc_gauss_Sigma:   [batch,K,zdim]
        dec_mean, dec_Sigma:                    [batch,K,imgdim]

        """
        # Encode
        enc_cat_logits, enc_gauss_mean, enc_gauss_Sigma = encoder(
            self.opts,
            input=inputs,
            cat_output_dim=self.opts['nmixtures'],
            gaus_output_dim=2 * self.opts['zdim'],
            scope='encoder',
            reuse=reuse,
            is_training=is_training)
        enc_gauss_mean = tf.reshape(
            enc_gauss_mean, [-1, self.opts['nmixtures'], self.opts['zdim']])
        enc_gauss_Sigma = tf.reshape(
            enc_gauss_Sigma, [-1, self.opts['nmixtures'], self.opts['zdim']])
        enc_z = sample_all_gmm(self.opts, enc_gauss_mean,
                               enc_gauss_Sigma)  #[batch,nmixtures,zdim]
        enc_z_flat = tf.reshape(enc_z, [-1, self.opts['zdim']])
        # Decode
        dec_mean, dec_Sigma = decoder(self.opts,
                                      input=enc_z_flat,
                                      output_dim=self.output_dim,
                                      scope='decoder',
                                      reuse=reuse,
                                      is_training=is_training)
        outshape = [
            -1, self.opts['nmixtures'],
            np.prod(datashapes[self.opts['dataset']])
        ]
        dec_mean = tf.reshape(dec_mean, outshape)
        dec_Sigma = tf.reshape(dec_Sigma, outshape)

        return enc_cat_logits, enc_z, enc_gauss_mean, enc_gauss_Sigma, dec_mean, dec_Sigma
Exemplo n.º 9
0
    def __init__(self, options):
        super(ArtGAN, self).__init__()
        # build model
        self.encoder = encoder(options)
        self.decoder = decoder(options)
        self.discriminator = discriminator(options)
        self.discriminator_weight = {
            "pred_1": 1.,
            "pred_2": 1.,
            "pred_4": 1.,
            "pred_6": 1.,
            "pred_7": 1.
        }
        self.loss = nn.BCEWithLogitsLoss(reduction='mean')
        self.mse = nn.MSELoss(reduction='mean')
        self.abs = nn.L1Loss(reduction='mean')

        # Setup the optimizers
        dis_params = list(self.discriminator.parameters())
        gen_params = list(self.encoder.parameters()) + list(
            self.decoder.parameters())
        self.dis_opt = torch.optim.Adam(
            [p for p in dis_params if p.requires_grad],
            lr=options.lr,
            betas=(0.5, 0.999),
            weight_decay=0.0001,
            amsgrad=True)
        self.gen_opt = torch.optim.Adam(
            [p for p in gen_params if p.requires_grad],
            lr=options.lr,
            betas=(0.5, 0.999),
            weight_decay=0.0001,
            amsgrad=True)
        self.dis_scheduler = get_scheduler(self.dis_opt, options)
        self.gen_scheduler = get_scheduler(self.gen_opt, options)

        # Network weight initialization
        self.apply(weights_init(options.init))
        self.discriminator.apply(weights_init('gaussian'))
        self.gener_loss = torch.tensor(0.)
        self.discr_loss = torch.tensor(0.)
Exemplo n.º 10
0
    def fnet(self, mel, is_training=True, reuse=None):

        prenet_out = prenet(mel,
                            num_units=[hp.hidden_units, hp.hidden_units // 2],
                            dropout_rate=hp.dropout_rate,
                            is_training=is_training,
                            reuse=reuse)  # (N, T, E/2)
        # CBHG1: mel-scale
        out, _ = cbhg(prenet_out,
                      hp.num_banks,
                      hp.hidden_units // 2,
                      hp.num_highway_blocks,
                      hp.norm_type,
                      is_training,
                      scope="fnet_cbhg",
                      reuse=reuse)

        out, _, _, _, _, _ = networks.decoder(self.x_mel,
                                              out,
                                              training=is_training)
        mid = out
        # Final linear projection
        logits = tf.layers.dense(out,
                                 hp.len_chinese_ppgs,
                                 trainable=is_training,
                                 reuse=reuse)  # (N, T, V)
        ppgs = tf.nn.softmax(logits / hp.t, name='ppgs')  # (N, T, V)
        preds = tf.to_int32(tf.argmax(logits, axis=-1))  # (N, T)

        decoded = tf.transpose(logits, perm=[1, 0, 2])
        sequence_len = tf.reduce_sum(tf.cast(
            tf.not_equal(tf.reduce_sum(mel, reduction_indices=2), 0.),
            tf.int32),
                                     reduction_indices=1)
        decoded, _ = tf.nn.ctc_beam_search_decoder(decoded,
                                                   sequence_len,
                                                   merge_repeated=False)
        decoded = tf.sparse_to_dense(decoded[0].indices,
                                     decoded[0].dense_shape, decoded[0].values)

        return mid, logits, ppgs, preds, decoded
Exemplo n.º 11
0
    def sample_x_from_prior(self, noise):
        """
        Sample is taken to be the mean parameters of the decoder.
        In the case of WAE, this correspond to determinitic decoder,
        for VAE, discrepency between decoder and samples as
        we consider the mean param as the samples from the model

        noise:      [batch,K,zdim]
        return:
        sample_x:   [batch,K,imgdim]
        """
        sample_x, _, = decoder(self.opts,
                               input=noise,
                               output_dim=self.output_dim,
                               scope='decoder',
                               reuse=True,
                               is_training=False)
        output_shape = [
            -1,
        ] + datashapes[self.opts['dataset']]

        return tf.reshape(sample_x, output_shape)
Exemplo n.º 12
0
    def __init__(self, config=None, training=True):
        # Load vocabulary
        self.char2idx, self.idx2char = load_vocab()
        self.graph = tf.Graph()
        with self.graph.as_default():
            # Data Feeding
            ## x: Text. (N, T_x), int32
            ## y1: Reduced melspectrogram. (N, T_y//r, n_mels*r) float32
            ## y2: Reduced dones. (N, T_y//r,) int32
            ## z: Magnitude. (N, T_y, n_fft//2+1) float32
            if training:
                self.origx, self.x, self.y1, self.y2, self.y3, self.num_batch = get_batch(
                    config)
                #self.origx, self.x, self.y1, self.y3, self.num_batch = get_batch(config)
                self.prev_max_attentions_li = tf.ones(shape=(hp.dec_layers,
                                                             hp.batch_size),
                                                      dtype=tf.int32)

            else:  # Evaluation
                self.x = tf.placeholder(tf.int32, shape=(1, hp.T_x))
                self.y1 = tf.placeholder(tf.float32,
                                         shape=(1, hp.T_y // hp.r,
                                                hp.n_mels * hp.r))
                self.prev_max_attentions_li = tf.placeholder(tf.int32,
                                                             shape=(
                                                                 hp.dec_layers,
                                                                 1,
                                                             ))

# Get decoder inputs: feed last frames only (N, Ty//r, n_mels)
            self.decoder_input = tf.concat((tf.zeros_like(
                self.y1[:, :1, -hp.n_mels:]), self.y1[:, :-1, -hp.n_mels:]), 1)

            # Networks
            with tf.variable_scope("encoder"):
                self.keys, self.vals = encoder(self.x,
                                               training=training)  # (N, Tx, e)

            with tf.variable_scope("decoder"):
                #self.mel_logits, self.decoder_output, self.alignments_li, self.max_attentions_li \
                self.mel_logits, self.done_output, self.decoder_output, self.alignments_li, self.max_attentions_li \
                    = decoder(self.decoder_input,
                             self.keys,
                             self.vals,
                             self.prev_max_attentions_li,
                             training=training)
                self.mel_output = tf.nn.sigmoid(self.mel_logits)

            with tf.variable_scope("converter"):
                # Restore shape
                self.converter_input = tf.reshape(
                    self.decoder_output, (-1, hp.T_y, hp.embed_size // hp.r))
                self.converter_input = fc_block(
                    self.converter_input,
                    hp.converter_channels,
                    activation_fn=tf.nn.relu,
                    training=training)  # (N, Ty, v)

                # Converter
                #self.mag_logits = converter(self.converter_input, training=training)
                # self.converter_input = tf.reshape(self.mel_output, (-1, hp.T_y, hp.n_mels))
                self.mag_logits = converter(self.converter_input,
                                            training=training)
                self.mag_output = tf.nn.sigmoid(self.mag_logits)

            self.global_step = tf.Variable(0,
                                           name='global_step',
                                           trainable=False)

            if training:
                # Loss
                self.loss1 = tf.reduce_mean(tf.abs(self.mel_output - self.y1))
                self.loss2 = tf.reduce_mean(
                    tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=self.done_output, labels=self.y2))
                self.loss3 = tf.reduce_mean(tf.abs(self.mag_output - self.y3))
                self.loss = self.loss1 + self.loss2 + self.loss3
                #self.loss = self.loss1 + self.loss3

                # Training Scheme
                self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr)
                ## gradient clipping
                self.gvs = self.optimizer.compute_gradients(self.loss)
                self.clipped = []
                for grad, var in self.gvs:
                    grad = grad if grad is None else tf.clip_by_value(
                        grad, -1. * hp.max_grad_val, hp.max_grad_val)
                    grad = grad if grad is None else tf.clip_by_norm(
                        grad, hp.max_grad_norm)
                    self.clipped.append((grad, var))

                self.train_op = self.optimizer.apply_gradients(
                    self.clipped, global_step=self.global_step)

                # Summary
                tf.summary.histogram('mel_output', self.mel_output)
                tf.summary.histogram('mel_actual', self.y1)
                tf.summary.histogram('done_output', self.done_output)
                tf.summary.histogram('done_actual', self.y2)
                tf.summary.histogram('mag_output', self.mag_output)
                tf.summary.histogram('mag_actual', self.y3)

                tf.summary.scalar('loss', self.loss)
                tf.summary.scalar('loss1', self.loss1)
                tf.summary.scalar('loss2', self.loss2)
                tf.summary.scalar('loss3', self.loss3)

                self.merged = tf.summary.merge_all()
Exemplo n.º 13
0
def build_enc_dec_connection(observation, constants):
    mean, logstd = encoder(observation, constants)
    eps = tf.random_normal(tf.shape(mean))
    non_sampled_z = mean + tf.exp(logstd) * eps
    dec_out = decoder(non_sampled_z)
    return (mean, logstd), dec_out
Exemplo n.º 14
0
    def __init__(self, mode="train"):
        '''
        Args:
          mode: Either "train" or "eval".
        '''
        # Set flag
        training = True if mode=="train" else False

        # Graph
        # Data Feeding
        ## x: Quantized wav. (B, T, 1) int32
        ## wavs: Raw wav. (B, length) float32
        ## speakers: Speaker ids. (B,). [0, 108]. int32.
        if mode=="train":
            self.x, self.wavs, self.speaker_ids, self.num_batch = get_batch()
            self.y = self.x
        else:  # test
            self.x = tf.placeholder(tf.int32, shape=(2, 63488, 1))
            self.y = tf.placeholder(tf.int32, shape=(2, 63488, 1))
            self.speaker_ids = tf.placeholder(tf.int32, shape=(2,))

        # inputs:
        self.encoder_inputs = tf.to_float(self.x)
        self.decoder_inputs = tf.to_float(self.y)
        self.decoder_inputs = tf.concat((tf.zeros_like(self.decoder_inputs[:, :1, :]), self.decoder_inputs[:, :-1, :]), 1)

        # speaker embedding
        self.speakers = tf.one_hot(self.speaker_ids, len(hp.speakers)) # (B, len(speakers))

        # encoder
        self.z_e = encoder(self.encoder_inputs) # (B, T', D)

        # vq
        self.z_q = vq(self.z_e) # (B, T', D)

        # decoder: y -> reconstructed logits.
        self.y_logits = decoder(self.decoder_inputs, self.speakers, self.z_q) # (B, T, Q)
        self.y_hat = tf.argmax(self.y_logits, -1) # (B, T)

        # monitor
        self.sample0 = tf.py_func(mu_law_decode, [self.y_hat[0]], tf.float32)
        self.sample1 = tf.py_func(mu_law_decode, [self.y_hat[1]], tf.float32)

        # speech samples
        # tf.summary.audio('{}/original1'.format(mode), self.wavs[:1], hp.sr, 1)
        # tf.summary.audio('{}/original2'.format(mode), self.wavs[1:], hp.sr, 1)
        tf.summary.audio('{}/sample0'.format(mode), tf.expand_dims(self.sample0, 0), hp.sr, 1)
        tf.summary.audio('{}/sample1'.format(mode), tf.expand_dims(self.sample1, 0), hp.sr, 1)

        self.global_step = tf.Variable(0, name='global_step', trainable=False)
        if training:
            self.dec_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.y_logits, labels=tf.squeeze(self.y)))
            self.vq_loss = tf.reduce_mean(tf.squared_difference(tf.stop_gradient(self.z_e), self.z_q))
            self.enc_loss = hp.beta * tf.reduce_mean(tf.squared_difference(self.z_e, tf.stop_gradient(self.z_q)))

            # decoder grads
            decoder_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "decoder")
            decoder_grads = tf.gradients(self.dec_loss, decoder_vars)
            decoder_grads_vars = list(zip(decoder_grads, decoder_vars))

            # embedding variables grads
            embed_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "vq")
            embed_grads = tf.gradients(self.dec_loss + self.vq_loss, embed_vars)
            embed_grads_vars = list(zip(embed_grads, embed_vars))

            # encoder grads
            encoder_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "encoder")
            transferred_grads = tf.gradients(self.dec_loss, self.z_q)
            encoder_grads = [tf.gradients(self.z_e, var, transferred_grads)[0] + tf.gradients(self.enc_loss, var)[0]
                                 for var in encoder_vars]
            encoder_grads_vars = list(zip(encoder_grads, encoder_vars))

            # total grads
            self.grads_vars = decoder_grads_vars + embed_grads_vars + encoder_grads_vars

            # Training Scheme
            self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr)

            # Summary
            tf.summary.scalar('train/dec_loss', self.dec_loss)
            tf.summary.scalar('train/vq_loss', self.vq_loss)
            tf.summary.scalar('train/enc_loss', self.enc_loss)

            # tf.summary.scalar("lr", self.lr)

            # gradient clipping
            self.clipped = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in self.grads_vars]

            with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
                self.train_op = self.optimizer.apply_gradients(self.clipped, global_step=self.global_step)

        # Summary
        self.merged = tf.summary.merge_all()
Exemplo n.º 15
0
    def __init__(self, mode="train"):
        """
        Initialize the class based off of the given mode
        :param mode: the mode to load the model based on
        """
        print("Loading your model...")

        # Initialize values used in class
        self.mode = mode
        self.global_step = None
        self.mel_loss = None
        self.mel_loss = None
        self.mag_loss = None
        self.learning_rate = None
        self.optimizer = None
        self.merged = None
        self.gradients = None
        self.clipped = None
        self.gvs = None
        self.opt_train = None

        # If is_training
        if mode == "train":
            self.is_training = True
        else:
            self.is_training = False

        print("Loading inputs...")
        # Load inputs
        if self.is_training:
            self.txt, self.mels, self.mags, self.file_names, self.num_batch = get_batch(
            )
        elif mode == "synthesize":
            self.txt = tf.placeholder(tf.int32, shape=(None, None))
            self.mels = tf.placeholder(tf.float32,
                                       shape=(None, None,
                                              N_MELS * REDUCTION_FACTOR))
        else:  # eval
            self.txt = tf.placeholder(tf.int32, shape=(None, None))
            self.mels = tf.placeholder(tf.float32,
                                       shape=(None, None,
                                              N_MELS * REDUCTION_FACTOR))
            self.mags = tf.placeholder(tf.float32,
                                       shape=(None, None, 1 + N_FFT // 2))
            self.file_names = tf.placeholder(tf.string, shape=(None, ))

        # decoder inputs
        self.decoder_inputs = tf.concat(
            (tf.zeros_like(self.mels[:, :1, :]), self.mels[:, :-1, :]), 1)
        self.decoder_inputs = self.decoder_inputs[:, :, -N_MELS:]

        # Networks
        with tf.variable_scope("Networks"):
            print("Loading the encoder...")
            # encoder
            self.memory = encoder(self.txt, is_training=self.is_training)

            print("Loading the decoder...")
            # decoder
            self.mel_hat, self.alignments = decoder(
                self.decoder_inputs, self.memory, is_training=self.is_training)

            print("Loading the post CBHG module...")
            # CBHG Module
            self.mags_hat = cbhg_helper(self.mel_hat,
                                        N_MELS,
                                        is_training=self.is_training,
                                        post=True)

        print("Audio out")
        # audio
        self.audio_out = tf.py_func(spectrogram2wav, [self.mags_hat[0]],
                                    tf.float32)

        # Training and evaluation
        if mode in ("train", "eval"):
            print("Generating Loss...")
            # Loss
            self.loss = self.get_loss()

            print("Getting the optimizer ready...")
            # Training Scheme
            self.optimize()

            print("Setting up your summary...")
            self.summarize()
Exemplo n.º 16
0
    def __init__(self, training=True):
        # Load vocabulary
        self.char2idx, self.idx2char = load_vocab()
        self.graph = tf.Graph()
        with self.graph.as_default():
            # Data Feeding
            ## x: Text. (N, T_x), int32
            ## y1: Reduced melspectrogram. (N, T_y//r, n_mels*r) float32
            ## y2: Reduced dones. (N, T_y//r,) int32
            ## z: Magnitude. (N, T_y, n_fft//2+1) float32
            if training:
                self.x, self.y1, self.y2, self.z, self.num_batch = get_batch()
                self.prev_max_attentions = tf.constant([0] * hp.batch_size)
            else:  # Evaluation
                self.x = tf.placeholder(tf.int32,
                                        shape=(hp.batch_size, hp.T_x))
                self.y1 = tf.placeholder(tf.float32,
                                         shape=(hp.batch_size, hp.T_y // hp.r,
                                                hp.n_mels * hp.r))
                self.prev_max_attentions = tf.placeholder(
                    tf.int32, shape=(hp.batch_size, ))

            # Get decoder inputs: feed last frames only (N, T_y//r, n_mels)
            self.decoder_input = tf.concat((tf.zeros_like(
                self.y1[:, :1, -hp.n_mels:]), self.y1[:, :-1, -hp.n_mels:]), 1)

            # Networks
            with tf.variable_scope("net"):
                # Encoder. keys: (N, T_x, e), vals: (N, T_x, e)
                self.keys, self.vals, self.masks = encoder(self.x,
                                                           training=training,
                                                           scope="encoder")

                # Decoder. mel_output: (N, T_y/r, n_mels*r), done_output: (N, T_y/r, 2),
                # decoder_output: (N, T_y/r, e), alignments: (N, T_y, T_x)
                self.mel_output, self.done_output, self.decoder_output, self.alignments, self.max_attentions = decoder(
                    self.decoder_input,
                    self.keys,
                    self.vals,
                    self.masks,
                    self.prev_max_attentions,
                    training=training,
                    scope="decoder",
                    reuse=None)
                # Restore shape. converter_input: (N, T_y, e/r)
                self.converter_input = tf.reshape(self.decoder_output,
                                                  (hp.batch_size, hp.T_y, -1))
                self.converter_input = normalize(self.converter_input,
                                                 type=hp.norm_type,
                                                 training=training,
                                                 activation_fn=tf.nn.relu)

                # Converter. mag_output: (N, T_y, 1+n_fft//2)
                self.mag_output = converter(self.converter_input,
                                            training=training,
                                            scope="converter")
            if training:
                # Loss
                self.loss1_mae = tf.reduce_mean(
                    tf.abs(self.mel_output - self.y1))
                self.loss1_ce = tf.reduce_mean(
                    tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=self.done_output, labels=self.y2))
                self.loss2 = tf.reduce_mean(tf.abs(self.mag_output - self.z))
                self.loss = self.loss1_mae + self.loss1_ce + self.loss2

                # Training Scheme
                self.global_step = tf.Variable(0,
                                               name='global_step',
                                               trainable=False)
                self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr)
                ## gradient clipping
                self.gvs = self.optimizer.compute_gradients(self.loss)
                self.clipped = []
                for grad, var in self.gvs:
                    grad = tf.clip_by_value(grad, -1. * hp.max_grad_val,
                                            hp.max_grad_val)
                    grad = tf.clip_by_norm(grad, hp.max_grad_norm)
                    self.clipped.append((grad, var))
                self.train_op = self.optimizer.apply_gradients(
                    self.clipped, global_step=self.global_step)

                # Summary
                tf.summary.scalar('loss', self.loss)
                tf.summary.scalar('loss1_mae', self.loss1_mae)
                tf.summary.scalar('loss1_ce', self.loss1_ce)
                tf.summary.scalar('loss2', self.loss2)

                self.merged = tf.summary.merge_all()
Exemplo n.º 17
0
])
train_loader_A = utils.data_load(os.path.join('data', args.dataset), 'trainA', transform, args.batch_size, shuffle=True, drop_last=True)
train_loader_B = utils.data_load(os.path.join('data', args.dataset), 'trainB', transform, args.batch_size, shuffle=True, drop_last=True)
test_loader_A = utils.data_load(os.path.join('data', args.dataset), 'testA', transform, 1, shuffle=True, drop_last=True)
test_loader_B = utils.data_load(os.path.join('data', args.dataset), 'testB', transform, 1, shuffle=True, drop_last=True)

print('------------ Datasets -------------')
print('TrainA:', len(train_loader_A))
print('TrainB:', len(train_loader_B))
print('TestA:', len(test_loader_A))
print('TestB:', len(test_loader_B))
print('-------------- End ----------------')
# network
En_A = networks.encoder(in_nc=args.in_ngc, nf=args.ngf, img_size=args.img_size).to(device)
En_B = networks.encoder(in_nc=args.in_ngc, nf=args.ngf, img_size=args.img_size).to(device)
De_A = networks.decoder(out_nc=args.out_ngc, nf=args.ngf).to(device)
De_B = networks.decoder(out_nc=args.out_ngc, nf=args.ngf).to(device)
Disc_A = networks.discriminator(in_nc=args.in_ndc, out_nc=args.out_ndc, nf=args.ndf, img_size=args.img_size).to(device)
Disc_B = networks.discriminator(in_nc=args.in_ndc, out_nc=args.out_ndc, nf=args.ndf, img_size=args.img_size).to(device)
En_A.train()
En_B.train()
De_A.train()
De_B.train()
Disc_A.train()
Disc_B.train()
print('---------- Networks initialized -------------')
utils.print_network(En_A)
utils.print_network(En_B)
utils.print_network(De_A)
utils.print_network(De_B)
utils.print_network(Disc_A)
Exemplo n.º 18
0
    def __init__(self, training=True):
        # Load vocabulary
        self.char2idx, self.idx2char = load_vocab()

        # Graph
        self.graph = tf.Graph()
        with self.graph.as_default():
            # Data Feeding
            ## x: Text. (N, Tx), int32
            ## y1: Reduced melspectrogram. (N, Ty//r, n_mels*r) float32
            ## y2: Reduced dones. (N, Ty//r,) int32
            ## z: Magnitude. (N, Ty, n_fft//2+1) float32
            if training:
                self.x, self.y1, self.y2, self.z, self.num_batch = get_batch()
                self.prev_max_attentions_li = tf.ones(shape=(hp.dec_layers, hp.batch_size), dtype=tf.int32)
            else: # Inference
                self.x = tf.placeholder(tf.int32, shape=(hp.batch_size, hp.Tx))
                self.y1 = tf.placeholder(tf.float32, shape=(hp.batch_size, hp.Ty//hp.r, hp.n_mels*hp.r))
                self.prev_max_attentions_li = tf.placeholder(tf.int32, shape=(hp.dec_layers, hp.batch_size,))

            # Get decoder inputs: feed last frames only (N, Ty//r, n_mels)
            self.decoder_input = tf.concat((tf.zeros_like(self.y1[:, :1, -hp.n_mels:]), self.y1[:, :-1, -hp.n_mels:]), 1)

            # Networks
            with tf.variable_scope("encoder"):
                self.keys, self.vals = encoder(self.x, training=training) # (N, Tx, e)

            with tf.variable_scope("decoder"):
                # mel_logits: (N, Ty/r, n_mels*r)
                # done_output: (N, Ty/r, 2),
                # decoder_output: (N, Ty/r, e)
                # alignments_li: dec_layers*(Tx, Ty/r)
                # max_attentions_li: dec_layers*(N, T_y/r)
                self.mel_logits, self.done_output, self.decoder_output, self.alignments_li, self.max_attentions_li \
                    = decoder(self.decoder_input,
                             self.keys,
                             self.vals,
                             self.prev_max_attentions_li,
                             training=training)
                self.mel_output = tf.nn.sigmoid(self.mel_logits)

            with tf.variable_scope("converter"):
                # Restore shape
                self.converter_input = tf.reshape(self.decoder_output, (-1, hp.Ty, hp.embed_size//hp.r))
                self.converter_input = fc_block(self.converter_input,
                                                hp.converter_channels,
                                                activation_fn=tf.nn.relu,
                                                training=training) # (N, Ty, v)

                # Converter
                self.mag_logits = converter(self.converter_input, training=training) # (N, Ty, 1+n_fft//2)
                self.mag_output = tf.nn.sigmoid(self.mag_logits)

            self.global_step = tf.Variable(0, name='global_step', trainable=False)
            if training:
                # Loss
                self.loss_mels = tf.reduce_mean(tf.abs(self.mel_output - self.y1))
                self.loss_dones = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.done_output, labels=self.y2))
                self.loss_mags = tf.reduce_mean(tf.abs(self.mag_output - self.z))
                self.loss = self.loss_mels + self.loss_dones + self.loss_mags

                # Training Scheme
                self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr)
                ## gradient clipping
                self.gvs = self.optimizer.compute_gradients(self.loss)
                self.clipped = []
                for grad, var in self.gvs:
                    grad = tf.clip_by_value(grad, -1. * hp.max_grad_val, hp.max_grad_val)
                    grad = tf.clip_by_norm(grad, hp.max_grad_norm)
                    self.clipped.append((grad, var))
                self.train_op = self.optimizer.apply_gradients(self.clipped, global_step=self.global_step)
                   
                # Summary
                tf.summary.scalar('Train_Loss/LOSS', self.loss)
                tf.summary.scalar('Train_Loss/mels', self.loss_mels)
                tf.summary.scalar('Train_Loss/dones', self.loss_dones)
                tf.summary.scalar('Train_Loss/mags', self.loss_mags)

                self.merged = tf.summary.merge_all()
Exemplo n.º 19
0
    def __init__(self, training=True):
        # Load vocabulary
        self.char2idx, self.idx2char = load_vocab()

        # Graph
        self.graph = tf.Graph()
        with self.graph.as_default():
            # Data Feeding
            ## x: Text. (N, Tx), int32
            ## y1: Melspectrogram. (N, Ty, n_mels) float32
            ## y2: Dones. (N, Ty) int32
            ## z: Magnitude. (N, Ty, n_fft//2+1) float32
            if training:
                self.x, self.y1, self.y2, self.z = get_batch()
                self.prev_max_attentions_li = tf.ones(shape=(hp.dec_layers,
                                                             hp.batch_size),
                                                      dtype=tf.int32)
            else:  # Inference
                self.x = tf.placeholder(tf.int32, shape=(hp.batch_size, hp.Tx))
                self.y1 = tf.placeholder(tf.float32,
                                         shape=(hp.batch_size, hp.Ty // hp.r,
                                                hp.n_mels * hp.r))
                self.prev_max_attentions_li = tf.placeholder(tf.int32,
                                                             shape=(
                                                                 hp.dec_layers,
                                                                 hp.batch_size,
                                                             ))

            # Get decoder inputs: feed last frames only (N, Ty, n_mels)
            self.decoder_input = tf.concat((tf.zeros_like(
                self.y1[:, :1, -hp.n_mels:]), self.y1[:, :-1, -hp.n_mels:]), 1)

            # Networks
            with tf.variable_scope("encoder"):
                self.keys, self.vals = encoder(self.x,
                                               training=training)  # (N, Tx, e)

            with tf.variable_scope("decoder"):
                # mel_logits: (N, Ty, n_mels)
                # done_output: (N, Ty, 2),
                # decoder_output: (N, Ty, e)
                # alignments_li: dec_layers*(Tx, Ty)
                # max_attentions_li: dec_layers*(N, T_y)
                self.mel_logits, self.done_output, self.decoder_output, self.alignments_li, self.max_attentions_li = decoder(
                    self.decoder_input,
                    self.keys,
                    self.vals,
                    self.prev_max_attentions_li,
                    training=training)
                self.mel_output = tf.nn.sigmoid(self.mel_logits)

            with tf.variable_scope("converter"):
                # Restore shape
                self.converter_input = tf.reshape(self.decoder_output,
                                                  (-1, hp.Ty, hp.embed_size))
                self.converter_input = fc_block(
                    self.converter_input,
                    hp.converter_channels,
                    activation_fn=tf.nn.relu,
                    training=training)  # (N, Ty, v)

                # Converter
                self.mag_logits = converter(
                    self.converter_input,
                    training=training)  # (N, Ty, 1+n_fft//2)
                self.mag_output = tf.nn.sigmoid(self.mag_logits)

            self.global_step = tf.Variable(0,
                                           name='global_step',
                                           trainable=False)
            if training:
                # Loss
                self.loss_mels = tf.reduce_mean(
                    tf.abs(self.mel_output - self.y1))
                self.loss_dones = tf.reduce_mean(
                    tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=self.done_output, labels=self.y2))
                self.loss_mags = tf.reduce_mean(
                    tf.abs(self.mag_output - self.z))
                self.loss = self.loss_mels + self.loss_dones + self.loss_mags

                # Training Scheme
                self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr)
                ## gradient clipping
                self.gvs = self.optimizer.compute_gradients(self.loss)
                self.clipped = []
                for grad, var in self.gvs:
                    grad = tf.clip_by_value(grad, -1. * hp.max_grad_val,
                                            hp.max_grad_val)
                    grad = tf.clip_by_norm(grad, hp.max_grad_norm)
                    self.clipped.append((grad, var))
                self.train_op = self.optimizer.apply_gradients(
                    self.clipped, global_step=self.global_step)

                # Summary
                tf.summary.scalar('Train_Loss/LOSS', self.loss)
                tf.summary.scalar('Train_Loss/mels', self.loss_mels)
                tf.summary.scalar('Train_Loss/dones', self.loss_dones)
                tf.summary.scalar('Train_Loss/mags', self.loss_mags)

                self.merged = tf.summary.merge_all()
Exemplo n.º 20
0
    def create_networks(self):

        # Placeholders
        self.state = tf.placeholder(tf.float32, [None, self.l_state], 'state')
        self.obs = tf.placeholder(tf.float32, [None, self.l_obs], 'obs')
        self.z = tf.placeholder(tf.float32, [None, self.l_z], 'z')

        # Decoder p(z|tau)
        if self.obs_truncate_length:
            self.traj = tf.placeholder(dtype=tf.float32,
                                       shape=[
                                           None, self.traj_length_downsampled,
                                           self.obs_truncate_length
                                       ])
        else:
            self.traj = tf.placeholder(
                dtype=tf.float32,
                shape=[None, self.traj_length_downsampled, self.l_obs])
        with tf.variable_scope("Decoder"):
            self.decoder_out, self.decoder_probs = networks.decoder(
                self.traj, self.traj_length_downsampled,
                self.nn['n_h_decoder'], self.l_z)

        # Low-level policy
        if self.low_level_alg == 'reinforce' or self.low_level_alg == 'iac':
            self.epsilon = tf.placeholder(tf.float32, None, 'epsilon')
            with tf.variable_scope("Policy_main"):
                probs = networks.actor(self.obs, self.z, self.nn['n_h1_low'],
                                       self.nn['n_h2_low'], self.l_action)
            self.probs = (1 - self.epsilon) * probs + self.epsilon / float(
                self.l_action)
            self.action_samples = tf.multinomial(tf.log(self.probs), 1)

        if self.low_level_alg == 'iac':
            with tf.variable_scope("V_main"):
                self.V = networks.critic(self.obs, self.z, self.nn['n_h1_low'],
                                         self.nn['n_h2_low'])
            with tf.variable_scope("V_target"):
                self.V_target = networks.critic(self.obs, self.z,
                                                self.nn['n_h1_low'],
                                                self.nn['n_h2_low'])

        # Low-level Q-functions
        if self.low_level_alg == 'iql':
            with tf.variable_scope("Qlow_main"):
                self.Q_low = networks.Q_low(self.obs, self.z,
                                            self.nn['n_h1_low'],
                                            self.nn['n_h2_low'], self.l_action)
            with tf.variable_scope("Qlow_target"):
                self.Q_low_target = networks.Q_low(self.obs, self.z,
                                                   self.nn['n_h1_low'],
                                                   self.nn['n_h2_low'],
                                                   self.l_action)
            self.argmax_Q_low = tf.argmax(self.Q_low, axis=1)
            self.actions_low_1hot = tf.placeholder(tf.float32,
                                                   [None, self.l_action],
                                                   'actions_low_1hot')

        # High-level QMIX
        # Individual agent networks
        # output dimension is [time * n_agents, q-values]
        with tf.variable_scope("Agent_main"):
            self.agent_qs = networks.Qmix_single(self.obs, self.nn['n_h1'],
                                                 self.nn['n_h2'], self.l_z)
        with tf.variable_scope("Agent_target"):
            self.agent_qs_target = networks.Qmix_single(
                self.obs, self.nn['n_h1'], self.nn['n_h2'], self.l_z)

        self.argmax_Q = tf.argmax(self.agent_qs, axis=1)
        self.argmax_Q_target = tf.argmax(self.agent_qs_target, axis=1)

        # To extract Q-value from agent_qs and agent_qs_target
        # [batch*n_agents, N_roles]
        self.actions_1hot = tf.placeholder(tf.float32, [None, self.l_z],
                                           'actions_1hot')
        # [batch*n_agents, 1]
        self.q_selected = tf.reduce_sum(tf.multiply(self.agent_qs,
                                                    self.actions_1hot),
                                        axis=1)
        # [batch, n_agents]
        self.mixer_q_input = tf.reshape(self.q_selected, [-1, self.n_agents])

        self.q_target_selected = tf.reduce_sum(tf.multiply(
            self.agent_qs_target, self.actions_1hot),
                                               axis=1)
        self.mixer_target_q_input = tf.reshape(self.q_target_selected,
                                               [-1, self.n_agents])

        # Mixing network
        with tf.variable_scope("Mixer_main"):
            self.mixer = networks.Qmix_mixer(self.mixer_q_input, self.state,
                                             self.l_state, self.n_agents,
                                             self.nn['n_h_mixer'])
        with tf.variable_scope("Mixer_target"):
            self.mixer_target = networks.Qmix_mixer(self.mixer_target_q_input,
                                                    self.state, self.l_state,
                                                    self.n_agents,
                                                    self.nn['n_h_mixer'])
Exemplo n.º 21
0
def model_fn(features, labels, mode, params, config):
    """
    This is a function for creating a computational tensorflow graph.
    The function is in format required by tf.estimator.
    """
    images = features
    is_training = mode == tf.estimator.ModeKeys.TRAIN

    # build the main graph
    feature_to_use = params['feature_to_use']  # Relu_X_1
    encoding = encoder(images)[feature_to_use]
    restored_images = decoder(encoding, feature_to_use)
    encoding_of_restored_images = encoder(restored_images)[feature_to_use]

    # use a pretrained backbone network
    if is_training:
        with tf.name_scope('init_from_checkpoint'):
            tf.train.init_from_checkpoint(params['pretrained_checkpoint'],
                                          {'vgg_19/': 'encoder/'})

    assert mode != tf.estimator.ModeKeys.PREDICT

    # add L2 regularization
    with tf.name_scope('weight_decay'):
        add_weight_decay(params['weight_decay'])
        regularization_loss = tf.losses.get_regularization_loss()

    batch_size = tf.to_float(tf.shape(images)[0])
    normalizer = 255.0 * batch_size
    reconstruction_loss = tf.nn.l2_loss(images - restored_images) / normalizer
    features_loss = tf.nn.l2_loss(encoding -
                                  encoding_of_restored_images) / normalizer

    tf.losses.add_loss(reconstruction_loss)
    tf.losses.add_loss(params['lambda'] * features_loss)
    tf.summary.scalar('regularization_loss', regularization_loss)
    tf.summary.scalar('reconstruction_loss', reconstruction_loss)
    tf.summary.scalar('features_loss', features_loss)
    total_loss = tf.losses.get_total_loss(add_regularization_losses=True)

    if mode == tf.estimator.ModeKeys.EVAL:

        eval_metric_ops = {
            'val_reconstruction_loss': tf.metrics.mean(reconstruction_loss),
            'val_features_loss': tf.metrics.mean(features_loss)
        }

        return tf.estimator.EstimatorSpec(mode,
                                          loss=total_loss,
                                          eval_metric_ops=eval_metric_ops)

    assert mode == tf.estimator.ModeKeys.TRAIN
    with tf.variable_scope('learning_rate'):
        global_step = tf.train.get_global_step()
        learning_rate = tf.train.polynomial_decay(
            params['initial_learning_rate'],
            global_step,
            params['num_steps'],
            params['end_learning_rate'],
            power=1.0  # linear decay
        )
        tf.summary.scalar('learning_rate', learning_rate)

    with tf.variable_scope('optimizer'):
        optimizer = tf.train.AdamOptimizer(learning_rate,
                                           beta1=0.9,
                                           beta2=0.999)
        train_op = optimizer.minimize(total_loss, global_step=global_step)

    return tf.estimator.EstimatorSpec(mode, loss=total_loss, train_op=train_op)
Exemplo n.º 22
0
    def __init__(self, config=None, training=True, train_form='Both'):
        # Load vocabulary
        self.char2idx, self.idx2char = load_vocab()
        self.graph = tf.Graph()
        with self.graph.as_default():
            if training:
                self.origx, self.x, self.y1, self.y2, self.y3, self.num_batch = get_batch(
                    config, train_form)
                self.prev_max_attentions_li = tf.ones(shape=(hp.dec_layers,
                                                             self.num_batch),
                                                      dtype=tf.int32)

            else:  # Evaluation
                self.x = tf.placeholder(tf.int32, shape=(1, hp.T_x))
                self.y1 = tf.placeholder(tf.float32,
                                         shape=(1, hp.T_y // hp.r,
                                                hp.n_mels * hp.r))
                self.prev_max_attentions_li = tf.placeholder(tf.int32,
                                                             shape=(
                                                                 hp.dec_layers,
                                                                 1,
                                                             ))

# Get decoder inputs: feed last frames only
            if train_form != 'Converter':
                self.decoder_input = tf.concat(
                    (tf.zeros_like(self.y1[:, :1, -hp.n_mels:]),
                     self.y1[:, :-1, -hp.n_mels:]), 1)

            # Networks
            if train_form != 'Converter':
                with tf.variable_scope("encoder"):
                    self.encoded = encoder(self.x, training=training)

                with tf.variable_scope("decoder"):
                    self.mel_logits, self.done_output, self.max_attentions_li = decoder(
                        self.decoder_input,
                        self.encoded,
                        self.prev_max_attentions_li,
                        training=training)
                    #self.mel_output = self.mel_logits
                    self.mel_output = tf.nn.sigmoid(self.mel_logits)

            if train_form == 'Both':
                with tf.variable_scope("converter"):
                    #self.converter_input = tf.reshape(self.mel_output, (-1, hp.T_y, hp.n_mels))
                    self.converter_input = self.mel_output
                    self.mag_logits = converter(self.converter_input,
                                                training=training)
                    self.mag_output = tf.nn.sigmoid(self.mag_logits)
            elif train_form == 'Converter':
                with tf.variable_scope("converter"):
                    #self.converter_input = tf.reshape(self.mel_output, (-1, hp.T_y, hp.n_mels))
                    self.converter_input = self.y1
                    self.mag_logits = converter(self.converter_input,
                                                training=training)
                    self.mag_output = tf.nn.sigmoid(self.mag_logits)

            self.global_step = tf.Variable(0,
                                           name='global_step',
                                           trainable=False)

            if training:
                # Loss
                if train_form != 'Converter':
                    self.loss1 = tf.reduce_mean(
                        tf.abs(self.mel_output - self.y1))
                    if hp.include_dones:
                        self.loss2 = tf.reduce_mean(
                            tf.nn.sparse_softmax_cross_entropy_with_logits(
                                logits=self.done_output, labels=self.y2))
                if train_form != 'Encoder':
                    self.loss3 = tf.reduce_mean(
                        tf.abs(self.mag_output - self.y3))

                if train_form == 'Both':
                    if hp.include_dones:
                        self.loss = self.loss1 + self.loss2 + self.loss3
                    else:
                        self.loss = self.loss1 + self.loss3
                elif train_form == 'Encoder':
                    if hp.include_dones:
                        self.loss = self.loss1 + self.loss2
                    else:
                        self.loss = self.loss1
                else:
                    self.loss = self.loss3

                # Training Scheme
                self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr)
                ## gradient clipping
                self.gvs = self.optimizer.compute_gradients(self.loss)
                self.clipped = []
                for grad, var in self.gvs:
                    grad = grad if grad is None else tf.clip_by_value(
                        grad, -1. * hp.max_grad_val, hp.max_grad_val)
                    grad = grad if grad is None else tf.clip_by_norm(
                        grad, hp.max_grad_norm)
                    self.clipped.append((grad, var))

                self.train_op = self.optimizer.apply_gradients(
                    self.clipped, global_step=self.global_step)

                # Summary
                tf.summary.scalar('loss', self.loss)

                if train_form != 'Converter':
                    tf.summary.histogram('mel_output', self.mel_output)
                    tf.summary.histogram('mel_actual', self.y1)
                    tf.summary.scalar('loss1', self.loss1)
                    if hp.include_dones:
                        tf.summary.histogram('done_output', self.done_output)
                        tf.summary.histogram('done_actual', self.y2)
                        tf.summary.scalar('loss2', self.loss2)
                if train_form != 'Encoder':
                    tf.summary.histogram('mag_output', self.mag_output)
                    tf.summary.histogram('mag_actual', self.y3)
                    tf.summary.scalar('loss3', self.loss3)

                self.merged = tf.summary.merge_all()
Exemplo n.º 23
0
        z_encoder_sketchy = encoder(in_dim=params.x_dim,
                                    z_dim=params.glove_dim)
        cuda(z_encoder_sketchy)
        z_encoder_sketchy = train_z_encoder(
            encoder_model=z_encoder_sketchy,
            feature_dict=features_sketchy_dict,
            dump_location=params.path_z_encoder_sketchy)

    else:
        z_encoder_sketchy = torch.load(params.path_z_encoder_sketchy)
        cuda(z_encoder_sketchy)

    if (not os.path.isfile(params.path_s_encoder_sketchy)):
        s_encoder_sketchy = encoder(in_dim=params.x_dim,
                                    z_dim=params.glove_dim)
        decoder_sketchy = decoder(params.glove_dim)
        adv_sketchy = adv_classifier(feat_dim=params.glove_dim,
                                     num_classes=params.num_class)
        cuda(s_encoder_sketchy)
        cuda(adv_sketchy)
        cuda(decoder_sketchy)
        s_encoder_sketchy = train_s_encoder(
            z_encoder=z_encoder_sketchy,
            s_encoder=s_encoder_sketchy,
            decoder=decoder_sketchy,
            adv_classifier=adv_sketchy,
            feature_dict=features_sketchy_dict,
            dump_location=params.path_s_encoder_sketchy)

    else:
        s_encoder_sketchy = torch.load(params.path_s_encoder_sketchy)