Exemplo n.º 1
0
def initialize_models(args, device):
    # network
    En_A = models.encoder(in_nc=args.in_ngc, nf=args.ngf, img_size=args.img_size).to(device)
    En_B = models.encoder(in_nc=args.in_ngc, nf=args.ngf, img_size=args.img_size).to(device)
    De_A = models.decoder(out_nc=args.out_ngc, nf=args.ngf).to(device)
    De_B = models.decoder(out_nc=args.out_ngc, nf=args.ngf).to(device)
    Disc_A = models.discriminator(in_nc=args.in_ndc, out_nc=args.out_ndc, nf=args.ndf, img_size=args.img_size).to(device)
    Disc_B = models.discriminator(in_nc=args.in_ndc, out_nc=args.out_ndc, nf=args.ndf, img_size=args.img_size).to(device)

    print('---------- models initialized -------------')
    utils.print_network(En_A)
    utils.print_network(En_B)
    utils.print_network(De_A)
    utils.print_network(De_B)
    utils.print_network(Disc_A)
    utils.print_network(Disc_B)
    print('-----------------------------------------------')

    # Parallelize code
    En_A = nn.DataParallel(En_A)
    En_B = nn.DataParallel(En_B)
    De_A = nn.DataParallel(De_A)
    De_B = nn.DataParallel(De_B)
    Disc_A = nn.DataParallel(Disc_A)
    Disc_B = nn.DataParallel(Disc_B)

    all_models = [En_A, En_B, De_A, De_B, Disc_A, Disc_B]
    return all_models
Exemplo n.º 2
0
def translate(input_sentence):

    sentence = preprocess_sentence(input_sentence)
    inputs = [src_LI.word2idx[i] for i in sentence.split(' ')]
    inputs = tf.keras.preprocessing.sequence.pad_sequences([inputs],
                                                           maxlen=50,
                                                           padding='post')
    inputs = tf.convert_to_tensor(inputs)

    result = ''
    print(inputs.shape)
    state = [[tf.zeros((1, 16)) for i in range(2)],
             [tf.zeros((1, 16)) for i in range(2)]]
    encoder_out, encoder_state = encoder(inputs, state)
    print(encoder_out.shape)

    decoder_state = encoder_state[0]
    decoder_input = tf.expand_dims([tgt_LI.word2idx['<start>']], 0)

    for t in range(50):
        predictions, decoder_state, _ = decoder(decoder_input, decoder_state,
                                                encoder_out)

        predicted_id = tf.argmax(predictions[0]).numpy()

        result += tgt_LI.idx2word[predicted_id] + ' '

        if tgt_LI.idx2word[predicted_id] == '<end>':
            return result, sentence

        # the predicted ID is fed back into the model
        decoder_input = tf.expand_dims([predicted_id], 0)

    return result, sentence
Exemplo n.º 3
0
 def __init__(self, encoded_size, x_dim, y_dim):
     super(NP, self).__init__()
     self.encoded_size = encoded_size
     self._encoder = encoder(encoded_size, x_dim, y_dim)
     self._rz = r_to_z(encoded_size)
     self._decoder = decoder(encoded_size, x_dim, y_dim)
     self.tanh = nn.Tanh()
Exemplo n.º 4
0
def build_network(input_sequences,
                  initial_state=None,
                  initialize_to_zero=True):
    input_sequences_rs = tf.expand_dims(input_sequences, axis=-1)

    # encoder network
    encoder_channels = [32, 16]
    encoding_channels = encoder_channels[-1]
    with tf.variable_scope("encoder"):
        all_encoder_states, final_encoder_state = encoder(
            inputs=input_sequences_rs,
            channels=encoder_channels,
            initial_state=initial_state,
            initialize_to_zero=initialize_to_zero)
        encoder_saver = tf.compat.v1.train.Saver()
        print('v1 train saver worked')
    # additional output network
    predictions_flat = tf.reshape(all_encoder_states,
                                  shape=(-1, image_height, image_width,
                                         encoding_channels))
    predictions_flat = output_layers(predictions_flat)

    predictions = tf.reshape(predictions_flat, tf.shape(input_sequences))
    print('build network worked')

    return predictions_flat, predictions, final_encoder_state, encoder_saver
Exemplo n.º 5
0
    def regularization_loss(self):
        opts = self.opts

        self.random_z_recon, _ = encoder(opts,
                                         inputs=self.pseudo_G_z,
                                         reuse=True,
                                         is_training=self.is_training)
        loss_1 = tf.reduce_mean(
            tf.sqrt(
                tf.reduce_sum(tf.square(self.random_z_recon - self.random_z),
                              axis=[1])))
        # tf.sqrt()
        self.G_new_z, _ = decoder(opts,
                                  noise=self.random_z,
                                  reuse=True,
                                  is_training=self.is_training)

        diff = tf.reshape(self.G_new_z, [-1, np.prod(self.opts['datashape'])]) -\
                 tf.reshape(self.pseudo_G_z, [-1, np.prod(self.opts['datashape'])])
        loss_2 = tf.reduce_mean(
            tf.sqrt(tf.reduce_sum(tf.square(diff), axis=[1])))

        loss = loss_1 + loss_2

        return loss
Exemplo n.º 6
0
def make_model():
    # Making all the modules of the model architecture
    i_s = 336
    encoder_inp_shape = (i_s,i_s,3)
    enc = encoder(encoder_inp_shape)
    hg_inp_shape_1 = (i_s // 4, i_s // 4, 512)
    hg1 = hourglass(hg_inp_shape_1)
    hg_inp_shape_2 = (i_s // 4, i_s // 4, 256)
    hg2 = hourglass(hg_inp_shape_2)
    decoder_inp_shape = (i_s // 4, i_s // 4, 256)
    dec = decoder(decoder_inp_shape)
    proSR = net2(encoder_inp_shape)

    # Making the graph by connecting all the moduless of the model architecture
    # Each of this model can be seen as a layer now.
    input_tensor_1 = Input(encoder_inp_shape)
    input_tensor_2 = Input(encoder_inp_shape)
    part1 = enc(input_tensor_1)
    part2 = hg1(part1)
    part3 = hg2(part2)
    part4 = dec(part3)
    part5 = proSR(input_tensor_2)
    output = Add()([part4, part5])
    model = Model([input_tensor_1, input_tensor_2], output)
    model.compile(loss=root_mean_sq_GxGy, optimizer = RMSprop())

    with open('hourglass_sr_t1_t2.txt', 'w') as f:
        with redirect_stdout(f):
            model.summary()

    return model
Exemplo n.º 7
0
def train_step(img_tensor, target, tokenizer):
    loss = 0
    # initializing the hidden state for each batch
    # because the captions are not related from image to image
    hidden = decoder.reset_state(batch_size=target.shape[0])
    dec_input = tf.expand_dims([tokenizer.word_index['<start>']] *
                               target.shape[0], 1)

    with tf.GradientTape() as tape:
        features = encoder(img_tensor)

        for i in range(1, target.shape[1]):
            # passing the features through the decoder
            predictions, hidden, _ = decoder(dec_input, features, hidden)

            loss += loss_function(target[:, i], predictions)

            # using teacher forcing
            dec_input = tf.expand_dims(target[:, i], 1)

    total_loss = (loss / int(target.shape[1]))

    trainable_variables = encoder.trainable_variables + decoder.trainable_variables

    gradients = tape.gradient(loss, trainable_variables)

    optimizer.apply_gradients(zip(gradients, trainable_variables))

    return loss, total_loss
Exemplo n.º 8
0
def add_aefixedpoint_cost(opts, wae_model):

    w_aefixedpoint = tf.placeholder(tf.float32, name='w_aefixedpoint')
    wae_model.w_aefixedpoint = w_aefixedpoint

    gen_images = wae_model.decoded
    gen_images.set_shape([opts['batch_size']] + wae_model.data_shape)
    tmp = encoder(opts,
                  reuse=True,
                  inputs=gen_images,
                  is_training=wae_model.is_training)
    tmp_sg = encoder(opts,
                     reuse=True,
                     inputs=tf.stop_gradient(gen_images),
                     is_training=wae_model.is_training)
    encoded_gen_images = tmp[0]
    encoded_gen_images_sg = tmp_sg[0]
    if opts['e_noise'] == 'gaussian':
        # Encoder outputs means and variances of Gaussian
        # Encoding into means
        encoded_gen_images = encoded_gen_images[0]
        encoded_gen_images_sg = encoded_gen_images_sg[0]
    autoencoded_gen_images, _ = decoder(opts,
                                        reuse=True,
                                        noise=encoded_gen_images,
                                        is_training=wae_model.is_training)
    autoencoded_gen_images_sg, _ = decoder(opts,
                                           reuse=True,
                                           noise=encoded_gen_images_sg,
                                           is_training=wae_model.is_training)
    a = wae.WAE.reconstruction_loss(opts, gen_images, autoencoded_gen_images)
    b = tf.stop_gradient(a)
    c = wae_model.reconstruction_loss(tf.stop_gradient(gen_images),
                                      autoencoded_gen_images_sg)
    extra_cost = b + a - c
    # Check gradients
    # encoder_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='encoder')
    # wae_model.grad_extra = tf.gradients(ys=extra_cost, xs=encoder_vars)
    # for idx, el in enumerate(wae_model.grad_extra):
    #    print encoder_vars[idx].name, el

    wae_model.wae_objective += wae_model.w_aefixedpoint * extra_cost
    def _setup_encoder(self, X, X_seq_len, dropout):
        """
        Sets up the encoder.
        
        Args:
            X: input sequence
            X_seq_len: inputs' sequence lengths
            dropout: dropout (1-keep_prob) to apply to encoder cell

        Returns (see the Encoder class for more details): 
            outputs: encoder outputs
            output_state: final state of the encoder
        """
        with tf.variable_scope("encoder"):
            encoder = Encoder(self.config)
            outputs, output_state = encoder(X, X_seq_len, dropout)

        return outputs, output_state
Exemplo n.º 10
0
    def __init__(self, args):
        super(VAE,self).__init__()

        self.device = args.device
        self.latent_size = args.latent_size
        self.hidden_size = args.hidden_size

        self.batch_size = args.batch_size
        self.input_dim = args.input_dim
        self.warmup = args.warmup

        self.encoder = models.encoder(self.input_dim, self.hidden_size, self.latent_size, self.device)

        self.decoder = models.decoder(self.latent_size, self.input_dim, self.device)

        self.reconstruction_criterion = nn.MSELoss(reduction='mean').to(self.device)
        self.reparameterize_with_noise = False

        self.reconstruction = args.recons
Exemplo n.º 11
0
    def _setup_rnn(self, X, X_seq_len, dropout, vocab_size):
        config = self.config
        params = encoder_params_helper(config.rnn_num_layers,
                                       config.rnn_unit_type,
                                       config.rnn_type,
                                       config.rnn_num_units,
                                       config.rnn_num_residual_layers,
                                       config.verbose)

        with tf.variable_scope("rnn"):
            encoder = Encoder(params)
            outputs, output_state = encoder(X, X_seq_len, dropout)

            output_layer = tf.layers.Dense(vocab_size+1)
            outputs = output_layer(outputs)

            if config.verbose:
                variable_summaries(output_layer.trainable_weights[0], 'output_layer_weights')
                variable_summaries(output_layer.trainable_weights[1], 'output_layer_biases')
                variable_summaries(outputs, 'linear_projections')

        return outputs
    def deblur_defmo(I, B, bbox_tight, nsplits, radius, obj_dim):
        bbox = extend_bbox(bbox_tight.copy(), 4 * np.max(radius),
                           g_resolution_y / g_resolution_x, I.shape)
        im_crop = crop_resize(I, bbox, (g_resolution_x, g_resolution_y))
        bgr_crop = crop_resize(B, bbox, (g_resolution_x, g_resolution_y))
        preprocess = get_transform()
        input_batch = torch.cat((preprocess(im_crop), preprocess(bgr_crop)),
                                0).to(device).unsqueeze(0).float()
        with torch.no_grad():
            latent = encoder(input_batch)
            times = torch.linspace(0, 1, nsplits * multi_f + 1).to(device)
            renders = rendering(latent, times[None])
            renders = renders[:, :-1].reshape(
                1, nsplits, multi_f, 4, g_resolution_y,
                g_resolution_x).mean(2)  # add small motion blur

        renders_rgba = renders[0].data.cpu().detach().numpy().transpose(
            2, 3, 1, 0)
        est_hs_crop = rgba2hs(renders_rgba, bgr_crop)
        est_hs = rev_crop_resize(est_hs_crop, bbox, I)
        est_traj = renders2traj(renders, device)[0].T.cpu()
        est_traj = rev_crop_resize_traj(est_traj, bbox,
                                        (g_resolution_x, g_resolution_y))
        return est_hs, est_traj
Exemplo n.º 13
0
    out = tf.transpose(model.output, perm=[0, 2, 1, 3])
    out = tf.keras.layers.Reshape([-1, out.shape[-1] * out.shape[-2]])(out)

    if config.n_layers > 0:
        if config.mode == 'GRU':
            out = tf.keras.layers.Dense(config.n_dim)(out)
            for i in range(config.n_layers):
                # out = transformer_layer(config.n_dim, config.n_heads)(out)
                out = tf.keras.layers.Bidirectional(
                    tf.keras.layers.GRU(config.n_dim, return_sequences=True),
                    backward_layer=tf.keras.layers.GRU(config.n_dim,
                                                       return_sequences=True,
                                                       go_backwards=True))(out)
        elif config.mode == 'transformer':
            out = tf.keras.layers.Dense(config.n_dim)(out)
            out = encoder(config.n_layers, config.n_dim, config.n_heads)(out)

            out = tf.keras.layers.Flatten()(out)
            out = tf.keras.layers.ReLU()(out)
        else:
            for i in range(config.n_layers):
                # out = tf.keras.layers.Dropout(0.1)(out)
                out = tf.keras.layers.Dense(config.n_dim)(out)
                out = tf.keras.layers.Activation('sigmoid')(out) * out

    out = tf.keras.layers.Dense(config.n_classes, activation='relu')(out)
    model = tf.keras.models.Model(inputs=model.input, outputs=out)

    specs = None
    for name in config.name.split(','):
        NAME = name if name.endswith('.h5') else name + '.h5'
Exemplo n.º 14
0
def run(args):
    print("Creating Langauge Indices for source and target...")
    src_LI = LanguageIndex(language="source")
    tgt_LI = LanguageIndex(language="target")

    src_LI.add(read_file(args.src_path))
    tgt_LI.add(read_file(args.tgt_path))
    print("Created Langauge Indices.")

    print("Instantiating DataLoader object...")
    Data = DataLoader(args.src_path, args.tgt_path, src_LI, tgt_LI, args.batch_size)
    print("Loaded data.")

    print("Creating an encoder object...")
    encoder = BiLSTMEncoder(
        vocab_size = len(src_LI.word2idx),
        embedding_dim=128,
        encoder_size=16,
        batch_size=args.batch_size
    )
    print("Created an encoder object.")

    print("Creating a decoder object...")
    decoder = Decoder(
        vocab_size = len(tgt_LI.word2idx),
        embedding_dim=128,
        decoder_size=16,
        batch_size=args.batch_size
    )
    print("Created a decoder object.")

    # create optimize
    if(args.optimizer=='adam'):
        optimizer = tf.train.AdamOptimizer()
    
    # function to calculate loss
    def loss_function(real, pred):
        mask = 1 - np.equal(real, 0)
        loss_ = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=real, logits=pred) * mask
        return tf.reduce_mean(loss_)

    # create a checkpoint object for saving the model
    checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                 encoder=encoder,
                                 decoder=decoder)

    for epoch in range(args.epochs):
        start = time.time()
        # intialize the hidden states of encoder
        encoder_state = encoder.initialize_hidden()
        
        total_loss = 0
        num_batch = 0
        
        for (batch, (input_seq, target_seq)) in enumerate(Data.data):
            num_batch += 1
            loss = 0
            with tf.GradientTape() as tape:
                encoder_output, encoder_state = encoder(input_seq, encoder_state)
                # initialize decoder hidden state
                decoder_state = encoder_state[0]
                decoder_input = tf.expand_dims([tgt_LI.word2idx['<start>']] * args.batch_size, 1)       
                
                # Teacher forcing - feeding the target as the next input
                for t in range(1, target_seq.shape[1]):
                    # passing encoder_outputs to the decoder
                    predictions, decoder_state, _ = decoder(decoder_input, decoder_state, encoder_output)
                    
                    loss += loss_function(target_seq[:, t], predictions)
                    
                    # using teacher forcing
                    decoder_input = tf.expand_dims(target_seq[:, t], 1)
            
            batch_loss = (loss / int(target_seq.shape[1]))
            # write batch_loss to the tensorboard logs
            with writer.as_default(), tf.contrib.summary.always_record_summaries():
                tf.contrib.summary.scalar('TrainingLoss', batch_loss.numpy())
            total_loss += batch_loss
            
            variables = encoder.variables + decoder.variables
            gradients = tape.gradient(loss, variables)
            
            optimizer.apply_gradients(zip(gradients, variables), global_step=global_step)
            
            if batch % 100 == 0:
                print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1,
                                                            batch,
                                                            batch_loss.numpy()))
            if batch % 2000 == 0:
                checkpoint.save(file_prefix = checkpoint_prefix)
        # saving (checkpoint) the model every 1 epoch
        if (epoch + 1) % 1 == 0:
            checkpoint.save(file_prefix = checkpoint_prefix)
        
        print('Epoch {} Loss {:.4f}'.format(epoch + 1,
                                            total_loss / num_batch))
        print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))
Exemplo n.º 15
0
import argparse
import os
import time

import numpy as np
import tensorflow as tf
tf.enable_eager_execution()

# import modules for encoder and decoder
from models.encoder import *
from models.decoder import *

print("Creating an encoder object...")
encoder = BiLSTMEncoder(vocab_size=1024,
                        embedding_dim=128,
                        encoder_size=16,
                        batch_size=4)
print("Created an encoder object.")

encoder_hidden = [[tf.zeros((1, 16)), tf.zeros((1, 16))],
                  [tf.zeros((1, 16)), tf.zeros((1, 16))]]
# print(encoder_hidden.shape)

inputs = tf.convert_to_tensor([2, 3, 4])
print(inputs.shape)
inputs = tf.reshape(inputs, (1, -1))

outputs, encoder_hidden = encoder(inputs, encoder_hidden)

print(type(outputs))
print(outputs.shape)
Exemplo n.º 16
0
    def __init__(self, opts, tag):
        tf.reset_default_graph()
        logging.error('Building the Tensorflow Graph')
        gpu_options = tf.GPUOptions(allow_growth=True)
        config = tf.ConfigProto(gpu_options=gpu_options)
        self.sess = tf.Session(config=config)
        self.opts = opts

        assert opts['dataset'] in datashapes, 'Unknown dataset.'
        self.data_shape = datashapes[opts['dataset']]

        self.add_inputs_placeholders()

        self.add_training_placeholders()
        sample_size = tf.shape(self.sample_points)[0]

        enc_mean, enc_sigmas = encoder(opts,
                                       inputs=self.sample_points,
                                       is_training=self.is_training,
                                       y=self.labels)

        enc_sigmas = tf.clip_by_value(enc_sigmas, -50, 50)
        self.enc_mean, self.enc_sigmas = enc_mean, enc_sigmas

        eps = tf.random_normal((sample_size, opts['zdim']),
                               0.,
                               1.,
                               dtype=tf.float32)
        self.encoded = self.enc_mean + tf.multiply(
            eps, tf.sqrt(1e-8 + tf.exp(self.enc_sigmas)))
        # self.encoded = self.enc_mean + tf.multiply(
        #     eps, tf.exp(self.enc_sigmas / 2.))

        (self.reconstructed, self.reconstructed_logits), self.probs1 = \
            decoder(opts, noise=self.encoded,
                    is_training=self.is_training)
        self.correct_sum = tf.reduce_sum(
            tf.cast(tf.equal(tf.argmax(self.probs1, axis=1), self.labels),
                    tf.float32))
        # Decode the content of sample_noise
        (self.decoded,
         self.decoded_logits), _ = decoder(opts,
                                           reuse=True,
                                           noise=self.sample_noise,
                                           is_training=self.is_training)
        # -- Objectives, losses, penalties
        self.loss_cls = self.cls_loss(self.labels, self.probs1)
        self.loss_mmd = self.mmd_penalty(self.sample_noise, self.encoded)
        self.loss_recon = self.reconstruction_loss(self.opts,
                                                   self.sample_points,
                                                   self.reconstructed)
        self.mixup_loss = self.MIXUP_loss(opts, self.encoded, self.labels)
        self.gmmpara_init()
        self.loss_mixture = self.mixture_loss(self.encoded)

        self.objective = self.loss_recon + opts[
            'lambda_cls'] * self.loss_cls + opts['lambda_mixture'] * tf.cast(
                self.loss_mixture, dtype=tf.float32)
        self.objective_pre = self.loss_recon + opts[
            'lambda'] * self.loss_mmd + self.loss_cls

        self.result_logger = ResultLogger(tag, opts['work_dir'], verbose=True)
        self.tag = tag

        logpxy = []
        dimY = opts['n_classes']
        N = sample_size
        S = opts['sampling_size']
        x_rep = tf.tile(self.sample_points, [S, 1, 1, 1])
        with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
            for i in range(dimY):
                y = tf.fill((N, ), i)
                mu, log_sig = encoder(opts,
                                      inputs=self.sample_points,
                                      reuse=True,
                                      is_training=False,
                                      y=y)
                mu = tf.tile(mu, [S, 1])
                log_sig = tf.tile(log_sig, [S, 1])
                y = tf.tile(y, [S])
                eps2 = tf.random_normal((N * S, opts['zdim']),
                                        0.,
                                        1.,
                                        dtype=tf.float32)
                z = mu + tf.multiply(eps2, tf.sqrt(1e-8 + tf.exp(log_sig)))
                (mu_x, _), logit_y = decoder(opts,
                                             reuse=True,
                                             noise=z,
                                             is_training=False)
                logp = -tf.reduce_sum((x_rep - mu_x)**2, axis=[1, 2, 3])
                log_pyz = -tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=y, logits=logit_y)
                posterior = tf.log(
                    self.theta_p) - 0.5 * tf.log(2 * math.pi * self.lambda_p)
                self.u_p_1 = tf.expand_dims(self.u_p, 2)
                z_m = tf.expand_dims(tf.transpose(z), 1)
                aa = tf.square(z_m - self.u_p_1)
                self.lambda_p_1 = tf.expand_dims(self.lambda_p, 2)
                bb = aa / 2 * self.lambda_p_1
                posterior = tf.expand_dims(posterior, 2) - bb
                posterior_sum = tf.reduce_sum(tf.reduce_sum(posterior, axis=0),
                                              axis=0)
                bound = 0.5 * logp + opts['lambda_cls'] * log_pyz + opts[
                    'lambda_mixture'] * posterior_sum
                bound = tf.reshape(bound, [S, N])
                bound = self.logsumexp(bound) - tf.log(float(S))
                logpxy.append(tf.expand_dims(bound, 1))
            logpxy = tf.concat(logpxy, 1)
        y_pred = tf.nn.softmax(logpxy)

        self.eval_probs = y_pred
        self.test_a = 0.5 * logp
        self.test_b = log_pyz
        self.test_c = posterior_sum

        if opts['e_pretrain']:
            self.loss_pretrain = self.pretrain_loss()
        else:
            self.loss_pretrain = None

        self.add_optimizers()
        self.add_savers()
Exemplo n.º 17
0
                                          batch_size=args.batch_size,
                                          num_workers=12)
testloader = torch.utils.data.DataLoader(test_dataset,
                                         batch_size=args.batch_size,
                                         num_workers=12)

num_layers = args.x

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Assuming that we are on a CUDA machine, this should print a CUDA device:

print(device)

if args.encoder:
    encoder = models.encoder(x=num_layers,
                             pretrained_path=args.encoder).to(device)
else:
    encoder = models.encoder(x=num_layers).to(device)

if args.decoder:
    decoder = models.decoder(x=num_layers,
                             pretrained_path=args.decoder).to(device)
else:
    decoder = models.decoder(x=num_layers).to(device)

encoder.train(True)
decoder.train(True)

criterion = nn.MSELoss().to(device)
optimizer = optim.Adam(list(decoder.parameters()) + list(encoder.parameters()),
                       lr=args.learn_rate)  # .to(device)
Exemplo n.º 18
0
def improved_sampling(opts):
    NUM_ROWS = 10
    NUM_COLS = 10
    NUM_GD_STEPS = 100000

    num_z = NUM_ROWS * NUM_COLS
    checkpoint = opts['checkpoint']
    with tf.Session() as sess:
        with sess.graph.as_default():

            z = tf.get_variable(
                "latent_codes", [num_z, opts['zdim']],
                tf.float32, tf.random_normal_initializer(stddev=1.))
            is_training_ph = tf.placeholder(tf.bool, name='is_training_ph')
            gen, _ = decoder(opts, z, is_training=is_training_ph)
            data_shape = datashapes[opts['dataset']]
            gen.set_shape([num_z] + data_shape)
            e_gen, _ = encoder(opts, gen, is_training=is_training_ph)
            if opts['e_noise'] == 'gaussian':
                e_gen = e_gen[0]
            ae_gen = decoder(opts, e_gen, reuse=True, is_training=is_training_ph)
            loss = wae.WAE.reconstruction_loss(opts, gen, ae_gen)
            # optim = tf.train.AdamOptimizer(0.001, 0.9)
            optim = tf.train.AdamOptimizer(0.01, 0.9)
            optim = optim.minimize(loss, var_list=[z])

            # Now restoring weights from the checkpoint
            # We need to restore all variables except for newly created ones
            all_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
            enc_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                         scope='encoder')
            dec_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                         scope='generator')
            new_vars = [v for v in all_vars if \
                        v not in enc_vars and v not in dec_vars]
            vars_to_restore = enc_vars + dec_vars
            saver = tf.train.Saver(vars_to_restore)
            saver.restore(sess, checkpoint)
            logging.error('Restored.')

            init = tf.variables_initializer(new_vars)
            for iteration in range(1):
                pic_id = 0
                loss_prev = 1e10
                init.run()
                for step in range(NUM_GD_STEPS):
                    if (step < 100) or (step >= 100 and step % 100 == 0):
                        # will save all 100 first steps and then every 100 steps
                        pics = gen.eval(feed_dict={is_training_ph: False})
                        codes = z.eval()
                        pic_path = os.path.join(opts['work_dir'],
                                                'pic%03d' % pic_id)
                        code_path = os.path.join(opts['work_dir'],
                                                 'code%03d' % pic_id)
                        np.save(pic_path, pics)
                        np.save(code_path, codes)
                        pic_id += 1
                    # Make a gradient step
                    sess.run(optim, feed_dict={is_training_ph: False})
                    if step % 10 == 0:
                        loss_cur = loss.eval(feed_dict={is_training_ph: False})
                        rel_imp = abs(loss_cur - loss_prev) / abs(loss_prev)
                        logging.error('step %d, loss=%f, rel_imp=%f' % (step, loss_cur, rel_imp))
                        # if rel_imp < 1e-2:
                        #     break
                        loss_prev = loss_cur
Exemplo n.º 19
0
def main(_):
    with tf.Session(config=config) as sess:
        train_x = []

        print("Start reading", train_num, "of training files ...")
        a = datetime.now().replace(microsecond=0)
        while True:
            try:
                imgs = sess.run(features)
                train_x.append(imgs)
            except tf.errors.OutOfRangeError:
                break
        b = datetime.now().replace(microsecond=0)
        print("Complete reading training files.")
        print("Time cost:", b - a)

        train_x = np.array(train_x)

        regularizer = tf.contrib.layers.l2_regularizer(scale=weight_decay)

        mean, var = encoder(X, latent_size, regularizer, is_training)
        std = tf.sqrt(var, name='z_std')
        epsilon = tf.random_normal(tf.shape(var), name='random_prob')
        sample_z = mean + epsilon * std
        sample_z = tf.identity(sample_z, name='input_z')
        decoded_x = decoder(sample_z, regularizer, is_training)

        # Add training ops into graph.
        with tf.variable_scope('train'):
            img_loss = tf.reduce_sum(tf.squared_difference(decoded_x, X),
                                     axis=[1, 2, 3])
            #img_loss = tf.reduce_sum(tf.losses.log_loss(X, decoded_x, reduction=tf.losses.Reduction.NONE), axis=[1, 2, 3])
            latent_loss = 0.5 * tf.reduce_sum(
                var + tf.square(mean) - 1 - tf.log(var), 1)
            loss = tf.reduce_mean(img_loss + latent_loss, name='loss_op')
            loss += tf.losses.get_regularization_loss()

            global_step = tf.Variable(0,
                                      name='global_step',
                                      trainable=False,
                                      collections=[
                                          tf.GraphKeys.GLOBAL_VARIABLES,
                                          tf.GraphKeys.GLOBAL_STEP
                                      ])

            optimizer = tf.train.AdamOptimizer(
                learning_rate=init_learning_rate)
            train_op = optimizer.minimize(loss,
                                          global_step=global_step,
                                          name='train_op')

        sess.run(tf.global_variables_initializer())

        global_step_tensor = sess.graph.get_tensor_by_name(
            'train/global_step:0')
        train_op = sess.graph.get_operation_by_name('train/train_op')
        loss_tensor = sess.graph.get_tensor_by_name('train/loss_op:0')
        z_tensor = sess.graph.get_tensor_by_name('input_z:0')
        prob_tensor = sess.graph.get_tensor_by_name('random_prob:0')

        # Start training
        print('Start training ...')
        a = datetime.now().replace(microsecond=0)
        loss_history = []
        for i in range(epochs):
            total_loss = 0
            np.random.shuffle(train_x)
            for j in range(steps_per_epoch):
                pos = j * batch_size
                nums = min(train_num, pos + batch_size) - pos
                _, loss_value = sess.run(
                    [train_op, loss_tensor],
                    feed_dict={X: train_x[pos:pos + nums]})
                total_loss += loss_value * nums

            total_loss /= train_num
            print("Iter: {}, Global step: {}, loss: {:.4f}".format(
                i + 1, global_step_tensor.eval(), total_loss))
            loss_history.append(total_loss)

        b = datetime.now().replace(microsecond=0)

        print("Time cost:", b - a)

        plt.plot(loss_history, label='training loss')
        plt.xlabel("epochs")
        plt.ylabel("Totla loss")
        plt.title("Training curve")
        plt.savefig("batch_" + str(batch_size) + "_latent_" +
                    str(latent_size) + "_training_curve.png",
                    dpi=100)
        plt.gcf().clear()

        # plot some images
        decoded_img, rand_prob = sess.run([decoded_x, prob_tensor],
                                          feed_dict={
                                              X: train_x[:64],
                                              is_training: False
                                          })
        test_img = train_x[:64].reshape([8, 8, 96, 96, 3])
        test_img = np.column_stack(test_img)
        test_img = np.column_stack(test_img)
        decoded_img = np.column_stack(decoded_img.reshape([8, 8, 96, 96, 3]))
        decoded_img = np.column_stack(decoded_img)

        fig, ax = plt.subplots(1, 2)
        ax[0].imshow(test_img)
        ax[0].set_title("Before encode")
        ax[1].imshow(decoded_img)
        ax[1].set_title("After decode")
        fig.suptitle("batch size: " + str(batch_size) + ", latent size: " +
                     str(latent_size))
        plt.savefig("batch_" + str(batch_size) + "_latent_" +
                    str(latent_size) + "_images_comparison.png",
                    dpi=150)
        plt.gcf().clear()

        generate_img = sess.run(decoded_x,
                                feed_dict={
                                    z_tensor: rand_prob,
                                    is_training: False
                                })
        generate_img = np.column_stack(generate_img.reshape([8, 8, 96, 96, 3]))
        generate_img = np.column_stack(generate_img)
        plt.imshow(generate_img)
        plt.title("Random images")
        plt.savefig("batch_" + str(batch_size) + "_latent_" +
                    str(latent_size) + "_generation.png",
                    dpi=100)
Exemplo n.º 20
0
    def __init__(self, opts):

        logging.error('Building the Tensorflow Graph')

        self.sess = tf.Session()
        self.opts = opts

        # -- Some of the parameters for future use

        assert opts['dataset'] in datashapes, 'Unknown dataset.'
        self.data_shape = datashapes[opts['dataset']]

        # -- Placeholders

        self.add_model_placeholders()
        self.add_training_placeholders()
        sample_size = tf.shape(self.sample_points)[0]

        # -- Transformation ops

        # Encode the content of sample_points placeholder
        if not opts['e_is_random']:
            self.encoded = encoder(opts,
                                   inputs=self.sample_points,
                                   is_training=self.is_training)
        else:
            enc_mean, enc_sigmas = encoder(opts,
                                           inputs=self.sample_points,
                                           is_training=self.is_training)
            if opts['verbose']:
                # Debug the largest and smallest log variances
                enc_sigmas = tf.Print(
                    enc_sigmas,
                    [tf.nn.top_k(tf.reshape(enc_sigmas, [-1]), 1).values[0]],
                    'Maximal log sigmas:')
                enc_sigmas = tf.Print(
                    enc_sigmas,
                    [-tf.nn.top_k(tf.reshape(-enc_sigmas, [-1]), 1).values[0]],
                    'Minimal log sigmas:')

            eps = tf.random_normal((sample_size, opts['zdim']),
                                   0.,
                                   1.,
                                   dtype=tf.float32)
            self.encoded = enc_mean + tf.multiply(
                eps, tf.sqrt(1e-8 + tf.exp(enc_sigmas)))

        # Decode the points encoded above (i.e. reconstruct)
        self.reconstructed, self.reconstructed_logits = \
                decoder(opts, noise=self.encoded,
                        is_training=self.is_training)
        # Decode the content of sample_noise
        self.decoded, self.decoded_logits = \
                decoder(opts, reuse=True, noise=self.sample_noise,
                        is_training=self.is_training)

        # -- Objectives, losses, penalties

        self.penalty, self.loss_gan = self.matching_penalty()
        self.loss_reconstruct = self.reconstruction_loss()
        self.wae_objective = self.loss_reconstruct + \
                         opts['lambda'] * self.penalty

        if opts['e_pretrain']:
            self.loss_pretrain = self.pretrain_loss()
        else:
            self.loss_pretrain = None

        self.add_least_gaussian2d_ops()

        # -- Optimizers, savers, etc

        self.add_optimizers()
        self.add_savers()
        self.init = tf.global_variables_initializer()
Exemplo n.º 21
0
import torch
from torch import nn, optim
from torch.autograd import Variable
import itertools
from models import encoder, decoder, discriminator, loss_functions
import helpers

import matplotlib.pyplot as plt
import numpy as np
from torch.nn import functional as F

#%% setting up parameters

batch_size, dim = 750, 2

Enc = encoder(dim=dim, k=2, batch_size=batch_size)
Dec = decoder(dim=dim, k=2, batch_size=batch_size)
Disc = discriminator(dim=dim, k=2, batch_size=batch_size)
losses_ = loss_functions()
dataHandler=helpers.data_and_plotting(batch_size,encoder=Enc,decoder=Dec,discriminator=Disc,mixture=False,\
                            semi_circle=True)

#%% setting up the optimizers

#generator optimizer
optimizerE = optim.Adam(itertools.chain(Enc.parameters(), Dec.parameters()),
                        lr=5e-4)

#discriminator optimizer
optimizerD = optim.Adam(itertools.chain(Disc.parameters()), lr=5e-4)
Exemplo n.º 22
0
    def __init__(self, opts, train_size=0):

        logging.error('Building the Tensorflow Graph')

        self.sess = tf.Session()
        self.opts = opts
        self.train_size = train_size

        # =====================================================================
        # -- Some of the parameters for future use
        # =====================================================================
        assert opts['dataset'] in datashapes, 'Unknown dataset.'
        self.data_shape = datashapes[opts['dataset']]

        # =====================================================================
        # -- Placeholders
        # =====================================================================
        self.add_inputs_placeholders()
        self.add_training_placeholders()
        sample_size = tf.shape(self.sample_points)[0]  # batch_size

        # =====================================================================
        # -- Transformation ops
        # =====================================================================

        # ================================================
        # Encode the content of sample_points placeholder
        # ================================================
        res = encoder(opts,
                      inputs=self.sample_points,
                      is_training=self.is_training)

        # ================================================
        # the encoder outputs depend on the hyperparameter -> e_noise
        # here, the outputs are assigned to the class vars accoring to the type of e_noise computing done by the encoder...
        # ================================================
        if opts['e_noise'] in ('deterministic', 'implicit', 'add_noise'):
            self.enc_mean, self.enc_sigmas = None, None
            if opts['e_noise'] == 'implicit':
                self.encoded, self.encoder_A = res
            else:
                self.encoded, _ = res
        elif opts['e_noise'] == 'gaussian':
            # Encoder outputs means and variances of Gaussian
            enc_mean, enc_sigmas = res[0]
            enc_sigmas = tf.clip_by_value(enc_sigmas, -50, 50)
            self.enc_mean, self.enc_sigmas = enc_mean, enc_sigmas
            if opts['verbose']:
                self.add_sigmas_debug()
            eps = tf.random_normal((sample_size, opts['zdim']),
                                   0.,
                                   1.,
                                   dtype=tf.float32)
            self.encoded = self.enc_mean + tf.multiply(
                eps, tf.sqrt(1e-8 + tf.exp(self.enc_sigmas)))
            # self.encoded = self.enc_mean + tf.multiply(eps, tf.exp(self.enc_sigmas / 2.))

        # ================================================
        # Decode the points encoded above (i.e. reconstruct)
        # ================================================
        self.reconstructed, self.reconstructed_logits = decoder(
            opts, noise=self.encoded, is_training=self.is_training)

        # ================================================
        # Decode the content of sample_noise
        # ================================================
        self.decoded, self.decoded_logits = decoder(
            opts,
            reuse=True,
            noise=self.sample_noise,
            is_training=self.is_training)

        # ================================================
        # -- Objectives, losses, penalties
        # ================================================
        self.penalty, self.loss_gan = self.matching_penalty()
        self.loss_reconstruct = self.reconstruction_loss(
            self.opts, self.sample_points, self.reconstructed)
        self.wae_objective = self.loss_reconstruct + self.wae_lambda * self.penalty
        # Extra costs if any
        if 'w_aef' in opts and opts['w_aef'] > 0:
            improved_wae.add_aefixedpoint_cost(opts, self)

        # ================================================
        # ================================================
        self.blurriness = self.compute_blurriness()

        # ================================================
        # ================================================
        if opts['e_pretrain']:
            self.loss_pretrain = self.pretrain_loss()
        else:
            self.loss_pretrain = None

        # ================================================
        # ================================================
        self.add_least_gaussian2d_ops()

        # ================================================
        # -- Optimizers, savers, etc
        # ================================================
        self.add_optimizers()
        self.add_savers()
        self.init = tf.global_variables_initializer()
Exemplo n.º 23
0
    sentence_str=''

    for index for output:
        word=index2word[index]

        if word=='EOS':
            break
        elif word!='PAD':
            sentence_str+=word

    return sentence_str
        

if __name__=='__main__':
    dataset=datasets.Cornell
    encoder=models.encoder(dataset.num_word,512,2,0.1)
    decoder=models.decoder(dataset.num_word,512,2,'dot',0.1)
    utils.load_model(encoder,os.path.join('./Model',str(config.MODEL)),'encoder.pth')
    utils.load_model(decoder,os.path.join('./Model',str(config.MODEL)),'decoder.pth')

    bot=GreedySearchBot(encoder,decoder)
    index2word=dataset.index2word
    word2index=dataset.word2index
    max_len=10

    while(True):
        input_sentence=input('>>> ')

        if input_sentence=='q':
            break
        else:
Exemplo n.º 24
0
Arquivo: wae.py Projeto: kingofoz/wae
    def __init__(self, opts):

        logging.error('Building the Tensorflow Graph')

        self.sess = tf.Session()
        self.opts = opts

        # -- Some of the parameters for future use

        assert opts['dataset'] in datashapes, 'Unknown dataset.'
        self.data_shape = datashapes[opts['dataset']]

        # -- Placeholders

        self.add_model_placeholders()
        self.add_training_placeholders()
        sample_size = tf.shape(self.sample_points)[0]

        # -- Transformation ops

        # Encode the content of sample_points placeholder
        if opts['e_noise'] in ('deterministic', 'implicit', 'add_noise'):
            self.enc_mean, self.enc_sigmas = None, None
            res = encoder(opts,
                          inputs=self.sample_points,
                          is_training=self.is_training)
            if opts['e_noise'] == 'implicit':
                self.encoded, self.encoder_A = res
            else:
                self.encoded = res
        elif opts['e_noise'] == 'gaussian':
            # Encoder outputs means and variances of Gaussian
            enc_mean, enc_sigmas = encoder(opts,
                                           inputs=self.sample_points,
                                           is_training=self.is_training)
            enc_sigmas = tf.clip_by_value(enc_sigmas, -50, 50)
            self.enc_mean, self.enc_sigmas = enc_mean, enc_sigmas
            if opts['verbose']:
                self.add_sigmas_debug()

            eps = tf.random_normal((sample_size, opts['zdim']),
                                   0.,
                                   1.,
                                   dtype=tf.float32)
            self.encoded = self.enc_mean + tf.multiply(
                eps, tf.sqrt(1e-8 + tf.exp(self.enc_sigmas)))
            # self.encoded = self.enc_mean + tf.multiply(
            #     eps, tf.exp(self.enc_sigmas / 2.))

        # Decode the points encoded above (i.e. reconstruct)
        self.reconstructed, self.reconstructed_logits = \
                decoder(opts, noise=self.encoded,
                        is_training=self.is_training)

        # Decode the content of sample_noise
        self.decoded, self.decoded_logits = \
                decoder(opts, reuse=True, noise=self.sample_noise,
                        is_training=self.is_training)

        # -- Objectives, losses, penalties

        self.penalty, self.loss_gan = self.matching_penalty()
        self.loss_reconstruct = self.reconstruction_loss()
        self.wae_objective = self.loss_reconstruct + \
                         self.wae_lambda * self.penalty

        if opts['e_pretrain']:
            self.loss_pretrain = self.pretrain_loss()
        else:
            self.loss_pretrain = None

        self.add_least_gaussian2d_ops()

        # -- Optimizers, savers, etc

        self.add_optimizers()
        self.add_savers()
        self.init = tf.global_variables_initializer()
Exemplo n.º 25
0
def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    torch.backends.cudnn.benchmark = True
    encoder = EncoderCNN()
    rendering = RenderingCNN()
    loss_function = FMOLoss()

    if g_finetune:
        g_load_temp_folder = '/home.stud/rozumden/tmp/PyTorch/20200918_2239_consfm2'
        encoder.load_state_dict(torch.load(os.path.join(g_load_temp_folder, 'encoder.pt')))
        rendering.load_state_dict(torch.load(os.path.join(g_load_temp_folder, 'rendering.pt')))

    encoder = nn.DataParallel(encoder).to(device)
    rendering = nn.DataParallel(rendering).to(device)
    loss_function = nn.DataParallel(loss_function).to(device)

    if not os.path.exists(g_temp_folder):
        os.makedirs(g_temp_folder)

    log_path = os.path.join(g_temp_folder,'training')
    if not os.path.exists(log_path):
        os.makedirs(log_path)

    encoder_params = sum(p.numel() for p in encoder.parameters())
    rendering_params = sum(p.numel() for p in rendering.parameters())
    encoder_grad = sum(int(p.requires_grad) for p in encoder.parameters())
    encoder_p = sum(1 for p in encoder.parameters())
    print('Encoder params {:2f}M, rendering params {:2f}M'.format(encoder_params/1e6,rendering_params/1e6))
    
    training_set = ShapeBlurDataset(dataset_folder=g_dataset_folder, render_objs = g_render_objs, number_per_category=g_number_per_category,do_augment=True,use_latent_learning=g_use_latent_learning)
    training_generator = torch.utils.data.DataLoader(training_set, batch_size=g_batch_size,shuffle=True,num_workers=g_num_workers,drop_last=True)
    val_set = ShapeBlurDataset(dataset_folder=g_validation_folder, render_objs = g_render_objs_val, number_per_category=g_number_per_category_val,do_augment=True,use_latent_learning=False)
    val_generator = torch.utils.data.DataLoader(val_set, batch_size=g_batch_size,shuffle=True,num_workers=g_num_workers,drop_last=True)

    vis_train_batch, _ = get_training_sample(["can"],min_obj=5,max_obj=5,dataset_folder=g_dataset_folder)
    vis_train_batch = vis_train_batch.unsqueeze(0).to(device)
    vis_val_batch, _ = get_training_sample(["can"],min_obj=4,max_obj=4,dataset_folder=g_validation_folder)
    vis_val_batch = vis_val_batch.unsqueeze(0).to(device)

    all_parameters = list(encoder.parameters()) + list(rendering.parameters())
    optimizer = torch.optim.Adam(all_parameters, lr=g_lr)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1000, gamma=0.5)
    writer = SummaryWriter(log_path)

    train_losses = []
    val_losses = []
    best_val_loss = 100.0
    for epoch in range(g_epochs):
        encoder.train()
        rendering.train()

        t0 = time.time()
        supervised_loss = []
        model_losses = []
        sharp_losses = []
        timecons_losses = []
        latent_losses = []
        joint_losses = []
        for it, (input_batch, times, hs_frames, times_left) in enumerate(training_generator):
            input_batch, times, hs_frames, times_left = input_batch.to(device), times.to(device), hs_frames.to(device), times_left.to(device)

            renders = []

            if g_use_latent_learning:
                latent = encoder(input_batch[:,:6])
                latent2 = encoder(input_batch[:,6:])
            else:
                latent = encoder(input_batch)
                latent2 = []
            
            renders = rendering(latent, torch.cat((times,times_left),1))
            
            sloss, mloss, shloss, tloss, lloss, jloss = loss_function(renders, hs_frames, input_batch[:,:6], (latent,latent2))

            supervised_loss.append(sloss.mean().item())
            model_losses.append(mloss.mean().item())
            sharp_losses.append(shloss.mean().item())
            timecons_losses.append(tloss.mean().item())
            latent_losses.append(lloss.mean().item())

            jloss = jloss.mean()
            joint_losses.append(jloss.item())    
            if it % 50 == 0:
                print("Epoch {:4d}, it {:4d}".format(epoch+1, it), end =" ")
                if g_use_supervised:
                    print(", loss {:.3f}".format(np.mean(supervised_loss)), end =" ")
                if g_use_selfsupervised_model:
                    print(", model {:.3f}".format(np.mean(model_losses)), end =" ")
                if g_use_selfsupervised_sharp_mask:
                    print(", sharp {:.3f}".format(np.mean(sharp_losses)), end =" ")
                if g_use_selfsupervised_timeconsistency:
                    print(", time {:.3f}".format(np.mean(timecons_losses)), end =" ")
                if g_use_latent_learning:
                    print(", latent {:.3f}".format(np.mean(latent_losses)), end =" ")

                print(", joint {:.3f}".format(np.mean(joint_losses)))
            
            optimizer.zero_grad()
            jloss.backward()
            optimizer.step()
        train_losses.append(np.mean(supervised_loss))

        with torch.no_grad():
            encoder.eval()
            rendering.eval()
            
            running_losses_min = []
            running_losses_max = []
            for it, (input_batch, times, hs_frames, _) in enumerate(val_generator):
                input_batch, times, hs_frames = input_batch.to(device), times.to(device), hs_frames.to(device)
                latent = encoder(input_batch)
                renders = rendering(latent, times)[:,:,:4]

                val_loss1 = fmo_loss(renders, hs_frames)
                val_loss2 = fmo_loss(renders, torch.flip(hs_frames,[1]))
                losses = torch.cat((val_loss1.unsqueeze(0),val_loss2.unsqueeze(0)),0)
                min_loss,_ = losses.min(0)
                max_loss,_ = losses.max(0)
                running_losses_min.append(min_loss.mean().item())
                running_losses_max.append(max_loss.mean().item())
            print("Epoch {:4d}, val it {:4d}, loss {}".format(epoch+1, it, np.mean(running_losses_min)))
            val_losses.append(np.mean(running_losses_min))
            if val_losses[-1] < best_val_loss and epoch >= 0:
                torch.save(encoder.module.state_dict(), os.path.join(g_temp_folder, 'encoder_best.pt'))
                torch.save(rendering.module.state_dict(), os.path.join(g_temp_folder, 'rendering_best.pt'))
                best_val_loss = val_losses[-1]
                print('    Saving best validation loss model!  ')
            
            writer.add_scalar('Loss/train_supervised', train_losses[-1], epoch+1)
            writer.add_scalar('Loss/train_joint', np.mean(joint_losses), epoch+1)
            if g_use_selfsupervised_model:
                writer.add_scalar('Loss/train_selfsupervised_model', np.mean(model_losses), epoch+1)
            if g_use_selfsupervised_sharp_mask:
                writer.add_scalar('Loss/train_selfsupervised_sharpness', np.mean(sharp_losses), epoch+1)
            if g_use_selfsupervised_timeconsistency:
                writer.add_scalar('Loss/train_selfsupervised_timeconsistency', np.mean(timecons_losses), epoch+1)
            if g_use_latent_learning:
                writer.add_scalar('Loss/train_selfsupervised_latent', np.mean(latent_losses), epoch+1)
            writer.add_scalar('Loss/val_min', val_losses[-1], epoch+1)
            writer.add_scalar('Loss/val_max', np.mean(running_losses_max), epoch+1)
            writer.add_scalar('LR/value', optimizer.param_groups[0]['lr'], epoch+1)
            writer.add_images('Vis Train Batch', get_images(encoder, rendering, device, vis_train_batch)[0], global_step=epoch+1)
            writer.add_images('Vis Val Batch', get_images(encoder, rendering, device, vis_val_batch)[0], global_step=epoch+1)
            
            concat = torch.cat((renders[:,0],renders[:,-1],hs_frames[:,0],hs_frames[:,-1]),2)
            writer.add_images('Val Batch', concat[:,3:]*(concat[:,:3]-1)+1, global_step=epoch+1)
            
        time_elapsed = (time.time() - t0)/60
        print('Epoch {:4d} took {:.2f} minutes, lr = {}, av train loss {:.5f}, val loss min {:.5f} max {:.5f}'.format(epoch+1, time_elapsed, optimizer.param_groups[0]['lr'], train_losses[-1], val_losses[-1], np.mean(running_losses_max)))
        scheduler.step()
        
    # pdb.set_trace()
    torch.cuda.empty_cache()
    torch.save(encoder.module.state_dict(), os.path.join(g_temp_folder, 'encoder.pt'))
    torch.save(rendering.module.state_dict(), os.path.join(g_temp_folder, 'rendering.pt'))
    writer.close()
Exemplo n.º 26
0
                         shuffle=shuffle_train,
                         num_workers=num_workers)

################################################################################

loss_list = []
loss_values = []
avg_loss_values = []
total_step = len(data_loader)

################################################################################

print("Pushing the model to GPU ...\n")

init = initializer().to(device)
encoder = encoder().to(device)
mean_encoder = mean_encoder().to(device)
decoder = decoder().to(device)
clstm = ConvLSTMCell(input_size=(8, 14),
                     input_dim=512,
                     hidden_dim=512,
                     kernel_size=(3, 3),
                     bias=True).to(device)
reduction = nn.Conv2d(1024, 512, kernel_size=3, stride=1, padding=1).to(device)

criterion = nn.BCELoss()
params = list(init.parameters()) + list(encoder.parameters()) + list(
    decoder.parameters()) + list(clstm.parameters()) + list(
        reduction.parameters())
optimizer = torch.optim.Adam(params, lr=lr)
Exemplo n.º 27
0
def improved_sampling(opts):
    MAX_GD_STEPS = 200
    LOSS_EVERY_STEPS = 50
    DEBUG = False
    NUM_POINTS = 10000
    BATCH_SIZE = 100

    checkpoint = opts['checkpoint']

    # Creating a dummy file for later FID evaluations
    dummy_path = os.path.join(opts['work_dir'], 'checkpoints', 'dummy.meta')
    with open(dummy_path, 'w') as f:
        f.write('dummy string')

    with tf.Session() as sess:
        with sess.graph.as_default():

            # Creating the graph

            if opts['pz'] in ('normal', 'sphere'):
                codes = tf.get_variable(
                    "latent_codes", [BATCH_SIZE, opts['zdim']], tf.float32,
                    tf.random_normal_initializer(stddev=1.))
                if opts['pz'] == 'sphere':
                    z = codes / (tf.norm(codes, axis=0) + 1e-8)
                else:
                    z = codes
            elif opts['pz'] == 'uniform':
                codes = tf.get_variable(
                    "latent_codes", [BATCH_SIZE, opts['zdim']], tf.float32,
                    tf.random_uniform_initializer(minval=-1., maxval=1.))
            z = opts['pz_scale'] * z
            is_training_ph = tf.placeholder(tf.bool, name='is_training_ph')
            gen, _ = decoder(opts, z, is_training=is_training_ph)
            data_shape = datashapes[opts['dataset']]
            gen.set_shape([BATCH_SIZE] + data_shape)
            e_gen, _ = encoder(opts, gen, is_training=is_training_ph)
            if opts['e_noise'] == 'gaussian':
                e_gen = e_gen[0]
            ae_gen, _ = decoder(opts,
                                e_gen,
                                reuse=True,
                                is_training=is_training_ph)
            # Cool hack: normalizing by the picture contrast,
            # otherwise SGD manages to decrease the loss by reducing
            # the contrast
            loss = wae.WAE.reconstruction_loss(opts, contrast_norm(gen),
                                               contrast_norm(ae_gen))
            optim = tf.train.AdamOptimizer(opts['lr'], 0.9)
            optim = optim.minimize(loss, var_list=[codes])

            # Now restoring encoder and decoder from the checkpoint

            all_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
            enc_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                         scope='encoder')
            dec_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                         scope='generator')
            new_vars = [v for v in all_vars if \
                        v not in enc_vars and v not in dec_vars]
            vars_to_restore = enc_vars + dec_vars
            saver = tf.train.Saver(vars_to_restore)
            saver.restore(sess, checkpoint)
            logging.error('Restored.')

            init = tf.variables_initializer(new_vars)

            # Finally, start generating the samples

            res_samples = []
            res_codes = []

            for ibatch in range(NUM_POINTS / BATCH_SIZE):

                logging.error('Batch %d of %d' %
                              (ibatch + 1, NUM_POINTS / BATCH_SIZE))
                loss_prev = 1e10
                init.run()
                for step in xrange(MAX_GD_STEPS):

                    # Make a gradient step
                    sess.run(optim, feed_dict={is_training_ph: False})

                    if step == 0 or step % LOSS_EVERY_STEPS == LOSS_EVERY_STEPS - 1:
                        loss_cur, pics, codes = sess.run(
                            [loss, gen, z], feed_dict={is_training_ph: False})
                        if DEBUG:
                            if opts['input_normalize_sym']:
                                pics = (pics + 1.) / 2.
                            pic_path = os.path.join(
                                opts['work_dir'], 'checkpoints',
                                'dummy.samples100_%05d' % step)
                            code_path = os.path.join(opts['work_dir'],
                                                     'checkpoints',
                                                     'code%05d' % step)
                            np.save(pic_path, pics)
                            np.save(code_path, codes)
                        rel_imp = abs(loss_cur - loss_prev) / abs(loss_prev)
                        logging.error('-- step %d, loss=%f, rel_imp=%f' %
                                      (step, loss_cur, rel_imp))
                        if step > 0 and rel_imp < 0.1:
                            break
                        loss_prev = loss_cur

                res_samples.append(pics)
                res_codes.append(codes)

            samples = np.array(res_samples)
            samples = np.vstack(samples)
            codes = np.array(res_codes)
            codes = np.vstack(codes)
            pic_path = os.path.join(opts['work_dir'], 'checkpoints',
                                    'dummy.samples%d' % (NUM_POINTS))
            code_path = os.path.join(opts['work_dir'], 'checkpoints',
                                     'codes%d' % (NUM_POINTS))
            np.save(pic_path, samples)
            np.save(code_path, codes)
Exemplo n.º 28
0
    def __init__(self, opts, tag):
        tf.reset_default_graph()
        logging.error('Building the Tensorflow Graph')
        gpu_options = tf.GPUOptions(allow_growth=True)
        config = tf.ConfigProto(gpu_options=gpu_options)
        self.sess = tf.Session(config=config)
        self.opts = opts

        assert opts['dataset'] in datashapes, 'Unknown dataset.'
        self.data_shape = datashapes[opts['dataset']]

        self.add_inputs_placeholders()

        self.add_training_placeholders()
        sample_size = tf.shape(self.sample_points)[0]

        enc_mean, enc_sigmas = encoder(opts,
                                       inputs=self.sample_points,
                                       is_training=self.is_training,
                                       y=self.labels)

        enc_sigmas = tf.clip_by_value(enc_sigmas, -50, 50)
        self.enc_mean, self.enc_sigmas = enc_mean, enc_sigmas

        eps = tf.random_normal((sample_size, opts['zdim']),
                               0.,
                               1.,
                               dtype=tf.float32)
        self.encoded = self.enc_mean + tf.multiply(
            eps, tf.sqrt(1e-8 + tf.exp(self.enc_sigmas)))
        # self.encoded = self.enc_mean + tf.multiply(
        #     eps, tf.exp(self.enc_sigmas / 2.))

        (self.reconstructed, self.reconstructed_logits), self.probs1 = \
            decoder(opts, noise=self.encoded,
                    is_training=self.is_training)
        self.correct_sum = tf.reduce_sum(
            tf.cast(tf.equal(tf.argmax(self.probs1, axis=1), self.labels),
                    tf.float32))
        (self.decoded,
         self.decoded_logits), _ = decoder(opts,
                                           reuse=True,
                                           noise=self.sample_noise,
                                           is_training=self.is_training)

        self.loss_cls = self.cls_loss(self.labels, self.probs1)
        self.loss_mmd = self.mmd_penalty(self.sample_noise, self.encoded)
        self.loss_recon = self.reconstruction_loss(self.opts,
                                                   self.sample_points,
                                                   self.reconstructed)
        self.objective = self.loss_recon + opts[
            'lambda'] * self.loss_mmd + self.loss_cls

        self.tag = tag

        logpxy = []
        dimY = opts['n_classes']
        N = sample_size
        S = opts['sampling_size']
        x_rep = tf.tile(self.sample_points, [S, 1, 1, 1])
        for i in range(dimY):
            y = tf.fill((N * S, ), i)
            mu, log_sig = encoder(opts,
                                  inputs=x_rep,
                                  reuse=True,
                                  is_training=False,
                                  y=y)
            eps2 = tf.random_normal((N * S, opts['zdim']),
                                    0.,
                                    1.,
                                    dtype=tf.float32)
            z = mu + tf.multiply(eps2, tf.sqrt(1e-8 + tf.exp(log_sig)))
            z_sample = tf.random_normal((tf.shape(z)[0], opts['zdim']),
                                        0.,
                                        1.,
                                        dtype=tf.float32)

            (mu_x, _), logit_y = decoder(opts,
                                         reuse=True,
                                         noise=z,
                                         is_training=False)
            logp = -tf.reduce_sum((x_rep - mu_x)**2, axis=[1, 2, 3])
            log_pyz = -tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=y, logits=logit_y)
            mmd_loss = self.mmd_penalty(z_sample, z)
            bound = 0.5 * logp + log_pyz + opts['lambda'] * mmd_loss
            bound = tf.reshape(bound, [S, N])
            bound = self.logsumexp(bound) - tf.log(float(S))
            logpxy.append(tf.expand_dims(bound, 1))
        logpxy = tf.concat(logpxy, 1)
        y_pred = tf.nn.softmax(logpxy)
        self.eval_probs = y_pred

        if opts['e_pretrain']:
            self.loss_pretrain = self.pretrain_loss()
        else:
            self.loss_pretrain = None

        self.add_optimizers()
        self.add_savers()
Exemplo n.º 29
0
opt.seed = 0
print("Random Seed: ", opt.seed)
random.seed(opt.seed)
torch.manual_seed(opt.seed)

if torch.cuda.is_available() and not opt.cuda:
    print(
        "WARNING: You have a CUDA device, so you should probably run with --cuda"
    )
colorMapFile = './colormap.mat'
colormap = loadmat(colorMapFile)['cmap']
colormap = torch.from_numpy(colormap).cuda()

####################################
# Initialize Network
encoder = models.encoder(isAddCostVolume=opt.isAddCostVolume)
for param in encoder.parameters():
    param.requires_grad = False
encoder.load_state_dict(
    torch.load('{0}/encoder_{1}.pth'.format(opt.experiment, opt.nepoch - 1),
               map_location={'cuda:0': 'cuda:{0}'.format(opt.gpuId)}))

decoder = models.decoder(isAddVisualHull=opt.isAddVisualHull)
for param in decoder.parameters():
    param.requires_grad = False
decoder.load_state_dict(
    torch.load('{0}/decoder_{1}.pth'.format(opt.experiment, opt.nepoch - 1),
               map_location={'cuda:0': 'cuda:{0}'.format(opt.gpuId)}))

normalFeature = models.normalFeature()
for param in normalFeature.parameters():
Exemplo n.º 30
0
def build_network(input_sequences,
                  output_sequences,
                  initial_state=None,
                  initialize_to_zero=True):
    batch_size = tf.shape(input_sequences)[1]
    input_sequences_rs = tf.expand_dims(input_sequences, axis=-1)
    num_prediction_steps = sequence_length - tf.shape(input_sequences)[0]

    # encoder network
    encoder_channels = [32, 16]
    encoding_channels = encoder_channels[-1]
    with tf.variable_scope("encoder"):
        all_encoder_states, final_encoder_state = encoder(
            inputs=input_sequences_rs,
            channels=encoder_channels,
            initial_state=initial_state,
            initialize_to_zero=initialize_to_zero)
        encoder_saver = tf.train.Saver()

    # decoder network
    # uses a tf.while_loop to store the predictions in a tf.TensorArray
    # The decoder state is initialized to the final vailes of the encoder state,
    # and the final output of the encoder is used as input to the decoder
    decoder_channels = encoder_channels[::-1]
    with tf.variable_scope("decoder") as scope:
        decoder_lstm_cells = [
            tf.contrib.rnn.Conv2DLSTMCell(
                input_shape=[image_height, image_width, encoding_channels],
                kernel_shape=[3, 3],
                output_channels=num_channels)
            for num_channels in decoder_channels
        ]

        decoder_lstm = tf.contrib.rnn.MultiRNNCell(decoder_lstm_cells)

        # array to store outputs
        init_prediction_sequence = tf.TensorArray(
            tf.float32, size=tf.shape(output_sequences)[0])

        # use the last encoder state to initialize the first decoder state
        init_decoder_state = final_encoder_state[::-1]

        def condition(step, _, __, ___):
            return tf.less(step, num_prediction_steps)

        def body(step, state, input, prediction_sequence):
            decoder_state, new_state = decoder_lstm(input, state)
            new_prediction = output_layers(decoder_state)

            # ensure variables in output layers are reused
            scope.reuse_variables()

            return tf.add(
                step, 1), new_state, new_prediction, prediction_sequence.write(
                    step, new_prediction)

        init = (tf.constant(0, name="i"), init_decoder_state,
                input_sequences_rs[-1, :, :, :, :], init_prediction_sequence)

        i, final_decoder_state, _, predictions_ta = tf.while_loop(
            condition, body, init)
        predictions_flat = predictions_ta.concat()

        # predictions: [time, batch, height, width, 1]
        # contains the sequence predicted to follow the encoder input
        predictions = tf.reshape(predictions_flat,
                                 (-1, batch_size, image_height, image_width))

    return predictions, predictions_flat, final_encoder_state, encoder_saver