Exemplo n.º 1
0
    def __init__(self, hidden_size, batch_size, learning_rate):
        self.input_tensor = tf.placeholder(tf.float32, [None, 28 * 28])
        with arg_scope([layers.conv2d, layers.conv2d_transpose],
                       activation_fn=tf.nn.elu,
                       normalizer_fn=layers.batch_norm,
                       normalizer_params={'scale': True}):
            with tf.variable_scope('model') as scope:
                encoded = encoder(self.input_tensor, hidden_size * 2)

                mean = encoded[:, :hidden_size]
                stddev = tf.sqrt(tf.exp(encoded[:, hidden_size:]))

                epsilon = tf.random_normal([tf.shape(mean)[0], hidden_size])
                input_sample = mean + epsilon * stddev

                output_tensor = decoder(input_sample)
            with tf.variable_scope('model', reuse=True) as scope:
                self.sampled_tensor = decoder(
                    tf.random_normal([batch_size, hidden_size]))
        vae_loss = self.__get_vae_cost(mean, stddev)
        rec_loss = self.__get_reconstruction_cost(output_tensor,
                                                  self.input_tensor)

        loss = vae_loss + rec_loss
        self.train = layers.optimize_loss(loss,
                                          get_or_create_global_step(),
                                          learning_rate=learning_rate,
                                          optimizer='Adam',
                                          update_ops=[])

        self.sess = tf.Session()
        self.sess.run(tf.global_variables_initializer())
    def __init__(self, hidden_size, batch_size, learning_rate, generate_size):
        self.input_tensor = tf.placeholder(tf.float32, [None, 28 * 28])

        with arg_scope([layers.conv2d, layers.conv2d_transpose],
                       activation_fn=concat_elu,
                       normalizer_fn=layers.batch_norm,
                       normalizer_params={'scale': True}):
            with tf.variable_scope("model"):
                D1 = discriminator(self.input_tensor)  # positive examples
                D_params_num = len(tf.trainable_variables())
                G = decoder(tf.random_normal([batch_size, hidden_size]))
                self.sampled_tensor = G

            with tf.variable_scope("model", reuse=True):
                D2 = discriminator(G)  # generated examples

            with tf.variable_scope("model", reuse=True):
                self.sampled_tensor_gener = decoder(
                    tf.random_normal([generate_size, hidden_size]))

        D_loss = self.__get_discrinator_loss(D1, D2)
        G_loss = self.__get_generator_loss(D2)

        params = tf.trainable_variables()
        D_params = params[:D_params_num]
        G_params = params[D_params_num:]
        #    train_discrimator = optimizer.minimize(loss=D_loss, var_list=D_params)
        # train_generator = optimizer.minimize(loss=G_loss, var_list=G_params)
        global_step = tf.contrib.framework.get_or_create_global_step()
        self.train_discrimator = layers.optimize_loss(D_loss,
                                                      global_step,
                                                      learning_rate / 10,
                                                      'Adam',
                                                      variables=D_params,
                                                      update_ops=[])
        self.train_generator = layers.optimize_loss(G_loss,
                                                    global_step,
                                                    learning_rate,
                                                    'Adam',
                                                    variables=G_params,
                                                    update_ops=[])

        self.sess = tf.Session()
        self.sess.run(tf.global_variables_initializer())
Exemplo n.º 3
0
    def __init__(self, hidden_size, batch_size, learning_rate, size):
        img_size = size[0] * size[1] * size[2]
        self.input_tensor = tf.placeholder(tf.float32, [batch_size, img_size])
        with arg_scope([layers.conv2d, layers.conv2d_transpose],
                       activation_fn=tf.nn.elu,
                       normalizer_fn=layers.batch_norm,
                       normalizer_params={'scale': True}):
            with tf.variable_scope("model") as scope:
                encoded = encoder(self.input_tensor, hidden_size * 2, size)

                mean = encoded[:, :hidden_size]
                stddev = tf.sqrt(tf.exp(encoded[:, hidden_size:]))

                epsilon = tf.random_normal([tf.shape(mean)[0], hidden_size])
                input_sample = mean + epsilon * stddev

                output_tensor = decoder(input_sample, img_size)

            with tf.variable_scope("model", reuse=True) as scope:
                self.sampled_tensor = decoder(
                    tf.random_normal([batch_size, hidden_size]), img_size)
                self.recons_tensor = output_tensor

        vae_loss = self.__get_vae_cost(mean, stddev)
        rec_loss = self.__get_reconstruction_cost(
            output_tensor,
            self.input_tensor)  # output_tensor: y  input_tensor: x

        loss = vae_loss + rec_loss
        # loss = vae_loss + rec_loss
        self.train = layers.optimize_loss(
            loss,
            tf.contrib.framework.get_or_create_global_step(),
            learning_rate=learning_rate,
            optimizer='Adam',
            update_ops=[])
        # opt = tf.train.AdamOptimizer(2e-4, beta1=0.5)
        # self.train = opt.minimize(loss)

        self.sess = tf.Session()
        self.sess.run(tf.global_variables_initializer())
Exemplo n.º 4
0
    def __init__(self, hidden_size, batch_size, learning_rate):
        self.input_tensor = tf.placeholder(tf.float32, [None, 28 * 28])
        self.is_training = tf.placeholder_with_default(True, [])

        with arg_scope([layers.conv2d, layers.conv2d_transpose],
                       activation_fn=concat_elu,
                       normalizer_fn=layers.batch_norm,
                       normalizer_params={
                           'scale': True,
                           'is_training': self.is_training
                       }):
            with tf.variable_scope("model"):
                D1 = discriminator(self.input_tensor)  # positive examples
                D_params_num = len(tf.trainable_variables())
                G = decoder(tf.random_normal([batch_size, hidden_size]))
                self.sampled_tensor = G

            with tf.variable_scope("model", reuse=True):
                D2 = discriminator(G)  # generated examples

        D_loss = self.__get_discrinator_loss(D1, D2)
        G_loss = self.__get_generator_loss(D2)

        params = tf.trainable_variables()
        D_params = params[:D_params_num]
        G_params = params[D_params_num:]
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        g_update_ops = [
            op for op in update_ops if op.name.startswith('model_1/')
        ]
        d_update_ops = [op for op in update_ops if op not in g_update_ops]
        #    train_discrimator = optimizer.minimize(loss=D_loss, var_list=D_params)
        # train_generator = optimizer.minimize(loss=G_loss, var_list=G_params)
        global_step = tf.contrib.framework.get_or_create_global_step()
        with tf.control_dependencies(d_update_ops):
            self.train_discrimator = layers.optimize_loss(D_loss,
                                                          global_step,
                                                          learning_rate / 10,
                                                          'Adam',
                                                          variables=D_params,
                                                          update_ops=[])
        with tf.control_dependencies(g_update_ops):
            self.train_generator = layers.optimize_loss(G_loss,
                                                        global_step,
                                                        learning_rate,
                                                        'Adam',
                                                        variables=G_params,
                                                        update_ops=[])

        self.sess = tf.Session()
        self.sess.run(tf.global_variables_initializer())
Exemplo n.º 5
0
    def __init__(self, model, channel_num, batch_size, seq_len, learning_rate,
                 ws, wg, wt, phase, sum_dir):
        if phase == 'train' or phase == 'test':
            self.inputNoiseList = [tf.placeholder(tf.float32, [batch_size, 128, 128, channel_num])\
                for _ in range(seq_len)]
            self.inputCleanList = [tf.placeholder(tf.float32, [batch_size, 128, 128, 3])\
                for _ in range(seq_len)]
        else:
            self.inputNoiseList = [tf.placeholder(tf.float32, [batch_size, 416, 800, channel_num])\
                for _ in range(seq_len)]
            self.inputCleanList = [tf.placeholder(tf.float32, [batch_size, 416, 800, 3])\
                for _ in range(seq_len)]

        with arg_scope(
            [layers.conv2d],
                activation_fn=tf.nn.leaky_relu,
                #normalizer_fn=layers.batch_norm,
                normalizer_params={'scale': True},
                padding='SAME'):
            with tf.variable_scope("model") as scope:  #Full VAEGAN structure
                if phase == 'train' or phase == 'test':
                    inpH, inpW = 128, 128
                else:
                    inpH, inpW = 416, 800
                if model == 'RAE':
                    with tf.name_scope("initalize_RNN_cell"):
                        cell1 = rnn.ConvLSTMCell(2, [inpH, inpW, 32],
                                                 32, [3, 3],
                                                 name='rnn1')
                        cell2 = rnn.ConvLSTMCell(2, [inpH / 2, inpW / 2, 43],
                                                 43, [3, 3],
                                                 name='rnn2')
                        cell3 = rnn.ConvLSTMCell(2, [inpH / 4, inpW / 4, 57],
                                                 57, [3, 3],
                                                 name='rnn3')
                        cell4 = rnn.ConvLSTMCell(2, [inpH / 8, inpW / 8, 76],
                                                 76, [3, 3],
                                                 name='rnn4')
                        cell5 = rnn.ConvLSTMCell(2,
                                                 [inpH / 16, inpW / 16, 101],
                                                 101, [3, 3],
                                                 name='rnn5')
                        cell6 = rnn.ConvLSTMCell(2,
                                                 [inpH / 32, inpW / 32, 101],
                                                 101, [3, 3],
                                                 name='rnn6')

                    # Encoder
                    l1, l2, l3, l4, l5, out = encoderRNN(self.inputNoiseList, batch_size, cell1, cell2, cell3, \
                        cell4, cell5, cell6, (inpH, inpW), reuse_vars=False)
                elif model == "AE":
                    l1, l2, l3, l4, l5, out = encoder(self.inputNoiseList,
                                                      batch_size,
                                                      reuse_vars=False)
                Enc_params_num = len(tf.trainable_variables())

                # Decoder / Generator
                self.denoised_imgList = decoder(l1,
                                                l2,
                                                l3,
                                                l4,
                                                l5,
                                                out, (inpH, inpW),
                                                reuse_vars=False)
                Enc_n_Dec_params_num = len(tf.trainable_variables())

        self.params = tf.trainable_variables()
        self.Enc_params = self.params[:Enc_params_num]
        self.Dec_params = self.params[Enc_params_num:Enc_n_Dec_params_num]
        print(len(self.params))
        for var in self.params:
            print(var.name)

        self.Spatial_loss = self.__get_L1_loss(self.denoised_imgList,
                                               self.inputCleanList)
        Spatial_loss_sum = tf.summary.scalar('Spatial_loss', self.Spatial_loss)
        self.Gradient_loss = self.__get_grad_L1_loss(self.denoised_imgList,
                                                     self.inputCleanList)
        Gradient_loss_sum = tf.summary.scalar('Gradient_loss',
                                              self.Gradient_loss)
        if model == 'RAE':
            self.Temporal_loss = self.__get_tem_L1_loss(
                self.denoised_imgList, self.inputCleanList)
            Temporal_loss_sum = tf.summary.scalar('Temporal_loss',
                                                  self.Temporal_loss)
            # merge  summary for Tensorboard
            self.detached_loss_summary_merged = tf.summary.merge(
                [Spatial_loss_sum, Gradient_loss_sum, Temporal_loss_sum])
            # loss function
            total_loss = ws * self.Spatial_loss + wg * self.Gradient_loss + wt * self.Temporal_loss

        elif model == 'AE':
            self.detached_loss_summary_merged = tf.summary.merge(
                [Spatial_loss_sum, Gradient_loss_sum])
            # loss function
            total_loss = ws * self.Spatial_loss + wg * self.Gradient_loss

        # self.train     = layers.optimize_loss(total_loss, tf.train.get_or_create_global_step(\
        #     ), learning_rate=learning_rate, variables = self.params, optimizer='RMSProp', update_ops=[])

        self.train = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                            beta1=0.9,
                                            beta2=0.99,
                                            epsilon=1e-08,
                                            name='Adam').minimize(
                                                total_loss,
                                                var_list=self.params)

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        self.sess = tf.Session(config=config)
        self.sess.run(tf.global_variables_initializer())
        #.replace('\\','/')
        self.train_writer = tf.summary.FileWriter(sum_dir, self.sess.graph)
Exemplo n.º 6
0
        pattern = re.compile("\d+_(.+)\.png")

        while True:
            file = choice(files)

            mat = cv.imread(file.path, cv.IMREAD_GRAYSCALE)
            image = np.array(Image.open(file.path), dtype=np.uint8).reshape(
                (128, 64, 1)) / 255

            label = pattern.search(file.name).group(1)
            predictions = model.predict(np.array([image]))

            cv.imshow('Input', mat)
            print('Ground truth vs prediction')
            print(label)
            print(decoder(predictions[0]))

            if cv.waitKey() == 27:
                break
    else:
        if os.path.isfile(arguments.image):
            image_pil = ImageOps.grayscale(
                Image.open(arguments.image).resize((128, 64)))
            image = np.array(image_pil, dtype=np.uint8).reshape(
                (128, 64, 1)) / 255

            predictions = model.predict(np.array([image]))

            print(decoder(predictions[0]))
        else:
            print(f'Input image was not found at {arguments.image}')
Exemplo n.º 7
0
input_tensor = tf.placeholder(tf.float32, [None, 28 * 28])
xs2 = tf.placeholder(tf.float32, [None, 28 * 28])
dis = tf.placeholder(tf.float32, [1, None])
flag = tf.placeholder(tf.float32, [1, None])

with tf.variable_scope("model") as scope:
    encoded = encoder(input_tensor, hidden_size * 2)

    mean = encoded[:, :hidden_size]
    stddev = tf.sqrt(tf.square(encoded[:, hidden_size:]))

    epsilon = tf.random_normal([tf.shape(mean)[0], hidden_size])
    input_sample = mean + epsilon * stddev

    output_tensor = decoder(input_sample)

with tf.variable_scope("model") as scope:
    encoded1 = encoder(xs2, hidden_size * 2)

    mean1 = encoded1[:, :hidden_size]
    stddev1 = tf.sqrt(tf.square(encoded1[:, hidden_size:]))

    epsilon1 = tf.random_normal([tf.shape(mean1)[0], hidden_size])
    input_sample1 = mean1 + epsilon1 * stddev1

    output_tensor1 = decoder(input_sample1)

with tf.variable_scope("model", reuse=True) as scope:
    encoded = encoder(input_tensor, hidden_size * 2)
Exemplo n.º 8
0
        transforms.Lambda(cv_resize),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ])
    device = 'cpu'
    train_dataset = yoloDataset(list_file='2007_val.txt',
                                train=False,
                                transform=transform,
                                device=device,
                                little_train=True,
                                S=14)
    train_loader = DataLoader(train_dataset,
                              batch_size=1,
                              shuffle=False,
                              num_workers=0)
    train_iter = iter(train_loader)
    for i in range(1):
        img, target = next(train_iter)
    print(img.shape, target.shape)
    boxes, clss, confs = decoder(target)
    print(boxes, clss, confs)

    mean = torch.tensor([0.485, 0.456, 0.406], dtype=torch.float32)
    std = torch.tensor([0.229, 0.224, 0.225], dtype=torch.float32)
    un_normal_trans = transforms.Normalize((-mean / std).tolist(),
                                           (1.0 / std).tolist())
    img = un_normal_trans(img.squeeze(0))
    draw_debug_rect(img.permute(1, 2, 0), boxes)
    for i in range(14):
        for j in range(14):
            print(target[:, i:i + 1, j:j + 1, :])
Exemplo n.º 9
0
    def __init__(self,
                 hidden_size,
                 batch_size,
                 learning_rate,
                 alpha,
                 beta,
                 gamma,
                 sum_dir,
                 attri_num,
                 add_gan=1,
                 GAN_model="V",
                 similarity_layer=4):

        print("\nInitializing model with following parameters:")
        print("batch_size:", batch_size, " learning_rate:", learning_rate,
              " alpha:", alpha, " beta:", beta, " gamma:", gamma)
        print("GAN_model:", GAN_model, " similarity_layer:", similarity_layer,
              "\n")

        self.input_tensor = tf.placeholder(tf.float32, [batch_size, 64, 64, 3])
        #self.input_label  = tf.placeholder(tf.int, [batch_size, attri_num])
        self.visual_attri = tf.placeholder(tf.float32, [hidden_size])

        with arg_scope([layers.conv2d, layers.conv2d_transpose],
                       activation_fn=tf.nn.relu,
                       normalizer_fn=layers.batch_norm,
                       normalizer_params={'scale': True},
                       padding='SAME'):
            with tf.variable_scope("model") as scope:  #Full VAEGAN structure
                # Encoder
                ENC = encoder(self.input_tensor, hidden_size * 2)
                Enc_params_num = len(tf.trainable_variables())

                # Add noise
                self.mean, stddev = tf.split(1, 2, ENC)
                stddev = tf.sqrt(tf.exp(stddev))
                epsilon = tf.random_normal(
                    [tf.shape(self.mean)[0], hidden_size])
                ENC_w_noise = self.mean + epsilon * stddev

                # Decoder / Generator
                self.DEC_of_ENC = decoder(ENC_w_noise)
                Enc_n_Dec_params_num = len(tf.trainable_variables())

                # Discriminator
                if add_gan == 1:
                    DIS_of_DEC_of_ENC = discriminator(self.DEC_of_ENC,
                                                      GAN_model)
                    Gen_dis_sum = tf.scalar_summary(
                        'Gen_dis_mean', tf.reduce_mean(DIS_of_DEC_of_ENC))

            with tf.variable_scope(
                    "model", reuse=True) as scope:  #Computation for Recon_Loss
                if add_gan == 1:
                    Real_Similarity = discriminator(self.input_tensor,
                                                    GAN_model,
                                                    extract=similarity_layer)
                    Gen_Similarity = discriminator(
                        self.DEC_of_ENC, GAN_model, extract=similarity_layer
                    )  #+ tf.random_normal([batch_size, 8, 8, 256])

            with tf.variable_scope(
                    "model", reuse=True) as scope:  #Computation for GAN_Loss
                if add_gan == 1:
                    Real_in_Dis = discriminator(self.input_tensor, GAN_model)
                    Real_dis_sum = tf.scalar_summary(
                        'Real_dis_mean', tf.reduce_mean(Real_in_Dis))
                    Prior_in_Dis = discriminator(
                        decoder(tf.random_normal([batch_size, hidden_size])),
                        GAN_model)
                    Prior_dis_sum = tf.scalar_summary(
                        'Prior_dis_mean', tf.reduce_mean(Prior_in_Dis))

            with tf.variable_scope(
                    "model", reuse=True) as scope:  #Sample from latent space
                self.sampled_tensor = decoder(
                    tf.random_normal([batch_size, hidden_size]))

            with tf.variable_scope(
                    "model", reuse=True) as scope:  #Add visual attributes
                #expand_mean = tf.expand_dims(self.mean, -1)
                print("shape of mean:", np.shape(self.mean),
                      " shape of visual attri:", np.shape(self.visual_attri))
                add_attri = self.mean + np.ones(
                    [batch_size, 1]
                ) * self.visual_attri  #[batch size, hidden size] (broadcasting)
                print("shape of add attri:", tf.shape(add_attri))
                self.with_attri_tensor = decoder(add_attri)

        self.params = tf.trainable_variables()
        self.Enc_params = self.params[:Enc_params_num]
        '''
        print ('Encoder Param:')
        for var in Enc_params:
            print (var.name)
        '''
        self.Dec_params = self.params[Enc_params_num:Enc_n_Dec_params_num]
        '''
        print ('Decoder Param:')
        for var in Dec_params:
            print (var.name)
        '''
        if add_gan == 1:
            self.Dis_params = self.params[Enc_n_Dec_params_num:]
        '''
        print ('Discriminator Param:')
        for var in Dis_params:
            print (var.name)
        '''
        self.Prior_loss = self.__get_prior_loss(self.mean, stddev)
        Prior_loss_sum = tf.scalar_summary('Prior_loss', self.Prior_loss)
        if add_gan == 1:
            self.Recon_loss = self.__get_reconstruction_loss(
                Gen_Similarity, Real_Similarity)
            Recon_loss_sum = tf.scalar_summary('Recon_loss', self.Recon_loss)
            self.GAN_loss = self.__get_GAN_loss(Real_in_Dis, Prior_in_Dis,
                                                DIS_of_DEC_of_ENC, GAN_model)
            GAN_loss_sum = tf.scalar_summary('GAN_loss', self.GAN_loss)
        else:
            self.Recon_loss = self.__get_reconstruction_loss(
                self.DEC_of_ENC, self.input_tensor)
            Recon_loss_sum = tf.scalar_summary('Recon_loss', self.Recon_loss)

        # merge  summary for Tensorboard
        if add_gan == 1:
            self.detached_loss_summary_merged = tf.merge_summary([
                Prior_loss_sum, Recon_loss_sum, GAN_loss_sum, Real_dis_sum,
                Prior_dis_sum, Gen_dis_sum
            ])
            #self.dis_mean_value_summary_merged         =  tf.merge_summary([Real_dis_sum,Prior_dis_sum,Gen_dis_sum])
        else:
            self.detached_loss_summary_merged = tf.merge_summary(
                [Prior_loss_sum, Recon_loss_sum])

        if add_gan == 1:
            enc_loss = self.Prior_loss + beta * self.Recon_loss
            dec_loss = gamma * self.Recon_loss + self.GAN_loss
            dis_loss = (-1) * self.GAN_loss
        else:
            total_loss = self.Prior_loss + beta * self.Recon_loss

        #self.combined_loss_summary_merged          =  tf.merge_summary([self.prior_loss_sum,self.recon_loss_sum,self.GAN_loss_sum])
        if add_gan == 1:
            self.train_enc = layers.optimize_loss(enc_loss, tf.contrib.framework.get_or_create_global_step(\
                ), learning_rate=learning_rate, variables = self.Enc_params, optimizer='RMSProp', update_ops=[])

            self.train_dec = layers.optimize_loss(dec_loss, tf.contrib.framework.get_or_create_global_step(\
                ), learning_rate=learning_rate, variables = self.Dec_params, optimizer='RMSProp', update_ops=[])

            self.train_dis = layers.optimize_loss(dis_loss, tf.contrib.framework.get_or_create_global_step(\
                ), learning_rate=learning_rate * alpha, variables = self.Dis_params, optimizer='RMSProp', update_ops=[])
        else:
            self.train     = layers.optimize_loss(total_loss, tf.contrib.framework.get_or_create_global_step(\
                ), learning_rate=learning_rate, variables = self.params, optimizer='RMSProp', update_ops=[])

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        self.sess = tf.Session(config=config)
        self.sess.run(tf.initialize_all_variables())

        self.train_writer = tf.train.SummaryWriter(sum_dir + '/train',
                                                   self.sess.graph)
Exemplo n.º 10
0
    transform = transforms.Compose([
        transforms.Lambda(cv_resize),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ])
    S = 7
    train_dataset = yoloDataset(list_file='datasets/2012_seg.txt',train=False,transform = transform, test_mode=True, with_mask=True, S=S, device='cuda:0')
    train_loader = DataLoader(train_dataset,batch_size=1,shuffle=False,num_workers=0)
    train_iter = iter(train_loader)
    # print(next(train_iter))
    for i in range(200):
        img, target, mask_label = next(train_iter)
        # mask_img = mask_img.squeeze(0).cpu().numpy()
        # print('mask shape is :', mask_img.shape)
        # print(img.shape, target.shape)
        boxes, clss, confs = decoder(target, grid_num=S, gt=True)
        # print(boxes, clss, confs)
        print('~'*50 + '\n\n\n')

        mean = torch.tensor([0.485, 0.456, 0.406], dtype=torch.float32)
        std = torch.tensor([0.229, 0.224, 0.225], dtype=torch.float32)
        un_normal_trans = transforms.Normalize((-mean / std).tolist(), (1.0 / std).tolist())
        img = un_normal_trans(img.squeeze(0))
        img = draw_debug_rect(img.permute(1, 2 ,0), boxes, clss, confs)
        img = draw_classify_confidence_map(img, target, S, Color)
        cv2.imshow('img', img)
        # print(mask_label[0, 10:100, 10:100])
        
        mask_img = mask_label_2_img(mask_label)
        print(mask_label.shape, mask_label.dtype)
        pred_mask = pred_mask_label_2_img(mask_label[0])
Exemplo n.º 11
0
    def __init__(self, hidden_size, batch_size, learning_rate):
        self.input_tensor = tf.placeholder(tf.float32, [None, 28 * 28])
        self.xs2 = tf.placeholder(tf.float32, [None, 28 * 28])
        self.dis = tf.placeholder(tf.float32, [1, None])
        self.flag = tf.placeholder(tf.float32, [1, None])

        with arg_scope([layers.conv2d, layers.conv2d_transpose],
                       activation_fn=concat_elu,
                       normalizer_fn=layers.batch_norm,
                       normalizer_params={'scale': True}):
            with tf.variable_scope("model"):
                D1 = discriminator(self.input_tensor)  # positive examples
                D_params_num = len(tf.trainable_variables())
                encoded = encoder(self.input_tensor, hidden_size * 2)

                mean = encoded[:, :hidden_size]
                stddev = tf.sqrt(tf.square(encoded[:, hidden_size:]))

                epsilon = tf.random_normal([tf.shape(mean)[0], hidden_size])
                input_sample = mean + epsilon * stddev
                # G = decoder(tf.random_normal([batch_size, hidden_size]))
                G_params_num = len(tf.trainable_variables())
                G = decoder(input_sample)
                self.sampled_tensor = G

            with tf.variable_scope("model", reuse=True):
                D2 = discriminator(G)  # generated examples
                encoded1 = encoder(self.xs2, hidden_size * 2)

                mean1 = encoded1[:, :hidden_size]
                stddev1 = tf.sqrt(tf.square(encoded1[:, hidden_size:]))

                epsilon1 = tf.random_normal([tf.shape(mean1)[0], hidden_size])
                input_sample1 = mean1 + epsilon1 * stddev1

                output_tensor1 = decoder(input_sample1)

        D_loss = self.__get_discrinator_loss(D1, D2)
        G_loss = self.__get_generator_loss(D2, mean, stddev, mean1)

        params = tf.trainable_variables()
        D_params = params[:D_params_num]
        G_params = params[G_params_num:]
        #    train_discrimator = optimizer.minimize(loss=D_loss, var_list=D_params)
        # train_generator = optimizer.minimize(loss=G_loss, var_list=G_params)
        global_step = tf.contrib.framework.get_or_create_global_step()
        self.train_discrimator = layers.optimize_loss(D_loss,
                                                      global_step,
                                                      learning_rate / 10,
                                                      'Adam',
                                                      variables=D_params,
                                                      update_ops=[])
        self.train_generator = layers.optimize_loss(G_loss,
                                                    global_step,
                                                    learning_rate,
                                                    'Adam',
                                                    variables=G_params,
                                                    update_ops=[])

        self.sess = tf.Session()
        self.sess.run(tf.global_variables_initializer())
Exemplo n.º 12
0
    def __init__(self, input_size, hidden_size, batch_size, learning_rate,
                 log_dir):
        self.input_tensor = tf.placeholder(tf.float32, [None, 3 * input_size])
        self.s_t_p_placeholder = tf.placeholder(tf.float32,
                                                [None, hidden_size])
        '''
        ##################################
        with open('params.txt') as f:
            first = f.readline()
            first = first.strip('\n')
            temp = first.split(' ')
            o_p_dim = int(temp[3]);
            s_p_dim = int(temp[4]);
            ln = f.readline()
            for i in range(s_p_dim):
                temp = f.readline()
            self.sig_2_init = np.zeros((s_p_dim, s_p_dim), np.float32)
            for i in range(s_p_dim):
                temp = f.readline().strip('\n').split(' ')
                for j in range(s_p_dim):
                    self.sig_2_init[i,j] = float(temp[j])
            
            eig_val , eig_vec = np.linalg.eig(self.sig_2_init)
            cf = np.sqrt(np.repeat(eig_val,s_p_dim).reshape(s_p_dim,s_p_dim).transpose())
            self.r_2_init = np.multiply(cf,eig_vec)
            
            self.sig_3_init = np.zeros((o_p_dim, o_p_dim), np.float32)
            for i in range(o_p_dim):
                temp = f.readline().strip('\n').split(' ')
                for j in range(o_p_dim):
                    self.sig_3_init[i,j] = float(temp[j])
            
            eig_val , eig_vec = np.linalg.eig(self.sig_3_init)
            cf = np.sqrt(np.repeat(eig_val,o_p_dim).reshape(o_p_dim,o_p_dim).transpose())
            self.r_3_init = np.multiply(cf,eig_vec)
            
            self.a_2_init = np.zeros((s_p_dim, s_p_dim), np.float32)
            for i in range(s_p_dim):
                temp = f.readline().strip('\n').split(' ')
                for j in range(s_p_dim):
                    self.a_2_init[i,j] = float(temp[j])
            
            self.a_3_init = np.zeros((s_p_dim, o_p_dim), np.float32)
            for i in range(s_p_dim):
                temp = f.readline().strip('\n').split(' ')
                for j in range(o_p_dim):
                    self.a_3_init[i,j] = float(temp[j])     
        ###################################
        '''

        with arg_scope([layers.fully_connected], activation_fn=tf.nn.relu):
            with tf.variable_scope("encoder"):
                with tf.variable_scope("encoder_s_t"):
                    self.s_t_minus_1_p = encoder(self.input_tensor[:, :input_size],\
                        hidden_size)
                with tf.variable_scope("encoder_s_t", reuse=True):
                    self.s_t_p = encoder(self.input_tensor[:, input_size:2 * input_size],\
                        hidden_size)
                with tf.variable_scope("encoder_o_t"):
                    self.o_t_p = encoder(self.input_tensor[:, 2 * input_size:],\
                        hidden_size)

            with tf.variable_scope("decoder"):
                with tf.variable_scope("decoder_s_t"):
                    self.output_s_t_minus_1 = decoder(self.s_t_minus_1_p,
                                                      input_size)
                with tf.variable_scope("decoder_s_t", reuse=True):
                    self.output_s_t = decoder(self.s_t_p, input_size)
                with tf.variable_scope("decoder_s_t", reuse=True):
                    self.s_t_decoded = decoder(self.s_t_p_placeholder,
                                               input_size)
                with tf.variable_scope("decoder_o_t"):
                    self.output_o_t = decoder(self.o_t_p, input_size)
            self.output_tensor = tf.concat(
                [self.output_s_t_minus_1, self.output_s_t, self.output_o_t],
                axis=1)

            #self.a_2, self.b_2, self.sigma_2, self.a_3, self.b_3, self.sigma_3 = self._MLE_Gaussian_params()
            self.a_2, self.b_2, self.sigma_2, self.a_3, self.b_3, self.sigma_3 = self._simple_Gaussian_params(
            )
            #self.a_2, self.b_2, self.sigma_2, self.a_3, self.b_3, self.sigma_3 = self._simple_Gaussian_plus_offset_params()
            self.r_2 = tf.cholesky(self.sigma_2)
            self.r_3 = tf.cholesky(self.sigma_3)

            #define reconstruction loss
            reconstruction_loss = tf.reduce_mean(tf.norm(self.output_tensor - \
                self.input_tensor, axis=1))

            # define classification loss
            y_1 = self.s_t_p - tf.matmul(self.s_t_minus_1_p, self.a_2)
            mvn_1 = tf.contrib.distributions.MultivariateNormalFull(
                self.b_2, self.sigma_2)
            #mvn_1 = tf.contrib.distributions.MultivariateNormalTrill(self.b_2, scale_tril=self.r_2)
            pos_samples_1 = mvn_1.sample(batch_size)

            y_2 = self.o_t_p - tf.matmul(self.s_t_p, self.a_3)
            #mvn_2 = tf.contrib.distributions.MultivariateNormalTriL(self.b_3, scale_tril=self.r_3)
            mvn_2 = tf.contrib.distributions.MultivariateNormalFull(
                self.b_3, self.sigma_3)
            pos_samples_2 = mvn_2.sample(batch_size)

            with tf.variable_scope('discriminator'):
                with tf.variable_scope('d1'):
                    pos_samples_1_pred = discriminator(pos_samples_1)
                with tf.variable_scope('d1', reuse=True):
                    neg_samples_1_pred = discriminator(y_1)
                with tf.variable_scope('d2'):
                    pos_samples_2_pred = discriminator(pos_samples_2)
                with tf.variable_scope('d2', reuse=True):
                    neg_samples_2_pred = discriminator(y_2)
            classification_loss_1 = compute_classification_loss(
                pos_samples_1_pred, neg_samples_1_pred)
            classification_loss_2 = compute_classification_loss(
                pos_samples_2_pred, neg_samples_2_pred)
            classification_loss = classification_loss_1 + classification_loss_2

            # define s_t likelihood
            s_diff = self.s_t_p - tf.matmul(self.s_t_minus_1_p, self.a_2)
            s_t_likelihood = tf.reduce_sum(mvn_1.log_prob(s_diff))

            # define o_t likelihood
            o_diff = self.o_t_p - tf.matmul(self.s_t_p, self.a_3)
            o_t_likelihood = tf.reduce_sum(mvn_2.log_prob(o_diff))

            self.likelihood = s_t_likelihood + o_t_likelihood

            # add summary ops
            tf.summary.scalar('likelihood', self.likelihood)
            tf.summary.scalar('s_t_likelihood', s_t_likelihood)
            tf.summary.scalar('o_t_likelihood', o_t_likelihood)
            tf.summary.scalar('classification_loss', classification_loss)
            tf.summary.scalar('classification_loss_1', classification_loss_1)
            tf.summary.scalar('classification_loss_2', classification_loss_2)
            tf.summary.scalar('reconstruction_loss', reconstruction_loss)

            # define references to params
            encoder_params = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, scope='encoder')
            decoder_params = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, scope='decoder')
            autoencoder_params = encoder_params + decoder_params
            gaussian_params = [self.a_2, self.a_3, self.r_2, self.r_3]
            discriminator_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, \
                scope='discriminator')

            global_step = tf.contrib.framework.get_or_create_global_step()
            # define training steps
            self.learn_rate = self._get_learn_rate(global_step, learning_rate)

            # update autoencoder params to minimise reconstruction loss
            self.train_autoencoder = layers.optimize_loss(reconstruction_loss, \
                    global_step, self.learn_rate * 0.1, optimizer=lambda lr: \
                    tf.train.AdamOptimizer(lr), variables=\
                    #tf.train.MomentumOptimizer(lr, 0.9), variables=\
                    autoencoder_params, update_ops=[])

            # update discriminator
            self.train_discriminator = layers.optimize_loss(classification_loss, \
                    global_step, self.learn_rate * 10, optimizer=lambda lr: \
                    tf.train.MomentumOptimizer(lr, 0.1), variables=\
                    #tf.train.AdamOptimizer(lr), variables=\
                    discriminator_params, update_ops=[])

            # update encoder params to fool the discriminator
            self.train_encoder = layers.optimize_loss(-classification_loss, \
                    global_step, self.learn_rate , optimizer=lambda lr: \
                    #tf.train.MomentumOptimizer(lr, 0.9), variables=\
                    tf.train.AdamOptimizer(lr), variables=\
                    encoder_params, update_ops=[])

            self.sess = tf.Session()
            self.merged = tf.summary.merge_all()
            self.train_writer = tf.summary.FileWriter(log_dir, \
                self.sess.graph)
            self.sess.run(tf.global_variables_initializer())
    def __init__(self, hidden_size, batch_size, learning_rate, log_dir):
        self.input_tensor = tf.placeholder(tf.float32, [None, 28 * 28])
        # add gaussian noise to the input
        input_with_noise = gaussian_noise_layer(self.input_tensor, 0.3)

        with arg_scope([layers.fully_connected], activation_fn=tf.nn.relu):
            with tf.variable_scope("encoder"):
                self.latent_representation = encoder(input_with_noise,
                        hidden_size)
                encoder_params_num = len(tf.trainable_variables())
            with tf.variable_scope('encoder', reuse=True):
                self.true_latent_representation = encoder(self.input_tensor,
                        hidden_size)
            with tf.variable_scope('decoder'):
                self.recons = decoder(self.latent_representation)
                autoencoder_params_num = len(tf.trainable_variables())
            with tf.variable_scope('decoder', reuse=True):
                self.sampled_imgs = decoder(tf.random_normal([batch_size,
                        hidden_size]))

            pos_samples = tf.random_normal([batch_size, hidden_size],
                stddev=5.)
            neg_samples = self.latent_representation
            with tf.variable_scope('discriminator'):
                pos_samples_pred = discriminator(pos_samples)
            with tf.variable_scope('discriminator', reuse=True):
                neg_samples_pred = discriminator(neg_samples)
            #define losses
            reconstruction_loss = tf.reduce_mean(tf.square(self.recons -
                    self.input_tensor)) #* 28 * 28 scale recons loss
            classification_loss = tf.losses.sigmoid_cross_entropy(\
                    tf.ones(tf.shape(pos_samples_pred)), pos_samples_pred) +\
                    tf.losses.sigmoid_cross_entropy(tf.zeros(
                    tf.shape(neg_samples_pred)), neg_samples_pred)
            tf.summary.scalar('reconstruction_loss', reconstruction_loss)
            tf.summary.scalar('classification_loss', classification_loss)
            # define references to params
            params = tf.trainable_variables()
            encoder_params = params[:encoder_params_num]
            decoder_params = params[encoder_params_num:autoencoder_params_num]
            autoencoder_params = encoder_params + decoder_params
            discriminator_params = params[autoencoder_params_num:]
            # record true positive rate and true negative rate
            correct_pred_pos = tf.equal(tf.cast(pos_samples_pred>0, tf.float32),
                tf.ones(tf.shape(pos_samples_pred)))
            self.true_pos_rate = tf.reduce_mean(tf.cast(correct_pred_pos,
                tf.float32))
            correct_pred_neg = tf.equal(tf.cast(neg_samples_pred<0, tf.float32),
                tf.ones(tf.shape(pos_samples_pred)))
            self.true_neg_rate = tf.reduce_mean(tf.cast(correct_pred_neg,
                tf.float32))
            tf.summary.scalar('true_pos_rate', self.true_pos_rate)
            tf.summary.scalar('true_neg_rate', self.true_neg_rate)
            global_step = tf.contrib.framework.get_or_create_global_step()
            self.learn_rate = self._get_learn_rate(global_step, learning_rate)
            self.train_autoencoder = layers.optimize_loss(reconstruction_loss,
                    global_step, self.learn_rate/10, optimizer=lambda lr: \
                    tf.train.MomentumOptimizer(lr, momentum=0.9), variables=
                    autoencoder_params, update_ops=[])
            self.train_discriminator = layers.optimize_loss(classification_loss,
                    global_step, self.learn_rate, optimizer=lambda lr: \
                    tf.train.MomentumOptimizer(lr, momentum=0.1), variables=
                    discriminator_params, update_ops=[])
            self.train_encoder = layers.optimize_loss(-classification_loss,
                    global_step, self.learn_rate/10, optimizer=lambda lr: \
                    tf.train.MomentumOptimizer(lr, momentum=0.1), variables=
                    encoder_params, update_ops=[])
            self.sess = tf.Session()
            self.merged = tf.summary.merge_all()
            self.train_writer = tf.summary.FileWriter(log_dir,
                                      self.sess.graph)
            self.sess.run(tf.global_variables_initializer())
Exemplo n.º 14
0
    def predict(self):
        if not 'image' in self.__dict__:
            return
        cropped, bboxes, valid, valid_bboxes, groups, labels = [], [], [], [], [], []
        image = np.array(self.image.convert('L'), np.uint8)
        h, w = image.shape
        thickness = int(w * 0.005)
        if w / h < RATIO:
            width = w
            height = width / RATIO
        else:
            height = h
            width = height * RATIO
        width, height = int(width), int(height)

        zoom = 1
        m_zoom = 2**(self.zoom_slider.get() / 2)
        while zoom <= m_zoom:
            scaled_w, scaled_h = int(w * zoom), int(h * zoom)

            overflow_x, overflow_y = abs(width - scaled_w), abs(height -
                                                                scaled_h)
            coeff = w / scaled_w

            scaled = cv.resize(image, (scaled_w, scaled_h))

            step = int(SLIDE_STEP * scaled_w)
            for i in range(0, overflow_x + step, step):
                for j in range(0, overflow_y + step, step):
                    bboxes.append(((int(i * coeff), int(j * coeff)),
                                   (int(i * coeff + width * coeff),
                                    int(j * coeff + height * coeff))))
                    cropped.append(
                        cv.resize(scaled[j:j + height, i:i + width],
                                  FINAL_SHAPE[:-1]).reshape(FINAL_SHAPE) / 255)

            zoom *= ZOOM_MULT

        predictions = self.model.predict(np.array(cropped))

        img = np.array(self.image)
        for i, prediction in enumerate(predictions):
            code = decoder(prediction)
            if code[:1] == '1':
                valid.append(code[3:])
                valid_bboxes.append(bboxes[i])

        for i, bbox0 in enumerate(valid_bboxes):
            for j, bbox1 in enumerate(valid_bboxes[i + 1:]):
                are_overlapping = max(bbox0[0][0], bbox1[0][0]) < min(
                    bbox0[1][0], bbox1[1][0]) and max(
                        bbox0[0][1], bbox1[0][1]) < min(
                            bbox0[1][1], bbox1[1][1])
                if are_overlapping:
                    appended = False
                    for group in groups:
                        if i in group or j + i + 1 in group:
                            if not i in group:
                                group.append(i)
                            if not j + i + 1 in group:
                                group.append(j + i + 1)
                            appended = True
                    if not appended:
                        groups.append([i, j + i + 1])

        for i, bbox in enumerate(valid_bboxes):
            is_in_group = False
            for group in groups:
                if i in group:
                    is_in_group = True
                    break
            if not is_in_group:
                groups.append([i])

        for group in groups:
            top, bottom, left, right, length = 0, 0, 0, 0, len(group)
            if length == 1:
                print('Unsure about group with a weak match: ' +
                      valid[group[0]])
                continue

            letters, max_probs = [[], [], [], [], [], [], [], []], []
            for index in group:
                left += valid_bboxes[index][0][0]
                top += valid_bboxes[index][0][1]
                right += valid_bboxes[index][1][0]
                bottom += valid_bboxes[index][1][1]
                for i, letter in enumerate(valid[index]):
                    letters[i].append(letter)

            for letter in letters:
                c = Counter(letter)
                max_prob = c.most_common(1)[0][1]
                with_max_prob = []
                for pair in c.most_common():
                    if pair[1] == max_prob:
                        with_max_prob.append(pair[0])
                    elif pair[1] < max_prob:
                        break
                max_probs.append(with_max_prob)

            possible = get_possible_label('', max_probs, [])
            if len(possible) >= length // 2:
                print('Unsure about group with labels: ' + ', '.join(possible))
                img = cv.rectangle(img, (left // length, top // length),
                                   (right // length, bottom // length),
                                   (255, 0, 0),
                                   thickness=thickness // 2)
            else:
                label = '/'.join(possible)
                labels.append(label)
                text_width = (right - left) // length
                font_size = 1
                is_too_long = False
                for i in range(1, 10):
                    size = cv.getTextSize(label,
                                          cv.FONT_HERSHEY_SIMPLEX,
                                          i,
                                          thickness=thickness)[0]
                    if size[0] < text_width:
                        font_size = i
                        text_height = size[1]
                    else:
                        is_too_long = i == 1
                        break

                img = img if is_too_long else cv.putText(
                    img,
                    label, (left // length + thickness,
                            top // length + text_height + thickness),
                    cv.FONT_HERSHEY_SIMPLEX,
                    font_size, (0, 255, 0),
                    thickness=thickness)
                img = cv.rectangle(img, (left // length, top // length),
                                   (right // length, bottom // length),
                                   (0, 255, 0),
                                   thickness=thickness)

        self.display(Image.fromarray(img))
        self.prediction_text.set('\n'.join(labels))