def __init__(self, hidden_size, batch_size, learning_rate): self.input_tensor = tf.placeholder(tf.float32, [None, 28 * 28]) with arg_scope([layers.conv2d, layers.conv2d_transpose], activation_fn=tf.nn.elu, normalizer_fn=layers.batch_norm, normalizer_params={'scale': True}): with tf.variable_scope('model') as scope: encoded = encoder(self.input_tensor, hidden_size * 2) mean = encoded[:, :hidden_size] stddev = tf.sqrt(tf.exp(encoded[:, hidden_size:])) epsilon = tf.random_normal([tf.shape(mean)[0], hidden_size]) input_sample = mean + epsilon * stddev output_tensor = decoder(input_sample) with tf.variable_scope('model', reuse=True) as scope: self.sampled_tensor = decoder( tf.random_normal([batch_size, hidden_size])) vae_loss = self.__get_vae_cost(mean, stddev) rec_loss = self.__get_reconstruction_cost(output_tensor, self.input_tensor) loss = vae_loss + rec_loss self.train = layers.optimize_loss(loss, get_or_create_global_step(), learning_rate=learning_rate, optimizer='Adam', update_ops=[]) self.sess = tf.Session() self.sess.run(tf.global_variables_initializer())
def __init__(self, hidden_size, batch_size, learning_rate, generate_size): self.input_tensor = tf.placeholder(tf.float32, [None, 28 * 28]) with arg_scope([layers.conv2d, layers.conv2d_transpose], activation_fn=concat_elu, normalizer_fn=layers.batch_norm, normalizer_params={'scale': True}): with tf.variable_scope("model"): D1 = discriminator(self.input_tensor) # positive examples D_params_num = len(tf.trainable_variables()) G = decoder(tf.random_normal([batch_size, hidden_size])) self.sampled_tensor = G with tf.variable_scope("model", reuse=True): D2 = discriminator(G) # generated examples with tf.variable_scope("model", reuse=True): self.sampled_tensor_gener = decoder( tf.random_normal([generate_size, hidden_size])) D_loss = self.__get_discrinator_loss(D1, D2) G_loss = self.__get_generator_loss(D2) params = tf.trainable_variables() D_params = params[:D_params_num] G_params = params[D_params_num:] # train_discrimator = optimizer.minimize(loss=D_loss, var_list=D_params) # train_generator = optimizer.minimize(loss=G_loss, var_list=G_params) global_step = tf.contrib.framework.get_or_create_global_step() self.train_discrimator = layers.optimize_loss(D_loss, global_step, learning_rate / 10, 'Adam', variables=D_params, update_ops=[]) self.train_generator = layers.optimize_loss(G_loss, global_step, learning_rate, 'Adam', variables=G_params, update_ops=[]) self.sess = tf.Session() self.sess.run(tf.global_variables_initializer())
def __init__(self, hidden_size, batch_size, learning_rate, size): img_size = size[0] * size[1] * size[2] self.input_tensor = tf.placeholder(tf.float32, [batch_size, img_size]) with arg_scope([layers.conv2d, layers.conv2d_transpose], activation_fn=tf.nn.elu, normalizer_fn=layers.batch_norm, normalizer_params={'scale': True}): with tf.variable_scope("model") as scope: encoded = encoder(self.input_tensor, hidden_size * 2, size) mean = encoded[:, :hidden_size] stddev = tf.sqrt(tf.exp(encoded[:, hidden_size:])) epsilon = tf.random_normal([tf.shape(mean)[0], hidden_size]) input_sample = mean + epsilon * stddev output_tensor = decoder(input_sample, img_size) with tf.variable_scope("model", reuse=True) as scope: self.sampled_tensor = decoder( tf.random_normal([batch_size, hidden_size]), img_size) self.recons_tensor = output_tensor vae_loss = self.__get_vae_cost(mean, stddev) rec_loss = self.__get_reconstruction_cost( output_tensor, self.input_tensor) # output_tensor: y input_tensor: x loss = vae_loss + rec_loss # loss = vae_loss + rec_loss self.train = layers.optimize_loss( loss, tf.contrib.framework.get_or_create_global_step(), learning_rate=learning_rate, optimizer='Adam', update_ops=[]) # opt = tf.train.AdamOptimizer(2e-4, beta1=0.5) # self.train = opt.minimize(loss) self.sess = tf.Session() self.sess.run(tf.global_variables_initializer())
def __init__(self, hidden_size, batch_size, learning_rate): self.input_tensor = tf.placeholder(tf.float32, [None, 28 * 28]) self.is_training = tf.placeholder_with_default(True, []) with arg_scope([layers.conv2d, layers.conv2d_transpose], activation_fn=concat_elu, normalizer_fn=layers.batch_norm, normalizer_params={ 'scale': True, 'is_training': self.is_training }): with tf.variable_scope("model"): D1 = discriminator(self.input_tensor) # positive examples D_params_num = len(tf.trainable_variables()) G = decoder(tf.random_normal([batch_size, hidden_size])) self.sampled_tensor = G with tf.variable_scope("model", reuse=True): D2 = discriminator(G) # generated examples D_loss = self.__get_discrinator_loss(D1, D2) G_loss = self.__get_generator_loss(D2) params = tf.trainable_variables() D_params = params[:D_params_num] G_params = params[D_params_num:] update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) g_update_ops = [ op for op in update_ops if op.name.startswith('model_1/') ] d_update_ops = [op for op in update_ops if op not in g_update_ops] # train_discrimator = optimizer.minimize(loss=D_loss, var_list=D_params) # train_generator = optimizer.minimize(loss=G_loss, var_list=G_params) global_step = tf.contrib.framework.get_or_create_global_step() with tf.control_dependencies(d_update_ops): self.train_discrimator = layers.optimize_loss(D_loss, global_step, learning_rate / 10, 'Adam', variables=D_params, update_ops=[]) with tf.control_dependencies(g_update_ops): self.train_generator = layers.optimize_loss(G_loss, global_step, learning_rate, 'Adam', variables=G_params, update_ops=[]) self.sess = tf.Session() self.sess.run(tf.global_variables_initializer())
def __init__(self, model, channel_num, batch_size, seq_len, learning_rate, ws, wg, wt, phase, sum_dir): if phase == 'train' or phase == 'test': self.inputNoiseList = [tf.placeholder(tf.float32, [batch_size, 128, 128, channel_num])\ for _ in range(seq_len)] self.inputCleanList = [tf.placeholder(tf.float32, [batch_size, 128, 128, 3])\ for _ in range(seq_len)] else: self.inputNoiseList = [tf.placeholder(tf.float32, [batch_size, 416, 800, channel_num])\ for _ in range(seq_len)] self.inputCleanList = [tf.placeholder(tf.float32, [batch_size, 416, 800, 3])\ for _ in range(seq_len)] with arg_scope( [layers.conv2d], activation_fn=tf.nn.leaky_relu, #normalizer_fn=layers.batch_norm, normalizer_params={'scale': True}, padding='SAME'): with tf.variable_scope("model") as scope: #Full VAEGAN structure if phase == 'train' or phase == 'test': inpH, inpW = 128, 128 else: inpH, inpW = 416, 800 if model == 'RAE': with tf.name_scope("initalize_RNN_cell"): cell1 = rnn.ConvLSTMCell(2, [inpH, inpW, 32], 32, [3, 3], name='rnn1') cell2 = rnn.ConvLSTMCell(2, [inpH / 2, inpW / 2, 43], 43, [3, 3], name='rnn2') cell3 = rnn.ConvLSTMCell(2, [inpH / 4, inpW / 4, 57], 57, [3, 3], name='rnn3') cell4 = rnn.ConvLSTMCell(2, [inpH / 8, inpW / 8, 76], 76, [3, 3], name='rnn4') cell5 = rnn.ConvLSTMCell(2, [inpH / 16, inpW / 16, 101], 101, [3, 3], name='rnn5') cell6 = rnn.ConvLSTMCell(2, [inpH / 32, inpW / 32, 101], 101, [3, 3], name='rnn6') # Encoder l1, l2, l3, l4, l5, out = encoderRNN(self.inputNoiseList, batch_size, cell1, cell2, cell3, \ cell4, cell5, cell6, (inpH, inpW), reuse_vars=False) elif model == "AE": l1, l2, l3, l4, l5, out = encoder(self.inputNoiseList, batch_size, reuse_vars=False) Enc_params_num = len(tf.trainable_variables()) # Decoder / Generator self.denoised_imgList = decoder(l1, l2, l3, l4, l5, out, (inpH, inpW), reuse_vars=False) Enc_n_Dec_params_num = len(tf.trainable_variables()) self.params = tf.trainable_variables() self.Enc_params = self.params[:Enc_params_num] self.Dec_params = self.params[Enc_params_num:Enc_n_Dec_params_num] print(len(self.params)) for var in self.params: print(var.name) self.Spatial_loss = self.__get_L1_loss(self.denoised_imgList, self.inputCleanList) Spatial_loss_sum = tf.summary.scalar('Spatial_loss', self.Spatial_loss) self.Gradient_loss = self.__get_grad_L1_loss(self.denoised_imgList, self.inputCleanList) Gradient_loss_sum = tf.summary.scalar('Gradient_loss', self.Gradient_loss) if model == 'RAE': self.Temporal_loss = self.__get_tem_L1_loss( self.denoised_imgList, self.inputCleanList) Temporal_loss_sum = tf.summary.scalar('Temporal_loss', self.Temporal_loss) # merge summary for Tensorboard self.detached_loss_summary_merged = tf.summary.merge( [Spatial_loss_sum, Gradient_loss_sum, Temporal_loss_sum]) # loss function total_loss = ws * self.Spatial_loss + wg * self.Gradient_loss + wt * self.Temporal_loss elif model == 'AE': self.detached_loss_summary_merged = tf.summary.merge( [Spatial_loss_sum, Gradient_loss_sum]) # loss function total_loss = ws * self.Spatial_loss + wg * self.Gradient_loss # self.train = layers.optimize_loss(total_loss, tf.train.get_or_create_global_step(\ # ), learning_rate=learning_rate, variables = self.params, optimizer='RMSProp', update_ops=[]) self.train = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.99, epsilon=1e-08, name='Adam').minimize( total_loss, var_list=self.params) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) self.sess.run(tf.global_variables_initializer()) #.replace('\\','/') self.train_writer = tf.summary.FileWriter(sum_dir, self.sess.graph)
pattern = re.compile("\d+_(.+)\.png") while True: file = choice(files) mat = cv.imread(file.path, cv.IMREAD_GRAYSCALE) image = np.array(Image.open(file.path), dtype=np.uint8).reshape( (128, 64, 1)) / 255 label = pattern.search(file.name).group(1) predictions = model.predict(np.array([image])) cv.imshow('Input', mat) print('Ground truth vs prediction') print(label) print(decoder(predictions[0])) if cv.waitKey() == 27: break else: if os.path.isfile(arguments.image): image_pil = ImageOps.grayscale( Image.open(arguments.image).resize((128, 64))) image = np.array(image_pil, dtype=np.uint8).reshape( (128, 64, 1)) / 255 predictions = model.predict(np.array([image])) print(decoder(predictions[0])) else: print(f'Input image was not found at {arguments.image}')
input_tensor = tf.placeholder(tf.float32, [None, 28 * 28]) xs2 = tf.placeholder(tf.float32, [None, 28 * 28]) dis = tf.placeholder(tf.float32, [1, None]) flag = tf.placeholder(tf.float32, [1, None]) with tf.variable_scope("model") as scope: encoded = encoder(input_tensor, hidden_size * 2) mean = encoded[:, :hidden_size] stddev = tf.sqrt(tf.square(encoded[:, hidden_size:])) epsilon = tf.random_normal([tf.shape(mean)[0], hidden_size]) input_sample = mean + epsilon * stddev output_tensor = decoder(input_sample) with tf.variable_scope("model") as scope: encoded1 = encoder(xs2, hidden_size * 2) mean1 = encoded1[:, :hidden_size] stddev1 = tf.sqrt(tf.square(encoded1[:, hidden_size:])) epsilon1 = tf.random_normal([tf.shape(mean1)[0], hidden_size]) input_sample1 = mean1 + epsilon1 * stddev1 output_tensor1 = decoder(input_sample1) with tf.variable_scope("model", reuse=True) as scope: encoded = encoder(input_tensor, hidden_size * 2)
transforms.Lambda(cv_resize), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) device = 'cpu' train_dataset = yoloDataset(list_file='2007_val.txt', train=False, transform=transform, device=device, little_train=True, S=14) train_loader = DataLoader(train_dataset, batch_size=1, shuffle=False, num_workers=0) train_iter = iter(train_loader) for i in range(1): img, target = next(train_iter) print(img.shape, target.shape) boxes, clss, confs = decoder(target) print(boxes, clss, confs) mean = torch.tensor([0.485, 0.456, 0.406], dtype=torch.float32) std = torch.tensor([0.229, 0.224, 0.225], dtype=torch.float32) un_normal_trans = transforms.Normalize((-mean / std).tolist(), (1.0 / std).tolist()) img = un_normal_trans(img.squeeze(0)) draw_debug_rect(img.permute(1, 2, 0), boxes) for i in range(14): for j in range(14): print(target[:, i:i + 1, j:j + 1, :])
def __init__(self, hidden_size, batch_size, learning_rate, alpha, beta, gamma, sum_dir, attri_num, add_gan=1, GAN_model="V", similarity_layer=4): print("\nInitializing model with following parameters:") print("batch_size:", batch_size, " learning_rate:", learning_rate, " alpha:", alpha, " beta:", beta, " gamma:", gamma) print("GAN_model:", GAN_model, " similarity_layer:", similarity_layer, "\n") self.input_tensor = tf.placeholder(tf.float32, [batch_size, 64, 64, 3]) #self.input_label = tf.placeholder(tf.int, [batch_size, attri_num]) self.visual_attri = tf.placeholder(tf.float32, [hidden_size]) with arg_scope([layers.conv2d, layers.conv2d_transpose], activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, normalizer_params={'scale': True}, padding='SAME'): with tf.variable_scope("model") as scope: #Full VAEGAN structure # Encoder ENC = encoder(self.input_tensor, hidden_size * 2) Enc_params_num = len(tf.trainable_variables()) # Add noise self.mean, stddev = tf.split(1, 2, ENC) stddev = tf.sqrt(tf.exp(stddev)) epsilon = tf.random_normal( [tf.shape(self.mean)[0], hidden_size]) ENC_w_noise = self.mean + epsilon * stddev # Decoder / Generator self.DEC_of_ENC = decoder(ENC_w_noise) Enc_n_Dec_params_num = len(tf.trainable_variables()) # Discriminator if add_gan == 1: DIS_of_DEC_of_ENC = discriminator(self.DEC_of_ENC, GAN_model) Gen_dis_sum = tf.scalar_summary( 'Gen_dis_mean', tf.reduce_mean(DIS_of_DEC_of_ENC)) with tf.variable_scope( "model", reuse=True) as scope: #Computation for Recon_Loss if add_gan == 1: Real_Similarity = discriminator(self.input_tensor, GAN_model, extract=similarity_layer) Gen_Similarity = discriminator( self.DEC_of_ENC, GAN_model, extract=similarity_layer ) #+ tf.random_normal([batch_size, 8, 8, 256]) with tf.variable_scope( "model", reuse=True) as scope: #Computation for GAN_Loss if add_gan == 1: Real_in_Dis = discriminator(self.input_tensor, GAN_model) Real_dis_sum = tf.scalar_summary( 'Real_dis_mean', tf.reduce_mean(Real_in_Dis)) Prior_in_Dis = discriminator( decoder(tf.random_normal([batch_size, hidden_size])), GAN_model) Prior_dis_sum = tf.scalar_summary( 'Prior_dis_mean', tf.reduce_mean(Prior_in_Dis)) with tf.variable_scope( "model", reuse=True) as scope: #Sample from latent space self.sampled_tensor = decoder( tf.random_normal([batch_size, hidden_size])) with tf.variable_scope( "model", reuse=True) as scope: #Add visual attributes #expand_mean = tf.expand_dims(self.mean, -1) print("shape of mean:", np.shape(self.mean), " shape of visual attri:", np.shape(self.visual_attri)) add_attri = self.mean + np.ones( [batch_size, 1] ) * self.visual_attri #[batch size, hidden size] (broadcasting) print("shape of add attri:", tf.shape(add_attri)) self.with_attri_tensor = decoder(add_attri) self.params = tf.trainable_variables() self.Enc_params = self.params[:Enc_params_num] ''' print ('Encoder Param:') for var in Enc_params: print (var.name) ''' self.Dec_params = self.params[Enc_params_num:Enc_n_Dec_params_num] ''' print ('Decoder Param:') for var in Dec_params: print (var.name) ''' if add_gan == 1: self.Dis_params = self.params[Enc_n_Dec_params_num:] ''' print ('Discriminator Param:') for var in Dis_params: print (var.name) ''' self.Prior_loss = self.__get_prior_loss(self.mean, stddev) Prior_loss_sum = tf.scalar_summary('Prior_loss', self.Prior_loss) if add_gan == 1: self.Recon_loss = self.__get_reconstruction_loss( Gen_Similarity, Real_Similarity) Recon_loss_sum = tf.scalar_summary('Recon_loss', self.Recon_loss) self.GAN_loss = self.__get_GAN_loss(Real_in_Dis, Prior_in_Dis, DIS_of_DEC_of_ENC, GAN_model) GAN_loss_sum = tf.scalar_summary('GAN_loss', self.GAN_loss) else: self.Recon_loss = self.__get_reconstruction_loss( self.DEC_of_ENC, self.input_tensor) Recon_loss_sum = tf.scalar_summary('Recon_loss', self.Recon_loss) # merge summary for Tensorboard if add_gan == 1: self.detached_loss_summary_merged = tf.merge_summary([ Prior_loss_sum, Recon_loss_sum, GAN_loss_sum, Real_dis_sum, Prior_dis_sum, Gen_dis_sum ]) #self.dis_mean_value_summary_merged = tf.merge_summary([Real_dis_sum,Prior_dis_sum,Gen_dis_sum]) else: self.detached_loss_summary_merged = tf.merge_summary( [Prior_loss_sum, Recon_loss_sum]) if add_gan == 1: enc_loss = self.Prior_loss + beta * self.Recon_loss dec_loss = gamma * self.Recon_loss + self.GAN_loss dis_loss = (-1) * self.GAN_loss else: total_loss = self.Prior_loss + beta * self.Recon_loss #self.combined_loss_summary_merged = tf.merge_summary([self.prior_loss_sum,self.recon_loss_sum,self.GAN_loss_sum]) if add_gan == 1: self.train_enc = layers.optimize_loss(enc_loss, tf.contrib.framework.get_or_create_global_step(\ ), learning_rate=learning_rate, variables = self.Enc_params, optimizer='RMSProp', update_ops=[]) self.train_dec = layers.optimize_loss(dec_loss, tf.contrib.framework.get_or_create_global_step(\ ), learning_rate=learning_rate, variables = self.Dec_params, optimizer='RMSProp', update_ops=[]) self.train_dis = layers.optimize_loss(dis_loss, tf.contrib.framework.get_or_create_global_step(\ ), learning_rate=learning_rate * alpha, variables = self.Dis_params, optimizer='RMSProp', update_ops=[]) else: self.train = layers.optimize_loss(total_loss, tf.contrib.framework.get_or_create_global_step(\ ), learning_rate=learning_rate, variables = self.params, optimizer='RMSProp', update_ops=[]) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) self.sess.run(tf.initialize_all_variables()) self.train_writer = tf.train.SummaryWriter(sum_dir + '/train', self.sess.graph)
transform = transforms.Compose([ transforms.Lambda(cv_resize), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) S = 7 train_dataset = yoloDataset(list_file='datasets/2012_seg.txt',train=False,transform = transform, test_mode=True, with_mask=True, S=S, device='cuda:0') train_loader = DataLoader(train_dataset,batch_size=1,shuffle=False,num_workers=0) train_iter = iter(train_loader) # print(next(train_iter)) for i in range(200): img, target, mask_label = next(train_iter) # mask_img = mask_img.squeeze(0).cpu().numpy() # print('mask shape is :', mask_img.shape) # print(img.shape, target.shape) boxes, clss, confs = decoder(target, grid_num=S, gt=True) # print(boxes, clss, confs) print('~'*50 + '\n\n\n') mean = torch.tensor([0.485, 0.456, 0.406], dtype=torch.float32) std = torch.tensor([0.229, 0.224, 0.225], dtype=torch.float32) un_normal_trans = transforms.Normalize((-mean / std).tolist(), (1.0 / std).tolist()) img = un_normal_trans(img.squeeze(0)) img = draw_debug_rect(img.permute(1, 2 ,0), boxes, clss, confs) img = draw_classify_confidence_map(img, target, S, Color) cv2.imshow('img', img) # print(mask_label[0, 10:100, 10:100]) mask_img = mask_label_2_img(mask_label) print(mask_label.shape, mask_label.dtype) pred_mask = pred_mask_label_2_img(mask_label[0])
def __init__(self, hidden_size, batch_size, learning_rate): self.input_tensor = tf.placeholder(tf.float32, [None, 28 * 28]) self.xs2 = tf.placeholder(tf.float32, [None, 28 * 28]) self.dis = tf.placeholder(tf.float32, [1, None]) self.flag = tf.placeholder(tf.float32, [1, None]) with arg_scope([layers.conv2d, layers.conv2d_transpose], activation_fn=concat_elu, normalizer_fn=layers.batch_norm, normalizer_params={'scale': True}): with tf.variable_scope("model"): D1 = discriminator(self.input_tensor) # positive examples D_params_num = len(tf.trainable_variables()) encoded = encoder(self.input_tensor, hidden_size * 2) mean = encoded[:, :hidden_size] stddev = tf.sqrt(tf.square(encoded[:, hidden_size:])) epsilon = tf.random_normal([tf.shape(mean)[0], hidden_size]) input_sample = mean + epsilon * stddev # G = decoder(tf.random_normal([batch_size, hidden_size])) G_params_num = len(tf.trainable_variables()) G = decoder(input_sample) self.sampled_tensor = G with tf.variable_scope("model", reuse=True): D2 = discriminator(G) # generated examples encoded1 = encoder(self.xs2, hidden_size * 2) mean1 = encoded1[:, :hidden_size] stddev1 = tf.sqrt(tf.square(encoded1[:, hidden_size:])) epsilon1 = tf.random_normal([tf.shape(mean1)[0], hidden_size]) input_sample1 = mean1 + epsilon1 * stddev1 output_tensor1 = decoder(input_sample1) D_loss = self.__get_discrinator_loss(D1, D2) G_loss = self.__get_generator_loss(D2, mean, stddev, mean1) params = tf.trainable_variables() D_params = params[:D_params_num] G_params = params[G_params_num:] # train_discrimator = optimizer.minimize(loss=D_loss, var_list=D_params) # train_generator = optimizer.minimize(loss=G_loss, var_list=G_params) global_step = tf.contrib.framework.get_or_create_global_step() self.train_discrimator = layers.optimize_loss(D_loss, global_step, learning_rate / 10, 'Adam', variables=D_params, update_ops=[]) self.train_generator = layers.optimize_loss(G_loss, global_step, learning_rate, 'Adam', variables=G_params, update_ops=[]) self.sess = tf.Session() self.sess.run(tf.global_variables_initializer())
def __init__(self, input_size, hidden_size, batch_size, learning_rate, log_dir): self.input_tensor = tf.placeholder(tf.float32, [None, 3 * input_size]) self.s_t_p_placeholder = tf.placeholder(tf.float32, [None, hidden_size]) ''' ################################## with open('params.txt') as f: first = f.readline() first = first.strip('\n') temp = first.split(' ') o_p_dim = int(temp[3]); s_p_dim = int(temp[4]); ln = f.readline() for i in range(s_p_dim): temp = f.readline() self.sig_2_init = np.zeros((s_p_dim, s_p_dim), np.float32) for i in range(s_p_dim): temp = f.readline().strip('\n').split(' ') for j in range(s_p_dim): self.sig_2_init[i,j] = float(temp[j]) eig_val , eig_vec = np.linalg.eig(self.sig_2_init) cf = np.sqrt(np.repeat(eig_val,s_p_dim).reshape(s_p_dim,s_p_dim).transpose()) self.r_2_init = np.multiply(cf,eig_vec) self.sig_3_init = np.zeros((o_p_dim, o_p_dim), np.float32) for i in range(o_p_dim): temp = f.readline().strip('\n').split(' ') for j in range(o_p_dim): self.sig_3_init[i,j] = float(temp[j]) eig_val , eig_vec = np.linalg.eig(self.sig_3_init) cf = np.sqrt(np.repeat(eig_val,o_p_dim).reshape(o_p_dim,o_p_dim).transpose()) self.r_3_init = np.multiply(cf,eig_vec) self.a_2_init = np.zeros((s_p_dim, s_p_dim), np.float32) for i in range(s_p_dim): temp = f.readline().strip('\n').split(' ') for j in range(s_p_dim): self.a_2_init[i,j] = float(temp[j]) self.a_3_init = np.zeros((s_p_dim, o_p_dim), np.float32) for i in range(s_p_dim): temp = f.readline().strip('\n').split(' ') for j in range(o_p_dim): self.a_3_init[i,j] = float(temp[j]) ################################### ''' with arg_scope([layers.fully_connected], activation_fn=tf.nn.relu): with tf.variable_scope("encoder"): with tf.variable_scope("encoder_s_t"): self.s_t_minus_1_p = encoder(self.input_tensor[:, :input_size],\ hidden_size) with tf.variable_scope("encoder_s_t", reuse=True): self.s_t_p = encoder(self.input_tensor[:, input_size:2 * input_size],\ hidden_size) with tf.variable_scope("encoder_o_t"): self.o_t_p = encoder(self.input_tensor[:, 2 * input_size:],\ hidden_size) with tf.variable_scope("decoder"): with tf.variable_scope("decoder_s_t"): self.output_s_t_minus_1 = decoder(self.s_t_minus_1_p, input_size) with tf.variable_scope("decoder_s_t", reuse=True): self.output_s_t = decoder(self.s_t_p, input_size) with tf.variable_scope("decoder_s_t", reuse=True): self.s_t_decoded = decoder(self.s_t_p_placeholder, input_size) with tf.variable_scope("decoder_o_t"): self.output_o_t = decoder(self.o_t_p, input_size) self.output_tensor = tf.concat( [self.output_s_t_minus_1, self.output_s_t, self.output_o_t], axis=1) #self.a_2, self.b_2, self.sigma_2, self.a_3, self.b_3, self.sigma_3 = self._MLE_Gaussian_params() self.a_2, self.b_2, self.sigma_2, self.a_3, self.b_3, self.sigma_3 = self._simple_Gaussian_params( ) #self.a_2, self.b_2, self.sigma_2, self.a_3, self.b_3, self.sigma_3 = self._simple_Gaussian_plus_offset_params() self.r_2 = tf.cholesky(self.sigma_2) self.r_3 = tf.cholesky(self.sigma_3) #define reconstruction loss reconstruction_loss = tf.reduce_mean(tf.norm(self.output_tensor - \ self.input_tensor, axis=1)) # define classification loss y_1 = self.s_t_p - tf.matmul(self.s_t_minus_1_p, self.a_2) mvn_1 = tf.contrib.distributions.MultivariateNormalFull( self.b_2, self.sigma_2) #mvn_1 = tf.contrib.distributions.MultivariateNormalTrill(self.b_2, scale_tril=self.r_2) pos_samples_1 = mvn_1.sample(batch_size) y_2 = self.o_t_p - tf.matmul(self.s_t_p, self.a_3) #mvn_2 = tf.contrib.distributions.MultivariateNormalTriL(self.b_3, scale_tril=self.r_3) mvn_2 = tf.contrib.distributions.MultivariateNormalFull( self.b_3, self.sigma_3) pos_samples_2 = mvn_2.sample(batch_size) with tf.variable_scope('discriminator'): with tf.variable_scope('d1'): pos_samples_1_pred = discriminator(pos_samples_1) with tf.variable_scope('d1', reuse=True): neg_samples_1_pred = discriminator(y_1) with tf.variable_scope('d2'): pos_samples_2_pred = discriminator(pos_samples_2) with tf.variable_scope('d2', reuse=True): neg_samples_2_pred = discriminator(y_2) classification_loss_1 = compute_classification_loss( pos_samples_1_pred, neg_samples_1_pred) classification_loss_2 = compute_classification_loss( pos_samples_2_pred, neg_samples_2_pred) classification_loss = classification_loss_1 + classification_loss_2 # define s_t likelihood s_diff = self.s_t_p - tf.matmul(self.s_t_minus_1_p, self.a_2) s_t_likelihood = tf.reduce_sum(mvn_1.log_prob(s_diff)) # define o_t likelihood o_diff = self.o_t_p - tf.matmul(self.s_t_p, self.a_3) o_t_likelihood = tf.reduce_sum(mvn_2.log_prob(o_diff)) self.likelihood = s_t_likelihood + o_t_likelihood # add summary ops tf.summary.scalar('likelihood', self.likelihood) tf.summary.scalar('s_t_likelihood', s_t_likelihood) tf.summary.scalar('o_t_likelihood', o_t_likelihood) tf.summary.scalar('classification_loss', classification_loss) tf.summary.scalar('classification_loss_1', classification_loss_1) tf.summary.scalar('classification_loss_2', classification_loss_2) tf.summary.scalar('reconstruction_loss', reconstruction_loss) # define references to params encoder_params = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='encoder') decoder_params = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='decoder') autoencoder_params = encoder_params + decoder_params gaussian_params = [self.a_2, self.a_3, self.r_2, self.r_3] discriminator_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, \ scope='discriminator') global_step = tf.contrib.framework.get_or_create_global_step() # define training steps self.learn_rate = self._get_learn_rate(global_step, learning_rate) # update autoencoder params to minimise reconstruction loss self.train_autoencoder = layers.optimize_loss(reconstruction_loss, \ global_step, self.learn_rate * 0.1, optimizer=lambda lr: \ tf.train.AdamOptimizer(lr), variables=\ #tf.train.MomentumOptimizer(lr, 0.9), variables=\ autoencoder_params, update_ops=[]) # update discriminator self.train_discriminator = layers.optimize_loss(classification_loss, \ global_step, self.learn_rate * 10, optimizer=lambda lr: \ tf.train.MomentumOptimizer(lr, 0.1), variables=\ #tf.train.AdamOptimizer(lr), variables=\ discriminator_params, update_ops=[]) # update encoder params to fool the discriminator self.train_encoder = layers.optimize_loss(-classification_loss, \ global_step, self.learn_rate , optimizer=lambda lr: \ #tf.train.MomentumOptimizer(lr, 0.9), variables=\ tf.train.AdamOptimizer(lr), variables=\ encoder_params, update_ops=[]) self.sess = tf.Session() self.merged = tf.summary.merge_all() self.train_writer = tf.summary.FileWriter(log_dir, \ self.sess.graph) self.sess.run(tf.global_variables_initializer())
def __init__(self, hidden_size, batch_size, learning_rate, log_dir): self.input_tensor = tf.placeholder(tf.float32, [None, 28 * 28]) # add gaussian noise to the input input_with_noise = gaussian_noise_layer(self.input_tensor, 0.3) with arg_scope([layers.fully_connected], activation_fn=tf.nn.relu): with tf.variable_scope("encoder"): self.latent_representation = encoder(input_with_noise, hidden_size) encoder_params_num = len(tf.trainable_variables()) with tf.variable_scope('encoder', reuse=True): self.true_latent_representation = encoder(self.input_tensor, hidden_size) with tf.variable_scope('decoder'): self.recons = decoder(self.latent_representation) autoencoder_params_num = len(tf.trainable_variables()) with tf.variable_scope('decoder', reuse=True): self.sampled_imgs = decoder(tf.random_normal([batch_size, hidden_size])) pos_samples = tf.random_normal([batch_size, hidden_size], stddev=5.) neg_samples = self.latent_representation with tf.variable_scope('discriminator'): pos_samples_pred = discriminator(pos_samples) with tf.variable_scope('discriminator', reuse=True): neg_samples_pred = discriminator(neg_samples) #define losses reconstruction_loss = tf.reduce_mean(tf.square(self.recons - self.input_tensor)) #* 28 * 28 scale recons loss classification_loss = tf.losses.sigmoid_cross_entropy(\ tf.ones(tf.shape(pos_samples_pred)), pos_samples_pred) +\ tf.losses.sigmoid_cross_entropy(tf.zeros( tf.shape(neg_samples_pred)), neg_samples_pred) tf.summary.scalar('reconstruction_loss', reconstruction_loss) tf.summary.scalar('classification_loss', classification_loss) # define references to params params = tf.trainable_variables() encoder_params = params[:encoder_params_num] decoder_params = params[encoder_params_num:autoencoder_params_num] autoencoder_params = encoder_params + decoder_params discriminator_params = params[autoencoder_params_num:] # record true positive rate and true negative rate correct_pred_pos = tf.equal(tf.cast(pos_samples_pred>0, tf.float32), tf.ones(tf.shape(pos_samples_pred))) self.true_pos_rate = tf.reduce_mean(tf.cast(correct_pred_pos, tf.float32)) correct_pred_neg = tf.equal(tf.cast(neg_samples_pred<0, tf.float32), tf.ones(tf.shape(pos_samples_pred))) self.true_neg_rate = tf.reduce_mean(tf.cast(correct_pred_neg, tf.float32)) tf.summary.scalar('true_pos_rate', self.true_pos_rate) tf.summary.scalar('true_neg_rate', self.true_neg_rate) global_step = tf.contrib.framework.get_or_create_global_step() self.learn_rate = self._get_learn_rate(global_step, learning_rate) self.train_autoencoder = layers.optimize_loss(reconstruction_loss, global_step, self.learn_rate/10, optimizer=lambda lr: \ tf.train.MomentumOptimizer(lr, momentum=0.9), variables= autoencoder_params, update_ops=[]) self.train_discriminator = layers.optimize_loss(classification_loss, global_step, self.learn_rate, optimizer=lambda lr: \ tf.train.MomentumOptimizer(lr, momentum=0.1), variables= discriminator_params, update_ops=[]) self.train_encoder = layers.optimize_loss(-classification_loss, global_step, self.learn_rate/10, optimizer=lambda lr: \ tf.train.MomentumOptimizer(lr, momentum=0.1), variables= encoder_params, update_ops=[]) self.sess = tf.Session() self.merged = tf.summary.merge_all() self.train_writer = tf.summary.FileWriter(log_dir, self.sess.graph) self.sess.run(tf.global_variables_initializer())
def predict(self): if not 'image' in self.__dict__: return cropped, bboxes, valid, valid_bboxes, groups, labels = [], [], [], [], [], [] image = np.array(self.image.convert('L'), np.uint8) h, w = image.shape thickness = int(w * 0.005) if w / h < RATIO: width = w height = width / RATIO else: height = h width = height * RATIO width, height = int(width), int(height) zoom = 1 m_zoom = 2**(self.zoom_slider.get() / 2) while zoom <= m_zoom: scaled_w, scaled_h = int(w * zoom), int(h * zoom) overflow_x, overflow_y = abs(width - scaled_w), abs(height - scaled_h) coeff = w / scaled_w scaled = cv.resize(image, (scaled_w, scaled_h)) step = int(SLIDE_STEP * scaled_w) for i in range(0, overflow_x + step, step): for j in range(0, overflow_y + step, step): bboxes.append(((int(i * coeff), int(j * coeff)), (int(i * coeff + width * coeff), int(j * coeff + height * coeff)))) cropped.append( cv.resize(scaled[j:j + height, i:i + width], FINAL_SHAPE[:-1]).reshape(FINAL_SHAPE) / 255) zoom *= ZOOM_MULT predictions = self.model.predict(np.array(cropped)) img = np.array(self.image) for i, prediction in enumerate(predictions): code = decoder(prediction) if code[:1] == '1': valid.append(code[3:]) valid_bboxes.append(bboxes[i]) for i, bbox0 in enumerate(valid_bboxes): for j, bbox1 in enumerate(valid_bboxes[i + 1:]): are_overlapping = max(bbox0[0][0], bbox1[0][0]) < min( bbox0[1][0], bbox1[1][0]) and max( bbox0[0][1], bbox1[0][1]) < min( bbox0[1][1], bbox1[1][1]) if are_overlapping: appended = False for group in groups: if i in group or j + i + 1 in group: if not i in group: group.append(i) if not j + i + 1 in group: group.append(j + i + 1) appended = True if not appended: groups.append([i, j + i + 1]) for i, bbox in enumerate(valid_bboxes): is_in_group = False for group in groups: if i in group: is_in_group = True break if not is_in_group: groups.append([i]) for group in groups: top, bottom, left, right, length = 0, 0, 0, 0, len(group) if length == 1: print('Unsure about group with a weak match: ' + valid[group[0]]) continue letters, max_probs = [[], [], [], [], [], [], [], []], [] for index in group: left += valid_bboxes[index][0][0] top += valid_bboxes[index][0][1] right += valid_bboxes[index][1][0] bottom += valid_bboxes[index][1][1] for i, letter in enumerate(valid[index]): letters[i].append(letter) for letter in letters: c = Counter(letter) max_prob = c.most_common(1)[0][1] with_max_prob = [] for pair in c.most_common(): if pair[1] == max_prob: with_max_prob.append(pair[0]) elif pair[1] < max_prob: break max_probs.append(with_max_prob) possible = get_possible_label('', max_probs, []) if len(possible) >= length // 2: print('Unsure about group with labels: ' + ', '.join(possible)) img = cv.rectangle(img, (left // length, top // length), (right // length, bottom // length), (255, 0, 0), thickness=thickness // 2) else: label = '/'.join(possible) labels.append(label) text_width = (right - left) // length font_size = 1 is_too_long = False for i in range(1, 10): size = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, i, thickness=thickness)[0] if size[0] < text_width: font_size = i text_height = size[1] else: is_too_long = i == 1 break img = img if is_too_long else cv.putText( img, label, (left // length + thickness, top // length + text_height + thickness), cv.FONT_HERSHEY_SIMPLEX, font_size, (0, 255, 0), thickness=thickness) img = cv.rectangle(img, (left // length, top // length), (right // length, bottom // length), (0, 255, 0), thickness=thickness) self.display(Image.fromarray(img)) self.prediction_text.set('\n'.join(labels))