def task(x, activation='relu', output_dim=256, scope='task_network', norm='layer', b_train=False): with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): if activation == 'swish': act_func = util.swish elif activation == 'relu': act_func = tf.nn.relu elif activation == 'lrelu': act_func = tf.nn.leaky_relu else: act_func = tf.nn.sigmoid print('Task Layer1: ' + str(x.get_shape().as_list())) block_depth = dense_block_depth l = x l = layers.conv(l, scope='conv1', filter_dims=[3, 3, block_depth], stride_dims=[1, 1], non_linear_fn=None, bias=False, dilation=[1, 1, 1, 1]) if norm == 'layer': l = layers.layer_norm(l, scope='ln1') elif norm == 'batch': l = layers.batch_norm_conv(l, b_train=b_train, scope='bn1') l = act_func(l) for i in range(15): l = layers.add_residual_block(l, filter_dims=[3, 3, block_depth], num_layers=2, act_func=act_func, norm=norm, b_train=b_train, scope='block1_' + str(i)) latent = layers.global_avg_pool(l, output_length=output_dim) return latent
def latent_discriminator(input_data, activation='swish', scope='ldiscriminator', reuse=False, bn_phaze=False): with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): #if reuse: # tf.get_variable_scope().reuse_variables() if activation == 'swish': act_func = util.swish elif activation == 'relu': act_func = tf.nn.relu elif activation == 'tanh': act_func = tf.nn.tanh else: act_func = tf.nn.sigmoid l = tf.reshape(input_data, shape=[-1, 4, 4, 8]) l = layers.conv(l, scope='conv1', filter_dims=[3, 3, g_dense_block_depth/2], stride_dims=[1, 1], non_linear_fn=None, bias=False) l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3, act_func=act_func, bn_phaze=bn_phaze, scope='block_0') l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3, act_func=act_func, bn_phaze=bn_phaze, scope='block_1') l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3, act_func=act_func, bn_phaze=bn_phaze, scope='block_2') l = layers.global_avg_pool(l, representation_dim) dc_final_layer = l dc_output = layers.fc(dc_final_layer, scope='g_enc_z_fc', out_dim=1, non_linear_fn=None) return dc_final_layer, dc_output, tf.sigmoid(dc_output)
def discriminator(input_data, activation='swish', scope='discriminator', reuse=False, bn_phaze=False): with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): #if reuse: # tf.get_variable_scope().reuse_variables() if activation == 'swish': act_func = util.swish elif activation == 'relu': act_func = tf.nn.relu elif activation == 'tanh': act_func = tf.nn.tanh else: act_func = tf.nn.sigmoid l = layers.conv(input_data, scope='conv1', filter_dims=[3, 3, g_dense_block_depth/2], stride_dims=[1, 1], non_linear_fn=None, bias=False) l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3, act_func=act_func, bn_phaze=bn_phaze, scope='block_0') l = tf.nn.avg_pool(l, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3, act_func=act_func, bn_phaze=bn_phaze, scope='block_1') l = tf.nn.avg_pool(l, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3, act_func=act_func, bn_phaze=bn_phaze, scope='block_2') #l = tf.nn.avg_pool(l, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') #l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3, # act_func=act_func, bn_phaze=bn_phaze, scope='block_3') #l = tf.nn.avg_pool(l, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') #l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3, # act_func=act_func, bn_phaze=bn_phaze, scope='block_4') #l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3, # act_func=act_func, bn_phaze=bn_phaze, scope='block_5') #l = tf.nn.avg_pool(l, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') #l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth], num_layers=3, # act_func=act_func, bn_phaze=bn_phaze, scope='block_6') #l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth], num_layers=3, # act_func=act_func, bn_phaze=bn_phaze, scope='block_7') # dc_final_layer = batch_norm_conv(last_dense_layer, b_train=bn_phaze, scope='last_dense_layer') l = layers.global_avg_pool(l, representation_dim) dc_final_layer = l dc_output = layers.fc(dc_final_layer, scope='g_enc_z_fc', out_dim=1, non_linear_fn=None) return dc_final_layer, dc_output, tf.sigmoid(dc_output)
def test(model_path, test_image_dir): trX = [] trY = [] test_output_dir = 'sr' if os.path.exists(test_output_dir) == False: os.mkdir(test_output_dir) with tf.device('/device:CPU:0'): test_image_dir_list = os.listdir(test_image_dir) for idx, labelname in enumerate(test_image_dir_list): if os.path.isdir(os.path.join(test_image_dir, labelname).replace("\\", "/")) is False: continue if os.path.exists(os.path.join(test_output_dir, labelname)) is False: os.mkdir(os.path.join(test_output_dir, labelname)) for filename in os.listdir(os.path.join(test_image_dir, labelname)): full_path = os.path.join(test_image_dir, labelname) + '/' + filename jpg_img = cv2.imread(full_path) img = cv2.cvtColor(jpg_img, cv2.COLOR_BGR2RGB) img = (img - 127.5) / 127.5 trX.append(img) trY.append(os.path.join(test_output_dir, labelname)) trX = np.array(trX) trY = np.array(trY) trX = trX.reshape(-1, input_height, input_width, num_channel) # Network setup cnn_representation, _, anchor_layer = encoder_network(X, activation='lrelu', bn_phaze=bn_train, scope='encoder') print('CNN Output Tensor Dimension: ' + str(cnn_representation.get_shape().as_list())) cnn_representation = layers.global_avg_pool(cnn_representation, representation_dim, scope='gap') print('CNN Representation Dimension: ' + str(cnn_representation.get_shape().as_list())) with tf.device('/device:GPU:1'): # decoder_input = make_multi_modal_noise(representation, num_mode=8) X_fake = decoder_network(latent=cnn_representation, anchor_layer=anchor_layer, activation='lrelu', scope='decoder', bn_phaze=bn_train) # Trainable variable lists encoder_var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='encoder') + tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='gap') decoder_var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='decoder') generator_vars = encoder_var + decoder_var with tf.device('/device:CPU:0'): residual_loss = get_residual_loss(Y, X_fake, type='l1', gamma=1.0) # training operation g_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(residual_loss) # Launch the graph in a session with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: sess.run(tf.global_variables_initializer()) try: saver = tf.train.Saver() saver.restore(sess, model_path) print('Model loaded') except: print('Model loading failed') return i = 0 for img in trX: # latent, anchor = sess.run([latent_real, anchor_layer], feed_dict={X: [img], bn_train: False, keep_prob: 1.0}) fake = sess.run( [X_fake], feed_dict={X: [img], bn_train: False, keep_prob: 1.0}) sample = fake[0][0] sample = (sample * 127.5) + 127.5 # print(sample.shape) sample = cv2.cvtColor(sample, cv2.COLOR_RGB2BGR) #sample = cv2.resize(sample, dsize=(0, 0), fx=4.0, fy=4.0, interpolation=cv2.INTER_CUBIC) #cv2.imwrite(trY[i] + '/tmp.jpg', sample) #sample = cv2.imread(trY[i] + '/tmp.jpg') #sample = cv2.resize(sample, dsize=(0, 0), fx=0.25, fy=0.25, interpolation=cv2.INTER_AREA) cv2.imwrite(trY[i] + '/' + str(i) + '.jpg', sample) i = i + 1
def train(model_path): trX = [] trY = [] dir_list = os.listdir(imgs_dirname) dir_list.sort(key=str.lower) with tf.device('/device:CPU:0'): for idx, labelname in enumerate(dir_list): for filename in os.listdir(os.path.join(imgs_dirname, labelname)): print(os.path.join(imgs_dirname, labelname) + '/' + filename) full_path = os.path.join(imgs_dirname, labelname) + '/' + filename jpg_img = cv2.imread(full_path) img = cv2.cvtColor(jpg_img, cv2.COLOR_BGR2RGB) img = cv2.resize(img, dsize=(96, 96), interpolation=cv2.INTER_AREA) sample = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) sample = cv2.resize(sample, dsize=(0, 0), fx=0.25, fy=0.25, interpolation=cv2.INTER_AREA) cv2.imwrite('tmp.jpg', sample) sample = cv2.imread('tmp.jpg') sample = cv2.cvtColor(sample, cv2.COLOR_BGR2RGB) sample = cv2.resize(sample, dsize=(0, 0), fx=4.0, fy=4.0, interpolation=cv2.INTER_CUBIC) sample = (sample - 127.5) / 127.5 trX.append(sample) img = (img - 127.5) / 127.5 trY.append(img) trX, trY = shuffle(trX, trY) trX = np.array(trX) trY = np.array(trY) trX = trX.reshape(-1, input_height, input_width, num_channel) trY = trY.reshape(-1, input_height, input_width, num_channel) # Network setup cnn_representation, _, anchor_layer = encoder_network(X, activation='lrelu', bn_phaze=bn_train, scope='encoder') print('CNN Output Tensor Dimension: ' + str(cnn_representation.get_shape().as_list())) cnn_representation = layers.global_avg_pool(cnn_representation, representation_dim, scope='gap') print('CNN Representation Dimension: ' + str(cnn_representation.get_shape().as_list())) with tf.device('/device:GPU:1'): # decoder_input = make_multi_modal_noise(representation, num_mode=8) X_fake = decoder_network(latent=cnn_representation, anchor_layer=anchor_layer, activation='lrelu', scope='decoder', bn_phaze=bn_train) # Trainable variable lists encoder_var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='encoder') + tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='gap') decoder_var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='decoder') generator_vars = encoder_var + decoder_var with tf.device('/device:CPU:0'): residual_loss = get_residual_loss(Y, X_fake, type='l1', gamma=1.0) # training operation g_optimizer = tf.train.AdamOptimizer(learning_rate=2e-4, beta1=0.5).minimize(residual_loss) # Launch the graph in a session config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) try: saver = tf.train.Saver() saver.restore(sess, model_path) print('Model loaded') except: print('Start New Training. Wait ...') num_itr = 0 training_batch = zip(range(0, len(trX), batch_size), range(batch_size, len(trX) + 1, batch_size)) for i in range(num_epoch): trX, trY = shuffle(trX, trY) for start, end in training_batch: _, r, fake = sess.run( [g_optimizer, residual_loss, X_fake], feed_dict={X: trX[start:end], Y: trY[start:end], bn_train: True, keep_prob: 0.5}) sample = fake[0] sample = (sample * 127.5) + 127.5 sample = cv2.cvtColor(sample, cv2.COLOR_RGB2BGR) #sample = cv2.resize(sample, dsize=(0, 0), fx=4.0, fy=4.0, interpolation=cv2.INTER_CUBIC) #cv2.imwrite('training_sr/tmp.jpg', sample) #sample = cv2.imread('training_sr/tmp.jpg') #sample = cv2.resize(sample, dsize=(0, 0), fx=0.25, fy=0.25, interpolation=cv2.INTER_AREA) cv2.imwrite('training_sr/sample' + str(num_itr) + '.jpg', sample) #sample = trX[start] #sample = (sample * 127.5) + 127.5 #sample = cv2.cvtColor(sample, cv2.COLOR_RGB2BGR) #cv2.imwrite('training_sr/sample' + str(num_itr) + '_1.jpg', sample) num_itr = num_itr + 1 if num_itr % 10 == 0: print('epoch #' + str(i) + ', itr #' + str(num_itr)) print(' - residual loss: ' + str(r)) try: saver.save(sess, model_path) except: print('Save failed')
def encoder(x, activation='relu', scope='encoder_network', norm='layer', b_train=False): with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): if activation == 'swish': act_func = util.swish elif activation == 'relu': act_func = tf.nn.relu elif activation == 'lrelu': act_func = tf.nn.leaky_relu else: act_func = tf.nn.sigmoid # [192 x 192] block_depth = dense_block_depth // 4 l = layers.conv(x, scope='conv1', filter_dims=[5, 5, block_depth], stride_dims=[1, 1], non_linear_fn=None, bias=False, dilation=[1, 1, 1, 1]) if norm == 'layer': l = layers.layer_norm(l, scope='ln0') elif norm == 'batch': l = layers.batch_norm_conv(l, b_train=b_train, scope='bn0') l = act_func(l) for i in range(4): l = layers.add_residual_dense_block(l, filter_dims=[3, 3, block_depth], num_layers=2, act_func=act_func, norm=norm, b_train=b_train, scope='dense_block_1_' + str(i)) # [64 x 64] block_depth = block_depth * 2 l = layers.conv(l, scope='tr1', filter_dims=[3, 3, block_depth], stride_dims=[2, 2], non_linear_fn=None) if norm == 'layer': l = layers.layer_norm(l, scope='ln1') elif norm == 'batch': l = layers.batch_norm_conv(l, b_train=b_train, scope='bn1') l = act_func(l) print('Encoder Block 1: ' + str(l.get_shape().as_list())) for i in range(2): l = layers.add_residual_block(l, filter_dims=[3, 3, block_depth], num_layers=2, act_func=act_func, norm=norm, b_train=b_train, scope='res_block_1_' + str(i)) # [32 x 32] block_depth = block_depth * 2 l = layers.conv(l, scope='tr2', filter_dims=[3, 3, block_depth], stride_dims=[2, 2], non_linear_fn=None) if norm == 'layer': l = layers.layer_norm(l, scope='ln2') elif norm == 'batch': l = layers.batch_norm_conv(l, b_train=b_train, scope='bn2') l = act_func(l) print('Encoder Block 2: ' + str(l.get_shape().as_list())) for i in range(2): l = layers.add_residual_block(l, filter_dims=[3, 3, block_depth], num_layers=2, act_func=act_func, norm=norm, b_train=b_train, scope='res_block_2_' + str(i)) # [16 x 16] block_depth = block_depth * 2 l = layers.conv(l, scope='tr3', filter_dims=[3, 3, block_depth], stride_dims=[2, 2], non_linear_fn=None) if norm == 'layer': l = layers.layer_norm(l, scope='ln3') elif norm == 'batch': l = layers.batch_norm_conv(l, b_train=b_train, scope='bn3') l = act_func(l) print('Encoder Block 3: ' + str(l.get_shape().as_list())) for i in range(2): l = layers.add_residual_block(l, filter_dims=[3, 3, block_depth], num_layers=2, act_func=act_func, norm=norm, b_train=b_train, scope='res_block_3' + str(i)) # [8 x 8] block_depth = block_depth * 2 l = layers.conv(l, scope='tr4', filter_dims=[3, 3, block_depth], stride_dims=[2, 2], non_linear_fn=None) if norm == 'layer': l = layers.layer_norm(l, scope='ln4') elif norm == 'batch': l = layers.batch_norm_conv(l, b_train=b_train, scope='bn4') l = act_func(l) print('Encoder Block 4: ' + str(l.get_shape().as_list())) for i in range(2): l = layers.add_residual_block(l, filter_dims=[3, 3, block_depth], num_layers=2, act_func=act_func, norm=norm, b_train=b_train, use_dilation=True, scope='res_block_4_' + str(i)) # [4 x 4] block_depth = block_depth * 2 l = layers.conv(l, scope='tr5', filter_dims=[3, 3, block_depth], stride_dims=[2, 2], non_linear_fn=None) print('Encoder Block 5: ' + str(l.get_shape().as_list())) if norm == 'layer': l = layers.layer_norm(l, scope='ln5') elif norm == 'batch': l = layers.batch_norm_conv(l, b_train=b_train, scope='bn5') l = act_func(l) for i in range(2): l = layers.add_residual_block(l, filter_dims=[3, 3, block_depth], num_layers=2, act_func=act_func, norm=norm, b_train=b_train, use_dilation=True, scope='res_block_5_' + str(i)) last_layer = l context = layers.global_avg_pool(last_layer, output_length=representation_dim, use_bias=True, scope='gp') print('Encoder GP Dims: ' + str(context.get_shape().as_list())) context = tf.reshape(context, [batch_size, num_context_patches, num_context_patches, -1]) print('Context Dims: ' + str(context.get_shape().as_list())) return context
def train(model_path): trX = [] trY = [] dir_list = os.listdir(imgs_dirname) dir_list.sort(key=str.lower) one_hot_length = len(os.listdir(imgs_dirname)) with tf.device('/device:CPU:0'): for idx, labelname in enumerate(dir_list): imgs_list = load_images_from_folder(os.path.join( imgs_dirname, labelname), use_augmentation=False) imgs_list = shuffle(imgs_list) label = np.zeros(one_hot_length) label[idx] += 1 print('label:', labelname, label) for idx2, img in enumerate(imgs_list): trY.append(label) ''' if idx2 < len(imgs_list) * 0.2: # SpecAugment w = np.random.randint(len(img)/10) # Max 10% width h = np.random.randint(len(img) - w + 1) img[h:h + w] = [[0, 0, 0]] img = np.transpose(img, [1, 0, 2]) w = np.random.randint(len(img)/10) # Max 10% width h = np.random.randint(len(img) - w + 1) img[h:h + w] = [[0, 0, 0]] img = np.transpose(img, [1, 0, 2]) #cv2.imwrite(labelname + str(idx2) + '.jpg', img) ''' trX.append(img) trX, trY = shuffle(trX, trY) trX = np.array(trX) trY = np.array(trY) trX = trX.reshape(-1, input_height, input_width, num_channel) X = tf.placeholder(tf.float32, [None, input_height, input_width, num_channel]) # Network setup cnn_representation, _, anchor_layer = encoder_network(X, activation='lrelu', bn_phaze=bn_train, scope='encoder') print('CNN Output Tensor Dimension: ' + str(cnn_representation.get_shape().as_list())) cnn_representation = layers.global_avg_pool(cnn_representation, representation_dim, scope='gap') print('CNN Representation Dimension: ' + str(cnn_representation.get_shape().as_list())) latent_fake = cnn_representation with tf.device('/device:GPU:1'): latent_real = make_multi_modal_noise(num_mode=8) X_fake = decoder_network(latent=cnn_representation, anchor_layer=None, activation='lrelu', scope='decoder', bn_phaze=bn_train) p_feature, p_logit, p_prob = discriminator(latent_real, activation='lrelu', scope='discriminator', bn_phaze=bn_train) n_feature, n_logit, n_prob = discriminator(latent_fake, activation='lrelu', scope='discriminator', bn_phaze=bn_train) # Trainable variable lists d_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='discriminator') encoder_var = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='encoder') + tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='gap') decoder_var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='decoder') generator_vars = encoder_var + decoder_var gan_g_vars = encoder_var with tf.device('/device:GPU:1'): residual_loss = get_residual_loss(X, X_fake, type='l1', gamma=1.0) feature_matching_loss = get_feature_matching_loss(p_feature, n_feature, type='l1', gamma=1.0) # Cross Entropy gan_g_loss = -tf.reduce_mean(n_prob) #gan_g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=n_logit, labels=tf.ones_like(n_logit))) #discriminator_loss, loss_real, loss_fake = get_discriminator_loss(p_prob, n_prob, type='wgan', gamma=1.0) discriminator_loss, loss_real, loss_fake = get_discriminator_loss( p_prob, n_prob, type='hinge', gamma=1.0) # training operation d_optimizer = tf.train.AdamOptimizer( learning_rate=2e-4, beta1=0.5).minimize(discriminator_loss, var_list=d_vars) g_optimizer = tf.train.AdamOptimizer(learning_rate=2e-4, beta1=0.5).minimize(residual_loss) gan_g_optimzier = tf.train.AdamOptimizer( learning_rate=2e-4, beta1=0.5).minimize(gan_g_loss, var_list=gan_g_vars) f_optimizer = tf.train.AdamOptimizer( learning_rate=2e-4, beta1=0.5).minimize(feature_matching_loss, var_list=gan_g_vars) # Launch the graph in a session config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) try: saver = tf.train.Saver() saver.restore(sess, model_path) print('Model loaded') except: print('Start New Training. Wait ...') num_itr = 0 training_batch = zip(range(0, len(trX), batch_size), range(batch_size, len(trX) + 1, batch_size)) for i in range(num_epoch): trX, trY = shuffle(trX, trY) for start, end in training_batch: with tf.device('/device:CPU:0'): style_trX = shuffle(trX[start:end]) #style_trX = trX[start:end] anchor, latent = sess.run( [anchor_layer, cnn_representation], feed_dict={ X: style_trX, bn_train: True, keep_prob: 0.5 }) _, r, fake = sess.run( [g_optimizer, residual_loss, X_fake], feed_dict={ X: trX[start:end], ANCHOR: anchor, bn_train: True, keep_prob: 0.5 }) _, d = sess.run( [d_optimizer, discriminator_loss], feed_dict={ X: trX[start:end], ANCHOR: anchor, bn_train: True, keep_prob: 0.5 }) #trX[start:end], trY[start:end] = shuffle(trX[start:end], trY[start:end]) #_, f = sess.run( # [f_optimizer, feature_matching_loss], # feed_dict={X: trX[start:end], Y: trY[start:end], ANCHOR: anchor, # bn_train: True, # keep_prob: 0.5}) _, g = sess.run([gan_g_optimzier, gan_g_loss], feed_dict={ X: trX[start:end], bn_train: True, keep_prob: 0.5 }) num_itr = num_itr + 1 if num_itr % 10 == 0: print('epoch #' + str(i) + ', itr #' + str(num_itr)) print(' - residual loss: ' + str(r)) print(' - discriminator loss: ' + str(d)) print(' - generator loss: ' + str(g)) #print(' - feature matching loss: ' + str(f)) if num_itr % 100 == 0: sample = fake[0] * 127.5 + 127.5 sample = cv2.cvtColor(sample, cv2.COLOR_RGB2BGR) cv2.imwrite( 'training_status/sample' + str(num_itr) + '.jpg', sample) try: saver.save(sess, model_path) except: print('Save failed')
def test(model_path, test_image_dir): trX = [] trY = [] test_output_dir = 'gan' if os.path.exists(test_output_dir) == False: os.mkdir(test_output_dir) with tf.device('/device:CPU:0'): test_image_dir_list = os.listdir(test_image_dir) for idx, labelname in enumerate(test_image_dir_list): if os.path.isdir( os.path.join(test_image_dir, labelname).replace( "\\", "/")) is False: continue if os.path.exists(os.path.join(test_output_dir, labelname)) is False: os.mkdir(os.path.join(test_output_dir, labelname)) imgs_list = load_images_from_folder(os.path.join( test_image_dir, labelname), use_augmentation=False) for idx2, img in enumerate(imgs_list): trY.append(os.path.join(test_output_dir, labelname)) trX.append(img) trX = np.array(trX) trY = np.array(trY) trX = trX.reshape(-1, input_height, input_width, num_channel) # Network setup X = tf.placeholder(tf.float32, [None, input_height, input_width, num_channel]) cnn_representation, _, anchor_layer = encoder_network(X, activation='lrelu', bn_phaze=bn_train, scope='encoder') print('CNN Output Tensor Dimension: ' + str(cnn_representation.get_shape().as_list())) cnn_representation = layers.global_avg_pool(cnn_representation, representation_dim, scope='gap') print('CNN Representation Dimension: ' + str(cnn_representation.get_shape().as_list())) latent_fake = cnn_representation with tf.device('/device:GPU:1'): # decoder_input = make_multi_modal_noise(representation, num_mode=8) latent_real = make_multi_modal_noise(num_mode=8) X_fake = decoder_network(latent=cnn_representation, anchor_layer=None, activation='lrelu', scope='decoder', bn_phaze=bn_train) p_feature, p_logit, p_prob = discriminator(latent_real, activation='lrelu', scope='discriminator', bn_phaze=bn_train) n_feature, n_logit, n_prob = discriminator(latent_fake, activation='lrelu', scope='discriminator', bn_phaze=bn_train) # Trainable variable lists d_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='discriminator') encoder_var = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='encoder') + tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='gap') decoder_var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='decoder') generator_vars = encoder_var + decoder_var gan_g_vars = encoder_var with tf.device('/device:GPU:1'): residual_loss = get_residual_loss(X, X_fake, type='l1', gamma=1.0) feature_matching_loss = get_feature_matching_loss(p_feature, n_feature, type='l2', gamma=1.0) # Cross Entropy gan_g_loss = -tf.reduce_mean(n_prob) # gan_g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=n_logit, labels=tf.ones_like(n_logit))) # discriminator_loss, loss_real, loss_fake = get_discriminator_loss(p_prob, n_prob, type='wgan', gamma=1.0) discriminator_loss, loss_real, loss_fake = get_discriminator_loss( p_prob, n_prob, type='hinge', gamma=1.0) # training operation d_optimizer = tf.train.AdamOptimizer( learning_rate=2e-4, beta1=0.5).minimize(discriminator_loss, var_list=d_vars) g_optimizer = tf.train.AdamOptimizer(learning_rate=2e-4, beta1=0.5).minimize(residual_loss) gan_g_optimzier = tf.train.AdamOptimizer( learning_rate=2e-4, beta1=0.5).minimize(gan_g_loss, var_list=gan_g_vars) f_optimizer = tf.train.AdamOptimizer( learning_rate=2e-4, beta1=0.5).minimize(feature_matching_loss, var_list=gan_g_vars) # Launch the graph in a session with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: sess.run(tf.global_variables_initializer()) try: saver = tf.train.Saver() saver.restore(sess, model_path) print('Model loaded') except: print('Model loading failed') return i = 0 for img in trX: #latent, anchor = sess.run([latent_real, anchor_layer], feed_dict={X: [img], bn_train: False, keep_prob: 1.0}) fake = sess.run([X_fake], feed_dict={ X: [img], bn_train: False, keep_prob: 1.0 }) sample = fake[0][0] * 127.5 + 127.5 #print(sample.shape) sample = cv2.cvtColor(sample, cv2.COLOR_RGB2BGR) cv2.imwrite(trY[i] + '/' + str(i) + '.jpg', sample) i = i + 1
def discriminator(x, activation='relu', scope='discriminator_network', norm='layer', b_train=False, use_patch=False): with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): if activation == 'swish': act_func = util.swish elif activation == 'relu': act_func = tf.nn.relu elif activation == 'lrelu': act_func = tf.nn.leaky_relu else: act_func = tf.nn.sigmoid block_depth = dense_block_depth bottleneck_width = 8 if use_patch is True: bottleneck_width = 16 #num_iter = input_width // bottleneck_width #num_iter = int(np.sqrt(num_iter)) num_iter = 3 print('Discriminator Input: ' + str(x.get_shape().as_list())) l = layers.conv(x, scope='conv_init', filter_dims=[3, 3, block_depth], stride_dims=[1, 1], non_linear_fn=None, bias=False) l = layers.conv_normalize(l, norm=norm, b_train=b_train, scope='norm_init') l = act_func(l) for i in range(num_iter): print('Discriminator Block ' + str(i) + ': ' + str(l.get_shape().as_list())) for j in range(2): l = layers.add_residual_block(l, filter_dims=[3, 3, block_depth], num_layers=2, act_func=act_func, norm=norm, b_train=b_train, scope='res_block_' + str(i) + '_' + str(j)) block_depth = block_depth * 2 l = layers.conv(l, scope='tr' + str(i), filter_dims=[3, 3, block_depth], stride_dims=[2, 2], non_linear_fn=None) l = layers.conv_normalize(l, norm=norm, b_train=b_train, scope='norm_' + str(i)) l = act_func(l) if use_patch is True: print('Discriminator Patch Block : ' + str(l.get_shape().as_list())) for i in range(2): l = layers.add_residual_block(l, filter_dims=[3, 3, block_depth], num_layers=2, act_func=act_func, norm=norm, b_train=b_train, scope='patch_block_' + str(i)) last_layer = l feature = layers.global_avg_pool(last_layer, output_length=representation_dim // 8, use_bias=False, scope='gp') print('Discriminator GP Dims: ' + str(feature.get_shape().as_list())) logit = layers.conv(last_layer, scope='conv_pred', filter_dims=[3, 3, 1], stride_dims=[1, 1], non_linear_fn=None, bias=False) print('Discriminator Logit Dims: ' + str(logit.get_shape().as_list())) else: #print('Discriminator Attention Block : ' + str(l.get_shape().as_list())) #l = layers.self_attention(l, block_depth, act_func=act_func) for i in range(2): l = layers.add_residual_block(l, filter_dims=[3, 3, block_depth], num_layers=2, act_func=act_func, norm=norm, b_train=b_train, scope='at_block_' + str(i)) last_layer = l feature = layers.global_avg_pool(last_layer, output_length=representation_dim // 8, use_bias=False, scope='gp') print('Discriminator GP Dims: ' + str(feature.get_shape().as_list())) logit = layers.fc(feature, 1, non_linear_fn=None, scope='flat') return feature, logit
def test(model_path): threshold = 0.9 print('Serving Mode, threshold: ' + str(threshold)) X = tf.placeholder(tf.float32, [None, input_height, input_width, num_channel]) Y = tf.placeholder(tf.float32, [None, num_class_per_group]) TripletX = tf.placeholder(tf.float32, [None, representation_dim]) bn_train = tf.placeholder(tf.bool) keep_prob = tf.placeholder(tf.float32) # Network setup cnn_representation, _ = cnn_network(X, bn_phaze=bn_train) print('CNN Output Tensor Dimension: ' + str(cnn_representation.get_shape().as_list())) cnn_representation = layers.global_avg_pool(cnn_representation, representation_dim) print('CNN Representation Dimension: ' + str(cnn_representation.get_shape().as_list())) #scale_representation = layers.global_avg_pool(scale_representation, representation_dim) #representation = tf.add(cnn_representation, tf.multiply(TripletX, scale_representation)) # Residual representation = tf.add(cnn_representation, TripletX) prediction = layers.fc(representation, num_class_per_group, scope='g_fc_final') with tf.variable_scope('center', reuse=tf.AUTO_REUSE): centers = tf.get_variable('centers', [num_class_per_group, g_fc_layer3_dim], dtype=tf.float32, initializer=tf.constant_initializer(0), trainable=False) center_loss = get_center_loss(representation, tf.argmax(Y, 1)) update_center = update_centers(representation, tf.argmax(Y, 1), CENTER_LOSS_ALPHA) entropy_loss = tf.reduce_mean( tf.losses.softmax_cross_entropy(onehot_labels=Y, logits=prediction, label_smoothing=0.1)) total_loss = entropy_loss + center_loss * LAMBDA train_op = tf.train.AdamOptimizer(0.003).minimize(total_loss) predict_op = tf.argmax(tf.nn.softmax(prediction), 1) confidence_op = tf.nn.softmax(prediction) # Launch the graph in a session with tf.Session() as sess: sess.run(tf.global_variables_initializer()) try: saver = tf.train.Saver() saver.restore(sess, model_path) print('Model loaded') except: print('Model load failed: ' + model_path) return fileDir = os.path.dirname(os.path.realpath(__file__)) # Modify baseDir to your environment inputDir = fileDir + '/input' label_list = [d for d in os.listdir(inputDir + '/user') if os.path.isdir(inputDir + '/user/' + d)] label_list.sort(key=str.lower) print(label_list) # Global lable list. group_label_list = os.listdir(imgs_dirname) redis_ready = False clf_directory = os.path.dirname(os.path.realpath(__file__)) + '/svm/group/' clf_files = os.listdir(clf_directory) clf_list = [pickle.load(open(clf_directory + pkl_file, 'r')) for pkl_file in clf_files] try: rds = redis.StrictRedis(host=REDIS_SERVER, port=REDIS_PORT, db=0) p = rds.pubsub() p.subscribe(redis_channel) redis_ready = True print('Connected to Message Queue') except: redis_ready = False print('Faile to connect to Message Queue') sock_ready = False try: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect((HOST, PORT)) sock_ready = True print('Connected to Edge Camera') except: print('Faile to connect to Edge Camera') sock_ready = False if redis_ready is False: print 'REDIS not ready.' return cur_target_frame = -1 next_target_frame = 1 if not os.path.exists(inputDir + '/../Unknown'): os.mkdir(inputDir + '/../Unknown') dirname = inputDir + '/../Unknown' if not os.path.exists(dirname): os.mkdir(dirname) for item in p.listen(): data = item if data is not None: data = data.get('data') if data != 1: temp = array.array('B', data) ar = np.array(temp, dtype=np.uint8) left = int_from_bytes(ar[4], ar[3], ar[2], ar[1]) right = int_from_bytes(ar[8], ar[7], ar[6], ar[5]) top = int_from_bytes(ar[12], ar[11], ar[10], ar[9]) bottom = int_from_bytes(ar[16], ar[15], ar[14], ar[13]) recv_frame = ar[0] ar = ar[17:] frame_str = rds.get(frame_db) if cur_target_frame is -1: cur_target_frame = recv_frame next_target_frame = int(frame_str) if recv_frame == cur_target_frame or recv_frame == next_target_frame: fileName = '/tmp/input' + redis_channel + '.jpg' jpgFile = open(fileName, "wb") jpgFile.write(ar) jpgFile.close() confidence = 0.97 person = 'Unknown' #print('Get triplet representation') tpReps, img = get_triplet_representation_align_image(fileName) if not tpReps: print('Not a valid face.') else: #print('Run prediction..') use_softmax = False pred_id, confidence, rep = sess.run([predict_op, confidence_op, representation], feed_dict={X: img, TripletX: tpReps, bn_train: False, keep_prob: 1.0}) if use_softmax is True: #print('# Prediction: ' + str(pred_id)) person = group_label_list[pred_id[0]] confidence = confidence[0][pred_id[0]] print('# Person: ' + person + ', Confidence: ' + str(confidence)) else: confidences = [] labels = [] for (le, clf) in clf_list: pred = clf.predict_proba(rep).ravel() maxI = np.argmax(pred) person = le.inverse_transform([maxI]) confidence = pred[maxI] confidences.append(confidence) labels.append(person[0]) print('#################################') print(labels) print(confidences) effective_labels = [] effective_confidences = [] for i in range(len(labels)): if labels[i] != 'Unknown': effective_labels.append(labels[i]) effective_confidences.append(confidences[i]) if len(effective_labels) == 0: person = 'Unknown' confidence = 0.99 else: confidence = max(effective_confidences) maxI = effective_confidences.index(confidence) person = effective_labels[maxI] if len(effective_labels) > 1: effective_confidences.sort(reverse=True) if effective_confidences[0] - effective_confidences[1] < 0.5: person = 'Unknown' confidence = 0.99 print('\nPerson: ' + person + ', Confidence: ' + str(confidence * 100) + '%') if confidence < 0.9: save_unknown_user(fileName, dirname, 'Unknown', confidence) elif confidence >= 0.9 and confidence < 0.97: save_unknown_user(fileName, dirname, person, confidence) if confidence < threshold: person = 'Unknown' if sock_ready is True: if person != 'Unknown' and person != 'Nobody': b_array = bytes() floatList = [left, right, top, bottom, confidence, label_list.index(person)] b_array = b_array.join((struct.pack('f', val) for val in floatList)) sock.send(b_array) else: cur_target_frame = next_target_frame else: rds.set(frame_db, '1')
def train(model_path): trX = [] trY = [] trXT = [] teX = [] teY = [] teXT = [] for idx, labelname in enumerate(os.listdir(imgs_dirname)): print('label:', idx, labelname) imgs_list = load_images_from_folder(os.path.join(imgs_dirname, labelname)) imgs_list = shuffle(imgs_list) for idx2, img in enumerate(imgs_list): label = np.zeros(len(os.listdir(imgs_dirname))) label[idx] += 1 if idx2 < len(imgs_list) * 0.8: trY.append(label) trXT.append(get_triplet_representation(img)) if idx2 < len(imgs_list) * 0.7: # SpecAugment w = np.random.randint(len(img)/10) # Max 10% width h = np.random.randint(len(img) - w + 1) img[h:h + w] = [[0, 0, 0]] img = np.transpose(img, [1, 0, 2]) w = np.random.randint(len(img)/10) # Max 10% width h = np.random.randint(len(img) - w + 1) img[h:h + w] = [[0, 0, 0]] img = np.transpose(img, [1, 0, 2]) #cv2.imwrite(labelname + str(idx2) + '.jpg', img) trX.append(img) else: teY.append(label) teXT.append(get_triplet_representation(img)) teX.append(img) trX, trY, trXT = shuffle(trX, trY, trXT) trX = np.array(trX) trY = np.array(trY) trXT = np.array(trXT) teX = np.array(teX) teY = np.array(teY) teXT = np.array(teXT) trX = trX.reshape(-1, input_height, input_width, num_channel) teX = teX.reshape(-1, input_height, input_width, num_channel) X = tf.placeholder(tf.float32, [None, input_height, input_width, num_channel]) Y = tf.placeholder(tf.float32, [None, num_class_per_group]) TripletX = tf.placeholder(tf.float32, [None, representation_dim]) bn_train = tf.placeholder(tf.bool) keep_prob = tf.placeholder(tf.float32) # Network setup cnn_representation, _ = cnn_network(X, bn_phaze=bn_train) print('CNN Output Tensor Dimension: ' + str(cnn_representation.get_shape().as_list())) cnn_representation = layers.global_avg_pool(cnn_representation, g_fc_layer3_dim) print('CNN Representation Dimension: ' + str(cnn_representation.get_shape().as_list())) #scale_representation = layers.global_avg_pool(scale_representation, representation_dim) #representation = tf.add(cnn_representation, tf.multiply(TripletX, scale_representation)) # Residual representation = tf.add(cnn_representation, TripletX) # L2 Softmax representation = tf.nn.l2_normalize(representation, axis=1) alpha = tf.log((0.9 * (num_class_per_group - 2)) / 1 - 0.9) representation = tf.multiply(alpha, representation) prediction = layers.fc(representation, num_class_per_group, scope='g_fc_final') with tf.variable_scope('center', reuse=tf.AUTO_REUSE): centers = tf.get_variable('centers', [num_class_per_group, g_fc_layer3_dim], dtype=tf.float32, initializer=tf.constant_initializer(0), trainable=False) center_loss = get_center_loss(representation, tf.argmax(Y, 1)) update_center = update_centers(representation, tf.argmax(Y, 1), CENTER_LOSS_ALPHA) entropy_loss = tf.reduce_mean(tf.losses.softmax_cross_entropy(onehot_labels=Y, logits=prediction, label_smoothing=0.1)) #entropy_loss = tf.reduce_mean(tf.losses.softmax_cross_entropy(onehot_labels=Y, logits=prediction)) # L1 BN: only for bottle neck layer's bn gamma tr_val_list = [var for var in tf.trainable_variables() if 'transition' in var.name] t_val_list = [tf.reduce_sum(tf.abs(var)) for var in tr_val_list if 'gamma' in var.name] L1_penalty = tf.reduce_sum(t_val_list) scale = 1e-7 total_loss = entropy_loss + center_loss * LAMBDA + scale * L1_penalty #total_loss = entropy_loss + center_loss * LAMBDA global_step = tf.Variable(0, trainable=False) learning_rate = 0.05 decayed_lr = tf.train.exponential_decay(learning_rate, global_step, 10000, 0.95, staircase=True) train_op = tf.train.AdamOptimizer(decayed_lr).minimize(total_loss) #train_op = tf.train.AdamOptimizer(3e-4).minimize(total_loss) predict_op = tf.argmax(tf.nn.softmax(prediction), 1) # Launch the graph in a session with tf.Session() as sess: sess.run(tf.global_variables_initializer()) try: saver = tf.train.Saver() saver.restore(sess, model_path) print('Model loaded') except: print('Start New Training. Wait ...') num_itr = 0 training_batch = zip(range(0, len(trX), batch_size), range(batch_size, len(trX) + 1, batch_size)) for i in range(num_epoch): trX, trY, trXT = shuffle(trX, trY, trXT) for start, end in training_batch: _, c, center, _, l1_penalty = sess.run( [train_op, entropy_loss, center_loss, update_center, L1_penalty], feed_dict={X: trX[start:end], Y: trY[start:end], TripletX: trXT[start:end], bn_train: True, keep_prob: 0.5}) num_itr = num_itr + 1 if num_itr % 10 == 0: try: print('entropy loss: ' + str(c)) print('center loss: ' + str(center)) print('l1 penalty: ' + str(l1_penalty)) saver.save(sess, model_path) except: print('Save failed') test_indices = np.arange(len(teX)) # Get A Test Batch np.random.shuffle(test_indices) test_indices = test_indices[0:test_size] print('# Test Set #') print(np.argmax(teY[test_indices], axis=1)) print('# Prediction #') print(sess.run(predict_op, feed_dict={X: teX[test_indices], Y: teY[test_indices], TripletX: teXT[test_indices], bn_train: False, keep_prob:1.0})) precision = np.mean(np.argmax(teY[test_indices], axis=1) == sess.run(predict_op, feed_dict={X: teX[test_indices], Y: teY[test_indices], TripletX: teXT[test_indices], bn_train: False, keep_prob:1.0})) print('epoch ' + str(i) + ', precision: ' + str(100 * precision) + ' %') if precision > 0.99: break
data_dir = args.data if not os.path.exists(data_dir): print('No data.') else: X = tf.placeholder(tf.float32, [None, input_height, input_width, num_channel]) TripletX = tf.placeholder(tf.float32, [None, representation_dim]) bn_train = tf.placeholder(tf.bool) keep_prob = tf.placeholder(tf.float32) # Network setup cnn_representation, _ = cnn_network(X, bn_phaze=bn_train) print('CNN Output Tensor Dimension: ' + str(cnn_representation.get_shape().as_list())) cnn_representation = layers.global_avg_pool(cnn_representation, g_fc_layer3_dim) print('CNN Representation Dimension: ' + str(cnn_representation.get_shape().as_list())) fc_representation = cnn_representation #scale_representation = layers.global_avg_pool(scale_representation, representation_dim) #representation = tf.add(cnn_representation, tf.multiply(TripletX, scale_representation)) # Residual representation = tf.add(cnn_representation, TripletX) prediction = layers.fc(representation, num_class_per_group, scope='g_fc_final') with tf.variable_scope('center', reuse=tf.AUTO_REUSE): centers = tf.get_variable('centers', [num_class_per_group, g_fc_layer3_dim], dtype=tf.float32, initializer=tf.constant_initializer(0), trainable=False)
def encoder(x, activation='relu', scope='encoder', norm='layer', b_train=False): with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): if activation == 'swish': act_func = util.swish elif activation == 'relu': act_func = tf.nn.relu elif activation == 'lrelu': act_func = tf.nn.leaky_relu else: act_func = tf.nn.sigmoid num_encoder_feature_blocks = 5 num_encoder_bt_blocks = 4 print('Encoder Input: ' + str(x.get_shape().as_list())) block_depth = unit_block_depth l = layers.conv(x, scope='conv0', filter_dims=[3, 3, block_depth], stride_dims=[1, 1], non_linear_fn=None, bias=False) l = layers.conv_normalize(l, norm=norm, b_train=b_train, scope='norm0') l = act_func(l) print('Encoder Block: ' + str(l.get_shape().as_list())) for i in range(num_encoder_bt_blocks): l = layers.add_se_residual_block(l, filter_dims=[3, 3, block_depth], num_layers=2, act_func=act_func, norm=norm, b_train=b_train, scope='res_block_' + str(i)) print('Encoder Bottleneck Block ' + str(i) + ': ' + str(l.get_shape().as_list())) block_depth = block_depth * 2 l = layers.conv(l, scope='tr_' + str(i), filter_dims=[3, 3, block_depth], stride_dims=[2, 2], non_linear_fn=None) l = layers.conv_normalize(l, norm=norm, b_train=b_train, scope='norm_' + str(i)) l = act_func(l) last_layer = l latent = layers.global_avg_pool(last_layer, output_length=representation_dim, use_bias=False, scope='gp') categories = layers.fc(latent, num_class) print('Encoder Latent Dims: ' + str(latent.get_shape().as_list())) return latent, categories