def valid_parse(ldr_dir, hdr_dir): ldr_string = tf.read_file(ldr_dir) hdr_string = tf.read_file(hdr_dir) ldr_img, hdr_img = tf.py_func(read_valid_images, [ldr_string, hdr_string], [tf.uint8, tf.float32]) _, hdr_img, Hth = norm_img(hdr_img) ldr_img = tf.image.convert_image_dtype(ldr_img, tf.float32) return ldr_img, hdr_img, Hth
def train(batch_size, epochs, dataset, log_dir): global_step = tf.Variable(0, name='global_step', trainable=False) image_width = 64 image_height = 64 # ##========================== DEFINE INPUT DATA ============================### images = tf.placeholder('float32', [None, image_height, image_width, 3], name='t_image_generator') z = tf.placeholder('float32', [None, 64], name='t_noise_generator') y_gan_real = tf.placeholder('float32', [None, 1], name='t_labels_real') y_gan_fake = tf.placeholder('float32', [None, 1], name='t_labels_fake') y_generator = tf.placeholder('float32', [None, 1], name='t_labels_generator') tf.summary.image('input_image', images) images_normalized = norm_img(images) # Normalization # ##========================== DEFINE MODEL ============================### net_gen = generator(z=z, reuse=False) tf.summary.image('generated_normalized_image', net_gen.outputs) tf.summary.image('generated_image', denorm_img(net_gen.outputs)) net_d, logits = discriminator(disc_input=tf.concat( [net_gen.outputs, images_normalized], axis=0), reuse=False) net_d_false, net_d_real = tf.split(net_d.outputs, num_or_size_splits=2, axis=0) # ###========================== DEFINE TRAIN OPS ==========================### g_vars = tl.layers.get_variables_with_name('generator', True, True) d_vars = tl.layers.get_variables_with_name('discriminator', True, True) with tf.variable_scope('learning_rate'): lr = tf.Variable(1e-4, trainable=False) d_loss_real = tf.reduce_mean(tf.square(net_d_real - y_gan_real), name='d_loss_real') d_loss_fake = tf.reduce_mean(tf.square(net_d_false - y_gan_fake), name='d_loss_fake') d_loss = d_loss_real + d_loss_fake g_loss = tf.reduce_mean(tf.square(net_d_false - y_generator), name='g_loss_gan') g_optim = tf.train.AdamOptimizer(lr).minimize(g_loss, var_list=g_vars) d_optim = tf.train.AdamOptimizer(lr).minimize(d_loss, var_list=d_vars) tf.summary.scalar('d_loss', d_loss) tf.summary.scalar('g_loss', g_loss) summary = tf.summary.merge_all() with tf.Session() as sess: saver = tf.train.Saver(max_to_keep=1) # Summary writer to save logs summary_writer = tf.summary.FileWriter(os.path.join(log_dir, 'train'), sess.graph) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init_op) if args.resume == "True": print("Restoring model from checkpoint") restore_model(sess, args.checkpoint_dir) items_faces, items_audio = dataset.get_items() total = 0 for j in range(0, epochs): iteration = 0 while iteration * batch_size < len(items_faces): input_images = np.empty([batch_size, 64, 64, 3]) count = 0 for face in items_faces[iteration * batch_size:iteration * batch_size + batch_size]: input_image = Image.open(face) input_image = np.asarray(input_image, dtype=float) input_images[count] = input_image count += 1 input_z = np.random.uniform(-1., 1, size=[batch_size, 64]) if np.random.uniform() > 0.1: # give correct classifications labels_real = np.random.uniform(size=[batch_size, 1], low=0.7, high=1.2) labels_fake = np.random.uniform(size=[batch_size, 1], low=0.0, high=0.3) else: # give wrong classifications (noisy labels) labels_fake = np.random.uniform(size=[batch_size, 1], low=0.7, high=1.2) labels_real = np.random.uniform(size=[batch_size, 1], low=0.0, high=0.3) labels_generator = np.random.uniform(size=[batch_size, 1], low=0.7, high=1.2) # ##========================= train SRGAN =========================### summary_str, gLoss, dLoss, _, _ = sess.run( [summary, g_loss, d_loss, g_optim, d_optim], feed_dict={ images: input_images, z: input_z, y_gan_real: labels_real, y_gan_fake: labels_fake, y_generator: labels_generator }) summary_writer.add_summary(summary_str, total) print("Epoch: %2d Iteration: %2d gLoss: %.8f dLoss: %.8f." % (j, iteration, gLoss, dLoss)) # ##========================= save checkpoint =========================### if iteration % 3000 == 0 and iteration > 0: tf.logging.info('Saving checkpoint') saver.save(sess, args.checkpoint_dir + "/checkpoint", global_step=iteration, write_meta_graph=False) iteration += 1 total += 1 rest = len(items_faces) - ((iteration - 1) * batch_size) if rest > 0: count = 0 input_images = np.empty([rest, 64, 64, 3]) for face in items_faces[len(items_faces) - rest:]: input_image = Image.open(face) input_image = np.asarray(input_image, dtype=float) input_images[count] = input_image count += 1 input_z = np.random.uniform(-1., 1, size=[rest, 64]) if np.random.uniform() > 0.1: # give correct classifications labels_real = np.random.uniform(size=[rest, 1], low=0.7, high=1.2) labels_fake = np.random.uniform(size=[rest, 1], low=0.0, high=0.3) else: # give wrong classifications (noisy labels) labels_fake = np.random.uniform(size=[rest, 1], low=0.7, high=1.2) labels_real = np.random.uniform(size=[rest, 1], low=0.0, high=0.3) labels_generator = np.random.uniform(size=[rest, 1], low=0.7, high=1.2) # ##========================= train SRGAN =========================### summary_str, gLoss, dLoss, _, _ = sess.run( [summary, g_loss, d_loss, g_optim, d_optim], feed_dict={ images: input_images, z: input_z, y_gan_real: labels_real, y_gan_fake: labels_fake, y_generator: labels_generator }) print("Epoch: %2d Iteration: %2d gLoss: %.8f dLoss: %.8f." % (j, iteration, gLoss, dLoss)) summary_writer.add_summary(summary_str, iteration)
def train(batch_size, epochs, dataset, log_dir): global_step = tf.Variable(0, name='global_step', trainable=False) image_width = 64 image_height = 64 audio_width = 12 audio_height = 35 # ##========================== DEFINE INPUT DATA ============================### images = tf.placeholder('float32', [None, image_height, image_width, 3], name='t_image_generator') audio = tf.placeholder('float32', [None, audio_height, audio_width, 1], name='t_audio_input_generator') tf.summary.image('input_image', images) images_normalized = norm_img(images) # Normalization # ##========================== DEFINE MODEL ============================### net_gen = generator(input_audio=audio, reuse=False) tf.summary.image('norm_generated_image', net_gen.outputs) tf.summary.image('generated_image', denorm_img(net_gen.outputs)) net_d, d_z = discriminator(disc_input=tf.concat( [net_gen.outputs, images_normalized], axis=0), reuse=False) net_d_false, net_d_real = tf.split(net_d.outputs, num_or_size_splits=2, axis=0) d_z_false, d_z_real = tf.split(d_z.outputs, num_or_size_splits=2, axis=0) tf.summary.image('autoencoder_real', denorm_img(net_d_real)) tf.summary.image('autoencoder_fake', denorm_img(net_d_false)) output_gen = denorm_img(net_gen.outputs) # Denormalization ae_gen, ae_real = denorm_img(net_d_false), denorm_img( net_d_real) # Denormalization # ###========================== DEFINE TRAIN OPS ==========================### lambda_k = 0.001 gamma = 0.7 k_t = tf.Variable(0., trainable=False, name='k_t') g_vars = tl.layers.get_variables_with_name('generator', True, True) d_vars = tl.layers.get_variables_with_name('discriminator', True, True) with tf.variable_scope('learning_rate'): lr = tf.Variable(0.00008, trainable=False) decay_rate = 0.5 decay_steps = 116722 learning_rate = tf.train.inverse_time_decay(lr, decay_rate=decay_rate, decay_steps=decay_steps, global_step=global_step) d_loss_real = tf.reduce_mean(tf.abs(ae_real - images)) d_loss_fake = tf.reduce_mean(tf.abs(ae_gen - output_gen)) d_loss = d_loss_real - k_t * d_loss_fake g_loss_discriminativefeatures = tf.reduce_mean(tf.abs(d_z_real - d_z_false)) g_loss = tf.reduce_mean( tf.abs(ae_gen - output_gen)) + 10e-2 * g_loss_discriminativefeatures g_optim = tf.train.AdamOptimizer(learning_rate).minimize( g_loss, var_list=g_vars, global_step=global_step) d_optim = tf.train.AdamOptimizer(learning_rate).minimize( d_loss, var_list=d_vars, global_step=global_step) balance = gamma * d_loss_real - g_loss with tf.control_dependencies([d_optim, g_optim]): k_update = tf.assign(k_t, tf.clip_by_value(k_t + lambda_k * balance, 0, 1)) m_global = d_loss_real + tf.abs(balance) tf.summary.scalar('m_global', m_global) tf.summary.scalar('g_loss_discriminativefeatures', g_loss_discriminativefeatures) tf.summary.scalar('k_t', k_t) tf.summary.scalar('learning_rate', learning_rate) summary = tf.summary.merge_all() with tf.Session() as sess: saver = tf.train.Saver(max_to_keep=1, var_list=tf.trainable_variables()) # Summary writer to save logs summary_writer = tf.summary.FileWriter(os.path.join(log_dir, 'train'), sess.graph) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init_op) if args.resume == "True": print("Restoring model from checkpoint") restore_model(sess) # Coordinate the different workers for the input data pipeline # coord = tf.train.Coordinator() # threads = tf.train.start_queue_runners(coord=coord) items_faces, items_audio = dataset.get_items() total = 0 for j in range(0, epochs): iteration = 0 while iteration * batch_size < len(items_faces): input_images = np.empty([batch_size, 64, 64, 3]) audio_MFCC = np.empty([batch_size, 35, 12, 1]) count = 0 for face, input_audio in zip( items_faces[iteration * batch_size:iteration * batch_size + batch_size], items_audio[iteration * batch_size:iteration * batch_size + batch_size]): input_image = Image.open(face) input_image = np.asarray(input_image, dtype=float) input_images[count] = input_image input_audio = np.load(input_audio) input_audio = np.asarray(input_audio, dtype=float) audio_MFCC[count] = input_audio[:, :, np.newaxis] count += 1 # ##========================= train SRGAN =========================### kt, mGlobal, summary_str = sess.run( [k_update, m_global, summary], feed_dict={ images: input_images, audio: audio_MFCC }) print("Epoch: %2d Iteration: %2d kt: %.8f Mglobal: %.8f." % (j, iteration, kt, mGlobal)) summary_writer.add_summary(summary_str, total) # summary_writer.flush() # ##========================= save checkpoint =========================### if iteration % 3630 == 0 and iteration > 0: tf.logging.info('Saving checkpoint') saver.save(sess, args.checkpoint_dir + "/checkpoint", global_step=iteration, write_meta_graph=False) iteration += 1 total += 1 rest = len(items_faces) - ((iteration - 1) * batch_size) if rest > 0: count = 0 input_images = np.empty([rest, 64, 64, 3]) audio_MFCC = np.empty([rest, 35, 12, 1]) for face, input_audio in zip( items_faces[len(items_faces) - rest:], items_audio[len(items_faces) - rest:]): input_image = Image.open(face) input_image = np.asarray(input_image, dtype=float) input_images[count] = input_image input_audio = np.load(input_audio) input_audio = np.asarray(input_audio, dtype=float) audio_MFCC[count] = input_audio[:, :, np.newaxis] count += 1 # ##========================= train SRGAN =========================### kt, mGlobal, summary_str = sess.run( [k_update, m_global, summary], feed_dict={ images: input_images, audio: audio_MFCC }) print("Iteration: %2d kt: %.8f Mglobal: %.8f." % (iteration, kt, mGlobal)) summary_writer.add_summary(summary_str, iteration)
def __init__(self, model, image, image_name=None, max_tr_steps=50000, load_path=''): ''' image is assumed to be a path to a precropped 64x64x3 uint8 image ''' #Some hardcoded defaults here self.log_step = 500 self.lr = 0.0005 self.max_tr_steps = max_tr_steps self.model = model self.load_path = load_path self.image_name = image_name or os.path.basename(image).replace( '.', '_') self.encode_dir = make_encode_dir(model, self.image_name) self.model_dir = self.encode_dir #different from self.model.model_dir self.save_dir = os.path.join(self.model_dir, 'save') self.sess = self.model.sess #session should already be in progress if model.model_type == 'dcgan': self.data_format = 'NHWC' #Don't change elif model.model_type == 'began': self.data_format = model.data_format #'NCHW' if gpu else: raise Exception('Should not happen. model_type=', model.model_type) #Notation: #self.uint_x/G ; 3D [0,255] #self.x/G ; 4D [-1,1] self.uint_x = read_prepared_uint8_image(image) #x is [0,255] print('Read image shape', self.uint_x.shape) self.x = norm_img(np.expand_dims(self.uint_x, 0), self.data_format) #bs=1 #self.x=norm_img(tf.expand_dims(self.uint_x,0),self.data_format)#bs=1 print('Shape after norm:', self.x.get_shape().as_list()) ##All variables created under encoder have uniform init vs = tf.variable_scope('encoder', initializer=tf.random_uniform_initializer( minval=-1., maxval=1.), dtype=tf.float32) with vs as scope: #avoid creating adams params optimizer = tf.train.GradientDescentOptimizer #optimizer = tf.train.AdamOptimizer self.g_optimizer = optimizer(self.lr) encode_var = { n.name: var_like_z(n.z, n.name) for n in model.cc.nodes } encode_var['gen'] = var_like_z(model.z_gen, 'gen') print 'encode variables created' self.train_var = tf.contrib.framework.get_variables(scope) self.step = tf.Variable(0, name='step') self.var = tf.contrib.framework.get_variables(scope) #all encode vars created by now self.saver = tf.train.Saver(var_list=self.var) print('Summaries will be written to ', self.model_dir) self.summary_writer = tf.summary.FileWriter(self.model_dir) #load or initialize enmodel variables self.init() if model.model_type == 'dcgan': self.cc = CausalController(graph=model.graph, input_dict=encode_var, reuse=True) self.fake_labels_logits = tf.concat(self.cc.list_label_logits(), -1) self.z_fake_labels = self.fake_labels_logits #self.z_gen = noise_like_z( self.model.z_gen,'en_z_gen') self.z_gen = encode_var['gen'] self.z = tf.concat([self.z_gen, self.z_fake_labels], axis=1, name='z') self.G = model.generator(self.z, bs=1, reuse=True) elif model.model_type == 'began': with tf.variable_scope('tower'): #reproduce variable scope self.cc = CausalController(graph=model.graph, input_dict=encode_var, reuse=True) self.fake_labels = tf.concat(self.cc.list_labels(), -1) self.fake_labels_logits = tf.concat( self.cc.list_label_logits(), -1) #self.z_gen = noise_like_z( self.model.z_gen,'en_z_gen') self.z_gen = encode_var['gen'] self.z = tf.concat([self.fake_labels, self.z_gen], axis=-1, name='z') self.G, _ = GeneratorCNN(self.z, model.conv_hidden_num, model.channel, model.repeat_num, model.data_format, reuse=True) d_out, self.D_zG, self.D_var = DiscriminatorCNN( self.G, model.channel, model.z_num, model.repeat_num, model.conv_hidden_num, model.data_format, reuse=True) _, self.D_zX, _ = DiscriminatorCNN(self.x, model.channel, model.z_num, model.repeat_num, model.conv_hidden_num, model.data_format, reuse=True) self.norm_AE_G = d_out #AE_G, AE_x = tf.split(d_out, 2) self.AE_G = denorm_img(self.norm_AE_G, model.data_format) self.aeg_sum = tf.summary.image('encoder/AE_G', self.AE_G) node_summaries = [] for node in self.cc.nodes: with tf.name_scope(node.name): ave_label = tf.reduce_mean(node.label) node_summaries.append(tf.summary.scalar('ave', ave_label)) #unclear how scope with adam param works #with tf.variable_scope('encoderGD') as scope: #use L1 loss #self.g_loss_image = tf.reduce_mean(tf.abs(self.x - self.G)) #use L2 loss #self.g_loss_image = tf.reduce_mean(tf.square(self.x - self.G)) #use autoencoder reconstruction loss #3.1.1 series #self.g_loss_image = tf.reduce_mean(tf.abs(self.x - self.norm_AE_G)) #use L1 in autoencoded space# 3.2 self.g_loss_image = tf.reduce_mean(tf.abs(self.D_zX - self.D_zG)) g_loss_sum=tf.summary.scalar( 'encoder/g_loss_image',\ self.g_loss_image,self.summ_col) self.g_loss = self.g_loss_image self.train_op = self.g_optimizer.minimize(self.g_loss, var_list=self.train_var, global_step=self.step) self.uint_G = tf.squeeze(denorm_img(self.G, self.data_format)) #3D[0,255] gimg_sum=tf.summary.image( 'encoder/Reconstruct',tf.stack([self.uint_x,self.uint_G]),\ max_outputs=2,collections=self.summ_col) #self.summary_op=tf.summary.merge_all(self.summ_col) #self.summary_op=tf.summary.merge_all(self.summ_col) if model.model_type == 'dcgan': self.summary_op = tf.summary.merge([g_loss_sum, gimg_sum] + node_summaries) elif model.model_type == 'began': self.summary_op = tf.summary.merge( [g_loss_sum, gimg_sum, self.aeg_sum] + node_summaries)
def train(batch_size, epochs, dataset, log_dir): global_step = tf.Variable(0, name='global_step', trainable=False) image_width = 64 image_height = 64 # ##========================== DEFINE INPUT DATA ============================### images = tf.placeholder('float32', [None, image_height, image_width, 3], name='t_image_input') generator_input = tf.placeholder('float32', [None, image_height, image_width, 3], name='t_input_generator') tf.summary.image('input_image', images) tf.summary.image('generator_input', generator_input) images_normalized = norm_img(images) # Normalization generator_input_normalized = norm_img(generator_input) # ##========================== DEFINE MODEL ============================### net_gen = generator(gen_in=generator_input_normalized, reuse=False) tf.summary.image('norm_generated_image', net_gen.outputs) tf.summary.image('generated_image', denorm_img(net_gen.outputs)) net_d, d_z = discriminator(disc_input=tf.concat( [net_gen.outputs, images_normalized], axis=0), reuse=False) net_d_false, net_d_real = tf.split(net_d.outputs, num_or_size_splits=2, axis=0) tf.summary.image('autoencoder_real', denorm_img(net_d_real)) tf.summary.image('autoencoder_fake', denorm_img(net_d_false)) output_gen = denorm_img(net_gen.outputs) # Denormalization ae_gen, ae_real = denorm_img(net_d_false), denorm_img( net_d_real) # Denormalization # ###========================== DEFINE TRAIN OPS ==========================### lambda_k = 0.001 gamma = 0.7 k_t = tf.Variable(0., trainable=False, name='k_t') g_vars = tl.layers.get_variables_with_name('generator', True, True) d_vars = tl.layers.get_variables_with_name('discriminator', True, True) with tf.variable_scope('learning_rate'): lr = tf.Variable(0.00004, trainable=False) d_loss_real = tf.reduce_mean(tf.abs(ae_real - images)) d_loss_fake = tf.reduce_mean(tf.abs(ae_gen - output_gen)) d_loss = d_loss_real - k_t * d_loss_fake g_loss_MSE = 1e-2 * tf.losses.mean_squared_error(output_gen, images) g_loss_adv = tf.reduce_mean(tf.abs(ae_gen - output_gen)) g_loss = g_loss_adv + g_loss_MSE g_optim = tf.train.AdamOptimizer(learning_rate=lr).minimize( g_loss, var_list=g_vars, global_step=global_step) d_optim = tf.train.AdamOptimizer(learning_rate=lr).minimize( d_loss, var_list=d_vars, global_step=global_step) balance = gamma * d_loss_real - g_loss_adv with tf.control_dependencies([d_optim, g_optim]): k_update = tf.assign(k_t, tf.clip_by_value(k_t + lambda_k * balance, 0, 1)) m_global = d_loss_real + tf.abs(balance) tf.summary.scalar('m_global', m_global) tf.summary.scalar('g_loss', g_loss) tf.summary.scalar('d_loss', d_loss) tf.summary.scalar('k_t', k_t) summary = tf.summary.merge_all() with tf.Session() as sess: saver = tf.train.Saver(max_to_keep=1) # Summary writer to save logs summary_writer = tf.summary.FileWriter(os.path.join(log_dir, 'train'), sess.graph) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init_op) if args.resume == "True": print("Restoring model from checkpoint") restore_model(sess, args.checkpoint_dir) items_faces, items_faces_blurry = dataset.get_items_blurry() total = 0 for j in range(0, epochs): iteration = 0 while iteration * batch_size < len(items_faces): input_images = np.empty([batch_size, 64, 64, 3]) input_images_blurry = np.empty([batch_size, 64, 64, 3]) count = 0 for face, face_blurry in zip( items_faces[iteration * batch_size:iteration * batch_size + batch_size], items_faces_blurry[iteration * batch_size:iteration * batch_size + batch_size]): # Normal images input_image = Image.open(face) input_image = np.asarray(input_image, dtype=float) input_images[count] = input_image # Blurry images # input_blurry_0 = gaussian_filter(input_image[:, :, 0], sigma=2) # input_blurry_1 = gaussian_filter(input_image[:, :, 1], sigma=2) # input_blurry_2 = gaussian_filter(input_image[:, :, 2], sigma=2) # input_images_blurry[count, :, :, 0] = input_blurry_0 # input_images_blurry[count, :, :, 1] = input_blurry_1 # input_images_blurry[count, :, :, 2] = input_blurry_2 input_image = Image.open(face_blurry) input_image = np.asarray(input_image, dtype=float) input_images_blurry[count] = input_image count += 1 # ##========================= train BEGAN =========================### kt, mGlobal, summary_str = sess.run( [k_update, m_global, summary], feed_dict={ images: input_images, generator_input: input_images_blurry }) summary_writer.add_summary(summary_str, total) if iteration % 16 == 0 and iteration > 0: print("Epoch: %2d Iteration: %2d kt: %.8f Mglobal: %.8f." % (j, iteration, kt, mGlobal)) # ##========================= save checkpoint =========================### if iteration % 3000 == 0 and iteration > 0: tf.logging.info('Saving checkpoint') saver.save(sess, args.checkpoint_dir + "/checkpoint", global_step=iteration, write_meta_graph=False) iteration += 1 total += 1 rest = len(items_faces) - ((iteration - 1) * batch_size) if rest > 0: count = 0 input_images = np.empty([rest, 64, 64, 3]) input_images_blurry = np.empty([rest, 64, 64, 3]) for face, face_blurry in zip( items_faces[len(items_faces) - rest:], items_faces_blurry[len(items_faces_blurry) - rest:]): # Normal images input_image = Image.open(face) input_image = np.asarray(input_image, dtype=float) input_images[count] = input_image # Blurry images input_image = Image.open(face_blurry) input_image = np.asarray(input_image, dtype=float) input_images_blurry[count] = input_image count += 1 # ##========================= train BEGAN =========================### kt, mGlobal, summary_str = sess.run( [k_update, m_global, summary], feed_dict={ images: input_images, generator_input: input_images_blurry }) print("Iteration: %2d kt: %.8f Mglobal: %.8f." % (iteration, kt, mGlobal)) summary_writer.add_summary(summary_str, iteration)
from sklearn.externals import joblib as jb from sklearn.svm import LinearSVC import utils import argparse argp = argparse.ArgumentParser() argp.add_argument("-d", "--dataset", required=True, help="dataset file") argp.add_argument("-m", "--model", required=True, help="Path model being saved") args = vars(argp.parse_args()) Numbers, Labels = utils.getting_data(args["dataset"]) data = [] for image in Numbers: image = utils.norm_img(image, 20) image = utils.mass_center(image, (20, 20)) dat = utils.hog(image, orientations=18, pixelsPerCell=(5, 5), cellsPerBlock=(1, 1), normalize=True) data.append(dat) model = LinearSVC(random_state=40) model.fit(data, Labels) jb.dump(model, args["model"])
img_corner = cv2.Canny(img_blur, 30, 150) (contours, _) = cv2.findContours(img_corner.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) contours = sorted([(cur, cv2.boundingRect(cur)[0]) for cur in contours], key=lambda x: x[1]) for (cur, _) in contours: (x, y, width, height) = cv2.boundingRect(cur) if width >= 8 and height >= 19: tmp = img_gray[y:y + height, x:x + width] cut_img = tmp.copy() h_th = ms.thresholding.otsu(tmp) cut_img[cut_img > h_th] = 255 cut_img = cv2.bitwise_not(cut_img) cut_img = utils.norm_img(cut_img, 20) cut_img = utils.mass_center(cut_img, (20, 20)) dat = utils.hog(cut_img, orientations=18, pixelsPerCell=(5, 5), cellsPerBlock=(1, 1), normalize=True) Number = model.predict(dat.reshape(1, -1))[0] cv2.rectangle(img, (x, y), (x + width, y + height), (0, 255, 0), 1) cv2.putText(img, str(Number), (x - 10, y - 10), cv2.FONT_HERSHEY_DUPLEX, 1.1, (0, 255, 0), 2) cv2.imshow("img", img) cv2.waitKey(0)
def train(batch_size, epochs, dataset, log_dir): global_step = tf.Variable(0, name='global_step', trainable=False) # ##========================== DEFINE PIPELINE ============================### images, audio = dataset.input_pipeline(batch_size=batch_size, num_epochs=epochs) tf.summary.image('input_image', images) tf.summary.image('audio_images', audio) images_normalized = norm_img(images) # Normalization # ##========================== DEFINE MODEL ============================### net_gen = generator(gen_input=audio, batch_size=batch_size, reuse=False) tf.summary.image('generated_image', denorm_img(net_gen.outputs)) net_d, d_z = discriminator(disc_input=tf.concat( [net_gen.outputs, images_normalized], axis=0), batch_size=batch_size, reuse=False) net_d_false, net_d_real = tf.split(net_d.outputs, num_or_size_splits=2, axis=0) tf.summary.image('autoencoder_real', denorm_img(net_d_real)) tf.summary.image('autoencoder_fake', denorm_img(net_d_false)) output_gen = denorm_img(net_gen.outputs) # Denormalization ae_gen, ae_real = denorm_img(net_d_false), denorm_img( net_d_real) # Denormalization # ###========================== DEFINE TRAIN OPS ==========================### lambda_k = 0.001 gamma = 0.5 k_t = tf.Variable(0., trainable=False, name='k_t') g_vars = tl.layers.get_variables_with_name('generator', True, True) d_vars = tl.layers.get_variables_with_name('discriminator', True, True) with tf.variable_scope('learning_rate'): lr = tf.Variable(1e-4, trainable=False) decay_rate = 0.5 decay_steps = 116722 learning_rate = tf.train.inverse_time_decay(lr, global_step=global_step, decay_rate=decay_rate, decay_steps=decay_steps) d_loss_real = tf.reduce_mean(tf.abs(ae_real - images)) d_loss_fake = tf.reduce_mean(tf.abs(ae_gen - output_gen)) d_loss = d_loss_real - k_t * d_loss_fake g_loss = tf.reduce_mean(tf.abs(ae_gen - output_gen)) + \ 10e-3 * tf.reduce_mean(tf.losses.mean_squared_error(images, ae_gen)) g_optim = tf.train.AdamOptimizer(learning_rate).minimize(g_loss, var_list=g_vars) d_optim = tf.train.AdamOptimizer(learning_rate).minimize(d_loss, var_list=d_vars) balance = gamma * d_loss_real - g_loss with tf.control_dependencies([d_optim, g_optim]): k_update = tf.assign(k_t, tf.clip_by_value(k_t + lambda_k * balance, 0, 1)) m_global = d_loss_real + tf.abs(balance) tf.summary.scalar('m_global', m_global) tf.summary.scalar('k_t', k_t) tf.summary.scalar('learning rate', learning_rate) summary = tf.summary.merge_all() with tf.Session() as sess: # Summary writer to save logs summary_writer = tf.summary.FileWriter(os.path.join(log_dir, 'train'), sess.graph) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init_op) # Coordinate the different workers for the input data pipeline coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) try: iteration = 0 while not coord.should_stop(): iteration += 1 # ##========================= train SRGAN =========================### kt, mGlobal, _, _ = sess.run( [k_update, m_global, g_optim, d_optim]) print("kt: %.8f Mglobal: %.8f" % (kt, mGlobal)) summary_str = sess.run(summary) summary_writer.add_summary(summary_str, iteration) summary_writer.flush() # ##========================= evaluate data =========================### except tf.errors.OutOfRangeError: print('Done -- epoch limit reached') finally: coord.request_stop() coord.join(threads)
def train(batch_size, epochs, dataset): global_step = tf.Variable(0, name='global_step', trainable=False) image_width = 64 image_height = 64 # ##========================== DEFINE INPUT DATA ============================### images = tf.placeholder('float32', [None, image_height, image_width, 3], name='t_image_generator') z = tf.placeholder('float32', [None, 64], name='t_noise_generator') images_normalized = norm_img(images) # Normalization # ##========================== DEFINE MODEL ============================### net_gen = generator(z=z, reuse=False) net_d, d_z = discriminator(disc_input=tf.concat( [net_gen.outputs, images_normalized], axis=0), reuse=False) net_d_false, net_d_real = tf.split(net_d.outputs, num_or_size_splits=2, axis=0) output_gen = denorm_img(net_gen.outputs) # Denormalization ae_gen, ae_real = denorm_img(net_d_false), denorm_img( net_d_real) # Denormalization # ###========================== DEFINE TRAIN OPS ==========================### lambda_k = 0.001 gamma = 0.7 k_t = tf.Variable(0., trainable=False, name='k_t') g_vars = tl.layers.get_variables_with_name('generator', True, True) d_vars = tl.layers.get_variables_with_name('discriminator', True, True) with tf.variable_scope('learning_rate'): lr = tf.Variable(0.00008, trainable=False) d_loss_real = tf.reduce_mean(tf.abs(ae_real - images)) d_loss_fake = tf.reduce_mean(tf.abs(ae_gen - output_gen)) d_loss = d_loss_real - k_t * d_loss_fake g_loss = tf.reduce_mean(tf.abs(ae_gen - output_gen)) g_optim = tf.train.AdamOptimizer(learning_rate=lr).minimize( g_loss, var_list=g_vars, global_step=global_step) d_optim = tf.train.AdamOptimizer(learning_rate=lr).minimize( d_loss, var_list=d_vars, global_step=global_step) balance = gamma * d_loss_real - g_loss with tf.control_dependencies([d_optim, g_optim]): k_update = tf.assign(k_t, tf.clip_by_value(k_t + lambda_k * balance, 0, 1)) m_global = d_loss_real + tf.abs(balance) with tf.Session() as sess: init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init_op) if args.resume == "True": print("Restoring model from checkpoint") restore_model(sess, args.checkpoint_dir) items_faces, items_audio = dataset.get_items() total = 0 for iteration in range(0, 10): input_images = np.empty([batch_size, 64, 64, 3]) count = 0 for face in items_faces[iteration * batch_size:iteration * batch_size + batch_size]: input_image = Image.open(face) input_image = np.asarray(input_image, dtype=float) input_images[count] = input_image count += 1 input_z = np.random.uniform(0, 0, size=[batch_size, 64]) print "Input vector: {}".format(input_z[0, :50]) output_image = sess.run(output_gen, feed_dict={ images: input_images, z: input_z })[0] ima = Image.fromarray(output_image.astype(np.uint8), 'RGB') ima.save("test_image_{}.png".format(iteration)) iteration += 1 total += 1