Exemplo n.º 1
0
def valid_parse(ldr_dir, hdr_dir):
    ldr_string = tf.read_file(ldr_dir)
    hdr_string = tf.read_file(hdr_dir)
    ldr_img, hdr_img = tf.py_func(read_valid_images, [ldr_string, hdr_string],
                                  [tf.uint8, tf.float32])
    _, hdr_img, Hth = norm_img(hdr_img)
    ldr_img = tf.image.convert_image_dtype(ldr_img, tf.float32)
    return ldr_img, hdr_img, Hth
Exemplo n.º 2
0
def train(batch_size, epochs, dataset, log_dir):
    global_step = tf.Variable(0, name='global_step', trainable=False)
    image_width = 64
    image_height = 64

    # ##========================== DEFINE INPUT DATA ============================###
    images = tf.placeholder('float32', [None, image_height, image_width, 3],
                            name='t_image_generator')
    z = tf.placeholder('float32', [None, 64], name='t_noise_generator')
    y_gan_real = tf.placeholder('float32', [None, 1], name='t_labels_real')
    y_gan_fake = tf.placeholder('float32', [None, 1], name='t_labels_fake')
    y_generator = tf.placeholder('float32', [None, 1],
                                 name='t_labels_generator')
    tf.summary.image('input_image', images)
    images_normalized = norm_img(images)  # Normalization

    # ##========================== DEFINE MODEL ============================###
    net_gen = generator(z=z, reuse=False)
    tf.summary.image('generated_normalized_image', net_gen.outputs)
    tf.summary.image('generated_image', denorm_img(net_gen.outputs))
    net_d, logits = discriminator(disc_input=tf.concat(
        [net_gen.outputs, images_normalized], axis=0),
                                  reuse=False)
    net_d_false, net_d_real = tf.split(net_d.outputs,
                                       num_or_size_splits=2,
                                       axis=0)

    # ###========================== DEFINE TRAIN OPS ==========================###

    g_vars = tl.layers.get_variables_with_name('generator', True, True)
    d_vars = tl.layers.get_variables_with_name('discriminator', True, True)
    with tf.variable_scope('learning_rate'):
        lr = tf.Variable(1e-4, trainable=False)

    d_loss_real = tf.reduce_mean(tf.square(net_d_real - y_gan_real),
                                 name='d_loss_real')
    d_loss_fake = tf.reduce_mean(tf.square(net_d_false - y_gan_fake),
                                 name='d_loss_fake')
    d_loss = d_loss_real + d_loss_fake
    g_loss = tf.reduce_mean(tf.square(net_d_false - y_generator),
                            name='g_loss_gan')
    g_optim = tf.train.AdamOptimizer(lr).minimize(g_loss, var_list=g_vars)
    d_optim = tf.train.AdamOptimizer(lr).minimize(d_loss, var_list=d_vars)

    tf.summary.scalar('d_loss', d_loss)
    tf.summary.scalar('g_loss', g_loss)

    summary = tf.summary.merge_all()
    with tf.Session() as sess:
        saver = tf.train.Saver(max_to_keep=1)
        # Summary writer to save logs
        summary_writer = tf.summary.FileWriter(os.path.join(log_dir, 'train'),
                                               sess.graph)

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        sess.run(init_op)

        if args.resume == "True":
            print("Restoring model from checkpoint")
            restore_model(sess, args.checkpoint_dir)

        items_faces, items_audio = dataset.get_items()
        total = 0
        for j in range(0, epochs):
            iteration = 0
            while iteration * batch_size < len(items_faces):
                input_images = np.empty([batch_size, 64, 64, 3])
                count = 0
                for face in items_faces[iteration *
                                        batch_size:iteration * batch_size +
                                        batch_size]:
                    input_image = Image.open(face)
                    input_image = np.asarray(input_image, dtype=float)
                    input_images[count] = input_image
                    count += 1
                input_z = np.random.uniform(-1., 1, size=[batch_size, 64])

                if np.random.uniform() > 0.1:
                    # give correct classifications
                    labels_real = np.random.uniform(size=[batch_size, 1],
                                                    low=0.7,
                                                    high=1.2)
                    labels_fake = np.random.uniform(size=[batch_size, 1],
                                                    low=0.0,
                                                    high=0.3)
                else:
                    # give wrong classifications (noisy labels)
                    labels_fake = np.random.uniform(size=[batch_size, 1],
                                                    low=0.7,
                                                    high=1.2)
                    labels_real = np.random.uniform(size=[batch_size, 1],
                                                    low=0.0,
                                                    high=0.3)

                labels_generator = np.random.uniform(size=[batch_size, 1],
                                                     low=0.7,
                                                     high=1.2)

                # ##========================= train SRGAN =========================###
                summary_str, gLoss, dLoss, _, _ = sess.run(
                    [summary, g_loss, d_loss, g_optim, d_optim],
                    feed_dict={
                        images: input_images,
                        z: input_z,
                        y_gan_real: labels_real,
                        y_gan_fake: labels_fake,
                        y_generator: labels_generator
                    })
                summary_writer.add_summary(summary_str, total)
                print("Epoch: %2d Iteration: %2d gLoss: %.8f dLoss: %.8f." %
                      (j, iteration, gLoss, dLoss))

                # ##========================= save checkpoint =========================###
                if iteration % 3000 == 0 and iteration > 0:
                    tf.logging.info('Saving checkpoint')
                    saver.save(sess,
                               args.checkpoint_dir + "/checkpoint",
                               global_step=iteration,
                               write_meta_graph=False)
                iteration += 1
                total += 1
            rest = len(items_faces) - ((iteration - 1) * batch_size)
            if rest > 0:
                count = 0
                input_images = np.empty([rest, 64, 64, 3])
                for face in items_faces[len(items_faces) - rest:]:
                    input_image = Image.open(face)
                    input_image = np.asarray(input_image, dtype=float)
                    input_images[count] = input_image
                    count += 1
                input_z = np.random.uniform(-1., 1, size=[rest, 64])
                if np.random.uniform() > 0.1:
                    # give correct classifications
                    labels_real = np.random.uniform(size=[rest, 1],
                                                    low=0.7,
                                                    high=1.2)
                    labels_fake = np.random.uniform(size=[rest, 1],
                                                    low=0.0,
                                                    high=0.3)
                else:
                    # give wrong classifications (noisy labels)
                    labels_fake = np.random.uniform(size=[rest, 1],
                                                    low=0.7,
                                                    high=1.2)
                    labels_real = np.random.uniform(size=[rest, 1],
                                                    low=0.0,
                                                    high=0.3)

                labels_generator = np.random.uniform(size=[rest, 1],
                                                     low=0.7,
                                                     high=1.2)

                # ##========================= train SRGAN =========================###
                summary_str, gLoss, dLoss, _, _ = sess.run(
                    [summary, g_loss, d_loss, g_optim, d_optim],
                    feed_dict={
                        images: input_images,
                        z: input_z,
                        y_gan_real: labels_real,
                        y_gan_fake: labels_fake,
                        y_generator: labels_generator
                    })
                print("Epoch: %2d Iteration: %2d gLoss: %.8f dLoss: %.8f." %
                      (j, iteration, gLoss, dLoss))
                summary_writer.add_summary(summary_str, iteration)
Exemplo n.º 3
0
def train(batch_size, epochs, dataset, log_dir):
    global_step = tf.Variable(0, name='global_step', trainable=False)
    image_width = 64
    image_height = 64
    audio_width = 12
    audio_height = 35

    # ##========================== DEFINE INPUT DATA ============================###
    images = tf.placeholder('float32', [None, image_height, image_width, 3],
                            name='t_image_generator')
    audio = tf.placeholder('float32', [None, audio_height, audio_width, 1],
                           name='t_audio_input_generator')
    tf.summary.image('input_image', images)
    images_normalized = norm_img(images)  # Normalization

    # ##========================== DEFINE MODEL ============================###
    net_gen = generator(input_audio=audio, reuse=False)
    tf.summary.image('norm_generated_image', net_gen.outputs)
    tf.summary.image('generated_image', denorm_img(net_gen.outputs))
    net_d, d_z = discriminator(disc_input=tf.concat(
        [net_gen.outputs, images_normalized], axis=0),
                               reuse=False)
    net_d_false, net_d_real = tf.split(net_d.outputs,
                                       num_or_size_splits=2,
                                       axis=0)
    d_z_false, d_z_real = tf.split(d_z.outputs, num_or_size_splits=2, axis=0)
    tf.summary.image('autoencoder_real', denorm_img(net_d_real))
    tf.summary.image('autoencoder_fake', denorm_img(net_d_false))

    output_gen = denorm_img(net_gen.outputs)  # Denormalization
    ae_gen, ae_real = denorm_img(net_d_false), denorm_img(
        net_d_real)  # Denormalization

    # ###========================== DEFINE TRAIN OPS ==========================###
    lambda_k = 0.001
    gamma = 0.7
    k_t = tf.Variable(0., trainable=False, name='k_t')

    g_vars = tl.layers.get_variables_with_name('generator', True, True)
    d_vars = tl.layers.get_variables_with_name('discriminator', True, True)
    with tf.variable_scope('learning_rate'):
        lr = tf.Variable(0.00008, trainable=False)

    decay_rate = 0.5
    decay_steps = 116722
    learning_rate = tf.train.inverse_time_decay(lr,
                                                decay_rate=decay_rate,
                                                decay_steps=decay_steps,
                                                global_step=global_step)

    d_loss_real = tf.reduce_mean(tf.abs(ae_real - images))
    d_loss_fake = tf.reduce_mean(tf.abs(ae_gen - output_gen))
    d_loss = d_loss_real - k_t * d_loss_fake

    g_loss_discriminativefeatures = tf.reduce_mean(tf.abs(d_z_real -
                                                          d_z_false))
    g_loss = tf.reduce_mean(
        tf.abs(ae_gen - output_gen)) + 10e-2 * g_loss_discriminativefeatures

    g_optim = tf.train.AdamOptimizer(learning_rate).minimize(
        g_loss, var_list=g_vars, global_step=global_step)
    d_optim = tf.train.AdamOptimizer(learning_rate).minimize(
        d_loss, var_list=d_vars, global_step=global_step)

    balance = gamma * d_loss_real - g_loss
    with tf.control_dependencies([d_optim, g_optim]):
        k_update = tf.assign(k_t,
                             tf.clip_by_value(k_t + lambda_k * balance, 0, 1))

    m_global = d_loss_real + tf.abs(balance)

    tf.summary.scalar('m_global', m_global)
    tf.summary.scalar('g_loss_discriminativefeatures',
                      g_loss_discriminativefeatures)
    tf.summary.scalar('k_t', k_t)
    tf.summary.scalar('learning_rate', learning_rate)

    summary = tf.summary.merge_all()
    with tf.Session() as sess:
        saver = tf.train.Saver(max_to_keep=1,
                               var_list=tf.trainable_variables())
        # Summary writer to save logs
        summary_writer = tf.summary.FileWriter(os.path.join(log_dir, 'train'),
                                               sess.graph)

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        sess.run(init_op)

        if args.resume == "True":
            print("Restoring model from checkpoint")
            restore_model(sess)

        # Coordinate the different workers for the input data pipeline
        # coord = tf.train.Coordinator()
        # threads = tf.train.start_queue_runners(coord=coord)

        items_faces, items_audio = dataset.get_items()
        total = 0
        for j in range(0, epochs):
            iteration = 0
            while iteration * batch_size < len(items_faces):
                input_images = np.empty([batch_size, 64, 64, 3])
                audio_MFCC = np.empty([batch_size, 35, 12, 1])
                count = 0
                for face, input_audio in zip(
                        items_faces[iteration *
                                    batch_size:iteration * batch_size +
                                    batch_size],
                        items_audio[iteration *
                                    batch_size:iteration * batch_size +
                                    batch_size]):
                    input_image = Image.open(face)
                    input_image = np.asarray(input_image, dtype=float)
                    input_images[count] = input_image
                    input_audio = np.load(input_audio)
                    input_audio = np.asarray(input_audio, dtype=float)
                    audio_MFCC[count] = input_audio[:, :, np.newaxis]
                    count += 1
                # ##========================= train SRGAN =========================###
                kt, mGlobal, summary_str = sess.run(
                    [k_update, m_global, summary],
                    feed_dict={
                        images: input_images,
                        audio: audio_MFCC
                    })
                print("Epoch: %2d Iteration: %2d kt: %.8f Mglobal: %.8f." %
                      (j, iteration, kt, mGlobal))
                summary_writer.add_summary(summary_str, total)

                # summary_writer.flush()

                # ##========================= save checkpoint =========================###
                if iteration % 3630 == 0 and iteration > 0:
                    tf.logging.info('Saving checkpoint')
                    saver.save(sess,
                               args.checkpoint_dir + "/checkpoint",
                               global_step=iteration,
                               write_meta_graph=False)
                iteration += 1
                total += 1
            rest = len(items_faces) - ((iteration - 1) * batch_size)
            if rest > 0:
                count = 0
                input_images = np.empty([rest, 64, 64, 3])
                audio_MFCC = np.empty([rest, 35, 12, 1])
                for face, input_audio in zip(
                        items_faces[len(items_faces) - rest:],
                        items_audio[len(items_faces) - rest:]):
                    input_image = Image.open(face)
                    input_image = np.asarray(input_image, dtype=float)
                    input_images[count] = input_image
                    input_audio = np.load(input_audio)
                    input_audio = np.asarray(input_audio, dtype=float)
                    audio_MFCC[count] = input_audio[:, :, np.newaxis]
                    count += 1
                # ##========================= train SRGAN =========================###
                kt, mGlobal, summary_str = sess.run(
                    [k_update, m_global, summary],
                    feed_dict={
                        images: input_images,
                        audio: audio_MFCC
                    })
                print("Iteration: %2d kt: %.8f Mglobal: %.8f." %
                      (iteration, kt, mGlobal))
                summary_writer.add_summary(summary_str, iteration)
Exemplo n.º 4
0
    def __init__(self,
                 model,
                 image,
                 image_name=None,
                 max_tr_steps=50000,
                 load_path=''):
        '''
        image is assumed to be a path to a precropped 64x64x3 uint8 image
        '''

        #Some hardcoded defaults here
        self.log_step = 500
        self.lr = 0.0005
        self.max_tr_steps = max_tr_steps

        self.model = model
        self.load_path = load_path

        self.image_name = image_name or os.path.basename(image).replace(
            '.', '_')
        self.encode_dir = make_encode_dir(model, self.image_name)
        self.model_dir = self.encode_dir  #different from self.model.model_dir
        self.save_dir = os.path.join(self.model_dir, 'save')

        self.sess = self.model.sess  #session should already be in progress

        if model.model_type == 'dcgan':
            self.data_format = 'NHWC'  #Don't change
        elif model.model_type == 'began':
            self.data_format = model.data_format  #'NCHW' if gpu
        else:
            raise Exception('Should not happen. model_type=', model.model_type)

        #Notation:
        #self.uint_x/G ; 3D [0,255]
        #self.x/G ; 4D [-1,1]
        self.uint_x = read_prepared_uint8_image(image)  #x is [0,255]

        print('Read image shape', self.uint_x.shape)
        self.x = norm_img(np.expand_dims(self.uint_x, 0),
                          self.data_format)  #bs=1
        #self.x=norm_img(tf.expand_dims(self.uint_x,0),self.data_format)#bs=1
        print('Shape after norm:', self.x.get_shape().as_list())

        ##All variables created under encoder have uniform init
        vs = tf.variable_scope('encoder',
                               initializer=tf.random_uniform_initializer(
                                   minval=-1., maxval=1.),
                               dtype=tf.float32)

        with vs as scope:
            #avoid creating adams params
            optimizer = tf.train.GradientDescentOptimizer
            #optimizer = tf.train.AdamOptimizer
            self.g_optimizer = optimizer(self.lr)

            encode_var = {
                n.name: var_like_z(n.z, n.name)
                for n in model.cc.nodes
            }
            encode_var['gen'] = var_like_z(model.z_gen, 'gen')
            print 'encode variables created'
            self.train_var = tf.contrib.framework.get_variables(scope)
            self.step = tf.Variable(0, name='step')
            self.var = tf.contrib.framework.get_variables(scope)

        #all encode vars created by now
        self.saver = tf.train.Saver(var_list=self.var)
        print('Summaries will be written to ', self.model_dir)
        self.summary_writer = tf.summary.FileWriter(self.model_dir)

        #load or initialize enmodel variables
        self.init()

        if model.model_type == 'dcgan':
            self.cc = CausalController(graph=model.graph,
                                       input_dict=encode_var,
                                       reuse=True)
            self.fake_labels_logits = tf.concat(self.cc.list_label_logits(),
                                                -1)
            self.z_fake_labels = self.fake_labels_logits
            #self.z_gen = noise_like_z( self.model.z_gen,'en_z_gen')
            self.z_gen = encode_var['gen']
            self.z = tf.concat([self.z_gen, self.z_fake_labels],
                               axis=1,
                               name='z')

            self.G = model.generator(self.z, bs=1, reuse=True)

        elif model.model_type == 'began':
            with tf.variable_scope('tower'):  #reproduce variable scope
                self.cc = CausalController(graph=model.graph,
                                           input_dict=encode_var,
                                           reuse=True)

                self.fake_labels = tf.concat(self.cc.list_labels(), -1)
                self.fake_labels_logits = tf.concat(
                    self.cc.list_label_logits(), -1)
                #self.z_gen = noise_like_z( self.model.z_gen,'en_z_gen')
                self.z_gen = encode_var['gen']
                self.z = tf.concat([self.fake_labels, self.z_gen],
                                   axis=-1,
                                   name='z')

                self.G, _ = GeneratorCNN(self.z,
                                         model.conv_hidden_num,
                                         model.channel,
                                         model.repeat_num,
                                         model.data_format,
                                         reuse=True)

                d_out, self.D_zG, self.D_var = DiscriminatorCNN(
                    self.G,
                    model.channel,
                    model.z_num,
                    model.repeat_num,
                    model.conv_hidden_num,
                    model.data_format,
                    reuse=True)

                _, self.D_zX, _ = DiscriminatorCNN(self.x,
                                                   model.channel,
                                                   model.z_num,
                                                   model.repeat_num,
                                                   model.conv_hidden_num,
                                                   model.data_format,
                                                   reuse=True)
                self.norm_AE_G = d_out

                #AE_G, AE_x = tf.split(d_out, 2)
                self.AE_G = denorm_img(self.norm_AE_G, model.data_format)
            self.aeg_sum = tf.summary.image('encoder/AE_G', self.AE_G)

        node_summaries = []
        for node in self.cc.nodes:
            with tf.name_scope(node.name):
                ave_label = tf.reduce_mean(node.label)
                node_summaries.append(tf.summary.scalar('ave', ave_label))

        #unclear how scope with adam param works
        #with tf.variable_scope('encoderGD') as scope:

        #use L1 loss
        #self.g_loss_image = tf.reduce_mean(tf.abs(self.x - self.G))

        #use L2 loss
        #self.g_loss_image = tf.reduce_mean(tf.square(self.x - self.G))

        #use autoencoder reconstruction loss  #3.1.1 series
        #self.g_loss_image = tf.reduce_mean(tf.abs(self.x - self.norm_AE_G))

        #use L1 in autoencoded space# 3.2
        self.g_loss_image = tf.reduce_mean(tf.abs(self.D_zX - self.D_zG))

        g_loss_sum=tf.summary.scalar( 'encoder/g_loss_image',\
                          self.g_loss_image,self.summ_col)

        self.g_loss = self.g_loss_image
        self.train_op = self.g_optimizer.minimize(self.g_loss,
                                                  var_list=self.train_var,
                                                  global_step=self.step)

        self.uint_G = tf.squeeze(denorm_img(self.G,
                                            self.data_format))  #3D[0,255]
        gimg_sum=tf.summary.image( 'encoder/Reconstruct',tf.stack([self.uint_x,self.uint_G]),\
                max_outputs=2,collections=self.summ_col)

        #self.summary_op=tf.summary.merge_all(self.summ_col)
        #self.summary_op=tf.summary.merge_all(self.summ_col)

        if model.model_type == 'dcgan':
            self.summary_op = tf.summary.merge([g_loss_sum, gimg_sum] +
                                               node_summaries)
        elif model.model_type == 'began':
            self.summary_op = tf.summary.merge(
                [g_loss_sum, gimg_sum, self.aeg_sum] + node_summaries)
Exemplo n.º 5
0
def train(batch_size, epochs, dataset, log_dir):
    global_step = tf.Variable(0, name='global_step', trainable=False)
    image_width = 64
    image_height = 64

    # ##========================== DEFINE INPUT DATA ============================###
    images = tf.placeholder('float32', [None, image_height, image_width, 3],
                            name='t_image_input')
    generator_input = tf.placeholder('float32',
                                     [None, image_height, image_width, 3],
                                     name='t_input_generator')
    tf.summary.image('input_image', images)
    tf.summary.image('generator_input', generator_input)
    images_normalized = norm_img(images)  # Normalization
    generator_input_normalized = norm_img(generator_input)

    # ##========================== DEFINE MODEL ============================###
    net_gen = generator(gen_in=generator_input_normalized, reuse=False)
    tf.summary.image('norm_generated_image', net_gen.outputs)
    tf.summary.image('generated_image', denorm_img(net_gen.outputs))
    net_d, d_z = discriminator(disc_input=tf.concat(
        [net_gen.outputs, images_normalized], axis=0),
                               reuse=False)
    net_d_false, net_d_real = tf.split(net_d.outputs,
                                       num_or_size_splits=2,
                                       axis=0)
    tf.summary.image('autoencoder_real', denorm_img(net_d_real))
    tf.summary.image('autoencoder_fake', denorm_img(net_d_false))

    output_gen = denorm_img(net_gen.outputs)  # Denormalization
    ae_gen, ae_real = denorm_img(net_d_false), denorm_img(
        net_d_real)  # Denormalization

    # ###========================== DEFINE TRAIN OPS ==========================###
    lambda_k = 0.001
    gamma = 0.7
    k_t = tf.Variable(0., trainable=False, name='k_t')

    g_vars = tl.layers.get_variables_with_name('generator', True, True)
    d_vars = tl.layers.get_variables_with_name('discriminator', True, True)
    with tf.variable_scope('learning_rate'):
        lr = tf.Variable(0.00004, trainable=False)

    d_loss_real = tf.reduce_mean(tf.abs(ae_real - images))
    d_loss_fake = tf.reduce_mean(tf.abs(ae_gen - output_gen))
    d_loss = d_loss_real - k_t * d_loss_fake

    g_loss_MSE = 1e-2 * tf.losses.mean_squared_error(output_gen, images)
    g_loss_adv = tf.reduce_mean(tf.abs(ae_gen - output_gen))
    g_loss = g_loss_adv + g_loss_MSE

    g_optim = tf.train.AdamOptimizer(learning_rate=lr).minimize(
        g_loss, var_list=g_vars, global_step=global_step)
    d_optim = tf.train.AdamOptimizer(learning_rate=lr).minimize(
        d_loss, var_list=d_vars, global_step=global_step)

    balance = gamma * d_loss_real - g_loss_adv
    with tf.control_dependencies([d_optim, g_optim]):
        k_update = tf.assign(k_t,
                             tf.clip_by_value(k_t + lambda_k * balance, 0, 1))

    m_global = d_loss_real + tf.abs(balance)

    tf.summary.scalar('m_global', m_global)
    tf.summary.scalar('g_loss', g_loss)
    tf.summary.scalar('d_loss', d_loss)
    tf.summary.scalar('k_t', k_t)

    summary = tf.summary.merge_all()
    with tf.Session() as sess:
        saver = tf.train.Saver(max_to_keep=1)
        # Summary writer to save logs
        summary_writer = tf.summary.FileWriter(os.path.join(log_dir, 'train'),
                                               sess.graph)

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        sess.run(init_op)

        if args.resume == "True":
            print("Restoring model from checkpoint")
            restore_model(sess, args.checkpoint_dir)

        items_faces, items_faces_blurry = dataset.get_items_blurry()
        total = 0
        for j in range(0, epochs):
            iteration = 0
            while iteration * batch_size < len(items_faces):
                input_images = np.empty([batch_size, 64, 64, 3])
                input_images_blurry = np.empty([batch_size, 64, 64, 3])
                count = 0
                for face, face_blurry in zip(
                        items_faces[iteration *
                                    batch_size:iteration * batch_size +
                                    batch_size],
                        items_faces_blurry[iteration *
                                           batch_size:iteration * batch_size +
                                           batch_size]):
                    # Normal images
                    input_image = Image.open(face)
                    input_image = np.asarray(input_image, dtype=float)
                    input_images[count] = input_image
                    # Blurry images
                    # input_blurry_0 = gaussian_filter(input_image[:, :, 0], sigma=2)
                    # input_blurry_1 = gaussian_filter(input_image[:, :, 1], sigma=2)
                    # input_blurry_2 = gaussian_filter(input_image[:, :, 2], sigma=2)
                    # input_images_blurry[count, :, :, 0] = input_blurry_0
                    # input_images_blurry[count, :, :, 1] = input_blurry_1
                    # input_images_blurry[count, :, :, 2] = input_blurry_2

                    input_image = Image.open(face_blurry)
                    input_image = np.asarray(input_image, dtype=float)
                    input_images_blurry[count] = input_image
                    count += 1

                # ##========================= train BEGAN =========================###
                kt, mGlobal, summary_str = sess.run(
                    [k_update, m_global, summary],
                    feed_dict={
                        images: input_images,
                        generator_input: input_images_blurry
                    })
                summary_writer.add_summary(summary_str, total)
                if iteration % 16 == 0 and iteration > 0:
                    print("Epoch: %2d Iteration: %2d kt: %.8f Mglobal: %.8f." %
                          (j, iteration, kt, mGlobal))

                # ##========================= save checkpoint =========================###
                if iteration % 3000 == 0 and iteration > 0:
                    tf.logging.info('Saving checkpoint')
                    saver.save(sess,
                               args.checkpoint_dir + "/checkpoint",
                               global_step=iteration,
                               write_meta_graph=False)
                iteration += 1
                total += 1
            rest = len(items_faces) - ((iteration - 1) * batch_size)
            if rest > 0:
                count = 0
                input_images = np.empty([rest, 64, 64, 3])
                input_images_blurry = np.empty([rest, 64, 64, 3])
                for face, face_blurry in zip(
                        items_faces[len(items_faces) - rest:],
                        items_faces_blurry[len(items_faces_blurry) - rest:]):
                    # Normal images
                    input_image = Image.open(face)
                    input_image = np.asarray(input_image, dtype=float)
                    input_images[count] = input_image
                    # Blurry images
                    input_image = Image.open(face_blurry)
                    input_image = np.asarray(input_image, dtype=float)
                    input_images_blurry[count] = input_image
                    count += 1
                    # ##========================= train BEGAN =========================###
                kt, mGlobal, summary_str = sess.run(
                    [k_update, m_global, summary],
                    feed_dict={
                        images: input_images,
                        generator_input: input_images_blurry
                    })
                print("Iteration: %2d kt: %.8f Mglobal: %.8f." %
                      (iteration, kt, mGlobal))
                summary_writer.add_summary(summary_str, iteration)
Exemplo n.º 6
0
from sklearn.externals import joblib as jb
from sklearn.svm import LinearSVC
import utils
import argparse

argp = argparse.ArgumentParser()
argp.add_argument("-d", "--dataset", required=True, help="dataset file")
argp.add_argument("-m",
                  "--model",
                  required=True,
                  help="Path model being saved")
args = vars(argp.parse_args())

Numbers, Labels = utils.getting_data(args["dataset"])
data = []

for image in Numbers:
    image = utils.norm_img(image, 20)
    image = utils.mass_center(image, (20, 20))
    dat = utils.hog(image,
                    orientations=18,
                    pixelsPerCell=(5, 5),
                    cellsPerBlock=(1, 1),
                    normalize=True)
    data.append(dat)

model = LinearSVC(random_state=40)
model.fit(data, Labels)
jb.dump(model, args["model"])
Exemplo n.º 7
0
img_corner = cv2.Canny(img_blur, 30, 150)

(contours, _) = cv2.findContours(img_corner.copy(), cv2.RETR_EXTERNAL,
                                 cv2.CHAIN_APPROX_SIMPLE)
contours = sorted([(cur, cv2.boundingRect(cur)[0]) for cur in contours],
                  key=lambda x: x[1])

for (cur, _) in contours:
    (x, y, width, height) = cv2.boundingRect(cur)

    if width >= 8 and height >= 19:
        tmp = img_gray[y:y + height, x:x + width]
        cut_img = tmp.copy()
        h_th = ms.thresholding.otsu(tmp)
        cut_img[cut_img > h_th] = 255
        cut_img = cv2.bitwise_not(cut_img)
        cut_img = utils.norm_img(cut_img, 20)
        cut_img = utils.mass_center(cut_img, (20, 20))

        dat = utils.hog(cut_img,
                        orientations=18,
                        pixelsPerCell=(5, 5),
                        cellsPerBlock=(1, 1),
                        normalize=True)
        Number = model.predict(dat.reshape(1, -1))[0]

        cv2.rectangle(img, (x, y), (x + width, y + height), (0, 255, 0), 1)
        cv2.putText(img, str(Number), (x - 10, y - 10),
                    cv2.FONT_HERSHEY_DUPLEX, 1.1, (0, 255, 0), 2)
        cv2.imshow("img", img)
        cv2.waitKey(0)
Exemplo n.º 8
0
def train(batch_size, epochs, dataset, log_dir):
    global_step = tf.Variable(0, name='global_step', trainable=False)

    # ##========================== DEFINE PIPELINE ============================###
    images, audio = dataset.input_pipeline(batch_size=batch_size,
                                           num_epochs=epochs)
    tf.summary.image('input_image', images)
    tf.summary.image('audio_images', audio)
    images_normalized = norm_img(images)  # Normalization

    # ##========================== DEFINE MODEL ============================###
    net_gen = generator(gen_input=audio, batch_size=batch_size, reuse=False)
    tf.summary.image('generated_image', denorm_img(net_gen.outputs))
    net_d, d_z = discriminator(disc_input=tf.concat(
        [net_gen.outputs, images_normalized], axis=0),
                               batch_size=batch_size,
                               reuse=False)
    net_d_false, net_d_real = tf.split(net_d.outputs,
                                       num_or_size_splits=2,
                                       axis=0)
    tf.summary.image('autoencoder_real', denorm_img(net_d_real))
    tf.summary.image('autoencoder_fake', denorm_img(net_d_false))

    output_gen = denorm_img(net_gen.outputs)  # Denormalization
    ae_gen, ae_real = denorm_img(net_d_false), denorm_img(
        net_d_real)  # Denormalization

    # ###========================== DEFINE TRAIN OPS ==========================###
    lambda_k = 0.001
    gamma = 0.5
    k_t = tf.Variable(0., trainable=False, name='k_t')

    g_vars = tl.layers.get_variables_with_name('generator', True, True)
    d_vars = tl.layers.get_variables_with_name('discriminator', True, True)
    with tf.variable_scope('learning_rate'):
        lr = tf.Variable(1e-4, trainable=False)

    decay_rate = 0.5
    decay_steps = 116722
    learning_rate = tf.train.inverse_time_decay(lr,
                                                global_step=global_step,
                                                decay_rate=decay_rate,
                                                decay_steps=decay_steps)

    d_loss_real = tf.reduce_mean(tf.abs(ae_real - images))
    d_loss_fake = tf.reduce_mean(tf.abs(ae_gen - output_gen))

    d_loss = d_loss_real - k_t * d_loss_fake
    g_loss = tf.reduce_mean(tf.abs(ae_gen - output_gen)) + \
             10e-3 * tf.reduce_mean(tf.losses.mean_squared_error(images, ae_gen))
    g_optim = tf.train.AdamOptimizer(learning_rate).minimize(g_loss,
                                                             var_list=g_vars)
    d_optim = tf.train.AdamOptimizer(learning_rate).minimize(d_loss,
                                                             var_list=d_vars)

    balance = gamma * d_loss_real - g_loss
    with tf.control_dependencies([d_optim, g_optim]):
        k_update = tf.assign(k_t,
                             tf.clip_by_value(k_t + lambda_k * balance, 0, 1))

    m_global = d_loss_real + tf.abs(balance)

    tf.summary.scalar('m_global', m_global)
    tf.summary.scalar('k_t', k_t)
    tf.summary.scalar('learning rate', learning_rate)

    summary = tf.summary.merge_all()
    with tf.Session() as sess:
        # Summary writer to save logs
        summary_writer = tf.summary.FileWriter(os.path.join(log_dir, 'train'),
                                               sess.graph)

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        sess.run(init_op)

        # Coordinate the different workers for the input data pipeline
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        try:
            iteration = 0
            while not coord.should_stop():
                iteration += 1
                # ##========================= train SRGAN =========================###
                kt, mGlobal, _, _ = sess.run(
                    [k_update, m_global, g_optim, d_optim])
                print("kt: %.8f Mglobal: %.8f" % (kt, mGlobal))
                summary_str = sess.run(summary)
                summary_writer.add_summary(summary_str, iteration)

                summary_writer.flush()

                # ##========================= evaluate data =========================###

        except tf.errors.OutOfRangeError:
            print('Done -- epoch limit reached')
        finally:
            coord.request_stop()
            coord.join(threads)
Exemplo n.º 9
0
def train(batch_size, epochs, dataset):
    global_step = tf.Variable(0, name='global_step', trainable=False)
    image_width = 64
    image_height = 64

    # ##========================== DEFINE INPUT DATA ============================###
    images = tf.placeholder('float32', [None, image_height, image_width, 3],
                            name='t_image_generator')
    z = tf.placeholder('float32', [None, 64], name='t_noise_generator')
    images_normalized = norm_img(images)  # Normalization

    # ##========================== DEFINE MODEL ============================###
    net_gen = generator(z=z, reuse=False)
    net_d, d_z = discriminator(disc_input=tf.concat(
        [net_gen.outputs, images_normalized], axis=0),
                               reuse=False)
    net_d_false, net_d_real = tf.split(net_d.outputs,
                                       num_or_size_splits=2,
                                       axis=0)

    output_gen = denorm_img(net_gen.outputs)  # Denormalization
    ae_gen, ae_real = denorm_img(net_d_false), denorm_img(
        net_d_real)  # Denormalization

    # ###========================== DEFINE TRAIN OPS ==========================###
    lambda_k = 0.001
    gamma = 0.7
    k_t = tf.Variable(0., trainable=False, name='k_t')

    g_vars = tl.layers.get_variables_with_name('generator', True, True)
    d_vars = tl.layers.get_variables_with_name('discriminator', True, True)
    with tf.variable_scope('learning_rate'):
        lr = tf.Variable(0.00008, trainable=False)

    d_loss_real = tf.reduce_mean(tf.abs(ae_real - images))
    d_loss_fake = tf.reduce_mean(tf.abs(ae_gen - output_gen))
    d_loss = d_loss_real - k_t * d_loss_fake

    g_loss = tf.reduce_mean(tf.abs(ae_gen - output_gen))

    g_optim = tf.train.AdamOptimizer(learning_rate=lr).minimize(
        g_loss, var_list=g_vars, global_step=global_step)
    d_optim = tf.train.AdamOptimizer(learning_rate=lr).minimize(
        d_loss, var_list=d_vars, global_step=global_step)

    balance = gamma * d_loss_real - g_loss
    with tf.control_dependencies([d_optim, g_optim]):
        k_update = tf.assign(k_t,
                             tf.clip_by_value(k_t + lambda_k * balance, 0, 1))

    m_global = d_loss_real + tf.abs(balance)

    with tf.Session() as sess:
        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        sess.run(init_op)

        if args.resume == "True":
            print("Restoring model from checkpoint")
            restore_model(sess, args.checkpoint_dir)

        items_faces, items_audio = dataset.get_items()
        total = 0
        for iteration in range(0, 10):
            input_images = np.empty([batch_size, 64, 64, 3])
            count = 0
            for face in items_faces[iteration *
                                    batch_size:iteration * batch_size +
                                    batch_size]:
                input_image = Image.open(face)
                input_image = np.asarray(input_image, dtype=float)
                input_images[count] = input_image
                count += 1
            input_z = np.random.uniform(0, 0, size=[batch_size, 64])
            print "Input vector: {}".format(input_z[0, :50])
            output_image = sess.run(output_gen,
                                    feed_dict={
                                        images: input_images,
                                        z: input_z
                                    })[0]

            ima = Image.fromarray(output_image.astype(np.uint8), 'RGB')
            ima.save("test_image_{}.png".format(iteration))
            iteration += 1
            total += 1