Exemplo n.º 1
0
Arquivo: PI.py Projeto: whgusdn321/PI
class PI(object):
    def __init__(self, d, lr, lambda_pi_usl, use_pi):
        """ flags for each regularizor """
        self.use_pi = use_pi
        """ data and external toolkits """
        self.d = d  # dataset manager
        self.ls = Layers()
        self.lf = LossFunctions(self.ls, d, self.encoder)
        """ placeholders defined outside"""
        self.lr = lr
        self.lambda_pi_usl = lambda_pi_usl

    def encoder(self, x, is_train=True, do_update_bn=True):
        """ https://arxiv.org/pdf/1610.02242.pdf """

        if is_train:
            h = self.distort(x)
            h = self.ls.get_corrupted(x, 0.15)
        else:
            h = x

        scope = '1'
        h = self.ls.conv2d(scope + '_1', h, 128, activation=self.ls.lrelu)
        h = self.ls.conv2d(scope + '_2', h, 128, activation=self.ls.lrelu)
        h = self.ls.conv2d(scope + '_3', h, 128, activation=self.ls.lrelu)
        h = self.ls.max_pool(h)
        if is_train: h = tf.nn.dropout(h, 0.5)

        scope = '2'
        h = self.ls.conv2d(scope + '_1', h, 256, activation=self.ls.lrelu)
        h = self.ls.conv2d(scope + '_2', h, 256, activation=self.ls.lrelu)
        h = self.ls.conv2d(scope + '_3', h, 256, activation=self.ls.lrelu)
        h = self.ls.max_pool(h)
        if is_train: h = tf.nn.dropout(h, 0.5)

        scope = '3'
        h = self.ls.conv2d(scope + '_1', h, 512, activation=self.ls.lrelu)
        h = self.ls.conv2d(scope + '_2',
                           h,
                           256,
                           activation=self.ls.lrelu,
                           filter_size=(1, 1))
        h = self.ls.conv2d(scope + '_3',
                           h,
                           128,
                           activation=self.ls.lrelu,
                           filter_size=(1, 1))
        h = tf.reduce_mean(h, reduction_indices=[1,
                                                 2])  # Global average pooling
        h = self.ls.dense(scope, h, self.d.l)

        return h

    def build_graph_train(self, x_l, y_l, x, is_supervised=True):

        o = dict()  # output
        loss = 0

        logit = self.encoder(x)

        with tf.variable_scope(tf.get_variable_scope(), reuse=True):
            logit_l = self.encoder(
                x_l, is_train=True,
                do_update_bn=False)  # for pyx and vat loss computation
        """ Classification Loss """
        o['Ly'], o['accur'] = self.lf.get_loss_pyx(logit_l, y_l)
        loss += o['Ly']
        """ PI Model Loss """
        if self.use_pi:
            with tf.variable_scope(tf.get_variable_scope(), reuse=True):
                _, _, o['Lp'] = self.lf.get_loss_pi(x, logit, is_train=True)
                loss += self.lambda_pi_usl * o['Lp']
        else:
            o['Lp'] = tf.constant(0)
        """ set losses """
        o['loss'] = loss
        self.o_train = o
        """ set optimizer """
        optimizer = tf.train.AdamOptimizer(learning_rate=self.lr, beta1=0.5)
        #self.op = optimizer.minimize(loss)
        grads = optimizer.compute_gradients(loss)
        for i, (g, v) in enumerate(grads):
            if g is not None:
                #g = tf.Print(g, [g], "g %s = "%(v))
                grads[i] = (tf.clip_by_norm(g, 5), v)  # clip gradients
            else:
                print('g is None:', v)
                v = tf.Print(v, [v], "v = ", summarize=10000)
        self.op = optimizer.apply_gradients(grads)  # return train_op

    def build_graph_test(self, x_l, y_l):

        o = dict()  # output
        loss = 0

        logit_l = self.encoder(
            x_l, is_train=False,
            do_update_bn=False)  # for pyx and vat loss computation
        """ classification loss """
        o['Ly'], o['accur'] = self.lf.get_loss_pyx(logit_l, y_l)
        loss += o['Ly']
        """ set losses """
        o['loss'] = loss
        self.o_test = o

    def distort(self, x):

        _d = self.d

        def _distort(a_image):
            """
            bounding_boxes: A Tensor of type float32.
                3-D with shape [batch, N, 4] describing the N bounding boxes associated with the image. 
            Bounding boxes are supplied and returned as [y_min, x_min, y_max, x_max]
            """
            # shape: [1, 1, 4]
            bounding_boxes = tf.constant([[[1 / 10, 1 / 10, 9 / 10, 9 / 10]]],
                                         dtype=tf.float32)

            begin, size, _ = tf.image.sample_distorted_bounding_box(
                (_d.h, _d.w, _d.c),
                bounding_boxes,
                min_object_covered=(8.5 / 10.0),
                aspect_ratio_range=[7.0 / 10.0, 10.0 / 7.0])

            a_image = tf.slice(a_image, begin, size)
            """ for the purpose of distorting not use tf.image.resize_image_with_crop_or_pad under """
            a_image = tf.image.resize_images(a_image, [_d.h, _d.w])
            """ due to the size of channel returned from tf.image.resize_images is not being given,
                specify it manually. """
            a_image = tf.reshape(a_image, [_d.h, _d.w, _d.c])
            return a_image

        """ process batch times in parallel """
        return tf.map_fn(_distort, x)
Exemplo n.º 2
0
Arquivo: VAE.py Projeto: geosada/LVAT
class VAE(object):
    def __init__(self, resource):
        """ data and external toolkits """
        self.d = resource.dh  # dataset manager
        self.ls = Layers()
        self.lf = LossFunctions(self.ls, self.d, self.encoder)
        """ placeholders defined outside"""
        if c.DO_TRAIN:
            self.lr = resource.ph['lr']

    def encoder(self, h, is_train, y=None):

        if is_train:
            _d = self.d
            #_ = tf.summary.image('image', tf.reshape(h, [-1, _d.h, _d.w, _d.c]), 10)

        scope = 'e_1'
        h = self.ls.conv2d(scope + '_1',
                           h,
                           128,
                           filter_size=(2, 2),
                           strides=(1, 2, 2, 1),
                           padding="VALID")
        h = tf.layers.batch_normalization(h, training=is_train, name=scope)
        h = tf.nn.relu(h)

        scope = 'e_2'
        h = self.ls.conv2d(scope + '_1',
                           h,
                           256,
                           filter_size=(2, 2),
                           strides=(1, 2, 2, 1),
                           padding="VALID")
        h = tf.layers.batch_normalization(h, training=is_train, name=scope)
        h = tf.nn.relu(h)

        scope = 'e_3'
        h = self.ls.conv2d(scope + '_1',
                           h,
                           512,
                           filter_size=(2, 2),
                           strides=(1, 2, 2, 1),
                           padding="VALID")
        h = tf.layers.batch_normalization(h, training=is_train, name=scope)
        #h = tf.nn.relu(h)
        h = tf.nn.tanh(h)

        # -> (b, 4, 4, 512)

        print('h:', h)
        #h = tf.reshape(h, (c.BATCH_SIZE, -1))
        h = tf.reshape(h, (-1, 4 * 4 * 512))
        print('h:', h)

        #sys.exit('aa')
        h = self.ls.denseV2('top_of_encoder', h, c.Z_SIZE * 2, activation=None)
        print('h:', h)
        return self.ls.vae_sampler_w_feature_slice(h, c.Z_SIZE)

    def decoder(self, h, is_train):

        scope = 'top_of_decoder'
        #h = self.ls.denseV2(scope, h, 128, activation=self.ls.lrelu)
        h = self.ls.denseV2(scope, h, 512, activation=self.ls.lrelu)
        print('h:', scope, h)

        h = tf.reshape(h, (-1, 4, 4, 32))
        print('h:', scope, h)

        scope = 'd_1'
        h = self.ls.deconv2d(scope + '_1', h, 512, filter_size=(2, 2))
        h = tf.layers.batch_normalization(h, training=is_train, name=scope)
        h = tf.nn.relu(h)
        print('h:', scope, h)

        scope = 'd_2'
        h = self.ls.deconv2d(scope + '_2', h, 256, filter_size=(2, 2))
        h = tf.layers.batch_normalization(h, training=is_train, name=scope)
        h = tf.nn.relu(h)
        print('h:', scope, h)

        scope = 'd_3'
        h = self.ls.deconv2d(scope + '_3', h, 128, filter_size=(2, 2))
        h = tf.layers.batch_normalization(h, training=is_train, name=scope)
        h = tf.nn.relu(h)
        print('h:', scope, h)

        scope = 'd_4'
        h = self.ls.conv2d(scope + '_4',
                           h,
                           3,
                           filter_size=(1, 1),
                           strides=(1, 1, 1, 1),
                           padding="VALID",
                           activation=tf.nn.sigmoid)
        print('h:', scope, h)

        return h

    def build_graph_train(self, x_l, y_l):

        o = dict()  # output
        loss = 0

        if c.IS_AUGMENTATION_ENABLED:
            x_l = distorted = self.distort(x_l)

            if c.IS_AUG_NOISE_TRUE:
                x_l = self.ls.get_corrupted(x_l, 0.15)

        z, mu, logsigma = self.encoder(x_l, is_train=True, y=y_l)

        x_reconst = self.decoder(z, is_train=True)
        """ p(x|z) Reconstruction Loss """
        o['Lr'] = self.lf.get_loss_pxz(x_reconst, x_l, 'Bernoulli')
        o['x_reconst'] = x_reconst
        o['x'] = x_l
        loss += o['Lr']
        """ VAE KL-Divergence Loss """
        LAMBDA_VAE = 0.1
        o['mu'], o['logsigma'] = mu, logsigma
        # work around. [ToDo] make sure the root cause that makes kl loss inf
        #logsigma = tf.clip_by_norm( logsigma, 10)
        o['Lz'] = self.lf.get_loss_vae(c.Z_SIZE, mu, logsigma, _lambda=0.0)
        loss += LAMBDA_VAE * o['Lz']
        """ set losses """
        o['loss'] = loss
        self.o_train = o
        """ set optimizer """
        optimizer = tf.train.AdamOptimizer(learning_rate=self.lr, beta1=0.5)
        grads = optimizer.compute_gradients(loss)
        for i, (g, v) in enumerate(grads):
            if g is not None:
                #g = tf.Print(g, [g], "g %s = "%(v))
                grads[i] = (tf.clip_by_norm(g, 5), v)  # clip gradients
            else:
                print('g is None:', v)
                v = tf.Print(v, [v], "v = ", summarize=10000)

        # update ema in batch_normalization
        with tf.control_dependencies(tf.get_collection(
                tf.GraphKeys.UPDATE_OPS)):
            self.op = optimizer.apply_gradients(grads)  # return train_op

    def build_graph_test(self, x_l, y_l):

        o = dict()  # output
        loss = 0

        z, mu, logsigma = self.encoder(x_l, is_train=False, y=y_l)

        x_reconst = self.decoder(mu, is_train=False)
        o['x_reconst'] = x_reconst
        o['x'] = x_l
        #o['Lr'] = self.lf.get_loss_pxz(x_reconst, x_l, 'LeastSquare')
        o['Lr'] = self.lf.get_loss_pxz(x_reconst, x_l, 'Bernoulli')
        #o['Lr'] = self.lf.get_loss_pxz(x_reconst, x_l, 'DiscretizedLogistic')
        #o['Lr'] = tf.reduce_mean(tf.keras.losses.binary_crossentropy(x_l, x_reconst))
        loss += o['Lr']
        """ set losses """
        o['loss'] = loss
        self.o_test = o

    def distort(self, x):
        """
        maybe helpful http://www.redhub.io/Tensorflow/tensorflow-models/src/master/inception/inception/image_processing.py
        """
        _d = self.d

        def _distort(a_image):
            """
            bounding_boxes: A Tensor of type float32.
                3-D with shape [batch, N, 4] describing the N bounding boxes associated with the image. 
            Bounding boxes are supplied and returned as [y_min, x_min, y_max, x_max]
            """
            if c.IS_AUG_TRANS_TRUE:
                a_image = tf.pad(a_image, [[2, 2], [2, 2], [0, 0]])
                a_image = tf.random_crop(a_image, [_d.h, _d.w, _d.c])

            if c.IS_AUG_FLIP_TRUE:
                a_image = tf.image.random_flip_left_right(a_image)

            if c.IS_AUG_ROTATE_TRUE:
                from math import pi
                radian = tf.random_uniform(shape=(), minval=0,
                                           maxval=360) * pi / 180
                a_image = tf.contrib.image.rotate(a_image,
                                                  radian,
                                                  interpolation='BILINEAR')

            if c.IS_AUG_COLOR_TRUE:
                a_image = tf.image.random_brightness(a_image, max_delta=0.2)
                a_image = tf.image.random_contrast(a_image,
                                                   lower=0.2,
                                                   upper=1.8)
                a_image = tf.image.random_hue(a_image, max_delta=0.2)

            if c.IS_AUG_CROP_TRUE:
                # shape: [1, 1, 4]
                bounding_boxes = tf.constant(
                    [[[1 / 10, 1 / 10, 9 / 10, 9 / 10]]], dtype=tf.float32)

                begin, size, _ = tf.image.sample_distorted_bounding_box(
                    (_d.h, _d.w, _d.c),
                    bounding_boxes,
                    min_object_covered=(9.8 / 10.0),
                    aspect_ratio_range=[9.5 / 10.0, 10.0 / 9.5])

                a_image = tf.slice(a_image, begin, size)
                """ for the purpose of distorting not use tf.image.resize_image_with_crop_or_pad under """
                a_image = tf.image.resize_images(a_image, [_d.h, _d.w])
                """ due to the size of channel returned from tf.image.resize_images is not being given,
                    specify it manually. """
                a_image = tf.reshape(a_image, [_d.h, _d.w, _d.c])
            return a_image

        """ process batch times in parallel """
        return tf.map_fn(_distort, x)