示例#1
0
def task(x, activation='relu', output_dim=256, scope='task_network', norm='layer', b_train=False):
    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
        if activation == 'swish':
            act_func = util.swish
        elif activation == 'relu':
            act_func = tf.nn.relu
        elif activation == 'lrelu':
            act_func = tf.nn.leaky_relu
        else:
            act_func = tf.nn.sigmoid

        print('Task Layer1: ' + str(x.get_shape().as_list()))

        block_depth = dense_block_depth
        l = x
        l = layers.conv(l, scope='conv1', filter_dims=[3, 3, block_depth], stride_dims=[1, 1],
                        non_linear_fn=None, bias=False, dilation=[1, 1, 1, 1])

        if norm == 'layer':
            l = layers.layer_norm(l, scope='ln1')
        elif norm == 'batch':
            l = layers.batch_norm_conv(l, b_train=b_train, scope='bn1')

        l = act_func(l)

        for i in range(15):
            l = layers.add_residual_block(l,  filter_dims=[3, 3, block_depth], num_layers=2, act_func=act_func,
                                          norm=norm, b_train=b_train, scope='block1_' + str(i))

        latent = layers.global_avg_pool(l, output_length=output_dim)

    return latent
示例#2
0
def latent_discriminator(input_data, activation='swish', scope='ldiscriminator', reuse=False, bn_phaze=False):
    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
        #if reuse:
        #    tf.get_variable_scope().reuse_variables()

        if activation == 'swish':
            act_func = util.swish
        elif activation == 'relu':
            act_func = tf.nn.relu
        elif activation == 'tanh':
            act_func = tf.nn.tanh
        else:
            act_func = tf.nn.sigmoid

        l = tf.reshape(input_data, shape=[-1, 4, 4, 8])

        l = layers.conv(l, scope='conv1', filter_dims=[3, 3, g_dense_block_depth/2], stride_dims=[1, 1],
                        non_linear_fn=None, bias=False)

        l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3,
                                     act_func=act_func, bn_phaze=bn_phaze, scope='block_0')

        l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3,
                                     act_func=act_func, bn_phaze=bn_phaze, scope='block_1')

        l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3,
                                     act_func=act_func, bn_phaze=bn_phaze, scope='block_2')

        l = layers.global_avg_pool(l, representation_dim)
        dc_final_layer = l

        dc_output = layers.fc(dc_final_layer, scope='g_enc_z_fc', out_dim=1, non_linear_fn=None)

    return dc_final_layer, dc_output, tf.sigmoid(dc_output)
示例#3
0
def discriminator(input_data, activation='swish', scope='discriminator', reuse=False, bn_phaze=False):
    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
        #if reuse:
        #    tf.get_variable_scope().reuse_variables()

        if activation == 'swish':
            act_func = util.swish
        elif activation == 'relu':
            act_func = tf.nn.relu
        elif activation == 'tanh':
            act_func = tf.nn.tanh
        else:
            act_func = tf.nn.sigmoid

        l = layers.conv(input_data, scope='conv1', filter_dims=[3, 3, g_dense_block_depth/2], stride_dims=[1, 1],
                        non_linear_fn=None, bias=False)

        l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3,
                                     act_func=act_func, bn_phaze=bn_phaze, scope='block_0')

        l = tf.nn.avg_pool(l, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

        l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3,
                                     act_func=act_func, bn_phaze=bn_phaze, scope='block_1')

        l = tf.nn.avg_pool(l, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

        l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3,
                                     act_func=act_func, bn_phaze=bn_phaze, scope='block_2')

        #l = tf.nn.avg_pool(l, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

        #l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3,
        #                             act_func=act_func, bn_phaze=bn_phaze, scope='block_3')

        #l = tf.nn.avg_pool(l, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

        #l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3,
        #                             act_func=act_func, bn_phaze=bn_phaze, scope='block_4')

        #l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3,
        #                             act_func=act_func, bn_phaze=bn_phaze, scope='block_5')

        #l = tf.nn.avg_pool(l, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

        #l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth], num_layers=3,
        #                             act_func=act_func, bn_phaze=bn_phaze, scope='block_6')

        #l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth], num_layers=3,
        #                             act_func=act_func, bn_phaze=bn_phaze, scope='block_7')

        # dc_final_layer = batch_norm_conv(last_dense_layer, b_train=bn_phaze, scope='last_dense_layer')

        l = layers.global_avg_pool(l, representation_dim)
        dc_final_layer = l

        dc_output = layers.fc(dc_final_layer, scope='g_enc_z_fc', out_dim=1, non_linear_fn=None)

    return dc_final_layer, dc_output, tf.sigmoid(dc_output)
示例#4
0
def test(model_path, test_image_dir):
    trX = []
    trY = []
    test_output_dir = 'sr'

    if os.path.exists(test_output_dir) == False:
        os.mkdir(test_output_dir)

    with tf.device('/device:CPU:0'):
        test_image_dir_list = os.listdir(test_image_dir)

        for idx, labelname in enumerate(test_image_dir_list):

            if os.path.isdir(os.path.join(test_image_dir, labelname).replace("\\", "/")) is False:
                continue

            if os.path.exists(os.path.join(test_output_dir, labelname)) is False:
                os.mkdir(os.path.join(test_output_dir, labelname))

            for filename in os.listdir(os.path.join(test_image_dir, labelname)):
                full_path = os.path.join(test_image_dir, labelname) + '/' + filename
                jpg_img = cv2.imread(full_path)
                img = cv2.cvtColor(jpg_img, cv2.COLOR_BGR2RGB)
                img = (img - 127.5) / 127.5
                trX.append(img)
                trY.append(os.path.join(test_output_dir, labelname))

        trX = np.array(trX)
        trY = np.array(trY)
        trX = trX.reshape(-1, input_height, input_width, num_channel)

        # Network setup
    cnn_representation, _, anchor_layer = encoder_network(X, activation='lrelu', bn_phaze=bn_train, scope='encoder')
    print('CNN Output Tensor Dimension: ' + str(cnn_representation.get_shape().as_list()))

    cnn_representation = layers.global_avg_pool(cnn_representation, representation_dim, scope='gap')
    print('CNN Representation Dimension: ' + str(cnn_representation.get_shape().as_list()))

    with tf.device('/device:GPU:1'):
        # decoder_input = make_multi_modal_noise(representation, num_mode=8)
        X_fake = decoder_network(latent=cnn_representation, anchor_layer=anchor_layer, activation='lrelu',
                                 scope='decoder',
                                 bn_phaze=bn_train)

    # Trainable variable lists
    encoder_var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='encoder') + tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='gap')
    decoder_var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='decoder')

    generator_vars = encoder_var + decoder_var

    with tf.device('/device:CPU:0'):
        residual_loss = get_residual_loss(Y, X_fake, type='l1', gamma=1.0)

    # training operation
    g_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(residual_loss)

    # Launch the graph in a session
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        sess.run(tf.global_variables_initializer())

        try:
            saver = tf.train.Saver()
            saver.restore(sess, model_path)
            print('Model loaded')
        except:
            print('Model loading failed')
            return

        i = 0

        for img in trX:
            # latent, anchor = sess.run([latent_real, anchor_layer], feed_dict={X: [img], bn_train: False, keep_prob: 1.0})

            fake = sess.run(
                [X_fake],
                feed_dict={X: [img],
                           bn_train: False,
                           keep_prob: 1.0})

            sample = fake[0][0]
            sample = (sample * 127.5) + 127.5
            # print(sample.shape)
            sample = cv2.cvtColor(sample, cv2.COLOR_RGB2BGR)
            #sample = cv2.resize(sample, dsize=(0, 0), fx=4.0, fy=4.0, interpolation=cv2.INTER_CUBIC)
            #cv2.imwrite(trY[i] + '/tmp.jpg', sample)
            #sample = cv2.imread(trY[i] + '/tmp.jpg')
            #sample = cv2.resize(sample, dsize=(0, 0), fx=0.25, fy=0.25, interpolation=cv2.INTER_AREA)
            cv2.imwrite(trY[i] + '/' + str(i) + '.jpg', sample)

            i = i + 1
示例#5
0
def train(model_path):
    trX = []
    trY = []

    dir_list = os.listdir(imgs_dirname)
    dir_list.sort(key=str.lower)

    with tf.device('/device:CPU:0'):
        for idx, labelname in enumerate(dir_list):
            for filename in os.listdir(os.path.join(imgs_dirname, labelname)):

                print(os.path.join(imgs_dirname, labelname) + '/' + filename)

                full_path = os.path.join(imgs_dirname, labelname) + '/' + filename
                jpg_img = cv2.imread(full_path)
                img = cv2.cvtColor(jpg_img, cv2.COLOR_BGR2RGB)

                img = cv2.resize(img, dsize=(96, 96), interpolation=cv2.INTER_AREA)

                sample = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
                sample = cv2.resize(sample, dsize=(0, 0), fx=0.25, fy=0.25, interpolation=cv2.INTER_AREA)

                cv2.imwrite('tmp.jpg', sample)

                sample = cv2.imread('tmp.jpg')
                sample = cv2.cvtColor(sample, cv2.COLOR_BGR2RGB)
                sample = cv2.resize(sample, dsize=(0, 0), fx=4.0, fy=4.0, interpolation=cv2.INTER_CUBIC)

                sample = (sample - 127.5) / 127.5
                trX.append(sample)

                img = (img - 127.5) / 127.5
                trY.append(img)

        trX, trY = shuffle(trX, trY)

        trX = np.array(trX)
        trY = np.array(trY)

        trX = trX.reshape(-1, input_height, input_width, num_channel)
        trY = trY.reshape(-1, input_height, input_width, num_channel)

    # Network setup
    cnn_representation, _, anchor_layer = encoder_network(X, activation='lrelu', bn_phaze=bn_train, scope='encoder')
    print('CNN Output Tensor Dimension: ' + str(cnn_representation.get_shape().as_list()))

    cnn_representation = layers.global_avg_pool(cnn_representation, representation_dim, scope='gap')
    print('CNN Representation Dimension: ' + str(cnn_representation.get_shape().as_list()))

    with tf.device('/device:GPU:1'):
        # decoder_input = make_multi_modal_noise(representation, num_mode=8)
        X_fake = decoder_network(latent=cnn_representation, anchor_layer=anchor_layer, activation='lrelu', scope='decoder',
                                 bn_phaze=bn_train)

    # Trainable variable lists
    encoder_var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='encoder') + tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='gap')
    decoder_var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='decoder')

    generator_vars = encoder_var + decoder_var

    with tf.device('/device:CPU:0'):
        residual_loss = get_residual_loss(Y, X_fake, type='l1', gamma=1.0)

    # training operation
    g_optimizer = tf.train.AdamOptimizer(learning_rate=2e-4, beta1=0.5).minimize(residual_loss)

    # Launch the graph in a session
    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())

        try:
            saver = tf.train.Saver()
            saver.restore(sess, model_path)
            print('Model loaded')
        except:
            print('Start New Training. Wait ...')

        num_itr = 0
        training_batch = zip(range(0, len(trX), batch_size),
                             range(batch_size, len(trX) + 1, batch_size))

        for i in range(num_epoch):
            trX, trY = shuffle(trX, trY)

            for start, end in training_batch:

                _, r, fake = sess.run(
                    [g_optimizer, residual_loss, X_fake],
                    feed_dict={X: trX[start:end], Y: trY[start:end],
                               bn_train: True,
                               keep_prob: 0.5})

                sample = fake[0]
                sample = (sample * 127.5) + 127.5
                sample = cv2.cvtColor(sample, cv2.COLOR_RGB2BGR)
                #sample = cv2.resize(sample, dsize=(0, 0), fx=4.0, fy=4.0, interpolation=cv2.INTER_CUBIC)
                #cv2.imwrite('training_sr/tmp.jpg', sample)
                #sample = cv2.imread('training_sr/tmp.jpg')
                #sample = cv2.resize(sample, dsize=(0, 0), fx=0.25, fy=0.25, interpolation=cv2.INTER_AREA)
                cv2.imwrite('training_sr/sample' + str(num_itr) + '.jpg', sample)

                #sample = trX[start]
                #sample = (sample * 127.5) + 127.5
                #sample = cv2.cvtColor(sample, cv2.COLOR_RGB2BGR)
                #cv2.imwrite('training_sr/sample' + str(num_itr) + '_1.jpg', sample)

                num_itr = num_itr + 1

                if num_itr % 10 == 0:
                    print('epoch #' + str(i) + ', itr #' + str(num_itr))
                    print('  - residual loss: ' + str(r))

            try:
                saver.save(sess, model_path)
            except:
                print('Save failed')
示例#6
0
def encoder(x, activation='relu', scope='encoder_network', norm='layer', b_train=False):
    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):

        if activation == 'swish':
            act_func = util.swish
        elif activation == 'relu':
            act_func = tf.nn.relu
        elif activation == 'lrelu':
            act_func = tf.nn.leaky_relu
        else:
            act_func = tf.nn.sigmoid

        # [192 x 192]
        block_depth = dense_block_depth // 4

        l = layers.conv(x, scope='conv1', filter_dims=[5, 5, block_depth], stride_dims=[1, 1],
                       non_linear_fn=None, bias=False, dilation=[1, 1, 1, 1])

        if norm == 'layer':
            l = layers.layer_norm(l, scope='ln0')
        elif norm == 'batch':
            l = layers.batch_norm_conv(l, b_train=b_train, scope='bn0')

        l = act_func(l)

        for i in range(4):
            l = layers.add_residual_dense_block(l, filter_dims=[3, 3, block_depth], num_layers=2,
                                                act_func=act_func, norm=norm, b_train=b_train,
                                                scope='dense_block_1_' + str(i))

        # [64 x 64]
        block_depth = block_depth * 2

        l = layers.conv(l, scope='tr1', filter_dims=[3, 3, block_depth], stride_dims=[2, 2], non_linear_fn=None)

        if norm == 'layer':
            l = layers.layer_norm(l, scope='ln1')
        elif norm == 'batch':
            l = layers.batch_norm_conv(l, b_train=b_train, scope='bn1')

        l = act_func(l)

        print('Encoder Block 1: ' + str(l.get_shape().as_list()))

        for i in range(2):
            l = layers.add_residual_block(l, filter_dims=[3, 3, block_depth], num_layers=2, act_func=act_func,
                                          norm=norm, b_train=b_train, scope='res_block_1_' + str(i))

        # [32 x 32]
        block_depth = block_depth * 2

        l = layers.conv(l, scope='tr2', filter_dims=[3, 3, block_depth], stride_dims=[2, 2], non_linear_fn=None)

        if norm == 'layer':
            l = layers.layer_norm(l, scope='ln2')
        elif norm == 'batch':
            l = layers.batch_norm_conv(l, b_train=b_train, scope='bn2')

        l = act_func(l)

        print('Encoder Block 2: ' + str(l.get_shape().as_list()))

        for i in range(2):
            l = layers.add_residual_block(l, filter_dims=[3, 3, block_depth], num_layers=2, act_func=act_func,
                                          norm=norm, b_train=b_train, scope='res_block_2_' + str(i))

        # [16 x 16]
        block_depth = block_depth * 2

        l = layers.conv(l, scope='tr3', filter_dims=[3, 3, block_depth], stride_dims=[2, 2], non_linear_fn=None)

        if norm == 'layer':
            l = layers.layer_norm(l, scope='ln3')
        elif norm == 'batch':
            l = layers.batch_norm_conv(l, b_train=b_train, scope='bn3')

        l = act_func(l)

        print('Encoder Block 3: ' + str(l.get_shape().as_list()))

        for i in range(2):
            l = layers.add_residual_block(l, filter_dims=[3, 3, block_depth], num_layers=2, act_func=act_func,
                                          norm=norm, b_train=b_train, scope='res_block_3' + str(i))

        # [8 x 8]
        block_depth = block_depth * 2
        l = layers.conv(l, scope='tr4', filter_dims=[3, 3, block_depth], stride_dims=[2, 2], non_linear_fn=None)

        if norm == 'layer':
            l = layers.layer_norm(l, scope='ln4')
        elif norm == 'batch':
            l = layers.batch_norm_conv(l, b_train=b_train, scope='bn4')

        l = act_func(l)

        print('Encoder Block 4: ' + str(l.get_shape().as_list()))

        for i in range(2):
            l = layers.add_residual_block(l, filter_dims=[3, 3, block_depth], num_layers=2, act_func=act_func,
                                          norm=norm, b_train=b_train, use_dilation=True, scope='res_block_4_' + str(i))

        # [4 x 4]
        block_depth = block_depth * 2
        l = layers.conv(l, scope='tr5', filter_dims=[3, 3, block_depth], stride_dims=[2, 2], non_linear_fn=None)
        print('Encoder Block 5: ' + str(l.get_shape().as_list()))

        if norm == 'layer':
            l = layers.layer_norm(l, scope='ln5')
        elif norm == 'batch':
            l = layers.batch_norm_conv(l, b_train=b_train, scope='bn5')

        l = act_func(l)

        for i in range(2):
            l = layers.add_residual_block(l, filter_dims=[3, 3, block_depth], num_layers=2, act_func=act_func,
                                          norm=norm, b_train=b_train, use_dilation=True, scope='res_block_5_' + str(i))

        last_layer = l

        context = layers.global_avg_pool(last_layer, output_length=representation_dim, use_bias=True, scope='gp')
        print('Encoder GP Dims: ' + str(context.get_shape().as_list()))

        context = tf.reshape(context, [batch_size, num_context_patches, num_context_patches, -1])
        print('Context Dims: ' + str(context.get_shape().as_list()))

    return context
示例#7
0
def train(model_path):
    trX = []
    trY = []

    dir_list = os.listdir(imgs_dirname)
    dir_list.sort(key=str.lower)

    one_hot_length = len(os.listdir(imgs_dirname))

    with tf.device('/device:CPU:0'):
        for idx, labelname in enumerate(dir_list):
            imgs_list = load_images_from_folder(os.path.join(
                imgs_dirname, labelname),
                                                use_augmentation=False)
            imgs_list = shuffle(imgs_list)

            label = np.zeros(one_hot_length)
            label[idx] += 1

            print('label:', labelname, label)

            for idx2, img in enumerate(imgs_list):
                trY.append(label)
                '''
                if idx2 < len(imgs_list) * 0.2:
                    # SpecAugment
                    w = np.random.randint(len(img)/10)  # Max 10% width
                    h = np.random.randint(len(img) - w + 1)
                    img[h:h + w] = [[0, 0, 0]]
                    img = np.transpose(img, [1, 0, 2])

                    w = np.random.randint(len(img)/10)  # Max 10% width
                    h = np.random.randint(len(img) - w + 1)   
                    img[h:h + w] = [[0, 0, 0]]
                    img = np.transpose(img, [1, 0, 2])

                    #cv2.imwrite(labelname + str(idx2) + '.jpg', img)
                '''
                trX.append(img)

        trX, trY = shuffle(trX, trY)

        trX = np.array(trX)
        trY = np.array(trY)

        trX = trX.reshape(-1, input_height, input_width, num_channel)

    X = tf.placeholder(tf.float32,
                       [None, input_height, input_width, num_channel])

    # Network setup
    cnn_representation, _, anchor_layer = encoder_network(X,
                                                          activation='lrelu',
                                                          bn_phaze=bn_train,
                                                          scope='encoder')
    print('CNN Output Tensor Dimension: ' +
          str(cnn_representation.get_shape().as_list()))

    cnn_representation = layers.global_avg_pool(cnn_representation,
                                                representation_dim,
                                                scope='gap')
    print('CNN Representation Dimension: ' +
          str(cnn_representation.get_shape().as_list()))

    latent_fake = cnn_representation

    with tf.device('/device:GPU:1'):
        latent_real = make_multi_modal_noise(num_mode=8)
        X_fake = decoder_network(latent=cnn_representation,
                                 anchor_layer=None,
                                 activation='lrelu',
                                 scope='decoder',
                                 bn_phaze=bn_train)

        p_feature, p_logit, p_prob = discriminator(latent_real,
                                                   activation='lrelu',
                                                   scope='discriminator',
                                                   bn_phaze=bn_train)
        n_feature, n_logit, n_prob = discriminator(latent_fake,
                                                   activation='lrelu',
                                                   scope='discriminator',
                                                   bn_phaze=bn_train)

    # Trainable variable lists
    d_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                               scope='discriminator')
    encoder_var = tf.get_collection(
        tf.GraphKeys.TRAINABLE_VARIABLES, scope='encoder') + tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES, scope='gap')
    decoder_var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                    scope='decoder')

    generator_vars = encoder_var + decoder_var
    gan_g_vars = encoder_var

    with tf.device('/device:GPU:1'):
        residual_loss = get_residual_loss(X, X_fake, type='l1', gamma=1.0)

    feature_matching_loss = get_feature_matching_loss(p_feature,
                                                      n_feature,
                                                      type='l1',
                                                      gamma=1.0)

    # Cross Entropy
    gan_g_loss = -tf.reduce_mean(n_prob)
    #gan_g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=n_logit, labels=tf.ones_like(n_logit)))

    #discriminator_loss, loss_real, loss_fake = get_discriminator_loss(p_prob, n_prob, type='wgan', gamma=1.0)
    discriminator_loss, loss_real, loss_fake = get_discriminator_loss(
        p_prob, n_prob, type='hinge', gamma=1.0)

    # training operation
    d_optimizer = tf.train.AdamOptimizer(
        learning_rate=2e-4, beta1=0.5).minimize(discriminator_loss,
                                                var_list=d_vars)
    g_optimizer = tf.train.AdamOptimizer(learning_rate=2e-4,
                                         beta1=0.5).minimize(residual_loss)
    gan_g_optimzier = tf.train.AdamOptimizer(
        learning_rate=2e-4, beta1=0.5).minimize(gan_g_loss,
                                                var_list=gan_g_vars)
    f_optimizer = tf.train.AdamOptimizer(
        learning_rate=2e-4, beta1=0.5).minimize(feature_matching_loss,
                                                var_list=gan_g_vars)

    # Launch the graph in a session

    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())

        try:
            saver = tf.train.Saver()
            saver.restore(sess, model_path)
            print('Model loaded')
        except:
            print('Start New Training. Wait ...')

        num_itr = 0
        training_batch = zip(range(0, len(trX), batch_size),
                             range(batch_size,
                                   len(trX) + 1, batch_size))

        for i in range(num_epoch):
            trX, trY = shuffle(trX, trY)

            for start, end in training_batch:
                with tf.device('/device:CPU:0'):
                    style_trX = shuffle(trX[start:end])
                    #style_trX = trX[start:end]
                    anchor, latent = sess.run(
                        [anchor_layer, cnn_representation],
                        feed_dict={
                            X: style_trX,
                            bn_train: True,
                            keep_prob: 0.5
                        })

                _, r, fake = sess.run(
                    [g_optimizer, residual_loss, X_fake],
                    feed_dict={
                        X: trX[start:end],
                        ANCHOR: anchor,
                        bn_train: True,
                        keep_prob: 0.5
                    })

                _, d = sess.run(
                    [d_optimizer, discriminator_loss],
                    feed_dict={
                        X: trX[start:end],
                        ANCHOR: anchor,
                        bn_train: True,
                        keep_prob: 0.5
                    })

                #trX[start:end], trY[start:end] = shuffle(trX[start:end], trY[start:end])

                #_, f = sess.run(
                #    [f_optimizer, feature_matching_loss],
                #    feed_dict={X: trX[start:end], Y: trY[start:end], ANCHOR: anchor,
                #               bn_train: True,
                #               keep_prob: 0.5})

                _, g = sess.run([gan_g_optimzier, gan_g_loss],
                                feed_dict={
                                    X: trX[start:end],
                                    bn_train: True,
                                    keep_prob: 0.5
                                })

                num_itr = num_itr + 1

                if num_itr % 10 == 0:
                    print('epoch #' + str(i) + ', itr #' + str(num_itr))
                    print('  - residual loss: ' + str(r))
                    print('  - discriminator loss: ' + str(d))
                    print('  - generator loss: ' + str(g))
                    #print('  - feature matching loss: ' + str(f))

                if num_itr % 100 == 0:
                    sample = fake[0] * 127.5 + 127.5
                    sample = cv2.cvtColor(sample, cv2.COLOR_RGB2BGR)
                    cv2.imwrite(
                        'training_status/sample' + str(num_itr) + '.jpg',
                        sample)

            try:
                saver.save(sess, model_path)
            except:
                print('Save failed')
示例#8
0
def test(model_path, test_image_dir):
    trX = []
    trY = []
    test_output_dir = 'gan'

    if os.path.exists(test_output_dir) == False:
        os.mkdir(test_output_dir)

    with tf.device('/device:CPU:0'):
        test_image_dir_list = os.listdir(test_image_dir)

        for idx, labelname in enumerate(test_image_dir_list):

            if os.path.isdir(
                    os.path.join(test_image_dir, labelname).replace(
                        "\\", "/")) is False:
                continue

            if os.path.exists(os.path.join(test_output_dir,
                                           labelname)) is False:
                os.mkdir(os.path.join(test_output_dir, labelname))

            imgs_list = load_images_from_folder(os.path.join(
                test_image_dir, labelname),
                                                use_augmentation=False)

            for idx2, img in enumerate(imgs_list):
                trY.append(os.path.join(test_output_dir, labelname))
                trX.append(img)

        trX = np.array(trX)
        trY = np.array(trY)
        trX = trX.reshape(-1, input_height, input_width, num_channel)

    # Network setup
    X = tf.placeholder(tf.float32,
                       [None, input_height, input_width, num_channel])

    cnn_representation, _, anchor_layer = encoder_network(X,
                                                          activation='lrelu',
                                                          bn_phaze=bn_train,
                                                          scope='encoder')
    print('CNN Output Tensor Dimension: ' +
          str(cnn_representation.get_shape().as_list()))

    cnn_representation = layers.global_avg_pool(cnn_representation,
                                                representation_dim,
                                                scope='gap')
    print('CNN Representation Dimension: ' +
          str(cnn_representation.get_shape().as_list()))

    latent_fake = cnn_representation

    with tf.device('/device:GPU:1'):
        # decoder_input = make_multi_modal_noise(representation, num_mode=8)
        latent_real = make_multi_modal_noise(num_mode=8)
        X_fake = decoder_network(latent=cnn_representation,
                                 anchor_layer=None,
                                 activation='lrelu',
                                 scope='decoder',
                                 bn_phaze=bn_train)

    p_feature, p_logit, p_prob = discriminator(latent_real,
                                               activation='lrelu',
                                               scope='discriminator',
                                               bn_phaze=bn_train)
    n_feature, n_logit, n_prob = discriminator(latent_fake,
                                               activation='lrelu',
                                               scope='discriminator',
                                               bn_phaze=bn_train)

    # Trainable variable lists
    d_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                               scope='discriminator')
    encoder_var = tf.get_collection(
        tf.GraphKeys.TRAINABLE_VARIABLES, scope='encoder') + tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES, scope='gap')
    decoder_var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                    scope='decoder')

    generator_vars = encoder_var + decoder_var
    gan_g_vars = encoder_var

    with tf.device('/device:GPU:1'):
        residual_loss = get_residual_loss(X, X_fake, type='l1', gamma=1.0)

    feature_matching_loss = get_feature_matching_loss(p_feature,
                                                      n_feature,
                                                      type='l2',
                                                      gamma=1.0)

    # Cross Entropy
    gan_g_loss = -tf.reduce_mean(n_prob)
    # gan_g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=n_logit, labels=tf.ones_like(n_logit)))

    # discriminator_loss, loss_real, loss_fake = get_discriminator_loss(p_prob, n_prob, type='wgan', gamma=1.0)
    discriminator_loss, loss_real, loss_fake = get_discriminator_loss(
        p_prob, n_prob, type='hinge', gamma=1.0)

    # training operation
    d_optimizer = tf.train.AdamOptimizer(
        learning_rate=2e-4, beta1=0.5).minimize(discriminator_loss,
                                                var_list=d_vars)
    g_optimizer = tf.train.AdamOptimizer(learning_rate=2e-4,
                                         beta1=0.5).minimize(residual_loss)
    gan_g_optimzier = tf.train.AdamOptimizer(
        learning_rate=2e-4, beta1=0.5).minimize(gan_g_loss,
                                                var_list=gan_g_vars)
    f_optimizer = tf.train.AdamOptimizer(
        learning_rate=2e-4, beta1=0.5).minimize(feature_matching_loss,
                                                var_list=gan_g_vars)

    # Launch the graph in a session
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        sess.run(tf.global_variables_initializer())

        try:
            saver = tf.train.Saver()
            saver.restore(sess, model_path)
            print('Model loaded')
        except:
            print('Model loading failed')
            return

        i = 0

        for img in trX:
            #latent, anchor = sess.run([latent_real, anchor_layer], feed_dict={X: [img], bn_train: False, keep_prob: 1.0})

            fake = sess.run([X_fake],
                            feed_dict={
                                X: [img],
                                bn_train: False,
                                keep_prob: 1.0
                            })

            sample = fake[0][0] * 127.5 + 127.5
            #print(sample.shape)
            sample = cv2.cvtColor(sample, cv2.COLOR_RGB2BGR)
            cv2.imwrite(trY[i] + '/' + str(i) + '.jpg', sample)

            i = i + 1
示例#9
0
def discriminator(x, activation='relu', scope='discriminator_network', norm='layer', b_train=False, use_patch=False):
    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
        if activation == 'swish':
            act_func = util.swish
        elif activation == 'relu':
            act_func = tf.nn.relu
        elif activation == 'lrelu':
            act_func = tf.nn.leaky_relu
        else:
            act_func = tf.nn.sigmoid

        block_depth = dense_block_depth
        bottleneck_width = 8
        if use_patch is True:
            bottleneck_width = 16
        #num_iter = input_width // bottleneck_width
        #num_iter = int(np.sqrt(num_iter))
        num_iter = 3

        print('Discriminator Input: ' + str(x.get_shape().as_list()))
        l = layers.conv(x, scope='conv_init', filter_dims=[3, 3, block_depth], stride_dims=[1, 1],
                        non_linear_fn=None, bias=False)
        l = layers.conv_normalize(l, norm=norm, b_train=b_train, scope='norm_init')
        l = act_func(l)

        for i in range(num_iter):
            print('Discriminator Block ' + str(i) + ': ' + str(l.get_shape().as_list()))

            for j in range(2):
                l = layers.add_residual_block(l, filter_dims=[3, 3, block_depth], num_layers=2, act_func=act_func,
                                              norm=norm, b_train=b_train, scope='res_block_' + str(i) + '_' + str(j))
            block_depth = block_depth * 2
            l = layers.conv(l, scope='tr' + str(i), filter_dims=[3, 3, block_depth], stride_dims=[2, 2],
                            non_linear_fn=None)
            l = layers.conv_normalize(l, norm=norm, b_train=b_train, scope='norm_' + str(i))
            l = act_func(l)

        if use_patch is True:
            print('Discriminator Patch Block : ' + str(l.get_shape().as_list()))

            for i in range(2):
                l = layers.add_residual_block(l, filter_dims=[3, 3, block_depth], num_layers=2, act_func=act_func,
                                              norm=norm, b_train=b_train, scope='patch_block_' + str(i))

            last_layer = l
            feature = layers.global_avg_pool(last_layer, output_length=representation_dim // 8, use_bias=False,
                                             scope='gp')
            print('Discriminator GP Dims: ' + str(feature.get_shape().as_list()))

            logit = layers.conv(last_layer, scope='conv_pred', filter_dims=[3, 3, 1], stride_dims=[1, 1],
                                non_linear_fn=None, bias=False)
            print('Discriminator Logit Dims: ' + str(logit.get_shape().as_list()))
        else:
            #print('Discriminator Attention Block : ' + str(l.get_shape().as_list()))
            #l = layers.self_attention(l, block_depth, act_func=act_func)
            for i in range(2):
                l = layers.add_residual_block(l, filter_dims=[3, 3, block_depth], num_layers=2, act_func=act_func,
                                              norm=norm, b_train=b_train, scope='at_block_' + str(i))

            last_layer = l
            feature = layers.global_avg_pool(last_layer, output_length=representation_dim // 8, use_bias=False,
                                             scope='gp')

            print('Discriminator GP Dims: ' + str(feature.get_shape().as_list()))

            logit = layers.fc(feature, 1, non_linear_fn=None, scope='flat')

    return feature, logit
示例#10
0
def test(model_path):
    threshold = 0.9
    print('Serving Mode, threshold: ' + str(threshold))

    X = tf.placeholder(tf.float32, [None, input_height, input_width, num_channel])
    Y = tf.placeholder(tf.float32, [None, num_class_per_group])
    TripletX = tf.placeholder(tf.float32, [None, representation_dim])
    bn_train = tf.placeholder(tf.bool)
    keep_prob = tf.placeholder(tf.float32)

    # Network setup
    cnn_representation, _ = cnn_network(X, bn_phaze=bn_train)
    print('CNN Output Tensor Dimension: ' + str(cnn_representation.get_shape().as_list()))

    cnn_representation = layers.global_avg_pool(cnn_representation, representation_dim)
    print('CNN Representation Dimension: ' + str(cnn_representation.get_shape().as_list()))

    #scale_representation = layers.global_avg_pool(scale_representation, representation_dim)
    #representation = tf.add(cnn_representation, tf.multiply(TripletX, scale_representation))

    # Residual
    representation = tf.add(cnn_representation, TripletX)

    prediction = layers.fc(representation, num_class_per_group, scope='g_fc_final')

    with tf.variable_scope('center', reuse=tf.AUTO_REUSE):
        centers = tf.get_variable('centers', [num_class_per_group, g_fc_layer3_dim], dtype=tf.float32,
                                  initializer=tf.constant_initializer(0), trainable=False)

    center_loss = get_center_loss(representation, tf.argmax(Y, 1))
    update_center = update_centers(representation, tf.argmax(Y, 1), CENTER_LOSS_ALPHA)
    entropy_loss = tf.reduce_mean(
        tf.losses.softmax_cross_entropy(onehot_labels=Y, logits=prediction, label_smoothing=0.1))

    total_loss = entropy_loss + center_loss * LAMBDA

    train_op = tf.train.AdamOptimizer(0.003).minimize(total_loss)

    predict_op = tf.argmax(tf.nn.softmax(prediction), 1)
    confidence_op = tf.nn.softmax(prediction)

    # Launch the graph in a session
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        try:
            saver = tf.train.Saver()
            saver.restore(sess, model_path)
            print('Model loaded')
        except:
            print('Model load failed: ' + model_path)
            return

        fileDir = os.path.dirname(os.path.realpath(__file__))
        # Modify baseDir to your environment
        inputDir = fileDir + '/input'
        label_list = [d for d in os.listdir(inputDir + '/user') if os.path.isdir(inputDir + '/user/' + d)]
        label_list.sort(key=str.lower)

        print(label_list)  # Global lable list.

        group_label_list = os.listdir(imgs_dirname)

        redis_ready = False

        clf_directory = os.path.dirname(os.path.realpath(__file__)) + '/svm/group/'
        clf_files = os.listdir(clf_directory)
        clf_list = [pickle.load(open(clf_directory + pkl_file, 'r')) for pkl_file in clf_files]

        try:
            rds = redis.StrictRedis(host=REDIS_SERVER, port=REDIS_PORT, db=0)

            p = rds.pubsub()
            p.subscribe(redis_channel)
            redis_ready = True

            print('Connected to Message Queue')
        except:
            redis_ready = False
            print('Faile to connect to Message Queue')

        sock_ready = False

        try:
            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            sock.connect((HOST, PORT))
            sock_ready = True

            print('Connected to Edge Camera')
        except:
            print('Faile to connect to Edge Camera')
            sock_ready = False

        if redis_ready is False:
            print
            'REDIS not ready.'
            return

        cur_target_frame = -1
        next_target_frame = 1

        if not os.path.exists(inputDir + '/../Unknown'):
            os.mkdir(inputDir + '/../Unknown')

        dirname = inputDir + '/../Unknown'

        if not os.path.exists(dirname):
            os.mkdir(dirname)

        for item in p.listen():
            data = item

            if data is not None:
                data = data.get('data')

                if data != 1:
                    temp = array.array('B', data)
                    ar = np.array(temp, dtype=np.uint8)

                    left = int_from_bytes(ar[4], ar[3], ar[2], ar[1])
                    right = int_from_bytes(ar[8], ar[7], ar[6], ar[5])
                    top = int_from_bytes(ar[12], ar[11], ar[10], ar[9])
                    bottom = int_from_bytes(ar[16], ar[15], ar[14], ar[13])

                    recv_frame = ar[0]

                    ar = ar[17:]

                    frame_str = rds.get(frame_db)

                    if cur_target_frame is -1:
                        cur_target_frame = recv_frame

                    next_target_frame = int(frame_str)

                    if recv_frame == cur_target_frame or recv_frame == next_target_frame:
                        fileName = '/tmp/input' + redis_channel + '.jpg'
                        jpgFile = open(fileName, "wb")
                        jpgFile.write(ar)
                        jpgFile.close()

                        confidence = 0.97
                        person = 'Unknown'

                        #print('Get triplet representation')
                        tpReps, img = get_triplet_representation_align_image(fileName)

                        if not tpReps:
                            print('Not a valid face.')
                        else:
                            #print('Run prediction..')
                            use_softmax = False

                            pred_id, confidence, rep = sess.run([predict_op, confidence_op, representation],
                                                                feed_dict={X: img, TripletX: tpReps, bn_train: False,
                                                                           keep_prob: 1.0})

                            if use_softmax is True:
                                #print('# Prediction: ' + str(pred_id))
                                person = group_label_list[pred_id[0]]
                                confidence = confidence[0][pred_id[0]]
                                print('# Person: ' + person + ', Confidence: ' + str(confidence))
                            else:
                                confidences = []
                                labels = []

                                for (le, clf) in clf_list:
                                    pred = clf.predict_proba(rep).ravel()
                                    maxI = np.argmax(pred)
                                    person = le.inverse_transform([maxI])
                                    confidence = pred[maxI]
                                    confidences.append(confidence)
                                    labels.append(person[0])

                                print('#################################')
                                print(labels)
                                print(confidences)

                                effective_labels = []
                                effective_confidences = []

                                for i in range(len(labels)):
                                    if labels[i] != 'Unknown':
                                        effective_labels.append(labels[i])
                                        effective_confidences.append(confidences[i])

                                if len(effective_labels) == 0:
                                    person = 'Unknown'
                                    confidence = 0.99
                                else:
                                    confidence = max(effective_confidences)
                                    maxI = effective_confidences.index(confidence)
                                    person = effective_labels[maxI]

                                    if len(effective_labels) > 1:
                                        effective_confidences.sort(reverse=True)

                                        if effective_confidences[0] - effective_confidences[1] < 0.5:
                                            person = 'Unknown'
                                            confidence = 0.99

                                print('\nPerson: ' + person + ', Confidence: ' + str(confidence * 100) + '%')

                                if confidence < 0.9:
                                    save_unknown_user(fileName, dirname, 'Unknown', confidence)
                                elif confidence >= 0.9 and confidence < 0.97:
                                    save_unknown_user(fileName, dirname, person, confidence)

                                if confidence < threshold:
                                    person = 'Unknown'

                            if sock_ready is True:
                                if person != 'Unknown' and person != 'Nobody':
                                    b_array = bytes()
                                    floatList = [left, right, top, bottom, confidence, label_list.index(person)]
                                    b_array = b_array.join((struct.pack('f', val) for val in floatList))
                                    sock.send(b_array)
                    else:
                        cur_target_frame = next_target_frame

                else:
                    rds.set(frame_db, '1')
示例#11
0
def train(model_path):
    trX = []
    trY = []
    trXT = []

    teX = []
    teY = []
    teXT = []

    for idx, labelname in enumerate(os.listdir(imgs_dirname)):
        print('label:', idx, labelname)

        imgs_list = load_images_from_folder(os.path.join(imgs_dirname, labelname))
        imgs_list = shuffle(imgs_list)

        for idx2, img in enumerate(imgs_list):
            label = np.zeros(len(os.listdir(imgs_dirname)))
            label[idx] += 1

            if idx2 < len(imgs_list) * 0.8:
                trY.append(label)
                trXT.append(get_triplet_representation(img))

                if idx2 < len(imgs_list) * 0.7:
                    # SpecAugment
                    w = np.random.randint(len(img)/10)  # Max 10% width
                    h = np.random.randint(len(img) - w + 1)
                    img[h:h + w] = [[0, 0, 0]]
                    img = np.transpose(img, [1, 0, 2])

                    w = np.random.randint(len(img)/10)  # Max 10% width
                    h = np.random.randint(len(img) - w + 1)
                    img[h:h + w] = [[0, 0, 0]]
                    img = np.transpose(img, [1, 0, 2])

                    #cv2.imwrite(labelname + str(idx2) + '.jpg', img)

                trX.append(img)
            else:
                teY.append(label)
                teXT.append(get_triplet_representation(img))
                teX.append(img)

    trX, trY, trXT = shuffle(trX, trY, trXT)

    trX = np.array(trX)
    trY = np.array(trY)
    trXT = np.array(trXT)
    teX = np.array(teX)
    teY = np.array(teY)
    teXT = np.array(teXT)

    trX = trX.reshape(-1, input_height, input_width, num_channel)
    teX = teX.reshape(-1, input_height, input_width, num_channel)

    X = tf.placeholder(tf.float32, [None, input_height, input_width, num_channel])
    Y = tf.placeholder(tf.float32, [None, num_class_per_group])
    TripletX = tf.placeholder(tf.float32, [None, representation_dim])
    bn_train = tf.placeholder(tf.bool)
    keep_prob = tf.placeholder(tf.float32)

    # Network setup
    cnn_representation, _ = cnn_network(X, bn_phaze=bn_train)
    print('CNN Output Tensor Dimension: ' + str(cnn_representation.get_shape().as_list()))

    cnn_representation = layers.global_avg_pool(cnn_representation, g_fc_layer3_dim)
    print('CNN Representation Dimension: ' + str(cnn_representation.get_shape().as_list()))

    #scale_representation = layers.global_avg_pool(scale_representation, representation_dim)
    #representation = tf.add(cnn_representation, tf.multiply(TripletX, scale_representation))

    # Residual
    representation = tf.add(cnn_representation, TripletX)

    # L2 Softmax
    representation = tf.nn.l2_normalize(representation, axis=1)
    alpha = tf.log((0.9 * (num_class_per_group - 2)) / 1 - 0.9)

    representation = tf.multiply(alpha, representation)

    prediction = layers.fc(representation, num_class_per_group, scope='g_fc_final')

    with tf.variable_scope('center', reuse=tf.AUTO_REUSE):
        centers = tf.get_variable('centers', [num_class_per_group, g_fc_layer3_dim], dtype=tf.float32,
                                  initializer=tf.constant_initializer(0), trainable=False)

    center_loss = get_center_loss(representation, tf.argmax(Y, 1))
    update_center = update_centers(representation, tf.argmax(Y, 1), CENTER_LOSS_ALPHA)
    entropy_loss = tf.reduce_mean(tf.losses.softmax_cross_entropy(onehot_labels=Y, logits=prediction, label_smoothing=0.1))
    #entropy_loss = tf.reduce_mean(tf.losses.softmax_cross_entropy(onehot_labels=Y, logits=prediction))

    # L1 BN: only for bottle neck layer's bn gamma
    tr_val_list = [var for var in tf.trainable_variables() if 'transition' in var.name]
    t_val_list = [tf.reduce_sum(tf.abs(var)) for var in tr_val_list if 'gamma' in var.name]
    L1_penalty = tf.reduce_sum(t_val_list)
    scale = 1e-7

    total_loss = entropy_loss + center_loss * LAMBDA + scale * L1_penalty
    #total_loss = entropy_loss + center_loss * LAMBDA

    global_step = tf.Variable(0, trainable=False)
    learning_rate = 0.05
    decayed_lr = tf.train.exponential_decay(learning_rate,
                                            global_step, 10000,
                                            0.95, staircase=True)

    train_op = tf.train.AdamOptimizer(decayed_lr).minimize(total_loss)
    #train_op = tf.train.AdamOptimizer(3e-4).minimize(total_loss)

    predict_op = tf.argmax(tf.nn.softmax(prediction), 1)

    # Launch the graph in a session
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        try:
            saver = tf.train.Saver()
            saver.restore(sess, model_path)
            print('Model loaded')
        except:
            print('Start New Training. Wait ...')

        num_itr = 0
        training_batch = zip(range(0, len(trX), batch_size),
                             range(batch_size, len(trX) + 1, batch_size))

        for i in range(num_epoch):
            trX, trY, trXT = shuffle(trX, trY, trXT)

            for start, end in training_batch:
                _, c, center, _, l1_penalty = sess.run(
                    [train_op, entropy_loss, center_loss, update_center, L1_penalty],
                    feed_dict={X: trX[start:end], Y: trY[start:end], TripletX: trXT[start:end], bn_train: True,
                               keep_prob: 0.5})

                num_itr = num_itr + 1

                if num_itr % 10 == 0:
                    try:
                        print('entropy loss: ' + str(c))
                        print('center loss: ' + str(center))
                        print('l1 penalty: ' + str(l1_penalty))

                        saver.save(sess, model_path)
                    except:
                        print('Save failed')

            test_indices = np.arange(len(teX))  # Get A Test Batch
            np.random.shuffle(test_indices)
            test_indices = test_indices[0:test_size]

            print('# Test Set #')
            print(np.argmax(teY[test_indices], axis=1))

            print('# Prediction #')
            print(sess.run(predict_op,
                           feed_dict={X: teX[test_indices], Y: teY[test_indices], TripletX: teXT[test_indices], bn_train: False, keep_prob:1.0}))

            precision = np.mean(np.argmax(teY[test_indices], axis=1) ==
                             sess.run(predict_op,
                                      feed_dict={X: teX[test_indices], Y: teY[test_indices], TripletX: teXT[test_indices], bn_train: False, keep_prob:1.0}))
            print('epoch ' + str(i) + ', precision: ' + str(100 * precision) + ' %')

            if precision > 0.99:
                break
示例#12
0
        data_dir = args.data

        if not os.path.exists(data_dir):
            print('No data.')
        else:
            X = tf.placeholder(tf.float32, [None, input_height, input_width, num_channel])
            TripletX = tf.placeholder(tf.float32, [None, representation_dim])
            bn_train = tf.placeholder(tf.bool)
            keep_prob = tf.placeholder(tf.float32)

            # Network setup
            cnn_representation, _ = cnn_network(X, bn_phaze=bn_train)
            print('CNN Output Tensor Dimension: ' + str(cnn_representation.get_shape().as_list()))

            cnn_representation = layers.global_avg_pool(cnn_representation, g_fc_layer3_dim)
            print('CNN Representation Dimension: ' + str(cnn_representation.get_shape().as_list()))

            fc_representation = cnn_representation

            #scale_representation = layers.global_avg_pool(scale_representation, representation_dim)
            #representation = tf.add(cnn_representation, tf.multiply(TripletX, scale_representation))

            # Residual
            representation = tf.add(cnn_representation, TripletX)

            prediction = layers.fc(representation, num_class_per_group, scope='g_fc_final')

            with tf.variable_scope('center', reuse=tf.AUTO_REUSE):
                centers = tf.get_variable('centers', [num_class_per_group, g_fc_layer3_dim], dtype=tf.float32,
                                          initializer=tf.constant_initializer(0), trainable=False)
示例#13
0
def encoder(x,
            activation='relu',
            scope='encoder',
            norm='layer',
            b_train=False):
    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
        if activation == 'swish':
            act_func = util.swish
        elif activation == 'relu':
            act_func = tf.nn.relu
        elif activation == 'lrelu':
            act_func = tf.nn.leaky_relu
        else:
            act_func = tf.nn.sigmoid

        num_encoder_feature_blocks = 5
        num_encoder_bt_blocks = 4

        print('Encoder Input: ' + str(x.get_shape().as_list()))
        block_depth = unit_block_depth

        l = layers.conv(x,
                        scope='conv0',
                        filter_dims=[3, 3, block_depth],
                        stride_dims=[1, 1],
                        non_linear_fn=None,
                        bias=False)
        l = layers.conv_normalize(l, norm=norm, b_train=b_train, scope='norm0')
        l = act_func(l)

        print('Encoder Block: ' + str(l.get_shape().as_list()))

        for i in range(num_encoder_bt_blocks):
            l = layers.add_se_residual_block(l,
                                             filter_dims=[3, 3, block_depth],
                                             num_layers=2,
                                             act_func=act_func,
                                             norm=norm,
                                             b_train=b_train,
                                             scope='res_block_' + str(i))

            print('Encoder Bottleneck Block ' + str(i) + ': ' +
                  str(l.get_shape().as_list()))
            block_depth = block_depth * 2
            l = layers.conv(l,
                            scope='tr_' + str(i),
                            filter_dims=[3, 3, block_depth],
                            stride_dims=[2, 2],
                            non_linear_fn=None)
            l = layers.conv_normalize(l,
                                      norm=norm,
                                      b_train=b_train,
                                      scope='norm_' + str(i))
            l = act_func(l)

        last_layer = l

        latent = layers.global_avg_pool(last_layer,
                                        output_length=representation_dim,
                                        use_bias=False,
                                        scope='gp')
        categories = layers.fc(latent, num_class)

        print('Encoder Latent Dims: ' + str(latent.get_shape().as_list()))

    return latent, categories