Пример #1
0
def create_refine_net(blocks, is_training, trainable=True):
    initializer = tf.contrib.layers.xavier_initializer()
    bottleneck = resnet_v1.bottleneck
    refine_fms = []
    for i, block in enumerate(blocks):
        mid_fm = block
        with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
            for j in range(i):
                mid_fm = bottleneck(mid_fm,
                                    256,
                                    128,
                                    stride=1,
                                    scope='res{}/refine_conv{}'.format(
                                        2 + i, j))  # no projection
        mid_fm = tf.image.resize_bilinear(
            mid_fm, (cfg.output_shape[0], cfg.output_shape[1]),
            name='upsample_conv/res{}'.format(2 + i))
        refine_fms.append(mid_fm)
    refine_fm = tf.concat(refine_fms, axis=3)
    with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
        refine_fm = bottleneck(refine_fm,
                               256,
                               128,
                               stride=1,
                               scope='final_bottleneck')
        res = slim.conv2d(refine_fm,
                          cfg.nr_skeleton, [3, 3],
                          trainable=trainable,
                          weights_initializer=initializer,
                          padding='SAME',
                          activation_fn=None,
                          scope='refine_out')
    return res
Пример #2
0
def create_global_net(blocks, is_training, trainable=True):
    global_fms = []
    global_outs = []
    last_fm = None
    initializer = tf.contrib.layers.xavier_initializer()
    for i, block in enumerate(reversed(blocks)):
        with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
            lateral = slim.conv2d(block,
                                  256, [1, 1],
                                  trainable=trainable,
                                  weights_initializer=initializer,
                                  padding='SAME',
                                  activation_fn=tf.nn.relu,
                                  scope='lateral/res{}'.format(5 - i))

        if last_fm is not None:
            sz = tf.shape(lateral)
            upsample = tf.image.resize_bilinear(
                last_fm, (sz[1], sz[2]), name='upsample/res{}'.format(5 - i))
            upsample = slim.conv2d(upsample,
                                   256, [1, 1],
                                   trainable=trainable,
                                   weights_initializer=initializer,
                                   padding='SAME',
                                   activation_fn=None,
                                   scope='merge/res{}'.format(5 - i))
            last_fm = upsample + lateral
        else:
            last_fm = lateral

        with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
            tmp = slim.conv2d(last_fm,
                              256, [1, 1],
                              trainable=trainable,
                              weights_initializer=initializer,
                              padding='SAME',
                              activation_fn=tf.nn.relu,
                              scope='tmp/res{}'.format(5 - i))
            out = slim.conv2d(tmp,
                              cfg.nr_skeleton, [3, 3],
                              trainable=trainable,
                              weights_initializer=initializer,
                              padding='SAME',
                              activation_fn=None,
                              scope='pyramid/res{}'.format(5 - i))
        global_fms.append(last_fm)
        global_outs.append(
            tf.image.resize_bilinear(
                out, (cfg.output_shape[0], cfg.output_shape[1])))
    global_fms.reverse()
    global_outs.reverse()
    return global_fms, global_outs
    def head_net(self, blocks, is_training, trainable=True):

        normal_initializer = tf.truncated_normal_initializer(0, 0.01)
        msra_initializer = tf.contrib.layers.variance_scaling_initializer()
        xavier_initializer = tf.contrib.layers.xavier_initializer()

        with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):

            out = slim.conv2d_transpose(blocks[-1], 256, [4, 4], stride=2,
                trainable=trainable, weights_initializer=normal_initializer,
                padding='SAME', activation_fn=tf.nn.relu,
                scope='up1')
            out = slim.conv2d_transpose(out, 256, [4, 4], stride=2,
                trainable=trainable, weights_initializer=normal_initializer,
                padding='SAME', activation_fn=tf.nn.relu,
                scope='up2')
            out = slim.conv2d_transpose(out, 256, [4, 4], stride=2,
                trainable=trainable, weights_initializer=normal_initializer,
                padding='SAME', activation_fn=tf.nn.relu,
                scope='up3')

            out = slim.conv2d(out, cfg.nr_skeleton, [1, 1],
                    trainable=trainable, weights_initializer=msra_initializer,
                    padding='SAME', normalizer_fn=None, activation_fn=None,
                    scope='out')

        return out
Пример #4
0
    def head_net(self, blocks, is_training, trainable=True):               
		normal_initializer = tf.truncated_normal_initializer(0, 0.01)
		msra_initializer = tf.contrib.layers.variance_scaling_initializer()
		xavier_initializer = tf.contrib.layers.xavier_initializer()
		
		with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
			
			out = slim.conv2d_transpose(blocks[-1], 256, [4, 4], stride=2,
				trainable=trainable, weights_initializer=normal_initializer,
				padding='SAME', activation_fn=tf.nn.relu,
				scope='up1')
			print(out.shape)
			
			att1 = _attention_part_crf(out,1,3,0,False)
			upsample1 = tf.image.resize_nearest_neighbor(att1, tf.shape(att1)[1:3]*4, name = 'upsampling')
			print(upsample1.shape)
			
			self.attentions = []
			self.attentions.append(upsample1)
			
			out = slim.conv2d_transpose(out, 256, [4, 4], stride=2,
				trainable=trainable, weights_initializer=normal_initializer,
				padding='SAME', activation_fn=tf.nn.relu,
				scope='up2')
			print(out.shape)
			
			att2 = _attention_part_crf(out,1,3,0,False)
			upsample2 = tf.image.resize_nearest_neighbor(att2, tf.shape(att2)[1:3]*2, name = 'upsampling')
			print(upsample2.shape)
			self.attentions.append(upsample2)
			
			out = slim.conv2d_transpose(out, 256, [4, 4], stride=2,
				trainable=trainable, weights_initializer=normal_initializer,
				padding='SAME', activation_fn=tf.nn.relu,
				scope='up3')
			
			
			print(out.shape)
			upsample3 = _attention_part_crf(out,1,3,0,False)
			aggatt = tf.add_n([upsample1,upsample2,upsample3])
			print(aggatt.shape)
			
			self.attentions.append(upsample3)
			self.attentions.append(aggatt)
			
			out = tf.multiply(out,aggatt)
			
			print("Agg attention shape",aggatt.shape)
			partatt = _attention_part_crf(out,1,3,1,False)
			print("Final Output shape", partatt.shape)
			
			self.attentions.append(partatt)
			out = tf.multiply(out,partatt)
			
			out = slim.conv2d(out, cfg.num_kps, [1, 1],
				trainable=trainable, weights_initializer=msra_initializer,
				padding='SAME', normalizer_fn=None, activation_fn=None,
				scope='out')
			self.attentions.append(out)
		return out
Пример #5
0
    def head_net(self, blocks, is_training, trainable=True, add_paf_output=False, add_nonlocal_block=False):
        
        normal_initializer = tf.truncated_normal_initializer(0, 0.01)
        msra_initializer = tf.contrib.layers.variance_scaling_initializer()
        xavier_initializer = tf.contrib.layers.xavier_initializer()
        
        with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
            if add_nonlocal_block:
                out = self.non_local_block(blocks[-1], 256)
            else:
                out = blocks[-1]
            out = slim.conv2d_transpose(out, 256, [4, 4], stride=2,
                trainable=trainable, weights_initializer=normal_initializer,
                padding='SAME', activation_fn=tf.nn.relu,
                scope='up1')
            out = slim.conv2d_transpose(out, 256, [4, 4], stride=2,
                trainable=trainable, weights_initializer=normal_initializer,
                padding='SAME', activation_fn=tf.nn.relu,
                scope='up2')
            out = slim.conv2d_transpose(out, 256, [4, 4], stride=2,
                trainable=trainable, weights_initializer=normal_initializer,
                padding='SAME', activation_fn=tf.nn.relu,
                scope='up3')

            if add_paf_output:
                hms_out = slim.conv2d(out, cfg.num_kps, [1, 1],
                    trainable=trainable, weights_initializer=msra_initializer,
                    padding='SAME', normalizer_fn=None, activation_fn=None,
                    scope='out')
                paf_out = slim.conv2d(out, len(cfg.kps_lines)*2, [1, 1],
                    trainable=trainable, weights_initializer=msra_initializer,
                    padding='SAME', normalizer_fn=None, activation_fn=None,
                    scope='paf')
                out = (hms_out, paf_out)
            else:
                activation = tf.sigmoid if cfg.gauss_integral else None
                out = slim.conv2d(out, cfg.num_kps, [1, 1],
                    trainable=trainable, weights_initializer=msra_initializer,
                    padding='SAME', normalizer_fn=None, activation_fn=activation,
                    scope='out')
        return out
Пример #6
0
def create_coarse_net(blocks, is_training, trainable=True):
    #blocks = [(32,64,48,256),(32,32,24,256),(32,16,12,256),(32,8,6,256)]
    initializer = tf.contrib.layers.xavier_initializer()
    bottleneck = resnet_v1.bottleneck
    coarse_fms = []
    coarse_outs = []
    last_fm = None
    for i, block in enumerate(blocks):
        mid_fm = block
        with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
            for j in range(i):
                mid_fm = bottleneck(mid_fm,
                                    256,
                                    128,
                                    stride=1,
                                    scope='res{}/coarse_conv{}'.format(
                                        2 + i, j))
                coarse_fms.append(mid_fm)
        mid_fm = tf.image.resize_bilinear(
            mid_fm, (cfg.output_shape[0], cfg.output_shape[1]),
            name='upsample_conv/res{}'.format(2 + i))
        coarse_outs.append(mid_fm)
    '''
    for i,block in enumerate(reversed(coarse_fms)):
        with slim.arg_scope(resnet_arg_scope(bn_is_training=is_trainging)):
            lateral = slim.conv2d(block, 256, [1, 1],
                trainable=trainable, weights_initializer=initializer,
                padding='SAME', activation_fn=tf.nn.relu,
                scope='coarse/res{}'.format(5-i))
        if last_fm is not None:
            sz = tf.shape(lateral)
            upsample = tf.image.resize_bilinear(last_fm, (sz[1], sz[2]),
                name='upsample2/res{}'.format(5-i))
            upsample = slim.conv2d(upsample, 256, [1, 1],
                trainable=trainable, weights_initializer=initializer,
                padding='SAME', activation_fn=None,
                scope='merge2/res{}'.format(5-i))
            last_fm = upsample + lateral
        else:
            last_fm = lateral
        with slim.arg_scope(resnet_arg_scope(bn_is_training=is_trainging)):
            tmp = slim.conv2d(last_fm, 256, [1, 1],
                trainable=trainable, weights_initializer=initializer,
                padding='SAME', activation_fn=tf.nn.relu,
                scope='tmp2/res{}'.format(5-i))
            out = slim.conv2d(tmp, cfg.nr_skeleton, [3, 3],
                trainable=trainable, weights_initializer=initializer,
                padding='SAME', activation_fn=None,
                scope='pyramid2/res{}'.format(5-i))
        coarse_outs.append(tf.image.resize_bilinear(out, (cfg.output_shape[0], cfg.output_shape[1])))
    '''
    coarse_out = tf.concat(coarse_outs, axis=3)
    with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
        coarse_out = bottleneck(
            coarse_out, 256, 128, stride=1, scope='coarse_final_bottleneck'
        )  #print("refine_fm.shape = ",refine_fm.shape)   refine_fm.shape =  (32, 64, 48, 256)
        coarse = slim.conv2d(
            coarse_out,
            cfg.nr_skeleton, [3, 3],
            trainable=trainable,
            weights_initializer=initializer,
            padding='SAME',
            activation_fn=None,
            scope='coarse_out'
        )  #print("res.shape = ",res.shape)   res.shape =  (32, 64, 48, 17)
    coarse_fms.reverse()
    #coarse_outs.reverse()
    return coarse_fms, coarse
Пример #7
0
    def head_net(self, blocks, is_training, trainable=True):

        normal_initializer = tf.truncated_normal_initializer(0, 0.01)
        msra_initializer = tf.contrib.layers.variance_scaling_initializer()
        xavier_initializer = tf.contrib.layers.xavier_initializer()

        with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):

            out = slim.conv2d_transpose(blocks[-1],
                                        256, [4, 4],
                                        stride=2,
                                        trainable=trainable,
                                        weights_initializer=normal_initializer,
                                        padding='SAME',
                                        activation_fn=tf.nn.relu,
                                        scope='up1')

            if (not cfg.MODEL.occluded_detection):

                out = slim.conv2d_transpose(
                    out,
                    256, [4, 4],
                    stride=2,
                    trainable=trainable,
                    weights_initializer=normal_initializer,
                    padding='SAME',
                    activation_fn=tf.nn.relu,
                    scope='up2')

                out = slim.conv2d_transpose(
                    out,
                    256, [4, 4],
                    stride=2,
                    trainable=trainable,
                    weights_initializer=normal_initializer,
                    padding='SAME',
                    activation_fn=tf.nn.relu,
                    scope='up3')

                out = slim.conv2d(out,
                                  cfg.num_kps, [1, 1],
                                  trainable=trainable,
                                  weights_initializer=msra_initializer,
                                  padding='SAME',
                                  normalizer_fn=None,
                                  activation_fn=None,
                                  scope='out')
                return out
            else:
                if (not cfg.MODEL.occluded_cross_branch):
                    out = slim.conv2d_transpose(
                        out,
                        256, [4, 4],
                        stride=2,
                        trainable=trainable,
                        weights_initializer=normal_initializer,
                        padding='SAME',
                        activation_fn=tf.nn.relu,
                        scope='up2')

                    out_occ = slim.conv2d_transpose(
                        out,
                        256, [4, 4],
                        stride=2,
                        trainable=trainable,
                        weights_initializer=normal_initializer,
                        padding='SAME',
                        activation_fn=tf.nn.relu,
                        scope='up3_occ')
                    out_vis = slim.conv2d_transpose(
                        out,
                        256, [4, 4],
                        stride=2,
                        trainable=trainable,
                        weights_initializer=normal_initializer,
                        padding='SAME',
                        activation_fn=tf.nn.relu,
                        scope='up3_vis')
                    out_vis = slim.conv2d(out_vis,
                                          cfg.num_kps + cfg.additional_outputs,
                                          [1, 1],
                                          trainable=trainable,
                                          weights_initializer=msra_initializer,
                                          padding='SAME',
                                          normalizer_fn=None,
                                          activation_fn=None,
                                          scope='out_vis')
                    out_occ = slim.conv2d(out_occ,
                                          cfg.num_kps + cfg.additional_outputs,
                                          [1, 1],
                                          trainable=trainable,
                                          weights_initializer=msra_initializer,
                                          padding='SAME',
                                          normalizer_fn=None,
                                          activation_fn=None,
                                          scope='out_occ')
                    return out_vis, out_occ
                else:

                    out_vis = slim.conv2d_transpose(
                        out,
                        256, [4, 4],
                        stride=2,
                        trainable=trainable,
                        weights_initializer=normal_initializer,
                        padding='SAME',
                        activation_fn=tf.nn.relu,
                        scope='up2_vis')

                    out_occ = slim.conv2d_transpose(
                        out,
                        256, [4, 4],
                        stride=2,
                        trainable=trainable,
                        weights_initializer=normal_initializer,
                        padding='SAME',
                        activation_fn=tf.nn.relu,
                        scope='up2_occ')

                    out_occ = slim.conv2d_transpose(
                        out_occ,
                        256, [4, 4],
                        stride=2,
                        trainable=trainable,
                        weights_initializer=normal_initializer,
                        padding='SAME',
                        activation_fn=tf.nn.relu,
                        scope='up3_occ')
                    out_vis = slim.conv2d_transpose(
                        out_vis,
                        256, [4, 4],
                        stride=2,
                        trainable=trainable,
                        weights_initializer=normal_initializer,
                        padding='SAME',
                        activation_fn=tf.nn.relu,
                        scope='up3_vis')

                    if (cfg.MODEL.stop_crossbranch_grad
                        ):  #stop gradient for cross branching

                        out_vis_stack = tf.concat(
                            [tf.stop_gradient(out_occ), out_vis], axis=3)
                        out_occ_stack = tf.concat(
                            [out_occ, tf.stop_gradient(out_vis)], axis=3)

                    else:
                        out_vis_stack = tf.concat([out_occ, out_vis], axis=3)
                        out_occ_stack = tf.concat([out_occ, out_vis], axis=3)

                    out_vis = slim.conv2d(out_vis_stack,
                                          cfg.num_kps + cfg.additional_outputs,
                                          [1, 1],
                                          trainable=trainable,
                                          weights_initializer=msra_initializer,
                                          padding='SAME',
                                          normalizer_fn=None,
                                          activation_fn=None,
                                          scope='out_vis')
                    out_occ = slim.conv2d(out_occ_stack,
                                          cfg.num_kps + cfg.additional_outputs,
                                          [1, 1],
                                          trainable=trainable,
                                          weights_initializer=msra_initializer,
                                          padding='SAME',
                                          normalizer_fn=None,
                                          activation_fn=None,
                                          scope='out_occ')

                    return out_vis, out_occ