def create_refine_net(blocks, is_training, trainable=True): initializer = tf.contrib.layers.xavier_initializer() bottleneck = resnet_v1.bottleneck refine_fms = [] for i, block in enumerate(blocks): mid_fm = block with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)): for j in range(i): mid_fm = bottleneck(mid_fm, 256, 128, stride=1, scope='res{}/refine_conv{}'.format( 2 + i, j)) # no projection mid_fm = tf.image.resize_bilinear( mid_fm, (cfg.output_shape[0], cfg.output_shape[1]), name='upsample_conv/res{}'.format(2 + i)) refine_fms.append(mid_fm) refine_fm = tf.concat(refine_fms, axis=3) with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)): refine_fm = bottleneck(refine_fm, 256, 128, stride=1, scope='final_bottleneck') res = slim.conv2d(refine_fm, cfg.nr_skeleton, [3, 3], trainable=trainable, weights_initializer=initializer, padding='SAME', activation_fn=None, scope='refine_out') return res
def create_global_net(blocks, is_training, trainable=True): global_fms = [] global_outs = [] last_fm = None initializer = tf.contrib.layers.xavier_initializer() for i, block in enumerate(reversed(blocks)): with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)): lateral = slim.conv2d(block, 256, [1, 1], trainable=trainable, weights_initializer=initializer, padding='SAME', activation_fn=tf.nn.relu, scope='lateral/res{}'.format(5 - i)) if last_fm is not None: sz = tf.shape(lateral) upsample = tf.image.resize_bilinear( last_fm, (sz[1], sz[2]), name='upsample/res{}'.format(5 - i)) upsample = slim.conv2d(upsample, 256, [1, 1], trainable=trainable, weights_initializer=initializer, padding='SAME', activation_fn=None, scope='merge/res{}'.format(5 - i)) last_fm = upsample + lateral else: last_fm = lateral with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)): tmp = slim.conv2d(last_fm, 256, [1, 1], trainable=trainable, weights_initializer=initializer, padding='SAME', activation_fn=tf.nn.relu, scope='tmp/res{}'.format(5 - i)) out = slim.conv2d(tmp, cfg.nr_skeleton, [3, 3], trainable=trainable, weights_initializer=initializer, padding='SAME', activation_fn=None, scope='pyramid/res{}'.format(5 - i)) global_fms.append(last_fm) global_outs.append( tf.image.resize_bilinear( out, (cfg.output_shape[0], cfg.output_shape[1]))) global_fms.reverse() global_outs.reverse() return global_fms, global_outs
def head_net(self, blocks, is_training, trainable=True): normal_initializer = tf.truncated_normal_initializer(0, 0.01) msra_initializer = tf.contrib.layers.variance_scaling_initializer() xavier_initializer = tf.contrib.layers.xavier_initializer() with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)): out = slim.conv2d_transpose(blocks[-1], 256, [4, 4], stride=2, trainable=trainable, weights_initializer=normal_initializer, padding='SAME', activation_fn=tf.nn.relu, scope='up1') out = slim.conv2d_transpose(out, 256, [4, 4], stride=2, trainable=trainable, weights_initializer=normal_initializer, padding='SAME', activation_fn=tf.nn.relu, scope='up2') out = slim.conv2d_transpose(out, 256, [4, 4], stride=2, trainable=trainable, weights_initializer=normal_initializer, padding='SAME', activation_fn=tf.nn.relu, scope='up3') out = slim.conv2d(out, cfg.nr_skeleton, [1, 1], trainable=trainable, weights_initializer=msra_initializer, padding='SAME', normalizer_fn=None, activation_fn=None, scope='out') return out
def head_net(self, blocks, is_training, trainable=True): normal_initializer = tf.truncated_normal_initializer(0, 0.01) msra_initializer = tf.contrib.layers.variance_scaling_initializer() xavier_initializer = tf.contrib.layers.xavier_initializer() with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)): out = slim.conv2d_transpose(blocks[-1], 256, [4, 4], stride=2, trainable=trainable, weights_initializer=normal_initializer, padding='SAME', activation_fn=tf.nn.relu, scope='up1') print(out.shape) att1 = _attention_part_crf(out,1,3,0,False) upsample1 = tf.image.resize_nearest_neighbor(att1, tf.shape(att1)[1:3]*4, name = 'upsampling') print(upsample1.shape) self.attentions = [] self.attentions.append(upsample1) out = slim.conv2d_transpose(out, 256, [4, 4], stride=2, trainable=trainable, weights_initializer=normal_initializer, padding='SAME', activation_fn=tf.nn.relu, scope='up2') print(out.shape) att2 = _attention_part_crf(out,1,3,0,False) upsample2 = tf.image.resize_nearest_neighbor(att2, tf.shape(att2)[1:3]*2, name = 'upsampling') print(upsample2.shape) self.attentions.append(upsample2) out = slim.conv2d_transpose(out, 256, [4, 4], stride=2, trainable=trainable, weights_initializer=normal_initializer, padding='SAME', activation_fn=tf.nn.relu, scope='up3') print(out.shape) upsample3 = _attention_part_crf(out,1,3,0,False) aggatt = tf.add_n([upsample1,upsample2,upsample3]) print(aggatt.shape) self.attentions.append(upsample3) self.attentions.append(aggatt) out = tf.multiply(out,aggatt) print("Agg attention shape",aggatt.shape) partatt = _attention_part_crf(out,1,3,1,False) print("Final Output shape", partatt.shape) self.attentions.append(partatt) out = tf.multiply(out,partatt) out = slim.conv2d(out, cfg.num_kps, [1, 1], trainable=trainable, weights_initializer=msra_initializer, padding='SAME', normalizer_fn=None, activation_fn=None, scope='out') self.attentions.append(out) return out
def head_net(self, blocks, is_training, trainable=True, add_paf_output=False, add_nonlocal_block=False): normal_initializer = tf.truncated_normal_initializer(0, 0.01) msra_initializer = tf.contrib.layers.variance_scaling_initializer() xavier_initializer = tf.contrib.layers.xavier_initializer() with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)): if add_nonlocal_block: out = self.non_local_block(blocks[-1], 256) else: out = blocks[-1] out = slim.conv2d_transpose(out, 256, [4, 4], stride=2, trainable=trainable, weights_initializer=normal_initializer, padding='SAME', activation_fn=tf.nn.relu, scope='up1') out = slim.conv2d_transpose(out, 256, [4, 4], stride=2, trainable=trainable, weights_initializer=normal_initializer, padding='SAME', activation_fn=tf.nn.relu, scope='up2') out = slim.conv2d_transpose(out, 256, [4, 4], stride=2, trainable=trainable, weights_initializer=normal_initializer, padding='SAME', activation_fn=tf.nn.relu, scope='up3') if add_paf_output: hms_out = slim.conv2d(out, cfg.num_kps, [1, 1], trainable=trainable, weights_initializer=msra_initializer, padding='SAME', normalizer_fn=None, activation_fn=None, scope='out') paf_out = slim.conv2d(out, len(cfg.kps_lines)*2, [1, 1], trainable=trainable, weights_initializer=msra_initializer, padding='SAME', normalizer_fn=None, activation_fn=None, scope='paf') out = (hms_out, paf_out) else: activation = tf.sigmoid if cfg.gauss_integral else None out = slim.conv2d(out, cfg.num_kps, [1, 1], trainable=trainable, weights_initializer=msra_initializer, padding='SAME', normalizer_fn=None, activation_fn=activation, scope='out') return out
def create_coarse_net(blocks, is_training, trainable=True): #blocks = [(32,64,48,256),(32,32,24,256),(32,16,12,256),(32,8,6,256)] initializer = tf.contrib.layers.xavier_initializer() bottleneck = resnet_v1.bottleneck coarse_fms = [] coarse_outs = [] last_fm = None for i, block in enumerate(blocks): mid_fm = block with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)): for j in range(i): mid_fm = bottleneck(mid_fm, 256, 128, stride=1, scope='res{}/coarse_conv{}'.format( 2 + i, j)) coarse_fms.append(mid_fm) mid_fm = tf.image.resize_bilinear( mid_fm, (cfg.output_shape[0], cfg.output_shape[1]), name='upsample_conv/res{}'.format(2 + i)) coarse_outs.append(mid_fm) ''' for i,block in enumerate(reversed(coarse_fms)): with slim.arg_scope(resnet_arg_scope(bn_is_training=is_trainging)): lateral = slim.conv2d(block, 256, [1, 1], trainable=trainable, weights_initializer=initializer, padding='SAME', activation_fn=tf.nn.relu, scope='coarse/res{}'.format(5-i)) if last_fm is not None: sz = tf.shape(lateral) upsample = tf.image.resize_bilinear(last_fm, (sz[1], sz[2]), name='upsample2/res{}'.format(5-i)) upsample = slim.conv2d(upsample, 256, [1, 1], trainable=trainable, weights_initializer=initializer, padding='SAME', activation_fn=None, scope='merge2/res{}'.format(5-i)) last_fm = upsample + lateral else: last_fm = lateral with slim.arg_scope(resnet_arg_scope(bn_is_training=is_trainging)): tmp = slim.conv2d(last_fm, 256, [1, 1], trainable=trainable, weights_initializer=initializer, padding='SAME', activation_fn=tf.nn.relu, scope='tmp2/res{}'.format(5-i)) out = slim.conv2d(tmp, cfg.nr_skeleton, [3, 3], trainable=trainable, weights_initializer=initializer, padding='SAME', activation_fn=None, scope='pyramid2/res{}'.format(5-i)) coarse_outs.append(tf.image.resize_bilinear(out, (cfg.output_shape[0], cfg.output_shape[1]))) ''' coarse_out = tf.concat(coarse_outs, axis=3) with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)): coarse_out = bottleneck( coarse_out, 256, 128, stride=1, scope='coarse_final_bottleneck' ) #print("refine_fm.shape = ",refine_fm.shape) refine_fm.shape = (32, 64, 48, 256) coarse = slim.conv2d( coarse_out, cfg.nr_skeleton, [3, 3], trainable=trainable, weights_initializer=initializer, padding='SAME', activation_fn=None, scope='coarse_out' ) #print("res.shape = ",res.shape) res.shape = (32, 64, 48, 17) coarse_fms.reverse() #coarse_outs.reverse() return coarse_fms, coarse
def head_net(self, blocks, is_training, trainable=True): normal_initializer = tf.truncated_normal_initializer(0, 0.01) msra_initializer = tf.contrib.layers.variance_scaling_initializer() xavier_initializer = tf.contrib.layers.xavier_initializer() with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)): out = slim.conv2d_transpose(blocks[-1], 256, [4, 4], stride=2, trainable=trainable, weights_initializer=normal_initializer, padding='SAME', activation_fn=tf.nn.relu, scope='up1') if (not cfg.MODEL.occluded_detection): out = slim.conv2d_transpose( out, 256, [4, 4], stride=2, trainable=trainable, weights_initializer=normal_initializer, padding='SAME', activation_fn=tf.nn.relu, scope='up2') out = slim.conv2d_transpose( out, 256, [4, 4], stride=2, trainable=trainable, weights_initializer=normal_initializer, padding='SAME', activation_fn=tf.nn.relu, scope='up3') out = slim.conv2d(out, cfg.num_kps, [1, 1], trainable=trainable, weights_initializer=msra_initializer, padding='SAME', normalizer_fn=None, activation_fn=None, scope='out') return out else: if (not cfg.MODEL.occluded_cross_branch): out = slim.conv2d_transpose( out, 256, [4, 4], stride=2, trainable=trainable, weights_initializer=normal_initializer, padding='SAME', activation_fn=tf.nn.relu, scope='up2') out_occ = slim.conv2d_transpose( out, 256, [4, 4], stride=2, trainable=trainable, weights_initializer=normal_initializer, padding='SAME', activation_fn=tf.nn.relu, scope='up3_occ') out_vis = slim.conv2d_transpose( out, 256, [4, 4], stride=2, trainable=trainable, weights_initializer=normal_initializer, padding='SAME', activation_fn=tf.nn.relu, scope='up3_vis') out_vis = slim.conv2d(out_vis, cfg.num_kps + cfg.additional_outputs, [1, 1], trainable=trainable, weights_initializer=msra_initializer, padding='SAME', normalizer_fn=None, activation_fn=None, scope='out_vis') out_occ = slim.conv2d(out_occ, cfg.num_kps + cfg.additional_outputs, [1, 1], trainable=trainable, weights_initializer=msra_initializer, padding='SAME', normalizer_fn=None, activation_fn=None, scope='out_occ') return out_vis, out_occ else: out_vis = slim.conv2d_transpose( out, 256, [4, 4], stride=2, trainable=trainable, weights_initializer=normal_initializer, padding='SAME', activation_fn=tf.nn.relu, scope='up2_vis') out_occ = slim.conv2d_transpose( out, 256, [4, 4], stride=2, trainable=trainable, weights_initializer=normal_initializer, padding='SAME', activation_fn=tf.nn.relu, scope='up2_occ') out_occ = slim.conv2d_transpose( out_occ, 256, [4, 4], stride=2, trainable=trainable, weights_initializer=normal_initializer, padding='SAME', activation_fn=tf.nn.relu, scope='up3_occ') out_vis = slim.conv2d_transpose( out_vis, 256, [4, 4], stride=2, trainable=trainable, weights_initializer=normal_initializer, padding='SAME', activation_fn=tf.nn.relu, scope='up3_vis') if (cfg.MODEL.stop_crossbranch_grad ): #stop gradient for cross branching out_vis_stack = tf.concat( [tf.stop_gradient(out_occ), out_vis], axis=3) out_occ_stack = tf.concat( [out_occ, tf.stop_gradient(out_vis)], axis=3) else: out_vis_stack = tf.concat([out_occ, out_vis], axis=3) out_occ_stack = tf.concat([out_occ, out_vis], axis=3) out_vis = slim.conv2d(out_vis_stack, cfg.num_kps + cfg.additional_outputs, [1, 1], trainable=trainable, weights_initializer=msra_initializer, padding='SAME', normalizer_fn=None, activation_fn=None, scope='out_vis') out_occ = slim.conv2d(out_occ_stack, cfg.num_kps + cfg.additional_outputs, [1, 1], trainable=trainable, weights_initializer=msra_initializer, padding='SAME', normalizer_fn=None, activation_fn=None, scope='out_occ') return out_vis, out_occ