def build_network(input, trainable): is_trainable(trainable) #128*128*5*5*3*16=18.75M net = convb(input, 5, 5, 16, 2, name="Conv2d_0") net = dwconvb(net, 7, 7, 1, name="Conv2d_1_dw") #4M mv2_branch_0 = convb(net, 1, 1, 16, 1, name="Conv2d_1_sep") with tf.variable_scope('MV2'): # 64 mv2_branch_1 = slim.stack(mv2_branch_0, inverted_bottleneck, [ (1, 16, 1, 7), (1, 16, 0, 7), ], scope="part1") # 32 mv2_branch_2 = slim.stack(mv2_branch_1, inverted_bottleneck, [ (1, 32, 1, 7), (1, 32, 0, 7), ], scope="part2") # 16 mv2_branch_3 = slim.stack(mv2_branch_2, inverted_bottleneck, [ (1, 48, 1, 7), (1, 48, 0, 7), ], scope="part3") # 8 mv2_branch_4 = slim.stack(mv2_branch_3, inverted_bottleneck, [ (1, 64, 1, 5), (1, 64, 0, 5), ], scope="part4") concat_mv2 = tf.concat( [ max_pool(mv2_branch_0, 4, 4, 4, 4, name="mv2_0_max_pool"), max_pool(mv2_branch_1, 2, 2, 2, 2, name="mv2_1_max_pool"), mv2_branch_2, upsample(mv2_branch_3, 2, name="mv2_3_upsample"), upsample(mv2_branch_4, 4, name="mv2_4_upsample") ] , axis=3) with tf.variable_scope("CPM"): l2s = [] prev = None for stage_number in range(STAGE_NUM): if prev is not None: inputs = tf.concat([concat_mv2, prev], axis=3) else: inputs = concat_mv2 kernel_size = 7 lastest_channel_size = 256 if stage_number == 0: kernel_size = 7 lastest_channel_size = 256 _ = slim.stack(inputs, inverted_bottleneck, [ (1, 64, 0, kernel_size), (1, 64, 0, kernel_size), (1, 64, 0, kernel_size), ], scope="stage_%d_mv2" % stage_number) _ = convb(_, 1, 1, lastest_channel_size, 1, "stage_%d_mv1_0"%stage_number, relu=True) _ = convb(_, 1, 1, N_KPOINTS, 1, "stage_%d_mv1_1"%stage_number, relu=True) prev = _ cpm_out = upsample(_, 2, "stage_%d_out" % stage_number) l2s.append(cpm_out) return cpm_out, l2s
def build_network(input, trainable): is_trainable(trainable) net = convb(input, 3, 3, out_channel_ratio(32), 2, name="Conv2d_0") with tf.variable_scope('MobilenetV2'): # 128, 112 mv2_branch_0 = slim.stack(net, inverted_bottleneck, [(1, out_channel_ratio(16), 0, 3), (1, out_channel_ratio(16), 0, 3)], scope="MobilenetV2_part_0") # 64, 56 mv2_branch_1 = slim.stack( mv2_branch_0, inverted_bottleneck, [ (up_channel_ratio(6), out_channel_ratio(24), 1, 3), (up_channel_ratio(6), out_channel_ratio(24), 0, 3), (up_channel_ratio(6), out_channel_ratio(24), 0, 3), (up_channel_ratio(6), out_channel_ratio(24), 0, 3), (up_channel_ratio(6), out_channel_ratio(24), 0, 3), ], scope="MobilenetV2_part_1") # 32, 28 mv2_branch_2 = slim.stack( mv2_branch_1, inverted_bottleneck, [ (up_channel_ratio(6), out_channel_ratio(32), 1, 3), (up_channel_ratio(6), out_channel_ratio(32), 0, 3), (up_channel_ratio(6), out_channel_ratio(32), 0, 3), (up_channel_ratio(6), out_channel_ratio(32), 0, 3), (up_channel_ratio(6), out_channel_ratio(32), 0, 3), ], scope="MobilenetV2_part_2") # 16, 14 mv2_branch_3 = slim.stack( mv2_branch_2, inverted_bottleneck, [ (up_channel_ratio(6), out_channel_ratio(64), 1, 3), (up_channel_ratio(6), out_channel_ratio(64), 0, 3), (up_channel_ratio(6), out_channel_ratio(64), 0, 3), (up_channel_ratio(6), out_channel_ratio(64), 0, 3), (up_channel_ratio(6), out_channel_ratio(64), 0, 3), ], scope="MobilenetV2_part_3") # 8, 7 mv2_branch_4 = slim.stack( mv2_branch_3, inverted_bottleneck, [(up_channel_ratio(6), out_channel_ratio(96), 1, 3), (up_channel_ratio(6), out_channel_ratio(96), 0, 3), (up_channel_ratio(6), out_channel_ratio(96), 0, 3), (up_channel_ratio(6), out_channel_ratio(96), 0, 3), (up_channel_ratio(6), out_channel_ratio(96), 0, 3)], scope="MobilenetV2_part_4") cancat_mv2 = tf.concat([ max_pool(mv2_branch_0, 4, 4, 4, 4, name="mv2_0_max_pool"), max_pool(mv2_branch_1, 2, 2, 2, 2, name="mv2_1_max_pool"), mv2_branch_2, upsample(mv2_branch_3, 2, name="mv2_3_upsample"), upsample(mv2_branch_4, 4, name="mv2_4_upsample") ], axis=3) with tf.variable_scope("Convolutional_Pose_Machine"): l2s = [] prev = None for stage_number in range(STAGE_NUM): if prev is not None: inputs = tf.concat([cancat_mv2, prev], axis=3) else: inputs = cancat_mv2 kernel_size = 7 lastest_channel_size = 128 if stage_number == 0: kernel_size = 3 lastest_channel_size = 512 _ = slim.stack( inputs, inverted_bottleneck, [ (2, out_channel_cpm(32), 0, kernel_size), (up_channel_ratio(4), out_channel_cpm(32), 0, kernel_size), (up_channel_ratio(4), out_channel_cpm(32), 0, kernel_size), ], scope="stage_%d_mv2" % stage_number) _ = slim.stack(_, separable_conv, [(out_channel_ratio(lastest_channel_size), 1, 1), (N_KPOINTS, 1, 1)], scope="stage_%d_mv1" % stage_number) prev = _ cpm_out = upsample(_, 4, "stage_%d_out" % stage_number) l2s.append(cpm_out) return cpm_out, l2s
def build_network(input, trainable): is_trainable(trainable) net = convb(input, 5, 5, 8, 2, name="Conv2d_0") with tf.variable_scope('MV'): # 128, 112 mv2_branch_0 = dwconvb(net, 5, 5, 1, name="Conv2d_1_dw") mv2_branch_0 = slim.stack(mv2_branch_0, inverted_bottleneck, [ (2, 16, 0, 5), ], scope="part0") # 64, 56 mv2_branch_1 = dwconvb(mv2_branch_0, 7, 7, 2, name="Conv2d_2_dw") mv2_branch_1 = slim.stack(mv2_branch_1, inverted_bottleneck, [ (2, 24, 0, 7), ], scope="part1") # 32, 28 mv2_branch_2 = dwconvb(mv2_branch_1, 7, 7, 2, name="Conv2d_3_dw") mv2_branch_2 = slim.stack(mv2_branch_2, inverted_bottleneck, [ (2, 32, 0, 7), ], scope="part2") # 16, 14 mv2_branch_3 = dwconvb(mv2_branch_2, 7, 7, 2, name="Conv2d_4_dw") mv2_branch_3 = slim.stack(mv2_branch_3, inverted_bottleneck, [ (3, 32, 0, 7), (3, 32, 0, 7), ], scope="part3") # 8, 7 mv2_branch_4 = dwconvb(mv2_branch_3, 7, 7, 2, name="Conv2d_5_dw") mv2_branch_4 = slim.stack(mv2_branch_4, inverted_bottleneck, [ (4, 32, 0, 7), (4, 32, 0, 7), ], scope="part4") cancat_mv2 = tf.concat([ max_pool(mv2_branch_0, 4, 4, 4, 4, name="mv2_0_max_pool"), max_pool(mv2_branch_1, 2, 2, 2, 2, name="mv2_1_max_pool"), mv2_branch_2, upsample(mv2_branch_3, 2, name="mv2_3_upsample"), upsample(mv2_branch_4, 4, name="mv2_4_upsample") ], axis=3) with tf.variable_scope("CPM"): l2s = [] prev = None for stage_number in range(STAGE_NUM): if prev is not None: inputs = tf.concat([cancat_mv2, prev], axis=3) else: inputs = cancat_mv2 #kernel_size = 7 lastest_channel_size = 256 if stage_number == 0: # kernel_size = 7 lastest_channel_size = 192 _ = slim.stack(inputs, inverted_bottleneck, [ (1, 24, 0, 5), (1, 24, 0, 5), ], scope="stage_%d_mv2" % stage_number) else: _ = slim.stack(inputs, inverted_bottleneck, [ (1, 32, 0, 7), (1, 32, 0, 7), ], scope="stage_%d_mv2" % stage_number) _ = slim.stack(_, separable_conv, [(lastest_channel_size, 1, 1), (N_KPOINTS, 1, 1)], scope="stage_%d_mv1" % stage_number) prev = _ cpm_out = upsample(_, 4, "stage_%d_out" % stage_number) l2s.append(cpm_out) return cpm_out, l2s
def build_network(input, trainable): is_trainable(trainable) net = convb(input, 7, 7, 16, 2, name="Conv2d_0") with tf.variable_scope('MV'): # 128 mv2_branch_0 = dwconvb(net, 7, 7, 1, name="Conv2d_1_dw") mv2_branch_0 = slim.stack(mv2_branch_0, inverted_bottleneck, [ (2, 16, 0, 7), (2, 32, 0, 7), ], scope="part0") # 64 mv2_branch_1 = dwconvb(mv2_branch_0, 7, 7, 2, name="Conv2d_2_dw") mv2_branch_1 = slim.stack(mv2_branch_1, inverted_bottleneck, [ (2, 32, 0, 7), (2, 32, 0, 7), ], scope="part1") # 32 mv2_branch_2 = dwconvb(mv2_branch_1, 7, 7, 2, name="Conv2d_3_dw") mv2_branch_2 = slim.stack(mv2_branch_2, inverted_bottleneck, [ (2, 32, 0, 7), (2, 32, 0, 7), (2, 32, 0, 7), ], scope="part2") # 16 mv2_branch_3 = dwconvb(mv2_branch_2, 7, 7, 2, name="Conv2d_4_dw") mv2_branch_3 = slim.stack(mv2_branch_3, inverted_bottleneck, [ (4, 32, 0, 7), (4, 32, 0, 7), (4, 32, 0, 7), (4, 32, 0, 7), ], scope="part3") # 8 mv2_branch_4 = dwconvb(mv2_branch_3, 7, 7, 2, name="Conv2d_5_dw") mv2_branch_4 = slim.stack(mv2_branch_4, inverted_bottleneck, [ (6, 32, 0, 7), (6, 32, 0, 7), (6, 32, 0, 7), (6, 32, 0, 7), ], scope="part4") cancat_mv2 = tf.concat([ max_pool(mv2_branch_0, 4, 4, 4, 4, name="mv2_0_max_pool"), max_pool(mv2_branch_1, 2, 2, 2, 2, name="mv2_1_max_pool"), mv2_branch_2, upsample(mv2_branch_3, 2, name="mv2_3_upsample"), upsample(mv2_branch_4, 4, name="mv2_4_upsample") ], axis=3) with tf.variable_scope("CPM"): l2s = [] prev = None inputs = cancat_mv2 for stage_number in range(STAGE_NUM): if prev is not None: inputs = tf.concat([inputs, prev], axis=3) _ = slim.stack(inputs, inverted_bottleneck, [ (1, 64, 0, 7), (1, 64, 0, 7), (1, 64, 0, 7), (4, 21, 0, 7), ], scope="stage_%d_mv2" % stage_number) prev = _ cpm_out = upsample(_, 4, "stage_%d_out" % stage_number) l2s.append(cpm_out) return cpm_out, l2s