def build_network(self, input, trainable):
        is_trainable(trainable)

        intermediate_heatmap_layers = []

        tower = layers.Conv2D(out_channel_ratio(64),
                              kernel_size=[3, 3],
                              strides=(2, 2),
                              activation=None,
                              padding='same')(input)
        tower = layers.BatchNormalization()(tower)
        tower = layers.ReLU()(tower)

        tower = inverted_bottleneck(tower, 1, out_channel_ratio(16), 1, 3)

        tower = max_pool(tower, 2, 2, 2, 2, name='max_pool_0')

        net_h_w = int(tower.shape[1])
        # build network recursively
        hg_out = self.hourglass_module(tower, STAGE_NUM,
                                       intermediate_heatmap_layers)

        for index, l2 in enumerate(intermediate_heatmap_layers):
            l2_w_h = int(l2.shape[1])
            if l2_w_h == net_h_w:
                continue
            scale = net_h_w // l2_w_h
            intermediate_heatmap_layers[index] = upsample(
                l2, scale, name="upsample_for_loss_%d" % index)
        merged_layer = tf.keras.layers.Average()(intermediate_heatmap_layers)
        return hg_out, merged_layer
    def hourglass_module(self, inp, stage_nums, intermediate_heatmap_layers):
        if stage_nums > 0:
            down_sample = max_pool(inp,
                                   2,
                                   2,
                                   2,
                                   2,
                                   name="hourglass_downsample_%d" % stage_nums)

            tower = inverted_bottleneck(down_sample, up_channel_ratio(6),
                                        out_channel_ratio(24), 0, 3)
            tower = inverted_bottleneck(tower, up_channel_ratio(6),
                                        out_channel_ratio(24), 0, 3)
            tower = inverted_bottleneck(tower, up_channel_ratio(6),
                                        out_channel_ratio(24), 0, 3)
            tower = inverted_bottleneck(tower, up_channel_ratio(6),
                                        out_channel_ratio(24), 0, 3)
            block_front = inverted_bottleneck(tower, up_channel_ratio(6),
                                              out_channel_ratio(24), 0, 3)

            stage_nums -= 1
            block_mid = self.hourglass_module(block_front, stage_nums,
                                              intermediate_heatmap_layers)
            block_back = inverted_bottleneck(block_mid,
                                             up_channel_ratio(6),
                                             N_KPOINTS,
                                             0,
                                             3,
                                             scope="hourglass_back_%d" %
                                             stage_nums)

            up_sample = upsample(block_back, 2,
                                 "hourglass_upsample_%d" % stage_nums)

            # jump layer
            tower = inverted_bottleneck(inp, up_channel_ratio(6),
                                        out_channel_ratio(24), 0, 3)
            tower = inverted_bottleneck(tower, up_channel_ratio(6),
                                        out_channel_ratio(24), 0, 3)
            tower = inverted_bottleneck(tower, up_channel_ratio(6),
                                        out_channel_ratio(24), 0, 3)
            tower = inverted_bottleneck(tower, up_channel_ratio(6),
                                        out_channel_ratio(24), 0, 3)
            branch_jump = inverted_bottleneck(tower, up_channel_ratio(6),
                                              N_KPOINTS, 0, 3)

            curr_hg_out = layers.Add()([up_sample, branch_jump])

            # mid supervise
            intermediate_heatmap_layers.append(curr_hg_out)

            return curr_hg_out
        else:
            return inverted_bottleneck(inp,
                                       up_channel_ratio(6),
                                       out_channel_ratio(24),
                                       0,
                                       3,
                                       scope="hourglass_mid_%d" % stage_nums)
def hourglass_module(inp, stage_nums):
    if stage_nums > 0:
        down_sample = max_pool(inp,
                               2,
                               2,
                               2,
                               2,
                               name="hourglass_downsample_%d" % stage_nums)

        block_front = slim.stack(
            down_sample,
            inverted_bottleneck, [
                (up_channel_ratio(6), out_channel_ratio(24), 0, 3),
                (up_channel_ratio(6), out_channel_ratio(24), 0, 3),
                (up_channel_ratio(6), out_channel_ratio(24), 0, 3),
                (up_channel_ratio(6), out_channel_ratio(24), 0, 3),
                (up_channel_ratio(6), out_channel_ratio(24), 0, 3),
            ],
            scope="hourglass_front_%d" % stage_nums)
        stage_nums -= 1
        block_mid = hourglass_module(block_front, stage_nums)
        block_back = inverted_bottleneck(block_mid,
                                         up_channel_ratio(6),
                                         N_KPOINTS,
                                         0,
                                         3,
                                         scope="hourglass_back_%d" %
                                         stage_nums)

        up_sample = upsample(block_back, 2,
                             "hourglass_upsample_%d" % stage_nums)

        # jump layer
        branch_jump = slim.stack(
            inp,
            inverted_bottleneck, [
                (up_channel_ratio(6), out_channel_ratio(24), 0, 3),
                (up_channel_ratio(6), out_channel_ratio(24), 0, 3),
                (up_channel_ratio(6), out_channel_ratio(24), 0, 3),
                (up_channel_ratio(6), out_channel_ratio(24), 0, 3),
                (up_channel_ratio(6), N_KPOINTS, 0, 3),
            ],
            scope="hourglass_branch_jump_%d" % stage_nums)

        curr_hg_out = tf.add(up_sample,
                             branch_jump,
                             name="hourglass_out_%d" % stage_nums)
        # mid supervise
        l2s.append(curr_hg_out)

        return curr_hg_out

    _ = inverted_bottleneck(inp,
                            up_channel_ratio(6),
                            out_channel_ratio(24),
                            0,
                            3,
                            scope="hourglass_mid_%d" % stage_nums)
    return _
Пример #4
0
def build_network(input, trainable):
    is_trainable(trainable)

    net = convb(input, 3, 3, out_channel_ratio(32), 2, name="Conv2d_0")

    with tf.variable_scope('MobilenetV2'):

        # 128, 112
        mv2_branch_0 = slim.stack(net,
                                  inverted_bottleneck,
                                  [(1, out_channel_ratio(16), 0, 3),
                                   (1, out_channel_ratio(16), 0, 3)],
                                  scope="MobilenetV2_part_0")

        # 64, 56
        mv2_branch_1 = slim.stack(
            mv2_branch_0,
            inverted_bottleneck, [
                (up_channel_ratio(6), out_channel_ratio(24), 1, 3),
                (up_channel_ratio(6), out_channel_ratio(24), 0, 3),
                (up_channel_ratio(6), out_channel_ratio(24), 0, 3),
                (up_channel_ratio(6), out_channel_ratio(24), 0, 3),
                (up_channel_ratio(6), out_channel_ratio(24), 0, 3),
            ],
            scope="MobilenetV2_part_1")

        # 32, 28
        mv2_branch_2 = slim.stack(
            mv2_branch_1,
            inverted_bottleneck, [
                (up_channel_ratio(6), out_channel_ratio(32), 1, 3),
                (up_channel_ratio(6), out_channel_ratio(32), 0, 3),
                (up_channel_ratio(6), out_channel_ratio(32), 0, 3),
                (up_channel_ratio(6), out_channel_ratio(32), 0, 3),
                (up_channel_ratio(6), out_channel_ratio(32), 0, 3),
            ],
            scope="MobilenetV2_part_2")

        # 16, 14
        mv2_branch_3 = slim.stack(
            mv2_branch_2,
            inverted_bottleneck, [
                (up_channel_ratio(6), out_channel_ratio(64), 1, 3),
                (up_channel_ratio(6), out_channel_ratio(64), 0, 3),
                (up_channel_ratio(6), out_channel_ratio(64), 0, 3),
                (up_channel_ratio(6), out_channel_ratio(64), 0, 3),
                (up_channel_ratio(6), out_channel_ratio(64), 0, 3),
            ],
            scope="MobilenetV2_part_3")

        # 8, 7
        mv2_branch_4 = slim.stack(
            mv2_branch_3,
            inverted_bottleneck,
            [(up_channel_ratio(6), out_channel_ratio(96), 1, 3),
             (up_channel_ratio(6), out_channel_ratio(96), 0, 3),
             (up_channel_ratio(6), out_channel_ratio(96), 0, 3),
             (up_channel_ratio(6), out_channel_ratio(96), 0, 3),
             (up_channel_ratio(6), out_channel_ratio(96), 0, 3)],
            scope="MobilenetV2_part_4")

        cancat_mv2 = tf.concat([
            max_pool(mv2_branch_0, 4, 4, 4, 4, name="mv2_0_max_pool"),
            max_pool(mv2_branch_1, 2, 2, 2, 2, name="mv2_1_max_pool"),
            mv2_branch_2,
            upsample(mv2_branch_3, 2, name="mv2_3_upsample"),
            upsample(mv2_branch_4, 4, name="mv2_4_upsample")
        ],
                               axis=3)

    with tf.variable_scope("Convolutional_Pose_Machine"):
        l2s = []
        prev = None
        for stage_number in range(STAGE_NUM):
            if prev is not None:
                inputs = tf.concat([cancat_mv2, prev], axis=3)
            else:
                inputs = cancat_mv2

            kernel_size = 7
            lastest_channel_size = 128
            if stage_number == 0:
                kernel_size = 3
                lastest_channel_size = 512

            _ = slim.stack(
                inputs,
                inverted_bottleneck, [
                    (2, out_channel_cpm(32), 0, kernel_size),
                    (up_channel_ratio(4), out_channel_cpm(32), 0, kernel_size),
                    (up_channel_ratio(4), out_channel_cpm(32), 0, kernel_size),
                ],
                scope="stage_%d_mv2" % stage_number)

            _ = slim.stack(_,
                           separable_conv,
                           [(out_channel_ratio(lastest_channel_size), 1, 1),
                            (N_KPOINTS, 1, 1)],
                           scope="stage_%d_mv1" % stage_number)

            prev = _
            cpm_out = upsample(_, 4, "stage_%d_out" % stage_number)
            l2s.append(cpm_out)

    return cpm_out, l2s