Пример #1
0
def init_model(backbone_model_name,
               freeze_backbone_for_N_epochs,
               input_shape,
               region_num,
               attribute_name_to_label_encoder_dict,
               kernel_regularization_factor,
               bias_regularization_factor,
               gamma_regularization_factor,
               beta_regularization_factor,
               use_adaptive_l1_l2_regularizer,
               min_value_in_clipping,
               max_value_in_clipping,
               share_last_block=False):
    def _add_objective_module(input_tensor):
        # Add GlobalAveragePooling2D
        if len(K.int_shape(input_tensor)) == 4:
            global_average_pooling_tensor = GlobalAveragePooling2D()(
                input_tensor)
        else:
            global_average_pooling_tensor = input_tensor
        if min_value_in_clipping is not None and max_value_in_clipping is not None:
            global_average_pooling_tensor = Lambda(
                lambda x: K.clip(x,
                                 min_value=min_value_in_clipping,
                                 max_value=max_value_in_clipping))(
                                     global_average_pooling_tensor)

        # https://arxiv.org/abs/1801.07698v1 Section 3.2.2 Output setting
        # https://arxiv.org/abs/1807.11042
        classification_input_tensor = global_average_pooling_tensor
        classification_embedding_tensor = BatchNormalization(
            scale=True, epsilon=2e-5)(classification_input_tensor)

        # Add categorical crossentropy loss
        assert len(attribute_name_to_label_encoder_dict) == 1
        # label_encoder = attribute_name_to_label_encoder_dict["identity_ID"]
        # class_num = len(label_encoder.classes_)
        # TODO: hardcoded for Market1501 model
        class_num = 751
        classification_output_tensor = Dense(
            units=class_num,
            use_bias=False,
            kernel_initializer=RandomNormal(
                mean=0.0, stddev=0.001))(classification_embedding_tensor)
        classification_output_tensor = Activation("softmax")(
            classification_output_tensor)

        # Add miscellaneous loss
        miscellaneous_input_tensor = global_average_pooling_tensor
        miscellaneous_embedding_tensor = miscellaneous_input_tensor
        miscellaneous_output_tensor = miscellaneous_input_tensor

        return classification_output_tensor, classification_embedding_tensor, miscellaneous_output_tensor, miscellaneous_embedding_tensor

    def _apply_concatenation(tensor_list):
        if len(tensor_list) == 1:
            return tensor_list[0]
        else:
            return Concatenate()(tensor_list)

    def _triplet_hermans_loss(y_true,
                              y_pred,
                              metric="euclidean",
                              margin="soft"):
        # Create the loss in two steps:
        # 1. Compute all pairwise distances according to the specified metric.
        # 2. For each anchor along the first dimension, compute its loss.
        dists = cdist(y_pred, y_pred, metric=metric)
        loss = batch_hard(dists=dists,
                          pids=tf.argmax(y_true, axis=-1),
                          margin=margin)
        return loss

    # Initiation
    classification_output_tensor_list = []
    classification_embedding_tensor_list = []
    miscellaneous_output_tensor_list = []
    miscellaneous_embedding_tensor_list = []

    # Initiate the early blocks
    model_instantiation = getattr(applications, backbone_model_name, None)
    assert model_instantiation is not None, "Backbone {} is not supported.".format(
        backbone_model_name)
    submodel_list, preprocess_input = model_instantiation(
        input_shape=input_shape)
    vanilla_input_tensor = Input(shape=K.int_shape(submodel_list[0].input)[1:])
    intermediate_output_tensor = vanilla_input_tensor
    for submodel in submodel_list[:-1]:
        if freeze_backbone_for_N_epochs > 0:
            submodel.trainable = False
        intermediate_output_tensor = submodel(intermediate_output_tensor)

    # Initiate the last blocks
    last_block = submodel_list[-1]
    last_block_for_global_branch_model = replicate_model(
        last_block, name="last_block_for_global_branch")
    if freeze_backbone_for_N_epochs > 0:
        last_block_for_global_branch_model.trainable = False
    if share_last_block:
        last_block_for_regional_branch_model = last_block_for_global_branch_model
    else:
        last_block_for_regional_branch_model = replicate_model(
            last_block, name="last_block_for_regional_branch")
        if freeze_backbone_for_N_epochs > 0:
            last_block_for_regional_branch_model.trainable = False

    # Add the global branch
    classification_output_tensor, classification_embedding_tensor, miscellaneous_output_tensor, miscellaneous_embedding_tensor = _add_objective_module(
        last_block_for_global_branch_model(intermediate_output_tensor))
    classification_output_tensor_list.append(classification_output_tensor)
    classification_embedding_tensor_list.append(
        classification_embedding_tensor)
    miscellaneous_output_tensor_list.append(miscellaneous_output_tensor)
    miscellaneous_embedding_tensor_list.append(miscellaneous_embedding_tensor)

    # Add the regional branch
    if region_num > 0:
        # Process each region
        regional_branch_output_tensor = last_block_for_regional_branch_model(
            intermediate_output_tensor)
        total_height = K.int_shape(regional_branch_output_tensor)[1]
        region_size = total_height // region_num
        for region_index in np.arange(region_num):
            # Get a slice of feature maps
            start_index = region_index * region_size
            end_index = (region_index + 1) * region_size
            if region_index == region_num - 1:
                end_index = total_height
            sliced_regional_branch_output_tensor = Lambda(
                lambda x, start_index=start_index, end_index=end_index:
                x[:, start_index:end_index])(regional_branch_output_tensor)

            # Downsampling
            sliced_regional_branch_output_tensor = Conv2D(
                filters=K.int_shape(sliced_regional_branch_output_tensor)[-1]
                // region_num,
                kernel_size=3,
                padding="same")(sliced_regional_branch_output_tensor)
            sliced_regional_branch_output_tensor = Activation("relu")(
                sliced_regional_branch_output_tensor)

            # Add the regional branch
            classification_output_tensor, classification_embedding_tensor, miscellaneous_output_tensor, miscellaneous_embedding_tensor = _add_objective_module(
                sliced_regional_branch_output_tensor)
            classification_output_tensor_list.append(
                classification_output_tensor)
            classification_embedding_tensor_list.append(
                classification_embedding_tensor)
            miscellaneous_output_tensor_list.append(
                miscellaneous_output_tensor)
            miscellaneous_embedding_tensor_list.append(
                miscellaneous_embedding_tensor)

    # Define the merged model
    embedding_tensor_list = [
        _apply_concatenation(miscellaneous_embedding_tensor_list)
    ]
    embedding_size_list = [
        K.int_shape(embedding_tensor)[1]
        for embedding_tensor in embedding_tensor_list
    ]
    merged_embedding_tensor = _apply_concatenation(embedding_tensor_list)
    merged_model = Model(inputs=[vanilla_input_tensor],
                         outputs=classification_output_tensor_list +
                         miscellaneous_output_tensor_list +
                         [merged_embedding_tensor])
    merged_model = specify_regularizers(merged_model,
                                        kernel_regularization_factor,
                                        bias_regularization_factor,
                                        gamma_regularization_factor,
                                        beta_regularization_factor,
                                        use_adaptive_l1_l2_regularizer)

    # Define the models for training/inference
    training_model = Model(inputs=[merged_model.input],
                           outputs=merged_model.output[:-1],
                           name="training_model")
    inference_model = Model(inputs=[merged_model.input],
                            outputs=[merged_model.output[-1]],
                            name="inference_model")
    inference_model.embedding_size_list = embedding_size_list

    # Compile the model
    classification_loss_function_list = [
        "categorical_crossentropy"
    ] * len(classification_output_tensor_list)
    triplet_hermans_loss_function = lambda y_true, y_pred: 1.0 * _triplet_hermans_loss(
        y_true, y_pred)
    miscellaneous_loss_function_list = [
        triplet_hermans_loss_function
    ] * len(miscellaneous_output_tensor_list)
    training_model.compile_kwargs = {
        "optimizer":
        Adam(),
        "loss":
        classification_loss_function_list + miscellaneous_loss_function_list
    }
    training_model.compile(**training_model.compile_kwargs)

    # Print the summary of the models
    # summarize_model(training_model)
    # summarize_model(inference_model)

    return training_model, inference_model, preprocess_input
Пример #2
0
def init_model(backbone_model_name, freeze_backbone_for_N_epochs, input_shape,
               region_num, attribute_name_to_label_encoder_dict,
               kernel_regularization_factor, bias_regularization_factor,
               gamma_regularization_factor, beta_regularization_factor,
               pooling_mode, min_value, max_value, use_horizontal_flipping):
    def _add_pooling_module(input_tensor):
        # Add a global pooling layer
        output_tensor = input_tensor
        if len(K.int_shape(output_tensor)) == 4:
            if pooling_mode == "Average":
                output_tensor = GlobalAveragePooling2D()(output_tensor)
            elif pooling_mode == "Max":
                output_tensor = GlobalMaxPooling2D()(output_tensor)
            elif pooling_mode == "GeM":
                output_tensor = GlobalGeMPooling2D()(output_tensor)
            else:
                assert False, "{} is an invalid argument!".format(pooling_mode)

        # Add the clipping operation
        if min_value is not None and max_value is not None:
            output_tensor = Lambda(lambda x: K.clip(
                x, min_value=min_value, max_value=max_value))(output_tensor)

        return output_tensor

    def _add_classification_module(input_tensor):
        # Add a batch normalization layer
        output_tensor = input_tensor
        output_tensor = BatchNormalization(epsilon=2e-5)(output_tensor)

        # Add a dense layer with softmax activation
        label_encoder = attribute_name_to_label_encoder_dict["identity_ID"]
        class_num = len(label_encoder.classes_)
        output_tensor = Dense(units=class_num,
                              use_bias=False,
                              kernel_initializer=RandomNormal(
                                  mean=0.0, stddev=0.001))(output_tensor)
        output_tensor = Activation("softmax")(output_tensor)

        return output_tensor

    def _triplet_hermans_loss(y_true,
                              y_pred,
                              metric="euclidean",
                              margin="soft"):
        # Create the loss in two steps:
        # 1. Compute all pairwise distances according to the specified metric.
        # 2. For each anchor along the first dimension, compute its loss.
        dists = cdist(y_pred, y_pred, metric=metric)
        loss = batch_hard(dists=dists,
                          pids=tf.argmax(y_true, axis=-1),
                          margin=margin)
        return loss

    # Initiation
    miscellaneous_output_tensor_list = []

    # Initiate the early blocks
    applications_instance = Applications()
    model_name_to_model_function = applications_instance.get_model_name_to_model_function(
    )
    assert backbone_model_name in model_name_to_model_function.keys(
    ), "Backbone {} is not supported.".format(backbone_model_name)
    model_function = model_name_to_model_function[backbone_model_name]
    blocks = applications_instance.get_model_in_blocks(
        model_function=model_function, include_top=False)
    vanilla_input_tensor = Input(shape=input_shape)
    intermediate_output_tensor = vanilla_input_tensor
    for block in blocks[:-1]:
        block = Applications.wrap_block(block, intermediate_output_tensor)
        intermediate_output_tensor = block(intermediate_output_tensor)

    # Initiate the last blocks
    last_block = Applications.wrap_block(blocks[-1],
                                         intermediate_output_tensor)
    last_block_for_global_branch_model = replicate_model(
        model=last_block, suffix="global_branch")
    last_block_for_regional_branch_model = replicate_model(
        model=last_block, suffix="regional_branch")

    # Add the global branch
    miscellaneous_output_tensor = _add_pooling_module(
        input_tensor=last_block_for_global_branch_model(
            intermediate_output_tensor))
    miscellaneous_output_tensor_list.append(miscellaneous_output_tensor)

    # Add the regional branch
    if region_num > 0:
        # Process each region
        regional_branch_output_tensor = last_block_for_regional_branch_model(
            intermediate_output_tensor)
        total_height = K.int_shape(regional_branch_output_tensor)[1]
        region_size = total_height // region_num
        for region_index in np.arange(region_num):
            # Get a slice of feature maps
            start_index = region_index * region_size
            end_index = (region_index + 1) * region_size
            if region_index == region_num - 1:
                end_index = total_height
            sliced_regional_branch_output_tensor = Lambda(
                lambda x, start_index=start_index, end_index=end_index:
                x[:, start_index:end_index])(regional_branch_output_tensor)

            # Downsampling
            sliced_regional_branch_output_tensor = Conv2D(
                filters=K.int_shape(sliced_regional_branch_output_tensor)[-1]
                // region_num,
                kernel_size=3,
                padding="same")(sliced_regional_branch_output_tensor)
            sliced_regional_branch_output_tensor = Activation("relu")(
                sliced_regional_branch_output_tensor)

            # Add the regional branch
            miscellaneous_output_tensor = _add_pooling_module(
                input_tensor=sliced_regional_branch_output_tensor)
            miscellaneous_output_tensor_list.append(
                miscellaneous_output_tensor)

    # Define the model used in inference
    inference_model = Model(inputs=[vanilla_input_tensor],
                            outputs=miscellaneous_output_tensor_list,
                            name="inference_model")
    specify_regularizers(inference_model, kernel_regularization_factor,
                         bias_regularization_factor,
                         gamma_regularization_factor,
                         beta_regularization_factor)

    # Define the model used in classification
    classification_input_tensor_list = [
        Input(shape=K.int_shape(item)[1:])
        for item in miscellaneous_output_tensor_list
    ]
    classification_output_tensor_list = []
    for classification_input_tensor in classification_input_tensor_list:
        classification_output_tensor = _add_classification_module(
            input_tensor=classification_input_tensor)
        classification_output_tensor_list.append(classification_output_tensor)
    classification_model = Model(inputs=classification_input_tensor_list,
                                 outputs=classification_output_tensor_list,
                                 name="classification_model")
    specify_regularizers(classification_model, kernel_regularization_factor,
                         bias_regularization_factor,
                         gamma_regularization_factor,
                         beta_regularization_factor)

    # Define the model used in training
    expand = lambda x: x if isinstance(x, list) else [x]
    vanilla_input_tensor = Input(shape=K.int_shape(inference_model.input)[1:])
    vanilla_feature_tensor_list = expand(inference_model(vanilla_input_tensor))
    if use_horizontal_flipping:
        flipped_input_tensor = tf.image.flip_left_right(vanilla_input_tensor)
        flipped_feature_tensor_list = expand(
            inference_model(flipped_input_tensor))
        merged_feature_tensor_list = [
            sum(item_tuple) / 2 for item_tuple in zip(
                vanilla_feature_tensor_list, flipped_feature_tensor_list)
        ]
    else:
        merged_feature_tensor_list = vanilla_feature_tensor_list
    miscellaneous_output_tensor_list = merged_feature_tensor_list
    classification_output_tensor_list = expand(
        classification_model(merged_feature_tensor_list))
    training_model = Model(inputs=[vanilla_input_tensor],
                           outputs=miscellaneous_output_tensor_list +
                           classification_output_tensor_list,
                           name="training_model")

    # Add the flipping loss
    if use_horizontal_flipping:
        flipping_loss_list = [
            K.mean(mean_squared_error(*item_tuple)) for item_tuple in zip(
                vanilla_feature_tensor_list, flipped_feature_tensor_list)
        ]
        flipping_loss = sum(flipping_loss_list)
        training_model.add_metric(flipping_loss,
                                  name="flipping",
                                  aggregation="mean")
        training_model.add_loss(1.0 * flipping_loss)

    # Compile the model
    triplet_hermans_loss_function = lambda y_true, y_pred: 1.0 * _triplet_hermans_loss(
        y_true, y_pred)
    miscellaneous_loss_function_list = [
        triplet_hermans_loss_function
    ] * len(miscellaneous_output_tensor_list)
    categorical_crossentropy_loss_function = lambda y_true, y_pred: 1.0 * categorical_crossentropy(
        y_true, y_pred, from_logits=False, label_smoothing=0.0)
    classification_loss_function_list = [
        categorical_crossentropy_loss_function
    ] * len(classification_output_tensor_list)
    training_model.compile_kwargs = {
        "optimizer":
        Adam(),
        "loss":
        miscellaneous_loss_function_list + classification_loss_function_list
    }
    if freeze_backbone_for_N_epochs > 0:
        specify_trainable(model=training_model,
                          trainable=False,
                          keywords=[block.name for block in blocks])
    training_model.compile(**training_model.compile_kwargs)

    # Print the summary of the training model
    summarize_model(training_model)

    return training_model, inference_model