def create_model(Bias_layer,
                 low,
                 up,
                 F,
                 beta,
                 gamma,
                 Co,
                 m,
                 a0RNN,
                 batch_input_shape,
                 selectbias,
                 selectidx,
                 selectdk,
                 selectwk,
                 myDtype,
                 return_sequences=False,
                 unroll=False):

    batch_adjusted_shape = (batch_input_shape[2] + 1, )  #Adding state
    placeHolder = Input(shape=(batch_input_shape[2] + 1, ))  #Adding state

    filterBias = inputsSelection(batch_adjusted_shape, selectbias)(placeHolder)

    filterSig = inputsSelection(batch_adjusted_shape, selectidx)(placeHolder)

    filterdK = inputsSelection(batch_adjusted_shape, selectdk)(placeHolder)

    filterda = inputsSelection(batch_adjusted_shape, selectwk)(placeHolder)

    MLP_min = low
    MLP_range = up - low

    Bias_layer = Bias_layer(filterBias)
    MLP = Lambda(lambda x: ((x * MLP_range) + MLP_min))(Bias_layer)

    Filter = Lambda(lambda x: sign(x))(filterSig)

    Bias_filtered_layer = Multiply()([MLP, Filter])

    dk_input_shape = filterdK.get_shape()

    dkLayer = StressIntensityRange(input_shape=dk_input_shape,
                                   dtype=myDtype,
                                   trainable=False)
    dkLayer.build(input_shape=dk_input_shape)
    dkLayer.set_weights([np.asarray([F], dtype=dkLayer.dtype)])
    dkLayer = dkLayer(filterdK)

    wmInput = Concatenate(axis=-1)([dkLayer, filterda])
    wm_input_shape = wmInput.get_shape()

    wmLayer = WalkerModel(input_shape=wm_input_shape,
                          dtype=myDtype,
                          trainable=False)
    wmLayer.build(input_shape=wm_input_shape)
    wmLayer.set_weights(
        [np.asarray([beta, gamma, Co, m], dtype=wmLayer.dtype)])
    wmLayer = wmLayer(wmInput)

    da_layer = Add()([Bias_filtered_layer, wmLayer])

    functionalModel = Model(inputs=[placeHolder], outputs=[da_layer])
    "-------------------------------------------------------------------------"
    CDMCellHybrid = CumulativeDamageCell(model=functionalModel,
                                         batch_input_shape=batch_input_shape,
                                         dtype=myDtype,
                                         initial_damage=a0RNN)

    CDMRNNhybrid = RNN(cell=CDMCellHybrid,
                       return_sequences=return_sequences,
                       return_state=False,
                       batch_input_shape=batch_input_shape,
                       unroll=unroll)

    model = Sequential()
    model.add(CDMRNNhybrid)
    model.compile(loss=mape, optimizer=RMSprop(1e-11), metrics=['mse'])
    return model
Пример #2
0
def bn_feature_net_skip_3D(receptive_field=61,
                           input_shape=(5, 256, 256, 1),
                           fgbg_model=None,
                           last_only=True,
                           n_skips=2,
                           norm_method='std',
                           padding_mode='reflect',
                           **kwargs):
    """Creates a 3D featurenet with skip-connections.

    Args:
        receptive_field (int): the receptive field of the neural network.
        input_shape (tuple): Create input tensor with this shape.
        fgbg_model (tensorflow.keras.Model): Concatenate output of this model
            with the inputs as a skip-connection.
        last_only (bool): Model will only output the final prediction,
            and not return any of the underlying model predictions.
        n_skips (int): The number of skip-connections
        norm_method (str): Normalization method to use with the
            :mod:`deepcell.layers.normalization.ImageNormalization3D` layer.
        padding_mode (str): Type of padding, one of 'reflect' or 'zero'
        kwargs (dict): Other model options defined in `~bn_feature_net_3D`

    Returns:
        tensorflow.keras.Model: 3D FeatureNet with skip-connections
    """
    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
    inputs = Input(shape=input_shape)
    img = ImageNormalization3D(norm_method=norm_method,
                               filter_size=receptive_field)(inputs)

    models = []
    model_outputs = []

    if fgbg_model is not None:
        for layer in fgbg_model.layers:
            layer.trainable = False
        models.append(fgbg_model)
        fgbg_output = fgbg_model(inputs)
        if isinstance(fgbg_output, list):
            fgbg_output = fgbg_output[-1]
        model_outputs.append(fgbg_output)

    for _ in range(n_skips + 1):
        if model_outputs:
            model_input = Concatenate(axis=channel_axis)(
                [img, model_outputs[-1]])
        else:
            model_input = img

        new_input_shape = model_input.get_shape().as_list()[1:]
        models.append(
            bn_feature_net_3D(receptive_field=receptive_field,
                              input_shape=new_input_shape,
                              norm_method=None,
                              dilated=True,
                              padding=True,
                              padding_mode=padding_mode,
                              **kwargs))
        model_outputs.append(models[-1](model_input))

    if last_only:
        model = Model(inputs=inputs, outputs=model_outputs[-1])
    elif fgbg_model is None:
        model = Model(inputs=inputs, outputs=model_outputs)
    else:
        model = Model(inputs=inputs, outputs=model_outputs[1:])

    return model
Пример #3
0
    def __init__(self,
                 sess,
                 ob_space,
                 action_space,
                 nbatch,
                 nsteps,
                 reuse=False):
        # This will use to initialize our kernels
        gain = np.sqrt(2)

        self.tokenizer = Tokenizer(num_words=5000)
        # Based on the action space, will select what probability distribution type
        # we will use to distribute action in our stochastic policy (in our case DiagGaussianPdType
        # aka Diagonal Gaussian, 3D normal distribution)
        self.pdtype = make_pdtype(action_space)

        song_text_shape = (None, 200)

        category_embedding_shape = (None, 1)
        embeddings = []

        girl_1_inputs_ = tf.placeholder(tf.float32,
                                        category_embedding_shape,
                                        name="girl_1_inputs_")
        girl_1_inputs_keras = tf.keras.layers.Input(tensor=girl_1_inputs_)
        embedding_size = EXTRA_SMALL_EMBEDDINGS_DIM
        embedding = Embedding(EXTRA_SMALL_VOCAB_SIZE,
                              embedding_size,
                              input_length=1)(girl_1_inputs_keras)
        embeddings.append(Reshape(target_shape=(embedding_size, ))(embedding))

        girl_2_inputs_ = tf.placeholder(tf.float32,
                                        category_embedding_shape,
                                        name="girl_2_inputs_")
        girl_2_inputs_keras = tf.keras.layers.Input(tensor=girl_2_inputs_)
        embedding_size = EXTRA_SMALL_EMBEDDINGS_DIM
        embedding = Embedding(EXTRA_SMALL_VOCAB_SIZE,
                              embedding_size,
                              input_length=1)(girl_2_inputs_keras)
        embeddings.append(Reshape(target_shape=(embedding_size, ))(embedding))

        girl_3_inputs_ = tf.placeholder(tf.float32,
                                        category_embedding_shape,
                                        name="girl_3_inputs_")
        girl_3_inputs_keras = tf.keras.layers.Input(tensor=girl_3_inputs_)
        embedding_size = EXTRA_SMALL_EMBEDDINGS_DIM
        embedding = Embedding(EXTRA_SMALL_VOCAB_SIZE,
                              embedding_size,
                              input_length=1)(girl_3_inputs_keras)
        embeddings.append(Reshape(target_shape=(embedding_size, ))(embedding))

        girl_4_inputs_ = tf.placeholder(tf.float32,
                                        category_embedding_shape,
                                        name="girl_4_inputs_")
        girl_4_inputs_keras = tf.keras.layers.Input(tensor=girl_4_inputs_)
        embedding_size = EXTRA_SMALL_EMBEDDINGS_DIM
        embedding = Embedding(EXTRA_SMALL_VOCAB_SIZE,
                              embedding_size,
                              input_length=1)(girl_4_inputs_keras)
        embeddings.append(Reshape(target_shape=(embedding_size, ))(embedding))

        current_girl_inputs_ = tf.placeholder(tf.float32,
                                              category_embedding_shape,
                                              name="current_girl_inputs_")
        current_girl_inputs_keras = tf.keras.layers.Input(
            tensor=current_girl_inputs_)
        embedding_size = EXTRA_SMALL_EMBEDDINGS_DIM
        embedding = Embedding(EXTRA_SMALL_VOCAB_SIZE,
                              embedding_size,
                              input_length=1)(current_girl_inputs_keras)
        embeddings.append(Reshape(target_shape=(embedding_size, ))(embedding))

        # Create the input placeholder
        non_category_data_input_ = tf.placeholder(
            tf.float32, (None, GUMBALL_FIELD_REMAINDER),
            name="non_category_data_input")
        combined_inputs_ = tf.placeholder(
            tf.float32, (None, ob_space.shape[1] + MM_EMBEDDINGS_DIM * 2),
            name="combined_input")
        text_inputs_ = tf.placeholder(tf.float32,
                                      song_text_shape,
                                      name="text_input")

        available_moves = tf.placeholder(tf.float32, [None, action_space.n],
                                         name="availableActions")
        """
		Build the model
		Embedding
		LSTM

		3 FC for spatial dependiencies
		1 common FC

		1 FC for policy (actor)
		1 FC for value (critic)

		"""
        with tf.variable_scope('model', reuse=reuse):
            # text reading LSTM
            #			lt_layer = lstm_layer()
            text_inputs_keras = tf.keras.layers.Input(tensor=text_inputs_)

            text_out = lstm_layer(text_inputs_keras)

            shape = text_out.get_shape().as_list()[1:]  # a list: [None, 9, 2]
            dim = np.prod(shape)  # dim = prod(9,2) = 18
            print('text_flatten before reshape', text_out.shape)
            text_flatten = tf.reshape(text_out, [1, -1])  # -1 means "all"

            print('embeds', len(embeddings))
            merged = Concatenate(axis=-1)(embeddings)

            # This returns a tensor
            non_category_data_input_keras = tf.keras.layers.Input(
                tensor=non_category_data_input_)
            categorical_dense = tf.keras.layers.Dense(
                512, activation='relu')(merged)
            categorical_dense = Reshape(
                target_shape=(512, ))(categorical_dense)
            non_categorical_dense = tf.keras.layers.Dense(
                512, activation='relu')(non_category_data_input_keras)

            combined_fields = Concatenate(axis=-1)(
                [non_categorical_dense, categorical_dense])
            #reshape to add dimension?
            comb_shape = combined_fields.get_shape()
            combined_fields = K.expand_dims(combined_fields, 2)
            print('combined_fields expanded dim', combined_fields.get_shape())

            conv1 = Conv1D(
                100,
                10,
                activation='relu',
                batch_input_shape=(
                    None, combined_fields.get_shape()[1]))(combined_fields)
            #			conv1 = Conv1D(100, 10, activation='relu', batch_input_shape=(None, ob_space.shape[1]))(field_inputs_)
            conv1 = Conv1D(100, 10, activation='relu')(conv1)
            conv1 = MaxPooling1D(3)(conv1)
            conv1 = Conv1D(160, 10, activation='relu')(conv1)
            conv1 = Conv1D(160, 10, activation='relu')(conv1)
            conv1 = GlobalAveragePooling1D()(conv1)
            conv1 = Dropout(0.5)(conv1)
            print('conv1 before reshape', conv1.get_shape())
            print('text_out before flatten', text_out.get_shape())

            text_out = Flatten()(text_out)
            print('text_out ater flatten', text_out.get_shape())
            text_dense = tf.keras.layers.Dense(512,
                                               activation='relu')(text_out)
            field_dense = tf.keras.layers.Dense(512, activation='relu')(conv1)
            print('text_dense after dense', text_dense.get_shape())

            #			scaled_image = tf.keras.layers.Lambda(function=lambda tensors: tensors[0] * tensors[1])([image, scale])
            #			fc_common_dense = Lambda(lambda x:K.concatenate([x[0], x[1]], axis=1))([text_dense, field_dense])
            #			fc_common_dense = tf.keras.layers.Concatenate(axis=-1)(list([text_dense, field_dense]))
            fc_common_dense = tf.keras.layers.Concatenate(axis=-1)(list(
                [text_dense, field_dense]))
            fc_common_dense = tf.keras.layers.Dense(
                512, activation='relu')(fc_common_dense)

            #available_moves takes form [0, 0, -inf, 0, -inf...], 0 if action is available, -inf if not.
            fc_act = tf.keras.layers.Dense(256,
                                           activation='relu')(fc_common_dense)
            #			self.pi = tf.keras.layers.Dense(action_space.n, activation='relu')(fc_act)
            self.pi = fc(fc_act, 'pi', action_space.n, init_scale=0.01)

            # Calculate the v(s)
            h3 = tf.keras.layers.Dense(256, activation='relu')(fc_common_dense)
            fc_vf = tf.keras.layers.Dense(1, activation=None)(h3)[:, 0]

#			vf = fc_layer(fc_3, 1, activation_fn=None)[:,0]
#			vf = fc_layer(fc_common_dense, 1, activation_fn=None)[:,0]

        self.initial_state = None
        """
		# Take an action in the action distribution (remember we are in a situation
		# of stochastic policy so we don't always take the action with the highest probability
		# for instance if we have 2 actions 0.7 and 0.3 we have 30% channce to take the second)
		a0 = self.pd.sample()

		# Calculate the neg log of our probability
		neglogp0 = self.pd.neglogp(a0)
		"""

        # perform calculations using available moves lists
        availPi = tf.add(self.pi, available_moves)

        def sample():
            u = tf.random_uniform(tf.shape(availPi))
            return tf.argmax(availPi - tf.log(-tf.log(u)), axis=-1)

        a0 = sample()
        el0in = tf.exp(availPi -
                       tf.reduce_max(availPi, axis=-1, keep_dims=True))
        z0in = tf.reduce_sum(el0in, axis=-1, keep_dims=True)
        p0in = el0in / z0in
        onehot = tf.one_hot(a0, availPi.get_shape().as_list()[-1])
        neglogp0 = -tf.log(tf.reduce_sum(tf.multiply(p0in, onehot), axis=-1))

        # Function use to take a step returns action to take and V(s)
        def step(state_in, valid_moves, ob_texts, *_args, **_kwargs):
            # return a0, vf, neglogp0
            # padd text
            #			print('ob_text', ob_texts)
            for ob_text in ob_texts:
                #				print('ob_text', ob_text)
                self.tokenizer.fit_on_texts([ob_text])

            ob_text_input = []
            for ob_text in ob_texts:
                #				print('ob_text', ob_text)
                token = self.tokenizer.texts_to_sequences([ob_text])
                token = sequence.pad_sequences(
                    token, maxlen=MM_MAX_SENTENCE_SIZE)  # pre_padding with 0
                ob_text_input.append(token)
#				print('token', token)
#				print('token shape', token.shape)
            orig_ob_text_input = np.array(ob_text_input)
            shape = orig_ob_text_input.shape
            #			print('ob_text_input shape', shape)
            ob_text_input = orig_ob_text_input.reshape(shape[0], shape[2])

            # Reshape for conv1
            #			state_in = np.expand_dims(state_in, axis=2)
            input_dict = dict({
                text_inputs_: ob_text_input,
                available_moves: valid_moves
            })
            input_dict.update(split_categories_from_state(state_in))

            return sess.run([a0, fc_vf, neglogp0], input_dict)

        # Function that calculates only the V(s)
        def value(state_in, valid_moves, ob_texts, *_args, **_kwargs):
            for ob_text in ob_texts:
                #				print('ob_text', ob_text)
                self.tokenizer.fit_on_texts([ob_text])

            ob_text_input = []
            for ob_text in ob_texts:
                #				print('ob_text', ob_text)
                token = self.tokenizer.texts_to_sequences([ob_text])
                token = sequence.pad_sequences(
                    token, maxlen=MM_MAX_SENTENCE_SIZE)  # pre_padding with 0
                ob_text_input.append(token)
#				print('token', token)
#				print('token shape', token.shape)
            ob_text_input = np.array(ob_text_input)
            shape = ob_text_input.shape
            #			print('ob_text_input shape', shape)
            ob_text_input = ob_text_input.reshape(shape[0], shape[2])

            # Reshape for conv1
            #			state_in = np.expand_dims(state_in, axis=2)
            input_dict = dict({
                text_inputs_: ob_text_input,
                available_moves: valid_moves
            })
            input_dict.update(split_categories_from_state(state_in))

            return sess.run(fc_vf, input_dict)
#			return sess.run(vf, {field_inputs_:state_in, text_inputs_:ob_text_input, available_moves:valid_moves})

        def select_action(state_in, valid_moves, ob_texts, *_args, **_kwargs):
            for ob_text in ob_texts:
                #				print('ob_text', ob_text)
                self.tokenizer.fit_on_texts([ob_text])

            ob_text_input = []
            for ob_text in ob_texts:
                #				print('ob_text', ob_text)
                token = self.tokenizer.texts_to_sequences([ob_text])
                token = sequence.pad_sequences(
                    token, maxlen=MM_MAX_SENTENCE_SIZE)  # pre_padding with 0
                ob_text_input.append(token)
#				print('token', token)
#				print('token shape', token.shape)
            ob_text_input = np.array(ob_text_input)
            shape = ob_text_input.shape
            #			print('ob_text_input shape', shape)
            ob_text_input = ob_text_input.reshape(shape[0], shape[2])

            # Reshape for conv1
            #			state_in = np.expand_dims(state_in, axis=2)
            input_dict = dict({
                text_inputs_: ob_text_input,
                available_moves: valid_moves
            })
            input_dict.update(split_categories_from_state(state_in))

            return sess.run(fc_vf, input_dict)
#			return sess.run(vf, {field_inputs_:state_in, text_inputs_:ob_text_input, available_moves:valid_moves})

        def split_categories_from_state(obs_datas):
            input_mappings = {}
            # Initialize buckets
            current_girl = np.empty([0, 1], dtype=np.float32)
            girl_1 = np.empty([0, 1], dtype=np.float32)
            girl_2 = np.empty([0, 1], dtype=np.float32)
            girl_3 = np.empty([0, 1], dtype=np.float32)
            girl_4 = np.empty([0, 1], dtype=np.float32)
            non_category_data = np.empty([0, GUMBALL_FIELD_REMAINDER],
                                         dtype=np.float32)

            input_mappings[current_girl_inputs_] = current_girl
            input_mappings[girl_1_inputs_] = girl_1
            input_mappings[girl_2_inputs_] = girl_2
            input_mappings[girl_3_inputs_] = girl_3
            input_mappings[girl_4_inputs_] = girl_4
            input_mappings[non_category_data_input_] = non_category_data

            # Everything above only happens once
            for obs_data in obs_datas:

                input_mappings[current_girl_inputs_] = np.append(
                    input_mappings[current_girl_inputs_],
                    np.array([[obs_data[0]]]),
                    axis=0)
                input_mappings[girl_1_inputs_] = np.append(
                    input_mappings[girl_1_inputs_],
                    np.array([[obs_data[1]]]),
                    axis=0)
                input_mappings[girl_2_inputs_] = np.append(
                    input_mappings[girl_2_inputs_],
                    np.array([[obs_data[2]]]),
                    axis=0)
                input_mappings[girl_3_inputs_] = np.append(
                    input_mappings[girl_3_inputs_],
                    np.array([[obs_data[3]]]),
                    axis=0)
                input_mappings[girl_4_inputs_] = np.append(
                    input_mappings[girl_4_inputs_],
                    np.array([[obs_data[4]]]),
                    axis=0)

                # rest of data is numeric observation
                rest_details_index = 5
                input_mappings[non_category_data_input_] = np.append(
                    input_mappings[non_category_data_input_],
                    np.array([obs_data[rest_details_index:]]),
                    axis=0)

            return input_mappings

        self.availPi = availPi
        self.split_categories_from_state = split_categories_from_state
        self.text_inputs_ = text_inputs_
        self.available_moves = available_moves
        self.vf = fc_vf
        #		self.fc_vf = fc_vf
        self.step = step
        self.value = value
        self.select_action = select_action
        print('this did finish')
Пример #4
0
def ssd_300(image_size,
            n_classes,
            mode='training',
            l2_regularization=0.0005,
            min_scale=None,
            max_scale=None,
            scales=None,
            aspect_ratios_global=None,
            aspect_ratios_per_layer=[[1.0, 2.0, 0.5],
                                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                     [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]],
            two_boxes_for_ar1=True,
            steps=[8, 16, 32, 64, 100, 300],
            offsets=None,
            clip_boxes=False,
            variances=[0.1, 0.1, 0.2, 0.2],
            coords='centroids',
            normalize_coords=True,
            subtract_mean=[123, 117, 104],
            divide_by_stddev=None,
            swap_channels=[2, 1, 0],
            confidence_thresh=0.01,
            iou_threshold=0.45,
            top_k=200,
            nms_max_output_size=400,
            return_predictor_sizes=False):
    '''
    Xây dựng model SSD300 với keras.
    Base network được sử dụng là VGG16.

    Chú ý: Yêu cầu Keras>=v2.0; TensorFlow backend>=v1.0.

    Arguments:
        image_size (tuple): Kích thước image input `(height, width, channels)`.
        n_classes (int): Số classes, chẳng hạn 20 cho Pascal VOC dataset, 80 cho MS COCO dataset.
        mode (str, optional): Một trong những dạng 'training', 'inference' và 'inference_fast'.
            'training' mode: Đầu ra của model là raw prediction tensor.
            'inference' và 'inference_fast' modes: raw predictions được decoded thành tọa độ đã được filtered thông qua threshold.
        l2_regularization (float, optional): L2-regularization rate. Áp dụng cho toàn bộ các convolutional layers.
        min_scale (float, optional): Nhân tố scaling nhỏ nhất cho các size của anchor boxes. Tỷ lệ này được tính trên so sánh với cạnh ngắn hơn
        của hình ảnh input.
        max_scale (float, optional): Nhân tố scale lớn nhất cho các size của anchor boxes.
        scales (list, optional): List các số floats chứa các nhân tố scaling của các convolutional predictor layer.
            List này phải lớn hơn số lượng các predictor layers là 1 để sử dụng cho trường hợp aspect ratio = 1 sẽ tính thêm next scale.
            Trong TH sử dụng scales thì interpolate theo min_scale và max_scale để tính list scales sẽ không được sử dụng.
        aspect_ratios_global (list, optional): List của các aspect ratios mà các anchor boxes được tạo thành. List này được áp dụng chung trên toàn bộ các prediction layers.
        aspect_ratios_per_layer (list, optional): List của các list aspect ratio cho mỗi một prediction layer.
            Nếu được truyền vào sẽ override `aspect_ratios_global`.
        two_boxes_for_ar1 (bool, optional): Chỉ áp dụng khi aspect ratio lists chứa 1. Sẽ bị loại bỏ trong các TH khác.
            Nếu `True`, 2 anchor boxes sẽ được tạo ra ứng với aspect ratio = 1. anchor box đầu tiên tạo thành bằng cách sử scale, anchor box thứ 2
            được tạo thành bằng trung bình hình học của scale và next scale.
        steps (list, optional): `None` hoặc là list với rất nhiều các phần tử có số lượng bằng với số lượng layers.
            Mỗi phần tử đại diện cho mỗi một predictor layer có bao nhiêu pixels khoảng cách giữa các tâm của anchor box.
            steps có thể gồm 2 số đại diện cho (step_width, step_height).
            nếu không có steps nào được đưa ra thì chúng ta sẽ tính để cho khoảng các giữa các tâm của anchor box là bằng nhau
        offsets (list, optional): None hoặc là các con số đại diện cho mỗi một predictor layer bao nhiêu pixels từ góc trên và bên trái mở rộng của ảnh
        clip_boxes (bool, optional): Nếu `True`, giới hạn tọa độ các anchor box để nằm trong boundaries của image.
        variances (list, optional): Một list gồm 4 số floats >0. Một anchor box offset tương ứng với mỗi tọa độ sẽ được chi cho giá trị variance tương ứng.
        coords (str, optional): Tọa độ của box được sử dụng bên trong model (chẳng hạn, nó không là input format của ground truth labels).
            Có thể là dạng 'centroids' format `(cx, cy, w, h)` (box center coordinates, width,
            and height), 'minmax' format `(xmin, xmax, ymin, ymax)`, hoặc 'corners' format `(xmin, ymin, xmax, ymax)`.
        normalize_coords (bool, optional): Được đặt là `True` nếu model được giả định sử dụng tọa độ tương đối thay vì tuyệt đối coordinates,
            chẳng hạn nếu model dự báo tọa độ box nằm trong [0, 1] thay vì tọa độ tuyệt đối.
        subtract_mean (array-like, optional): `None` hoặc một array object với bất kì shape nào mà dạng mở rộng phù hợp với shape của ảnh. Gía trị của nó được bớt đi từ độ lớn pixel của ảnh. The elements of this array will be
            Chẳng hạn truyền vào một list gồm 3 số nguyên để tính toán trung bình chuẩn hóa cho các kênh của ảnh.
        divide_by_stddev (array-like, optional): `None` hoặc một array object. Tương tự như subtract_mean nhưng được chia cho từ độ lớn của ảnh để tính chuẩn hóa.
        swap_channels (list, optional): Là `False` hoặc một list các số nguyên biểu diễn thứ tự kì vọng mà trong đó đầu vào các channels của ảnh có thể được hoán đổi.
        confidence_thresh (float, optional): Một số float nằm trong khoảng [0,1), là ngưỡng tin cậy nhỏ nhất trong phân loại của một lớp xảy ra.
        iou_threshold (float, optional): Một float nằm trong khoảng [0,1]. Tất cả các boxes có chỉ số Jaccard similarity lớn hơn hoặc bằng `iou_threshold`
            sẽ được xem xét là chứa vệt thể bên trong nó.
        top_k (int, optional): Điểm dự báo cáo nhất được giữ trong mỗi batch item sau bước non-maximum suppression stage.
        nms_max_output_size (int, optional): Số lượng lớn nhất các dự báo sẽ được chuyển qua bước NMS stage.
        return_predictor_sizes (bool, optional): Nếu `True`, hàm số này sẽ không chỉ trả về mô hình, mà còn trả về
            một list chứa các chiều của predictor layers.

    Returns:
        model: The Keras SSD300 model.
        predictor_sizes (optional): Một numpy array chứa các phần `(height, width)` của output tensor shape tương ứng với mỗi convolutional predictor layer.

    References:
        https://arxiv.org/abs/1512.02325v5
    '''

    n_predictor_layers = 6  # Số lượng các preductor convolutional layers trong network là 6 cho original SSD300.
    n_classes += 1  # Số lượng classes, + 1 để tính thêm background class.
    l2_reg = l2_regularization  # tham số chuẩn hóa của norm chuẩn l2.
    img_height, img_width, img_channels = image_size[0], image_size[
        1], image_size[2]

    ############################################################################
    # Một số lỗi ngoại lệ.
    ############################################################################

    if aspect_ratios_global is None and aspect_ratios_per_layer is None:
        raise ValueError(
            "`aspect_ratios_global` and `aspect_ratios_per_layer` cannot both be None. At least one needs to be specified."
        )
    if aspect_ratios_per_layer:
        if len(aspect_ratios_per_layer) != n_predictor_layers:
            raise ValueError(
                "It must be either aspect_ratios_per_layer is None or len(aspect_ratios_per_layer) == {}, but len(aspect_ratios_per_layer) == {}."
                .format(n_predictor_layers, len(aspect_ratios_per_layer)))

    # Tạo list scales
    if (min_scale is None or max_scale is None) and scales is None:
        raise ValueError(
            "Either `min_scale` and `max_scale` or `scales` need to be specified."
        )
    if scales:
        if len(scales) != n_predictor_layers + 1:
            raise ValueError(
                "It must be either scales is None or len(scales) == {}, but len(scales) == {}."
                .format(n_predictor_layers + 1, len(scales)))
    else:
        scales = np.linspace(min_scale, max_scale, n_predictor_layers + 1)

    if len(variances) != 4:
        raise ValueError(
            "4 variance values must be pased, but {} values were received.".
            format(len(variances)))
    variances = np.array(variances)
    if np.any(variances <= 0):
        raise ValueError(
            "All variances must be >0, but the variances given are {}".format(
                variances))

    if (not (steps is None)) and (len(steps) != n_predictor_layers):
        raise ValueError(
            "You must provide at least one step value per predictor layer.")

    if (not (offsets is None)) and (len(offsets) != n_predictor_layers):
        raise ValueError(
            "You must provide at least one offset value per predictor layer.")

    ############################################################################
    # Tính các tham số của anchor box.
    ############################################################################

    # Thiết lập aspect ratios cho mỗi predictor layer (chỉ cần thiết cho tính toán anchor box layers).
    if aspect_ratios_per_layer:
        aspect_ratios = aspect_ratios_per_layer
    else:
        aspect_ratios = [aspect_ratios_global] * n_predictor_layers

    # Tính số lượng boxes được dự báo / 1 cell cho mỗi predictor layer.
    # Chúng ta cần biết bao nhiêu channels các predictor layers cần có.
    if aspect_ratios_per_layer:
        n_boxes = []
        for ar in aspect_ratios_per_layer:
            if (1 in ar) & two_boxes_for_ar1:
                n_boxes.append(len(ar) +
                               1)  # +1 cho trường hợp aspect ratio = 1
            else:
                n_boxes.append(len(ar))
    else:  # Nếu chỉ 1 global aspect ratio list được truyền vào thì số lượng boxes là như nhau cho mọi layers.
        if (1 in aspect_ratios_global) & two_boxes_for_ar1:
            n_boxes = len(aspect_ratios_global) + 1
        else:
            n_boxes = len(aspect_ratios_global)
        n_boxes = [n_boxes] * n_predictor_layers

    if steps is None:
        steps = [None] * n_predictor_layers
    if offsets is None:
        offsets = [None] * n_predictor_layers

    ############################################################################
    # Xác định các hàm số cho Lambda layers bên dưới.
    ############################################################################

    def identity_layer(tensor):
        return tensor

    def input_mean_normalization(tensor):
        return tensor - np.array(subtract_mean)

    def input_stddev_normalization(tensor):
        return tensor / np.array(divide_by_stddev)

    def input_channel_swap(tensor):
        if len(swap_channels) == 3:
            return K.stack([
                tensor[..., swap_channels[0]], tensor[..., swap_channels[1]],
                tensor[..., swap_channels[2]]
            ],
                           axis=-1)
        elif len(swap_channels) == 4:
            return K.stack([
                tensor[..., swap_channels[0]], tensor[..., swap_channels[1]],
                tensor[..., swap_channels[2]], tensor[..., swap_channels[3]]
            ],
                           axis=-1)

    ############################################################################
    # Bước 1: Xây dựng network.
    ############################################################################

    x = Input(shape=(img_height, img_width, img_channels))

    x1 = Lambda(identity_layer,
                output_shape=(img_height, img_width, img_channels),
                name='identity_layer')(x)
    if not (subtract_mean is None):
        x1 = Lambda(input_mean_normalization,
                    output_shape=(img_height, img_width, img_channels),
                    name='input_mean_normalization')(x1)
    if not (divide_by_stddev is None):
        x1 = Lambda(input_stddev_normalization,
                    output_shape=(img_height, img_width, img_channels),
                    name='input_stddev_normalization')(x1)
    if swap_channels:
        x1 = Lambda(input_channel_swap,
                    output_shape=(img_height, img_width, img_channels),
                    name='input_channel_swap')(x1)

    ############################################################################
    # Bước 1.1: Tính toán base network là mạng VGG16
    ############################################################################

    conv1_1 = Conv2D(64, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv1_1')(x1)
    conv1_2 = Conv2D(64, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv1_2')(conv1_1)
    pool1 = MaxPooling2D(pool_size=(2, 2),
                         strides=(2, 2),
                         padding='same',
                         name='pool1')(conv1_2)

    conv2_1 = Conv2D(128, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv2_1')(pool1)
    conv2_2 = Conv2D(128, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv2_2')(conv2_1)
    pool2 = MaxPooling2D(pool_size=(2, 2),
                         strides=(2, 2),
                         padding='same',
                         name='pool2')(conv2_2)

    conv3_1 = Conv2D(256, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv3_1')(pool2)
    conv3_2 = Conv2D(256, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv3_2')(conv3_1)
    conv3_3 = Conv2D(256, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv3_3')(conv3_2)
    pool3 = MaxPooling2D(pool_size=(2, 2),
                         strides=(2, 2),
                         padding='same',
                         name='pool3')(conv3_3)

    conv4_1 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv4_1')(pool3)
    conv4_2 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv4_2')(conv4_1)
    conv4_3 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv4_3')(conv4_2)
    pool4 = MaxPooling2D(pool_size=(2, 2),
                         strides=(2, 2),
                         padding='same',
                         name='pool4')(conv4_3)

    conv5_1 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv5_1')(pool4)
    conv5_2 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv5_2')(conv5_1)
    conv5_3 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv5_3')(conv5_2)
    pool5 = MaxPooling2D(pool_size=(3, 3),
                         strides=(1, 1),
                         padding='same',
                         name='pool5')(conv5_3)

    ############################################################################
    # Bước 1.2: Áp dụng các convolutional filter có kích thước (3 x 3) để tính toán ra features map.
    ############################################################################

    fc6 = Conv2D(1024, (3, 3),
                 dilation_rate=(6, 6),
                 activation='relu',
                 padding='same',
                 kernel_initializer='he_normal',
                 kernel_regularizer=l2(l2_reg),
                 name='fc6')(pool5)
    print('fully connected 6: ', fc6.get_shape())
    fc7 = Conv2D(1024, (1, 1),
                 activation='relu',
                 padding='same',
                 kernel_initializer='he_normal',
                 kernel_regularizer=l2(l2_reg),
                 name='fc7')(fc6)
    print('fully connected 7: ', fc7.get_shape())
    conv6_1 = Conv2D(256, (1, 1),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv6_1')(fc7)
    conv6_1 = ZeroPadding2D(padding=((1, 1), (1, 1)),
                            name='conv6_padding')(conv6_1)
    conv6_2 = Conv2D(512, (3, 3),
                     strides=(2, 2),
                     activation='relu',
                     padding='valid',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv6_2')(conv6_1)
    print('conv6_2: ', conv6_2.get_shape())
    conv7_1 = Conv2D(128, (1, 1),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv7_1')(conv6_2)
    conv7_1 = ZeroPadding2D(padding=((1, 1), (1, 1)),
                            name='conv7_padding')(conv7_1)
    conv7_2 = Conv2D(256, (3, 3),
                     strides=(2, 2),
                     activation='relu',
                     padding='valid',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv7_2')(conv7_1)
    print('conv7_2: ', conv7_2.get_shape())
    conv8_1 = Conv2D(128, (1, 1),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv8_1')(conv7_2)
    conv8_2 = Conv2D(256, (3, 3),
                     strides=(1, 1),
                     activation='relu',
                     padding='valid',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv8_2')(conv8_1)
    print('conv8_2: ', conv8_2.get_shape())
    conv9_1 = Conv2D(128, (1, 1),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv9_1')(conv8_2)
    conv9_2 = Conv2D(256, (3, 3),
                     strides=(1, 1),
                     activation='relu',
                     padding='valid',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv9_2')(conv9_1)
    print('conv9_2: ', conv9_2.get_shape())
    print('conv4_3: ', conv4_3.get_shape())

    # Feed conv4_3 vào the L2 normalization layer
    conv4_3_norm = L2Normalization(gamma_init=20, name='conv4_3_norm')(conv4_3)
    print('conv4_3_norm.shape: ', conv4_3_norm.get_shape())
    ############################################################################
    # Bước 1.3: Xác định output phân phối xác suất theo các classes ứng với mỗi một default bounding box.
    ############################################################################

    ### Xây dựng các convolutional predictor layers tại top của base network
    # Chúng ta dự báo các giá trị confidence cho mỗi box, do đó confidence predictors có độ sâu `n_boxes * n_classes`
    # Đầu ra của confidence layers có shape: `(batch, height, width, n_boxes * n_classes)`
    conv4_3_norm_mbox_conf = Conv2D(
        n_boxes[0] * n_classes, (3, 3),
        padding='same',
        kernel_initializer='he_normal',
        kernel_regularizer=l2(l2_reg),
        name='conv4_3_norm_mbox_conf')(conv4_3_norm)
    print('conv4_3_norm_mbox_conf.shape: ', conv4_3_norm_mbox_conf.get_shape())
    fc7_mbox_conf = Conv2D(n_boxes[1] * n_classes, (3, 3),
                           padding='same',
                           kernel_initializer='he_normal',
                           kernel_regularizer=l2(l2_reg),
                           name='fc7_mbox_conf')(fc7)
    print('fc7_mbox_conf.shape: ', fc7_mbox_conf.get_shape())
    conv6_2_mbox_conf = Conv2D(n_boxes[2] * n_classes, (3, 3),
                               padding='same',
                               kernel_initializer='he_normal',
                               kernel_regularizer=l2(l2_reg),
                               name='conv6_2_mbox_conf')(conv6_2)
    conv7_2_mbox_conf = Conv2D(n_boxes[3] * n_classes, (3, 3),
                               padding='same',
                               kernel_initializer='he_normal',
                               kernel_regularizer=l2(l2_reg),
                               name='conv7_2_mbox_conf')(conv7_2)
    conv8_2_mbox_conf = Conv2D(n_boxes[4] * n_classes, (3, 3),
                               padding='same',
                               kernel_initializer='he_normal',
                               kernel_regularizer=l2(l2_reg),
                               name='conv8_2_mbox_conf')(conv8_2)
    conv9_2_mbox_conf = Conv2D(n_boxes[5] * n_classes, (3, 3),
                               padding='same',
                               kernel_initializer='he_normal',
                               kernel_regularizer=l2(l2_reg),
                               name='conv9_2_mbox_conf')(conv9_2)
    print('conv9_2_mbox_conf: ', conv9_2_mbox_conf.get_shape())

    ############################################################################
    # Bước 1.4: Xác định output các tham số offset của default bounding boxes tương ứng với mỗi cell trên các features map.
    ############################################################################

    # Chúng ta dự báo 4 tọa độ cho mỗi box, do đó localization predictors có độ sâu `n_boxes * 4`
    # Output shape của localization layers: `(batch, height, width, n_boxes * 4)`
    conv4_3_norm_mbox_loc = Conv2D(n_boxes[0] * 4, (3, 3),
                                   padding='same',
                                   kernel_initializer='he_normal',
                                   kernel_regularizer=l2(l2_reg),
                                   name='conv4_3_norm_mbox_loc')(conv4_3_norm)
    print('conv4_3_norm_mbox_loc: ', conv4_3_norm_mbox_loc.get_shape())
    fc7_mbox_loc = Conv2D(n_boxes[1] * 4, (3, 3),
                          padding='same',
                          kernel_initializer='he_normal',
                          kernel_regularizer=l2(l2_reg),
                          name='fc7_mbox_loc')(fc7)
    conv6_2_mbox_loc = Conv2D(n_boxes[2] * 4, (3, 3),
                              padding='same',
                              kernel_initializer='he_normal',
                              kernel_regularizer=l2(l2_reg),
                              name='conv6_2_mbox_loc')(conv6_2)
    conv7_2_mbox_loc = Conv2D(n_boxes[3] * 4, (3, 3),
                              padding='same',
                              kernel_initializer='he_normal',
                              kernel_regularizer=l2(l2_reg),
                              name='conv7_2_mbox_loc')(conv7_2)
    conv8_2_mbox_loc = Conv2D(n_boxes[4] * 4, (3, 3),
                              padding='same',
                              kernel_initializer='he_normal',
                              kernel_regularizer=l2(l2_reg),
                              name='conv8_2_mbox_loc')(conv8_2)
    conv9_2_mbox_loc = Conv2D(n_boxes[5] * 4, (3, 3),
                              padding='same',
                              kernel_initializer='he_normal',
                              kernel_regularizer=l2(l2_reg),
                              name='conv9_2_mbox_loc')(conv9_2)
    print('conv9_2_mbox_loc: ', conv9_2_mbox_loc.get_shape())

    ############################################################################
    # Bước 1.5: Tính toán các AnchorBoxes làm cơ sở để dự báo offsets cho các predicted bounding boxes bao quan vật thể
    ############################################################################

    ### Khởi tạo các anchor boxes (được gọi là "priors" trong code gốc Caffe/C++ của mô hình)
    # Shape output của anchors: `(batch, height, width, n_boxes, 8)`
    conv4_3_norm_mbox_priorbox = AnchorBoxes(
        img_height,
        img_width,
        this_scale=scales[0],
        next_scale=scales[1],
        aspect_ratios=aspect_ratios[0],
        two_boxes_for_ar1=two_boxes_for_ar1,
        this_steps=steps[0],
        this_offsets=offsets[0],
        clip_boxes=clip_boxes,
        variances=variances,
        coords=coords,
        normalize_coords=normalize_coords,
        name='conv4_3_norm_mbox_priorbox')(conv4_3_norm_mbox_loc)
    print('conv4_3_norm_mbox_priorbox: ',
          conv4_3_norm_mbox_priorbox.get_shape())
    fc7_mbox_priorbox = AnchorBoxes(img_height,
                                    img_width,
                                    this_scale=scales[1],
                                    next_scale=scales[2],
                                    aspect_ratios=aspect_ratios[1],
                                    two_boxes_for_ar1=two_boxes_for_ar1,
                                    this_steps=steps[1],
                                    this_offsets=offsets[1],
                                    clip_boxes=clip_boxes,
                                    variances=variances,
                                    coords=coords,
                                    normalize_coords=normalize_coords,
                                    name='fc7_mbox_priorbox')(fc7_mbox_loc)
    print('fc7_mbox_priorbox: ', fc7_mbox_priorbox.get_shape())
    conv6_2_mbox_priorbox = AnchorBoxes(
        img_height,
        img_width,
        this_scale=scales[2],
        next_scale=scales[3],
        aspect_ratios=aspect_ratios[2],
        two_boxes_for_ar1=two_boxes_for_ar1,
        this_steps=steps[2],
        this_offsets=offsets[2],
        clip_boxes=clip_boxes,
        variances=variances,
        coords=coords,
        normalize_coords=normalize_coords,
        name='conv6_2_mbox_priorbox')(conv6_2_mbox_loc)
    print('conv6_2_mbox_priorbox: ', conv6_2_mbox_priorbox.get_shape())
    conv7_2_mbox_priorbox = AnchorBoxes(
        img_height,
        img_width,
        this_scale=scales[3],
        next_scale=scales[4],
        aspect_ratios=aspect_ratios[3],
        two_boxes_for_ar1=two_boxes_for_ar1,
        this_steps=steps[3],
        this_offsets=offsets[3],
        clip_boxes=clip_boxes,
        variances=variances,
        coords=coords,
        normalize_coords=normalize_coords,
        name='conv7_2_mbox_priorbox')(conv7_2_mbox_loc)
    print('conv7_2_mbox_priorbox: ', conv7_2_mbox_priorbox.get_shape())
    conv8_2_mbox_priorbox = AnchorBoxes(
        img_height,
        img_width,
        this_scale=scales[4],
        next_scale=scales[5],
        aspect_ratios=aspect_ratios[4],
        two_boxes_for_ar1=two_boxes_for_ar1,
        this_steps=steps[4],
        this_offsets=offsets[4],
        clip_boxes=clip_boxes,
        variances=variances,
        coords=coords,
        normalize_coords=normalize_coords,
        name='conv8_2_mbox_priorbox')(conv8_2_mbox_loc)
    print('conv8_2_mbox_priorbox: ', conv8_2_mbox_priorbox.get_shape())
    conv9_2_mbox_priorbox = AnchorBoxes(
        img_height,
        img_width,
        this_scale=scales[5],
        next_scale=scales[6],
        aspect_ratios=aspect_ratios[5],
        two_boxes_for_ar1=two_boxes_for_ar1,
        this_steps=steps[5],
        this_offsets=offsets[5],
        clip_boxes=clip_boxes,
        variances=variances,
        coords=coords,
        normalize_coords=normalize_coords,
        name='conv9_2_mbox_priorbox')(conv9_2_mbox_loc)
    print('conv9_2_mbox_priorbox: ', conv9_2_mbox_priorbox.get_shape())

    ############################################################################
    # Bước 2: Reshape lại các output tensor shape
    ############################################################################

    ############################################################################
    # Bước 2.1: Reshape output của class predictions
    ############################################################################

    # Reshape các class predictions, trả về 3D tensors có shape `(batch, height * width * n_boxes, n_classes)`
    # Chúng ta muốn các classes là tách biệt nhau trên last axis để tính softmax trên chúng.
    conv4_3_norm_mbox_conf_reshape = Reshape(
        (-1, n_classes),
        name='conv4_3_norm_mbox_conf_reshape')(conv4_3_norm_mbox_conf)
    fc7_mbox_conf_reshape = Reshape(
        (-1, n_classes), name='fc7_mbox_conf_reshape')(fc7_mbox_conf)
    conv6_2_mbox_conf_reshape = Reshape(
        (-1, n_classes), name='conv6_2_mbox_conf_reshape')(conv6_2_mbox_conf)
    conv7_2_mbox_conf_reshape = Reshape(
        (-1, n_classes), name='conv7_2_mbox_conf_reshape')(conv7_2_mbox_conf)
    conv8_2_mbox_conf_reshape = Reshape(
        (-1, n_classes), name='conv8_2_mbox_conf_reshape')(conv8_2_mbox_conf)
    conv9_2_mbox_conf_reshape = Reshape(
        (-1, n_classes), name='conv9_2_mbox_conf_reshape')(conv9_2_mbox_conf)
    print('conv4_3_norm_mbox_conf_reshape: ',
          conv4_3_norm_mbox_conf_reshape.get_shape())
    print('fc7_mbox_conf_reshape: ', fc7_mbox_conf_reshape.get_shape())
    print('conv9_2_mbox_conf_reshape: ', conv9_2_mbox_conf_reshape.get_shape())
    print('conv9_2_mbox_conf_reshape: ', conv9_2_mbox_conf_reshape.get_shape())
    print('conv9_2_mbox_conf_reshape: ', conv9_2_mbox_conf_reshape.get_shape())

    ############################################################################
    # Bước 2.2: Reshape output của bounding box predictions
    ############################################################################

    # Reshape các box predictions, trả về 3D tensors có shape `(batch, height * width * n_boxes, 4)`
    # Chúng ta muốn 4 tọa độ box là tách biệt nhau trên last axis để tính hàm smooth L1 loss
    conv4_3_norm_mbox_loc_reshape = Reshape(
        (-1, 4), name='conv4_3_norm_mbox_loc_reshape')(conv4_3_norm_mbox_loc)
    fc7_mbox_loc_reshape = Reshape((-1, 4),
                                   name='fc7_mbox_loc_reshape')(fc7_mbox_loc)
    conv6_2_mbox_loc_reshape = Reshape(
        (-1, 4), name='conv6_2_mbox_loc_reshape')(conv6_2_mbox_loc)
    conv7_2_mbox_loc_reshape = Reshape(
        (-1, 4), name='conv7_2_mbox_loc_reshape')(conv7_2_mbox_loc)
    conv8_2_mbox_loc_reshape = Reshape(
        (-1, 4), name='conv8_2_mbox_loc_reshape')(conv8_2_mbox_loc)
    conv9_2_mbox_loc_reshape = Reshape(
        (-1, 4), name='conv9_2_mbox_loc_reshape')(conv9_2_mbox_loc)
    print('conv4_3_norm_mbox_loc_reshape: ',
          conv4_3_norm_mbox_loc_reshape.get_shape())
    print('fc7_mbox_loc_reshape: ', fc7_mbox_loc_reshape.get_shape())
    print('conv6_2_mbox_loc_reshape: ', conv6_2_mbox_loc_reshape.get_shape())
    print('conv7_2_mbox_loc_reshape: ', conv7_2_mbox_loc_reshape.get_shape())
    print('conv8_2_mbox_loc_reshape: ', conv8_2_mbox_loc_reshape.get_shape())
    print('conv9_2_mbox_loc_reshape: ', conv9_2_mbox_loc_reshape.get_shape())

    ############################################################################
    # Bước 2.3: Reshape output của anchor box
    ############################################################################

    # Reshape anchor box tensors, trả về 3D tensors có shape `(batch, height * width * n_boxes, 8)`
    conv4_3_norm_mbox_priorbox_reshape = Reshape(
        (-1, 8),
        name='conv4_3_norm_mbox_priorbox_reshape')(conv4_3_norm_mbox_priorbox)
    fc7_mbox_priorbox_reshape = Reshape(
        (-1, 8), name='fc7_mbox_priorbox_reshape')(fc7_mbox_priorbox)
    conv6_2_mbox_priorbox_reshape = Reshape(
        (-1, 8), name='conv6_2_mbox_priorbox_reshape')(conv6_2_mbox_priorbox)
    conv7_2_mbox_priorbox_reshape = Reshape(
        (-1, 8), name='conv7_2_mbox_priorbox_reshape')(conv7_2_mbox_priorbox)
    conv8_2_mbox_priorbox_reshape = Reshape(
        (-1, 8), name='conv8_2_mbox_priorbox_reshape')(conv8_2_mbox_priorbox)
    conv9_2_mbox_priorbox_reshape = Reshape(
        (-1, 8), name='conv9_2_mbox_priorbox_reshape')(conv9_2_mbox_priorbox)
    print('conv4_3_norm_mbox_priorbox_reshape: ',
          conv4_3_norm_mbox_priorbox_reshape.get_shape())
    print('fc7_mbox_priorbox_reshape: ', fc7_mbox_priorbox_reshape.get_shape())
    print('conv6_2_mbox_priorbox_reshape: ',
          conv6_2_mbox_priorbox_reshape.get_shape())
    print('conv7_2_mbox_priorbox_reshape: ',
          conv7_2_mbox_priorbox_reshape.get_shape())
    print('conv8_2_mbox_priorbox_reshape: ',
          conv8_2_mbox_priorbox_reshape.get_shape())
    print('conv9_2_mbox_priorbox_reshape: ',
          conv9_2_mbox_priorbox_reshape.get_shape())
    ### Concatenate các predictions từ các layers khác nhau

    ############################################################################
    # Bước 3: Concatenate các boxes trên layers
    ############################################################################

    ############################################################################
    # Bước 3.1: Concatenate confidence output box
    ############################################################################

    # Axis 0 (batch) và axis 2 (n_classes hoặc 4) là xác định duy nhất cho toàn bộ các predictions layer
    # nên chúng ta muốn concatenate theo axis 1, số lượng các boxes trên layer
    # Output shape của `mbox_conf`: (batch, n_boxes_total, n_classes)
    mbox_conf = Concatenate(axis=1, name='mbox_conf')([
        conv4_3_norm_mbox_conf_reshape, fc7_mbox_conf_reshape,
        conv6_2_mbox_conf_reshape, conv7_2_mbox_conf_reshape,
        conv8_2_mbox_conf_reshape, conv9_2_mbox_conf_reshape
    ])
    print('mbox_conf.shape: ', mbox_conf.get_shape())

    ############################################################################
    # Bước 3.2: Concatenate location output box
    ############################################################################

    # Output shape của `mbox_loc`: (batch, n_boxes_total, 4)
    mbox_loc = Concatenate(axis=1, name='mbox_loc')([
        conv4_3_norm_mbox_loc_reshape, fc7_mbox_loc_reshape,
        conv6_2_mbox_loc_reshape, conv7_2_mbox_loc_reshape,
        conv8_2_mbox_loc_reshape, conv9_2_mbox_loc_reshape
    ])

    print('mbox_loc.shape: ', mbox_loc.get_shape())

    ############################################################################
    # Bước 3.3: Concatenate anchor output box
    ############################################################################

    # Output shape của `mbox_priorbox`: (batch, n_boxes_total, 8)
    mbox_priorbox = Concatenate(axis=1, name='mbox_priorbox')([
        conv4_3_norm_mbox_priorbox_reshape, fc7_mbox_priorbox_reshape,
        conv6_2_mbox_priorbox_reshape, conv7_2_mbox_priorbox_reshape,
        conv8_2_mbox_priorbox_reshape, conv9_2_mbox_priorbox_reshape
    ])

    print('mbox_priorbox.shape: ', mbox_priorbox.get_shape())

    ############################################################################
    # Bước 4: Tính toán output
    ############################################################################

    ############################################################################
    # Bước 4.1 : Xây dựng các hàm loss function cho confidence
    ############################################################################

    # tọa độ của box predictions sẽ được truyền vào hàm loss function,
    # nhưng cho các dự báo lớp, chúng ta sẽ áp dụng một hàm softmax activation layer đầu tiên
    mbox_conf_softmax = Activation('softmax',
                                   name='mbox_conf_softmax')(mbox_conf)

    # Concatenate các class và box predictions và the anchors thành một large predictions vector
    # Đầu ra của `predictions`: (batch, n_boxes_total, n_classes + 4 + 8)
    predictions = Concatenate(axis=2, name='predictions')(
        [mbox_conf_softmax, mbox_loc, mbox_priorbox])
    print('predictions.shape: ', predictions.get_shape())
    if mode == 'training':
        model = Model(inputs=x, outputs=predictions)
    elif mode == 'inference':
        decoded_predictions = DecodeDetections(
            confidence_thresh=confidence_thresh,
            iou_threshold=iou_threshold,
            top_k=top_k,
            nms_max_output_size=nms_max_output_size,
            coords=coords,
            normalize_coords=normalize_coords,
            img_height=img_height,
            img_width=img_width,
            name='decoded_predictions')(predictions)
        model = Model(inputs=x, outputs=decoded_predictions)
    elif mode == 'inference_fast':
        decoded_predictions = DecodeDetectionsFast(
            confidence_thresh=confidence_thresh,
            iou_threshold=iou_threshold,
            top_k=top_k,
            nms_max_output_size=nms_max_output_size,
            coords=coords,
            normalize_coords=normalize_coords,
            img_height=img_height,
            img_width=img_width,
            name='decoded_predictions')(predictions)
        model = Model(inputs=x, outputs=decoded_predictions)
    else:
        raise ValueError(
            "`mode` must be one of 'training', 'inference' or 'inference_fast', but received '{}'."
            .format(mode))

    if return_predictor_sizes:
        predictor_sizes = np.array([
            conv4_3_norm_mbox_conf._keras_shape[1:3],
            fc7_mbox_conf._keras_shape[1:3],
            conv6_2_mbox_conf._keras_shape[1:3],
            conv7_2_mbox_conf._keras_shape[1:3],
            conv8_2_mbox_conf._keras_shape[1:3],
            conv9_2_mbox_conf._keras_shape[1:3]
        ])
        return model, predictor_sizes
    else:
        return model
Пример #5
0
def build_model_hpconfig(args):
    """
    Description:
        Building models for hyperparameter Tuning

    Args:
        args: input arguments

    Returns:
        model (keras model)
    """

    #parsing and assigning hyperparameter variables from argparse
    conv1_filters = int(args.conv1_filters)
    conv2_filters = int(args.conv2_filters)
    conv3_filters = int(args.conv3_filters)
    window_size = int(args.window_size)
    kernel_regularizer = args.kernel_regularizer
    max_pool_size = int(args.pool_size)
    conv_dropout = float(args.conv_dropout)
    conv1d_initializer = args.conv_weight_initializer
    recurrent_layer1 = int(args.recurrent_layer1)
    recurrent_layer2 = int(args.recurrent_layer2)
    recurrent_dropout = float(args.recurrent_dropout)
    after_recurrent_dropout = float(args.after_recurrent_dropout)
    recurrent_recurrent_dropout = float(args.recurrent_recurrent_dropout)
    recurrent_initalizer = args.recurrent_weight_initializer
    optimizer = args.optimizer
    learning_rate = float(args.learning_rate)
    bidirection = args.bidirection
    recurrent_layer = str(args.recurrent_layer)
    dense_dropout = float(args.dense_dropout)
    dense_1 = int(args.dense_1)
    dense_initializer = args.dense_weight_initializer
    train_data = str(args.train_input_data)

    #main input is the length of the amino acid in the protein sequence (700,)
    main_input = Input(shape=(700, ), dtype='float32', name='main_input')

    #Embedding Layer used as input to the neural network
    embed = Embedding(output_dim=21, input_dim=21,
                      input_length=700)(main_input)

    #secondary input is the protein profile features
    auxiliary_input = Input(shape=(700, 21), name='aux_input')

    #get shape of input layers
    print("Protein Sequence shape: ", main_input.get_shape())
    print("Protein Profile shape: ", auxiliary_input.get_shape())

    #concatenate input layers
    concat = Concatenate(axis=-1)([embed, auxiliary_input])

    #3x1D Convolutional Hidden Layers with BatchNormalization, Dropout and MaxPooling
    conv_layer1 = Conv1D(conv1_filters,
                         window_size,
                         kernel_regularizer=kernel_regularizer,
                         padding='same',
                         kernel_initializer=conv1d_initializer)(concat)
    batch_norm = BatchNormalization()(conv_layer1)
    conv_act = activations.relu(batch_norm)
    conv_dropout = Dropout(conv_dropout)(conv_act)
    max_pool_1D_1 = MaxPooling1D(pool_size=max_pool_size,
                                 strides=1,
                                 padding='same')(conv_dropout)

    conv_layer2 = Conv1D(conv2_filters,
                         window_size,
                         padding='same',
                         kernel_initializer=conv1d_initializer)(concat)
    batch_norm = BatchNormalization()(conv_layer2)
    conv_act = activations.relu(batch_norm)
    conv_dropout = Dropout(conv_dropout)(conv_act)
    max_pool_1D_2 = MaxPooling1D(pool_size=max_pool_size,
                                 strides=1,
                                 padding='same')(conv_dropout)

    conv_layer3 = Conv1D(conv3_filters,
                         window_size,
                         kernel_regularizer=kernel_regularizer,
                         padding='same',
                         kernel_initializer=conv1d_initializer)(concat)
    batch_norm = BatchNormalization()(conv_layer3)
    conv_act = activations.relu(batch_norm)
    conv_dropout = Dropout(conv_dropout)(conv_act)
    max_pool_1D_3 = MaxPooling1D(pool_size=max_pool_size,
                                 strides=1,
                                 padding='same')(conv_dropout)

    #concat pooling layers
    conv_features = Concatenate(axis=-1)(
        [max_pool_1D_1, max_pool_1D_2, max_pool_1D_3])
    print("Shape of convolutional output: ", conv_features.get_shape())

    conv_features = Dense(600, activation='relu')(conv_features)

    ######## Recurrent Layers ########
    if (recurrent_layer == 'lstm'):
        if (bidirection):
            print('Entering LSTM Layers')
            #Creating Bidirectional LSTM layers
            lstm_f1 = Bidirectional(
                LSTM(recurrent_layer1,
                     return_sequences=True,
                     activation='tanh',
                     recurrent_activation='sigmoid',
                     dropout=recurrent_dropout,
                     recurrent_dropout=recurrent_recurrent_dropout,
                     kernel_initializer=recurrent_initalizer))(conv_features)
            lstm_f2 = Bidirectional(
                LSTM(recurrent_layer2,
                     return_sequences=True,
                     activation='tanh',
                     recurrent_activation='sigmoid',
                     dropout=recurrent_dropout,
                     recurrent_dropout=recurrent_recurrent_dropout,
                     kernel_initializer=recurrent_initalizer))(lstm_f1)

            #concatenate LSTM with convolutional layers
            concat_features = Concatenate(axis=-1)(
                [lstm_f1, lstm_f2, conv_features])
            concat_features = Dropout(after_recurrent_dropout)(concat_features)
            print('Concatenated LSTM layers')

        else:
            #Creating unidirectional LSTM Layers
            lstm_f1 = LSTM(
                recurrent_layer1,
                return_sequences=True,
                activation='tanh',
                recurrent_activation='sigmoid',
                dropout=recurrent_dropout,
                recurrent_dropout=recurrent_recurrent_dropout,
                kernel_initializer=recurrent_initalizer)(conv_features)

            lstm_f2 = LSTM(recurrent_layer2,
                           return_sequences=True,
                           activation='tanh',
                           recurrent_activation='sigmoid',
                           dropout=recurrent_dropout,
                           recurrent_dropout=recurrent_recurrent_dropout,
                           kernel_initializer=recurrent_initalizer)(lstm_f1)

            #concatenate LSTM with convolutional layers
            concat_features = Concatenate(axis=-1)(
                [lstm_f1, lstm_f2, conv_features])
            concat_features = Dropout(after_recurrent_dropout)(concat_features)

    elif (recurrent_layer == 'gru'):
        if (bidirection):

            #Creating Bidirectional GRU layers
            gru_f1 = Bidirectional(
                GRU(recurrent_layer1,
                    return_sequences=True,
                    activation='tanh',
                    recurrent_activation='sigmoid',
                    dropout=recurrent_dropout,
                    recurrent_dropout=recurrent_recurrent_dropout,
                    kernel_initializer=recurrent_initalizer))(conv_features)

            gru_f2 = Bidirectional(
                GRU(recurrent_layer2,
                    return_sequences=True,
                    activation='tanh',
                    recurrent_activation='sigmoid',
                    dropout=recurrent_dropout,
                    recurrent_dropout=recurrent_recurrent_dropout,
                    kernel_initializer=recurrent_initalizer))(gru_f1)

            #concatenate LSTM with convolutional layers
            concat_features = Concatenate(axis=-1)(
                [gru_f1, gru_f2, conv_features])
            concat_features = Dropout(after_recurrent_dropout)(concat_features)

        else:
            #Creating unidirectional GRU Layers
            gru_f1 = GRU(
                recurrent_layer1,
                return_sequences=True,
                activation='tanh',
                recurrent_activation='sigmoid',
                dropout=recurrent_dropout,
                recurrent_dropout=recurrent_recurrent_dropout,
                kernel_initializer=recurrent_initalizer)(conv_features)

            gru_f2 = GRU(recurrent_layer1,
                         return_sequences=True,
                         activation='tanh',
                         recurrent_activation='sigmoid',
                         dropout=recurrent_dropout,
                         recurrent_dropout=recurrent_recurrent_dropout,
                         kernel_initializer=recurrent_initalizer)(gru_f1)

            #concatenate LSTM with convolutional layers
            concat_features = Concatenate(axis=-1)(
                [gru_f1, gru_f2, conv_features])
            concat_features = Dropout(after_recurrent_dropout)(concat_features)
    else:

        print('Only LSTM and GRU recurrent layers are used in this model')
        return

    #Dense Fully-Connected DNN layers
    fc_dense1 = Dense(dense_1,
                      activation='relu',
                      kernel_initializer=dense_initializer)(concat_features)
    fc_dense1_dropout = Dropout(dense_dropout)(fc_dense1)

    #Final Output layer with 8 nodes for the 8 output classifications
    main_output = Dense(8, activation='softmax',
                        name='main_output')(fc_dense1_dropout)

    #create model from inputs and outputs
    model = Model(inputs=[main_input, auxiliary_input], outputs=[main_output])

    #Set optimizer to be used with the model, default is Adam
    if optimizer == 'adam':
        optimizer = Adam(lr=learning_rate, name='adam')
    elif optimizer == 'sgd':
        optimizer = SGD(lr=0.01, momentum=0.0, nesterov=False, name='SGD')
    elif optimizer == 'rmsprop':
        optimizer = RMSprop(learning_rate=learning_rate,
                            centered=True,
                            name='RMSprop')
    elif optimizer == 'adagrad':
        optimizer = Adagrad(learning_rate=learning_rate, name='Adagrad')
    elif optimizer == 'adamax':
        optimizer = Adamax(learning_rate=learning_rate, name='Adamax')
    else:
        optimizer = 'adam'
        optimizer = Adam(lr=learning_rate, name='adam')

    #compile model using optimizer and the cateogorical crossentropy loss function
    model.compile(optimizer=optimizer,
                  loss={'main_output': 'categorical_crossentropy'},
                  metrics=[
                      'accuracy',
                      MeanSquaredError(),
                      FalseNegatives(),
                      FalsePositives(),
                      TrueNegatives(),
                      TruePositives(),
                      MeanAbsoluteError(),
                      Recall(),
                      Precision()
                  ])

    #get summary of model including its layers and num parameters
    model.summary()

    return model
Пример #6
0
    def refine_model(self):
        input_ = Input(self.global_shape)
        input_mask = Input(self.mask_shape)

        x, mask = self.partial_conv(32,
                                    kernel_size=5,
                                    strides=1,
                                    dilation_rate=1,
                                    padding='same',
                                    activation='elu')(input_, input_mask)
        x, mask = self.partial_conv(64,
                                    kernel_size=3,
                                    strides=2,
                                    dilation_rate=1,
                                    padding='same',
                                    activation='elu')(x, mask)
        x, mask = self.partial_conv(64,
                                    kernel_size=3,
                                    strides=1,
                                    dilation_rate=1,
                                    padding='same',
                                    activation='elu')(x, mask)
        x, mask = self.partial_conv(128,
                                    kernel_size=3,
                                    strides=2,
                                    dilation_rate=1,
                                    padding='same',
                                    activation='elu')(x, mask)
        x, mask = self.partial_conv(128,
                                    kernel_size=3,
                                    strides=1,
                                    dilation_rate=1,
                                    padding='same',
                                    activation='elu')(x, mask)
        x, mask = self.partial_conv(128,
                                    kernel_size=3,
                                    strides=2,
                                    dilation_rate=1,
                                    padding='same',
                                    activation='elu')(x, mask)
        x, mask = self.partial_conv(128,
                                    kernel_size=3,
                                    strides=1,
                                    dilation_rate=1,
                                    padding='same',
                                    activation='elu')(x, mask)
        x, mask = self.partial_conv(128,
                                    kernel_size=3,
                                    strides=2,
                                    dilation_rate=1,
                                    padding='same',
                                    activation='elu')(x, mask)
        x, mask = self.partial_conv(128,
                                    kernel_size=3,
                                    strides=1,
                                    dilation_rate=1,
                                    padding='same',
                                    activation='elu')(x, mask)
        x, mask = self.partial_conv(128,
                                    kernel_size=3,
                                    strides=2,
                                    dilation_rate=1,
                                    padding='same',
                                    activation='elu')(x, mask)
        x, mask = self.partial_conv(128,
                                    kernel_size=3,
                                    strides=1,
                                    dilation_rate=1,
                                    padding='same',
                                    activation='elu')(x, mask)
        x, mask = self.partial_conv(64,
                                    kernel_size=3,
                                    strides=1,
                                    dilation_rate=1,
                                    padding='same',
                                    activation='elu')(x, mask)

        x = self.context_attention(x, mask)
        # context = Conv2D(128, kernel_size=3, strides=2, dilation_rate=1, padding='same', activation='elu')(x)

        x, mask = self.partial_conv(128,
                                    kernel_size=3,
                                    strides=1,
                                    dilation_rate=1,
                                    padding='same',
                                    activation='elu')(x, mask)
        x, mask = self.partial_conv(128,
                                    kernel_size=3,
                                    strides=1,
                                    dilation_rate=1,
                                    padding='same',
                                    activation='elu')(x, mask)
        # contexture attention

        y, mask_y = self.partial_conv(32,
                                      kernel_size=5,
                                      strides=1,
                                      dilation_rate=1,
                                      padding='same',
                                      activation='elu')(input_, input_mask)
        y, mask_y = self.partial_conv(64,
                                      kernel_size=3,
                                      strides=2,
                                      dilation_rate=1,
                                      padding='same',
                                      activation='elu')(y, mask_y)
        y, mask_y = self.partial_conv(64,
                                      kernel_size=3,
                                      strides=1,
                                      dilation_rate=1,
                                      padding='same',
                                      activation='elu')(y, mask_y)
        y, mask_y = self.partial_conv(128,
                                      kernel_size=3,
                                      strides=2,
                                      dilation_rate=1,
                                      padding='same',
                                      activation='elu')(y, mask_y)
        y, mask_y = self.partial_conv(128,
                                      kernel_size=3,
                                      strides=1,
                                      dilation_rate=1,
                                      padding='same',
                                      activation='elu')(y, mask_y)
        y, mask_y = self.partial_conv(128,
                                      kernel_size=3,
                                      strides=1,
                                      dilation_rate=1,
                                      padding='same',
                                      activation='elu')(y, mask_y)

        while (y.get_shape().as_list()[1] > x.get_shape().as_list()[1]):
            y, mask_y = self.partial_conv(128,
                                          kernel_size=3,
                                          strides=2,
                                          dilation_rate=1,
                                          padding='same',
                                          activation='elu')(y, mask_y)
            y, mask_y = self.partial_conv(128,
                                          kernel_size=3,
                                          strides=1,
                                          dilation_rate=1,
                                          padding='same',
                                          activation='elu')(y, mask_y)

        y, mask_y = self.partial_conv(128,
                                      kernel_size=3,
                                      strides=1,
                                      dilation_rate=1,
                                      padding='same',
                                      activation='elu')(y, mask_y)
        y, mask_y = self.partial_conv(128,
                                      kernel_size=3,
                                      strides=1,
                                      dilation_rate=1,
                                      padding='same',
                                      activation='elu')(y, mask_y)

        out = Concatenate(axis=-1)([x, y])
        out_mask = Concatenate(axis=-1)([mask, mask_y])
        while (input_.get_shape().as_list()[1] > out.get_shape().as_list()[1]):
            out, out_mask = self.partial_conv(128,
                                              kernel_size=3,
                                              strides=1,
                                              dilation_rate=1,
                                              padding='same',
                                              activation='elu')(out, out_mask)
            out, out_mask = self.partial_conv(128,
                                              kernel_size=3,
                                              strides=1,
                                              dilation_rate=1,
                                              padding='same',
                                              activation='elu')(out, out_mask)
            out = UpSampling2D()(out)
            out_mask = UpSampling2D()(out_mask)

        out, out_mask = self.partial_conv(32,
                                          kernel_size=3,
                                          strides=1,
                                          dilation_rate=1,
                                          padding='same',
                                          activation='elu')(out, out_mask)
        out, out_mask = self.partial_conv(16,
                                          kernel_size=3,
                                          strides=1,
                                          dilation_rate=1,
                                          padding='same',
                                          activation='elu')(out, out_mask)
        out, out_mask = self.partial_conv(3,
                                          kernel_size=3,
                                          strides=1,
                                          dilation_rate=1,
                                          padding='same',
                                          activation='elu')(out, out_mask)

        out = tf.clip_by_value(out, clip_value_min=-1, clip_value_max=1)

        out = self.paste_patch(input_, out, input_mask)
        self.refine = Model([input_, input_mask], out, name="refine")
Пример #7
0
def BisenetV2(include_top=True,
              input_tensor=None,
              input_shape=(224, 224, 3),
              weights=None
              ):
    if K.backend() != 'tensorflow':
        raise RuntimeError('Only tensorflow supported for now')
    name = "bisenetv2"
    input_shape = _obtain_input_shape(input_shape, default_size=224, min_size=28, require_flatten=include_top,
                                      data_format=K.image_data_format())
    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        if not K.is_keras_tensor(input_tensor):
            img_input = Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    img_input1 = Conv2D(16, kernel_size=(3, 3), strides=2, padding="same", use_bias=False, name="stem_block/conv_block_1")(img_input)
    img_input1 = BatchNormalization(axis=-1, name="stem_block/conv_block_1/bn_1")(img_input1)
    img_input1 = Activation(activation="relu", name="stem_block/conv_block_1/activate_1")(img_input1)

    branch_left_output = Conv2D(int(16/2), kernel_size=(1, 1), strides=1, padding="same", use_bias=False, name="stem_block/downsample_branch_left/1x1_conv_block")(img_input1)
    branch_left_output = BatchNormalization(axis=-1, name="stem_block/downsample_branch_left/1x1_conv_block/bn_1")(branch_left_output)
    branch_left_output = Activation(activation="relu", name="stem_block/downsample_branch_left/1x1_conv_block/activate_1")(branch_left_output)


    branch_left_output = Conv2D(16, kernel_size=(3, 3), strides=2, padding="same", use_bias=False,
                                name="stem_block/downsample_branch_left/3x3_conv_block")(branch_left_output)
    branch_left_output = BatchNormalization(axis=-1, name="stem_block/downsample_branch_left/3x3_conv_block/bn_1")(branch_left_output)
    branch_left_output = Activation(activation="relu", name="stem_block/downsample_branch_left/3x3_conv_block/activate_1")(branch_left_output)


    branch_right_output = MaxPool2D(pool_size=(3, 3), strides=2, padding='same', name="stem_block/downsample_branch_right/maxpooling_block")(img_input1)
    stem_result = Concatenate(axis=-1, name="stem_block/concate_features")([branch_left_output, branch_right_output])
    stem_result = Conv2D(16, kernel_size=(3, 3), strides=1, padding="same", use_bias=False, name="stem_block/final_conv_block")(stem_result)
    stem_result = BatchNormalization(axis=-1, name="stem_block/final_conv_block/bn_1")(stem_result)
    stem_result = Activation(activation="relu", name="stem_block/final_conv_block/activate_1")(stem_result)

    # k_reduce_mean = Lambda(lambda x: tf.reduce_mean(x, axis=[1, 2], keepdims=True, name='global_avg_pooling'))
    # embedding_result=k_reduce_mean(stem_result)
    # embedding_result = K.mean(stem_result, axis=[1, 2], keepdims=True)
    embedding_result = KerasReduceMean(axis=(1, 2), keep_dim=True, name="global_avg_pooling")(stem_result)

    embedding_result = BatchNormalization(axis=-1, name="context_embedding_block/bn")(embedding_result)
    output_channels = stem_result.get_shape().as_list()[-1]
    embedding_result = Conv2D(output_channels, kernel_size=(1, 1), strides=1, padding="same", use_bias=False,
                              name="context_embedding_block/conv_block_1")(embedding_result)
    embedding_result = BatchNormalization(axis=-1, name="context_embedding_block/conv_block_1/bn_1")(embedding_result)
    embedding_result = Activation(activation="relu", name="context_embedding_block/conv_block_1/activate_1")(embedding_result)
    embedding_result = Add(name="context_embedding_block/fused_features")([embedding_result, stem_result])
    embedding_result = Conv2D(output_channels, kernel_size=(3, 3), strides=1, padding="same", use_bias=False, name="context_embedding_block/final_conv_block")(embedding_result)


    output_channels = embedding_result.get_shape().as_list()[-1]
    gather_expansion_result = Conv2D(output_channels, kernel_size=(3, 3), strides=1, padding="same", use_bias=False,
                                     name="ge_block_with_stride_1/stride_equal_one_module/3x3_conv_block")(embedding_result)
    gather_expansion_result = BatchNormalization(axis=-1, name="ge_block_with_stride_1/stride_equal_one_module/3x3_conv_block/bn_1")(gather_expansion_result)
    gather_expansion_result = Activation(activation="relu", name="ge_block_with_stride_1/stride_equal_one_module/3x3_conv_block/activate_1")(gather_expansion_result)

    gather_expansion_result = DepthwiseConv2D(kernel_size=3, strides=1, depth_multiplier=6, padding='same',
                                              name="ge_block_with_stride_1/stride_equal_one_module/depthwise_conv_block")(gather_expansion_result)
    gather_expansion_result = BatchNormalization(axis=-1, name="ge_block_with_stride_1/stride_equal_one_module/dw_bn")(gather_expansion_result)

    gather_expansion_result = Conv2D(output_channels, kernel_size=(1, 1), strides=1, padding="same", use_bias=False,
                                     name="ge_block_with_stride_1/stride_equal_one_module/1x1_conv_block")(gather_expansion_result)
    gather_expansion_result = Add(name="ge_block_with_stride_1/stride_equal_one_module/fused_features")([embedding_result, gather_expansion_result])
    gather_expansion_result = Activation(activation="relu", name="ge_block_with_stride_1/stride_equal_one_module/ge_output")(gather_expansion_result)

    gather_expansion_proj_result = DepthwiseConv2D(kernel_size=3, depth_multiplier=1, strides=2, padding="same",
                                                   name="ge_block_with_stride_2/stride_equal_two_module/input_project_dw_conv_block")(gather_expansion_result)
    gather_expansion_proj_result = BatchNormalization(axis=-1, name="ge_block_with_stride_2/stride_equal_two_module/input_project_bn")(gather_expansion_proj_result)
    gather_expansion_proj_result = Conv2D(128, kernel_size=(1, 1), strides=1, padding="same", use_bias=False, activation=None)(gather_expansion_proj_result)
    input_tensor_channels = gather_expansion_result.get_shape().as_list()[-1]
    gather_expansion_stride2_result = Conv2D(input_tensor_channels, kernel_size=(3, 3), strides=1, padding="same",
                                             use_bias=False, name="ge_block_with_stride_2/stride_equal_two_module/3x3_conv_block")(gather_expansion_result)
    gather_expansion_stride2_result = BatchNormalization(axis=-1, name="ge_block_with_stride_2/stride_equal_two_module/3x3_conv_block/bn_1")(gather_expansion_stride2_result)
    gather_expansion_stride2_result = Activation(activation="relu", name="ge_block_with_stride_2/stride_equal_two_module/3x3_conv_block/activate_1")(gather_expansion_stride2_result)

    gather_expansion_stride2_result = DepthwiseConv2D(kernel_size=3, depth_multiplier=6, strides=2, padding="same",
                                                      name="ge_block_with_stride_2/stride_equal_two_module/depthwise_conv_block_1")(gather_expansion_stride2_result)
    gather_expansion_stride2_result = BatchNormalization(axis=-1, name="ge_block_with_stride_2/stride_equal_two_module/dw_bn_1")(gather_expansion_stride2_result)
    gather_expansion_stride2_result = DepthwiseConv2D(kernel_size=3, depth_multiplier=1, strides=1, padding="same",
                                                      name="ge_block_with_stride_2/stride_equal_two_module/depthwise_conv_block_2")(gather_expansion_stride2_result)
    gather_expansion_stride2_result = BatchNormalization(axis=-1, name="ge_block_with_stride_2/stride_equal_two_module/dw_bn_2")(gather_expansion_stride2_result)
    gather_expansion_stride2_result = Conv2D(128, kernel_size=(1, 1), strides=1, padding="same",
                                             use_bias=False, activation=None, name="ge_block_with_stride_2/stride_equal_two_module/1x1_conv_block")(gather_expansion_stride2_result)
    gather_expansion_total_result = Add(name="ge_block_with_stride_2/stride_equal_two_module/fused_features")([gather_expansion_proj_result, gather_expansion_stride2_result])
    gather_expansion_total_result = Activation(activation="relu", name="ge_block_with_stride_2/stride_equal_two_module/ge_output")(gather_expansion_total_result)


    gather_expansion_proj2_result = DepthwiseConv2D(kernel_size=3, depth_multiplier=1, strides=2, padding="same",
                                                   name="ge_block_with_stride_2_repeat/stride_equal_two_module/input_project_dw_conv_block")(gather_expansion_total_result)
    gather_expansion_proj2_result = BatchNormalization(axis=-1, name="ge_block_with_stride_2_repeat/stride_equal_two_module/input_project_bn")(gather_expansion_proj2_result)
    gather_expansion_proj2_result = Conv2D(128, kernel_size=(1, 1), strides=1, padding="same", use_bias=False, activation=None)(gather_expansion_proj2_result)
    input_tensor_channels = gather_expansion_total_result.get_shape().as_list()[-1]
    gather_expansion_stride2_result_repeat = Conv2D(input_tensor_channels, kernel_size=(3, 3), strides=1,  padding="same",
                                             use_bias=False, name="ge_block_with_stride_2_repeat/stride_equal_two_module/3x3_conv_block")(gather_expansion_total_result)
    gather_expansion_stride2_result_repeat = BatchNormalization(axis=-1, name="ge_block_with_stride_2_repeat/stride_equal_two_module/3x3_conv_block/bn_1")(gather_expansion_stride2_result_repeat)
    gather_expansion_stride2_result_repeat = Activation(activation="relu", name="ge_block_with_stride_2_repeat/stride_equal_two_module/3x3_conv_block/activate_1")(gather_expansion_stride2_result_repeat)

    gather_expansion_stride2_result_repeat = DepthwiseConv2D(kernel_size=3, depth_multiplier=6, strides=2, padding="same",
                                                      name="ge_block_with_stride_2_repeat/stride_equal_two_module/depthwise_conv_block_1")(gather_expansion_stride2_result_repeat)
    gather_expansion_stride2_result_repeat = BatchNormalization(axis=-1, name="ge_block_with_stride_2_repeat/stride_equal_two_module/dw_bn_1")(gather_expansion_stride2_result_repeat)
    gather_expansion_stride2_result_repeat = DepthwiseConv2D(kernel_size=3, depth_multiplier=1, strides=1, padding="same",
                                                      name="ge_block_with_stride_2_repeat/stride_equal_two_module/depthwise_conv_block_2")(gather_expansion_stride2_result_repeat)
    gather_expansion_stride2_result_repeat = BatchNormalization(axis=-1, name="ge_block_with_stride_2_repeat/stride_equal_two_module/dw_bn_2")(gather_expansion_stride2_result_repeat)
    gather_expansion_stride2_result_repeat = Conv2D(128, kernel_size=(1, 1), strides=1, padding="same",
                                             use_bias=False, activation=None, name="ge_block_with_stride_2_repeat/stride_equal_two_module/1x1_conv_block")(gather_expansion_stride2_result_repeat)
    gather_expansion_total_result_repeat = Add(name="ge_block_with_stride_2_repeat/stride_equal_two_module/fused_features")([gather_expansion_proj2_result, gather_expansion_stride2_result_repeat])
    gather_expansion_total_result_repeat = Activation(activation="relu", name="ge_block_with_stride_2_repeat/stride_equal_two_module/ge_output")(gather_expansion_total_result_repeat)

    detail_input_tensor = stem_result
    semantic_input_tensor = gather_expansion_total_result_repeat
    output_channels = stem_result.get_shape().as_list()[-1]
    detail_branch_remain = DepthwiseConv2D(kernel_size=3, strides=1, padding="same", depth_multiplier=1,
                                           name="guided_aggregation_block/detail_branch/3x3_dw_conv_block")(detail_input_tensor)
    detail_branch_remain = BatchNormalization(axis=-1, name="guided_aggregation_block/detail_branch/bn_1")(detail_branch_remain)
    detail_branch_remain = Conv2D(output_channels, kernel_size=(1, 1), padding="same", strides=1, use_bias=False,
                                  name="guided_aggregation_block/detail_branch/1x1_conv_block")(detail_branch_remain)

    detail_branch_downsample = Conv2D(output_channels, kernel_size=(3, 3), strides=2, use_bias=False, activation=None,
                                      padding="same", name="guided_aggregation_block/detail_branch/3x3_conv_block")(detail_input_tensor)

    detail_branch_downsample = AveragePooling2D(pool_size=(3, 3), strides=2, padding="same", name="guided_aggregation_block/detail_branch/avg_pooling_block")(detail_branch_downsample)

    semantic_branch_remain = DepthwiseConv2D(kernel_size=3, strides=1, padding="same", depth_multiplier=1,
                                             name="guided_aggregation_block/semantic_branch/3x3_dw_conv_block")(semantic_input_tensor)
    semantic_branch_remain = BatchNormalization(axis=-1, name="guided_aggregation_block/semantic_branch/bn_1")(semantic_branch_remain)
    semantic_branch_remain = Conv2D(output_channels, kernel_size=(1, 1), strides=1, use_bias=False, activation=None, padding="same",
                                    name="guided_aggregation_block/semantic_branch/1x1_conv_block")(semantic_branch_remain)
    # semantic_branch_remain = sigmoid(semantic_branch_remain)
    # keras_sigmoid = Lambda(lambda x: tf.nn.sigmoid(x, name="guided_aggregation_block/semantic_branch/semantic_remain_sigmoid"))
    # semantic_branch_remain = keras_sigmoid(semantic_branch_remain)
    semantic_branch_remain = Activation("sigmoid", name="guided_aggregation_block/semantic_branch/semantic_remain_sigmoid")(semantic_branch_remain)

    semantic_branch_upsample = Conv2D(output_channels, kernel_size=(3, 3), strides=1, padding="same", use_bias=False,
                                      activation=None, name="guided_aggregation_block/semantic_branch/3x3_conv_block")(semantic_input_tensor)
    # semantic_branch_upsample = resize_images(semantic_branch_upsample, 4, 4, data_format="channels_last", interpolation='bilinear')

    # upsample_bilinear0 = Lambda(lambda x: tf.image.resize_bilinear(x, size=stem_result.get_shape().as_list()[1:3],
    #                                                               name="guided_aggregation_block/semantic_branch/semantic_upsample_features"))
    # semantic_branch_upsample = upsample_bilinear0(semantic_branch_upsample)
    semantic_branch_upsample = BilinearUpSampling2D((4, 4), name="guided_aggregation_block/semantic_branch/semantic_upsample_features")(semantic_branch_upsample)
    semantic_branch_upsample = Activation("sigmoid", name="guided_aggregation_block/semantic_branch/semantic_branch_upsample_sigmoid")(semantic_branch_upsample)
    # keras_sigmoid_1 = Lambda(lambda x: tf.nn.sigmoid(x, name="guided_aggregation_block/semantic_branch/semantic_branch_upsample_sigmoid"))
    # semantic_branch_upsample = keras_sigmoid_1(semantic_branch_upsample)
    # semantic_branch_upsample = sigmoid(semantic_branch_upsample)

    guided_features_remain = Multiply(name="guided_aggregation_block/aggregation_features/guided_detail_features")([detail_branch_remain, semantic_branch_upsample])
    guided_features_downsample = Multiply(name="guided_aggregation_block/aggregation_features/guided_semantic_features")([detail_branch_downsample, semantic_branch_remain])

    # upsample_bilinear1 = Lambda(lambda x: tf.image.resize_bilinear(x, size=stem_result.get_shape().as_list()[1:3],
    #                                        name="guided_aggregation_block/aggregation_features/guided_upsample_features"))
    #
    # guided_features_upsample = upsample_bilinear1(guided_features_downsample)
    guided_features_upsample = BilinearUpSampling2D((4, 4), name="guided_aggregation_block/aggregation_features/guided_upsample_features")(guided_features_downsample)
    # guided_features_upsample = resize_images(guided_features_downsample, 4, 4, data_format="channels_last", interpolation='bilinear')

    guided_features = Add(name="guided_aggregation_block/aggregation_features/fused_features")([guided_features_remain, guided_features_upsample])
    guided_features = Conv2D(output_channels, kernel_size=(3, 3), strides=1, use_bias=False, padding="same",
                             name="guided_aggregation_block/aggregation_features/aggregation_feature_output")(guided_features)
    guided_features = BatchNormalization(axis=-1, name="guided_aggregation_block/aggregation_features/aggregation_feature_output/bn_1")(guided_features)
    guided_features = Activation(activation="relu", name="guided_aggregation_block/aggregation_features/aggregation_feature_output/activate_1")(guided_features)

    # input_tensor_size = [int(tmp * 4)for tmp in guided_features.get_shape().as_list()[1:3]]
    result = Conv2D(8, kernel_size=(3, 3), strides=1, use_bias=False, padding="same", name="seg_head_block/3x3_conv_block")(guided_features)
    result = BatchNormalization(axis=-1, name="seg_head_block/bn_1")(result)
    result = Activation("relu", name="seg_head_block/activate_1")(result)

    # upsample_bilinear2 = Lambda(lambda x: tf.image.resize_bilinear(x, size=input_tensor_size, name="seg_head_block/segmentation_head_logits"))
    # result = upsample_bilinear2(result)
    result = BilinearUpSampling2D((4, 4), name="seg_head_block/segmentation_head_upsample")(result)
    # result = resize_images(result, 4, 4, data_format="channels_last", interpolation='bilinear')

    result = Conv2D(1, kernel_size=(1, 1), strides=1, use_bias=False, padding="same",
                    name="seg_head_block/1x1_conv_block")(result)
    if input_tensor:
        inputs = get_source_inputs(input_tensor)
    else:
        inputs = img_input

    model = Model(inputs, result, name=name)

    if weights:
        model.load_weights(weights, by_name=True)

    return model