Пример #1
0
    def vggvox_resnet2d_icassp(self, inputs, trainable=True):
        # ===============================================
        #                   parameters
        # ===============================================
        x = backbone.resnet_2D_v1(inputs, trainable=trainable)

        # ===============================================
        #            Fully Connected Block 1
        # ===============================================
        x_fc = tf.layers.conv2d(x,
                                self.embedding_dim, [7, 1],
                                strides=[1, 1],
                                activation='relu',
                                kernel_initializer=tf.orthogonal_initializer(),
                                use_bias=True,
                                trainable=trainable,
                                kernel_regularizer=self.l2_regularizer,
                                bias_regularizer=self.l2_regularizer,
                                name='x_fc')

        # ===============================================
        #            Feature Aggregation
        # ===============================================
        x_k_center = tf.layers.conv2d(
            x,
            self.vlad_clusters + self.ghost_clusters, [7, 1],
            strides=[1, 1],
            kernel_initializer=tf.orthogonal_initializer(),
            use_bias=True,
            trainable=trainable,
            kernel_regularizer=self.l2_regularizer,
            bias_regularizer=self.l2_regularizer,
            name='gvlad_center_assignment')

        x = self.vladPooling(x_fc, x_k_center)

        # ===============================================
        #            Fully Connected Block 2
        # ===============================================
        embeddings = tf.layers.dense(
            x,
            self.embedding_dim,
            kernel_initializer=tf.orthogonal_initializer(),
            use_bias=True,
            trainable=trainable,
            kernel_regularizer=self.l2_regularizer,
            bias_regularizer=self.l2_regularizer,
            name='fc6')

        embeddings = tf.nn.l2_normalize(embeddings, 1)

        return embeddings
Пример #2
0
def stutter_model(input_dim=(257, 250, 1), num_class=8631, mode='train', args=None):
    net=args.net
    loss=args.loss
    bottleneck_dim=args.bottleneck_dim
    mgpu = len(keras.backend.tensorflow_backend._get_available_gpus())

    if net == 'resnet34s':
        inputs, x = backbone.resnet_2D_v1(input_dim=input_dim, mode=mode)
    else:
        inputs, x = backbone.resnet_2D_v2(input_dim=input_dim, mode=mode)


    # ===============================================
    #            Fully Connected Block 2
    # ===============================================
    x.add(keras.layers.Dense(bottleneck_dim, activation='relu',
                           kernel_initializer='orthogonal',
                           use_bias=True, trainable=True,
                           kernel_regularizer=keras.regularizers.l2(weight_decay),
                           bias_regularizer=keras.regularizers.l2(weight_decay),
                           name='fc6_deepid3'))

    # ===============================================
    #            Softmax Vs AMSoftmax
    # ===============================================
    x.add(keras.layers.Dense(num_class, activation='softmax',
                           kernel_initializer='orthogonal',
                           use_bias=False, trainable=True,
                           kernel_regularizer=keras.regularizers.l2(weight_decay),
                           bias_regularizer=keras.regularizers.l2(weight_decay),
                           name='prediction'))
    trnloss = 'categorical_crossentropy'

    model = x

    if mode == 'train':
        if mgpu > 1:
            model = ModelMGPU(model, gpus=mgpu)
        # set up optimizer.
        if args.optimizer == 'adam':  opt = keras.optimizers.Adam(lr=1e-3)
        elif args.optimizer =='sgd':  opt = keras.optimizers.SGD(lr=0.1, momentum=0.9, decay=0.0, nesterov=True)
        elif args.optimizer =='rmsprop':  opt = keras.optimizers.RMSprop(lr=0.1, rho=0.9, decay=0.0)
        else: raise IOError('==> unknown optimizer type')
        model.compile(optimizer=opt, loss=trnloss, metrics=['acc'])

    # model.summary()
    # exit()
    return model
Пример #3
0
    def init_inference_for_train(self):
        # ===============================================
        #           build network and loss
        # ===============================================
        # feed inputs, labels and masks placeholder here
        self.inputs = tf.placeholder(tf.float32, [None, 224, 224, 3],
                                     name='input')
        self.labels = tf.placeholder(tf.int32, [None, self.max_step],
                                     name='label')
        self.masks = tf.placeholder(tf.float32, [None, self.max_step],
                                    name='mask')

        features = backbone.resnet_2D_v1(self.inputs, trainable=True)
        self.channels = features.get_shape().as_list()[-1]
        self.num_ctx = features.get_shape().as_list()[1] * features.get_shape(
        ).as_list()[2]
        self._cost = self.buildAttention(features,
                                         self.labels,
                                         self.masks,
                                         is_train=True)
        self._init_cost = True
Пример #4
0
    def init_inference(self):
        # feed inputs placeholder here
        self.inputs = tf.placeholder(tf.float32, [None, 224, 224, 3],
                                     name='inputs')
        self.last_word = tf.placeholder(tf.int32, [None], name='last_word')
        self.last_output = tf.placeholder(tf.float32,
                                          [None, self.num_lstm_units],
                                          name='last_output')
        self.last_memory = tf.placeholder(tf.float32,
                                          [None, self.num_lstm_units],
                                          name='last_memory')

        features = backbone.resnet_2D_v1(self.inputs, trainable=False)
        self.channels = features.get_shape().as_list()[-1]
        self.num_ctx = features.get_shape().as_list()[1] * features.get_shape(
        ).as_list()[2]
        probs, last_output, last_memory = self.buildAttention(
            features,
            last_word=self.last_word,
            last_output=self.last_output,
            last_memory=self.last_memory,
            is_train=False)
        return probs, last_output, last_memory
Пример #5
0
def vggvox_resnet2d_icassp(
        input_dim=(257, 250, 1), num_class=8631, mode='train', args=None):
    net = args.net
    loss = args.loss
    vlad_clusters = args.vlad_cluster
    ghost_clusters = args.ghost_cluster
    bottleneck_dim = args.bottleneck_dim
    aggregation = args.aggregation_mode
    mgpu = len(tf.config.experimental.list_physical_devices('GPU'))

    if net == 'resnet34s':
        inputs, x = backbone.resnet_2D_v1(input_dim=input_dim, mode=mode)
    else:
        inputs, x = backbone.resnet_2D_v2(input_dim=input_dim, mode=mode)
    # ===============================================
    #            Fully Connected Block 1
    # ===============================================
    x_fc = keras.layers.Conv2D(
        bottleneck_dim, (7, 1),
        strides=(1, 1),
        activation='relu',
        kernel_initializer='orthogonal',
        use_bias=True,
        trainable=True,
        kernel_regularizer=keras.regularizers.l2(weight_decay),
        bias_regularizer=keras.regularizers.l2(weight_decay),
        name='x_fc')(x)

    # ===============================================
    #            Feature Aggregation
    # ===============================================
    if aggregation == 'avg':
        if mode == 'train':
            x = keras.layers.AveragePooling2D((1, 5),
                                              strides=(1, 1),
                                              name='avg_pool')(x)
            x = keras.layers.Reshape((-1, bottleneck_dim))(x)
        else:
            x = keras.layers.GlobalAveragePooling2D(name='avg_pool')(x)
            x = keras.layers.Reshape((1, bottleneck_dim))(x)

    elif aggregation == 'vlad':
        x_k_center = keras.layers.Conv2D(
            vlad_clusters, (7, 1),
            strides=(1, 1),
            kernel_initializer='orthogonal',
            use_bias=True,
            trainable=True,
            kernel_regularizer=keras.regularizers.l2(weight_decay),
            bias_regularizer=keras.regularizers.l2(weight_decay),
            name='vlad_center_assignment')(x)
        x = VladPooling(k_centers=vlad_clusters, mode='vlad',
                        name='vlad_pool')([x_fc, x_k_center])

    elif aggregation == 'gvlad':
        x_k_center = keras.layers.Conv2D(
            vlad_clusters + ghost_clusters, (7, 1),
            strides=(1, 1),
            kernel_initializer='orthogonal',
            use_bias=True,
            trainable=True,
            kernel_regularizer=keras.regularizers.l2(weight_decay),
            bias_regularizer=keras.regularizers.l2(weight_decay),
            name='gvlad_center_assignment')(x)
        x = VladPooling(k_centers=vlad_clusters,
                        g_centers=ghost_clusters,
                        mode='gvlad',
                        name='gvlad_pool')([x_fc, x_k_center])

    else:
        raise IOError('==> unknown aggregation mode')

    # ===============================================
    #            Fully Connected Block 2
    # ===============================================
    x = keras.layers.Dense(
        bottleneck_dim,
        activation='relu',
        kernel_initializer='orthogonal',
        use_bias=True,
        trainable=True,
        kernel_regularizer=keras.regularizers.l2(weight_decay),
        bias_regularizer=keras.regularizers.l2(weight_decay),
        name='fc6')(x)

    # ===============================================
    #            Softmax Vs AMSoftmax
    # ===============================================
    if loss == 'softmax':
        y = keras.layers.Dense(
            num_class,
            activation='softmax',
            kernel_initializer='orthogonal',
            use_bias=False,
            trainable=True,
            kernel_regularizer=keras.regularizers.l2(weight_decay),
            bias_regularizer=keras.regularizers.l2(weight_decay),
            name='prediction')(x)
        trnloss = 'categorical_crossentropy'

    elif loss == 'amsoftmax':
        x_l2 = keras.layers.Lambda(lambda x: K.l2_normalize(x, 1))(x)
        y = keras.layers.Dense(
            num_class,
            kernel_initializer='orthogonal',
            use_bias=False,
            trainable=True,
            kernel_constraint=keras.constraints.unit_norm(),
            kernel_regularizer=keras.regularizers.l2(weight_decay),
            bias_regularizer=keras.regularizers.l2(weight_decay),
            name='prediction')(x_l2)
        trnloss = amsoftmax_loss

    else:
        raise IOError('==> unknown loss.')

    if mode == 'eval':
        y = keras.layers.Lambda(lambda x: keras.backend.l2_normalize(x, 1))(x)

    model = keras.models.Model(inputs,
                               y,
                               name='vggvox_resnet2D_{}_{}'.format(
                                   loss, aggregation))

    if mode == 'train':
        if mgpu > 1:
            model = ModelMGPU(model, gpus=mgpu)
        # set up optimizer.
        if args.optimizer == 'adam': opt = keras.optimizers.Adam(lr=1e-3)
        elif args.optimizer == 'sgd':
            opt = keras.optimizers.SGD(lr=0.1,
                                       momentum=0.9,
                                       decay=0.0,
                                       nesterov=True)
        else:
            raise IOError('==> unknown optimizer type')
        model.compile(optimizer=opt, loss=trnloss, metrics=['acc'])
    return model