def vggvox_resnet2d_icassp(self, inputs, trainable=True): # =============================================== # parameters # =============================================== x = backbone.resnet_2D_v1(inputs, trainable=trainable) # =============================================== # Fully Connected Block 1 # =============================================== x_fc = tf.layers.conv2d(x, self.embedding_dim, [7, 1], strides=[1, 1], activation='relu', kernel_initializer=tf.orthogonal_initializer(), use_bias=True, trainable=trainable, kernel_regularizer=self.l2_regularizer, bias_regularizer=self.l2_regularizer, name='x_fc') # =============================================== # Feature Aggregation # =============================================== x_k_center = tf.layers.conv2d( x, self.vlad_clusters + self.ghost_clusters, [7, 1], strides=[1, 1], kernel_initializer=tf.orthogonal_initializer(), use_bias=True, trainable=trainable, kernel_regularizer=self.l2_regularizer, bias_regularizer=self.l2_regularizer, name='gvlad_center_assignment') x = self.vladPooling(x_fc, x_k_center) # =============================================== # Fully Connected Block 2 # =============================================== embeddings = tf.layers.dense( x, self.embedding_dim, kernel_initializer=tf.orthogonal_initializer(), use_bias=True, trainable=trainable, kernel_regularizer=self.l2_regularizer, bias_regularizer=self.l2_regularizer, name='fc6') embeddings = tf.nn.l2_normalize(embeddings, 1) return embeddings
def stutter_model(input_dim=(257, 250, 1), num_class=8631, mode='train', args=None): net=args.net loss=args.loss bottleneck_dim=args.bottleneck_dim mgpu = len(keras.backend.tensorflow_backend._get_available_gpus()) if net == 'resnet34s': inputs, x = backbone.resnet_2D_v1(input_dim=input_dim, mode=mode) else: inputs, x = backbone.resnet_2D_v2(input_dim=input_dim, mode=mode) # =============================================== # Fully Connected Block 2 # =============================================== x.add(keras.layers.Dense(bottleneck_dim, activation='relu', kernel_initializer='orthogonal', use_bias=True, trainable=True, kernel_regularizer=keras.regularizers.l2(weight_decay), bias_regularizer=keras.regularizers.l2(weight_decay), name='fc6_deepid3')) # =============================================== # Softmax Vs AMSoftmax # =============================================== x.add(keras.layers.Dense(num_class, activation='softmax', kernel_initializer='orthogonal', use_bias=False, trainable=True, kernel_regularizer=keras.regularizers.l2(weight_decay), bias_regularizer=keras.regularizers.l2(weight_decay), name='prediction')) trnloss = 'categorical_crossentropy' model = x if mode == 'train': if mgpu > 1: model = ModelMGPU(model, gpus=mgpu) # set up optimizer. if args.optimizer == 'adam': opt = keras.optimizers.Adam(lr=1e-3) elif args.optimizer =='sgd': opt = keras.optimizers.SGD(lr=0.1, momentum=0.9, decay=0.0, nesterov=True) elif args.optimizer =='rmsprop': opt = keras.optimizers.RMSprop(lr=0.1, rho=0.9, decay=0.0) else: raise IOError('==> unknown optimizer type') model.compile(optimizer=opt, loss=trnloss, metrics=['acc']) # model.summary() # exit() return model
def init_inference_for_train(self): # =============================================== # build network and loss # =============================================== # feed inputs, labels and masks placeholder here self.inputs = tf.placeholder(tf.float32, [None, 224, 224, 3], name='input') self.labels = tf.placeholder(tf.int32, [None, self.max_step], name='label') self.masks = tf.placeholder(tf.float32, [None, self.max_step], name='mask') features = backbone.resnet_2D_v1(self.inputs, trainable=True) self.channels = features.get_shape().as_list()[-1] self.num_ctx = features.get_shape().as_list()[1] * features.get_shape( ).as_list()[2] self._cost = self.buildAttention(features, self.labels, self.masks, is_train=True) self._init_cost = True
def init_inference(self): # feed inputs placeholder here self.inputs = tf.placeholder(tf.float32, [None, 224, 224, 3], name='inputs') self.last_word = tf.placeholder(tf.int32, [None], name='last_word') self.last_output = tf.placeholder(tf.float32, [None, self.num_lstm_units], name='last_output') self.last_memory = tf.placeholder(tf.float32, [None, self.num_lstm_units], name='last_memory') features = backbone.resnet_2D_v1(self.inputs, trainable=False) self.channels = features.get_shape().as_list()[-1] self.num_ctx = features.get_shape().as_list()[1] * features.get_shape( ).as_list()[2] probs, last_output, last_memory = self.buildAttention( features, last_word=self.last_word, last_output=self.last_output, last_memory=self.last_memory, is_train=False) return probs, last_output, last_memory
def vggvox_resnet2d_icassp( input_dim=(257, 250, 1), num_class=8631, mode='train', args=None): net = args.net loss = args.loss vlad_clusters = args.vlad_cluster ghost_clusters = args.ghost_cluster bottleneck_dim = args.bottleneck_dim aggregation = args.aggregation_mode mgpu = len(tf.config.experimental.list_physical_devices('GPU')) if net == 'resnet34s': inputs, x = backbone.resnet_2D_v1(input_dim=input_dim, mode=mode) else: inputs, x = backbone.resnet_2D_v2(input_dim=input_dim, mode=mode) # =============================================== # Fully Connected Block 1 # =============================================== x_fc = keras.layers.Conv2D( bottleneck_dim, (7, 1), strides=(1, 1), activation='relu', kernel_initializer='orthogonal', use_bias=True, trainable=True, kernel_regularizer=keras.regularizers.l2(weight_decay), bias_regularizer=keras.regularizers.l2(weight_decay), name='x_fc')(x) # =============================================== # Feature Aggregation # =============================================== if aggregation == 'avg': if mode == 'train': x = keras.layers.AveragePooling2D((1, 5), strides=(1, 1), name='avg_pool')(x) x = keras.layers.Reshape((-1, bottleneck_dim))(x) else: x = keras.layers.GlobalAveragePooling2D(name='avg_pool')(x) x = keras.layers.Reshape((1, bottleneck_dim))(x) elif aggregation == 'vlad': x_k_center = keras.layers.Conv2D( vlad_clusters, (7, 1), strides=(1, 1), kernel_initializer='orthogonal', use_bias=True, trainable=True, kernel_regularizer=keras.regularizers.l2(weight_decay), bias_regularizer=keras.regularizers.l2(weight_decay), name='vlad_center_assignment')(x) x = VladPooling(k_centers=vlad_clusters, mode='vlad', name='vlad_pool')([x_fc, x_k_center]) elif aggregation == 'gvlad': x_k_center = keras.layers.Conv2D( vlad_clusters + ghost_clusters, (7, 1), strides=(1, 1), kernel_initializer='orthogonal', use_bias=True, trainable=True, kernel_regularizer=keras.regularizers.l2(weight_decay), bias_regularizer=keras.regularizers.l2(weight_decay), name='gvlad_center_assignment')(x) x = VladPooling(k_centers=vlad_clusters, g_centers=ghost_clusters, mode='gvlad', name='gvlad_pool')([x_fc, x_k_center]) else: raise IOError('==> unknown aggregation mode') # =============================================== # Fully Connected Block 2 # =============================================== x = keras.layers.Dense( bottleneck_dim, activation='relu', kernel_initializer='orthogonal', use_bias=True, trainable=True, kernel_regularizer=keras.regularizers.l2(weight_decay), bias_regularizer=keras.regularizers.l2(weight_decay), name='fc6')(x) # =============================================== # Softmax Vs AMSoftmax # =============================================== if loss == 'softmax': y = keras.layers.Dense( num_class, activation='softmax', kernel_initializer='orthogonal', use_bias=False, trainable=True, kernel_regularizer=keras.regularizers.l2(weight_decay), bias_regularizer=keras.regularizers.l2(weight_decay), name='prediction')(x) trnloss = 'categorical_crossentropy' elif loss == 'amsoftmax': x_l2 = keras.layers.Lambda(lambda x: K.l2_normalize(x, 1))(x) y = keras.layers.Dense( num_class, kernel_initializer='orthogonal', use_bias=False, trainable=True, kernel_constraint=keras.constraints.unit_norm(), kernel_regularizer=keras.regularizers.l2(weight_decay), bias_regularizer=keras.regularizers.l2(weight_decay), name='prediction')(x_l2) trnloss = amsoftmax_loss else: raise IOError('==> unknown loss.') if mode == 'eval': y = keras.layers.Lambda(lambda x: keras.backend.l2_normalize(x, 1))(x) model = keras.models.Model(inputs, y, name='vggvox_resnet2D_{}_{}'.format( loss, aggregation)) if mode == 'train': if mgpu > 1: model = ModelMGPU(model, gpus=mgpu) # set up optimizer. if args.optimizer == 'adam': opt = keras.optimizers.Adam(lr=1e-3) elif args.optimizer == 'sgd': opt = keras.optimizers.SGD(lr=0.1, momentum=0.9, decay=0.0, nesterov=True) else: raise IOError('==> unknown optimizer type') model.compile(optimizer=opt, loss=trnloss, metrics=['acc']) return model