def _build_model(inputs_queue, clone_batch_size): """Builds a clone of train model. Args: inputs_queue: A prefetch queue for images and labels. Returns: A dictionary of logits names to logits. """ samples = inputs_queue.dequeue() batch_size = clone_batch_size * FLAGS.num_classes inputs = tf.identity(samples['image'], name='image') labels = tf.identity(samples['label'], name='label') model_options = common.ModelOptions(output_stride=FLAGS.output_stride) net, end_points = model.get_features( inputs, model_options=model_options, weight_decay=FLAGS.weight_decay, is_training=True, fine_tune_batch_norm=FLAGS.fine_tune_batch_norm) logits, _ = model.classification(net, end_points, num_classes=FLAGS.num_classes, is_training=True) if FLAGS.multi_label: with tf.name_scope('Multilabel_logits'): logits = slim.softmax(logits) half_batch_size = batch_size / 2 for i in range(1, FLAGS.num_classes): class_logits = tf.identity(logits[:, i], name='class_logits_%02d' % (i)) class_labels = tf.identity(labels[:, i], name='class_labels_%02d' % (i)) num_positive = tf.reduce_sum(class_labels) num_negative = batch_size - num_positive weights = tf.where( tf.equal(class_labels, 1.0), tf.tile([half_batch_size / num_positive], [batch_size]), tf.tile([half_batch_size / num_negative], [batch_size])) train_utils.focal_loss(class_labels, class_logits, weights=weights, scope='class_loss_%02d' % (i)) else: logits = slim.softmax(logits) train_utils.focal_loss(labels, logits, scope='cls_loss') if (FLAGS.dataset == 'protein') and FLAGS.add_counts_logits: counts = tf.identity(samples['counts'] - 1, name='counts') one_hot_counts = slim.one_hot_encoding(counts, 5) counts_logits, _ = model.classification(net, end_points, num_classes=5, is_training=True, scope='Counts_logits') counts_logits = slim.softmax(counts_logits) train_utils.focal_loss(one_hot_counts, counts_logits, scope='counts_loss') return logits, counts_logits return logits
def _add_pixellink_layers(self, basenet, end_points): with slim.arg_scope([slim.conv2d], activation_fn=None, weights_regularizer=slim.l2_regularizer(self.weight_decay),weights_initializer=tf.contrib.layers.xavier_initializer(),biases_initializer = tf.zeros_initializer()): pixel_cls_pred = 2 pixel_cls_stage_1 = slim.conv2d(end_points['fc7'], pixel_cls_pred, [1, 1], scope='stage_6_pixel_fuse') + slim.conv2d(end_points['conv5_3'], pixel_cls_pred, [1, 1], scope='stage_5_pixel_fuse') pixel_cls_stage_2 = self.unpool(pixel_cls_stage_1) + slim.conv2d(end_points['conv4_3'], pixel_cls_pred, [1, 1], scope='stage_4_pixel_fuse') pixel_cls_stage_3 = self.unpool(pixel_cls_stage_2) + slim.conv2d(end_points['conv3_3'], pixel_cls_pred, [1, 1], scope='stage_3_pixel_fuse') pixel_cls = slim.conv2d(pixel_cls_stage_3, pixel_cls_pred, [1, 1], scope='text_predication') link_cls_pred = 16 link_cls_stage_1 = slim.conv2d(end_points['fc7'], link_cls_pred, [1, 1], scope='stage_6_link_fuse') + slim.conv2d(end_points['conv5_3'], link_cls_pred, [1, 1], scope='stage_5_link_fuse') link_cls_stage_2 = self.unpool(link_cls_stage_1) + slim.conv2d(end_points['conv4_3'], link_cls_pred, [1, 1], scope='stage_4_link_fuse') link_cls_stage_3 = self.unpool(link_cls_stage_2) + slim.conv2d(end_points['conv3_3'], link_cls_pred, [1, 1], scope='stage_3_link_fuse') link_cls = slim.conv2d(link_cls_stage_3, link_cls_pred, [1, 1], scope='link_predication') self.pixel_cls = pixel_cls self.link_cls = link_cls self.pixel_scores = slim.softmax(pixel_cls) link_scores = slim.softmax(link_cls[:,:,:,0:2]) pixel_pred_image = tf.expand_dims(self.pixel_scores[0,:,:,1], 0) pixel_pred_image = tf.expand_dims(pixel_pred_image, 3) tf.summary.image('pixel_pred_image', pixel_pred_image) link_pred_image = tf.expand_dims(link_scores[0,:,:,1], 0) link_pred_image = tf.expand_dims(link_pred_image, 3) tf.summary.image('link_pred_image', link_pred_image) # self.link_scores = tf.stack([link_scores[:,:,:,0],link_scores[:,:,:,2], link_scores[:,:,:,4], link_scores[:,:,:,6], link_scores[:,:,:,8], link_scores[:,:,:,10], link_scores[:,:,:,12], link_scores[:,:,:,14]], axis=3) tf.summary.histogram('pixel_scores', self.pixel_scores) tf.summary.histogram('link_scores', link_scores) return pixel_cls, link_cls
def _add_seglink_layers(self): all_seg_scores = [] all_seg_offsets = [] all_within_layer_link_scores = [] all_cross_layer_link_scores = [] for layer_name in self.feat_layers: with tf.variable_scope(layer_name): seg_scores, seg_offsets, within_layer_link_scores, cross_layer_link_scores = self._build_seg_link_layer( layer_name) all_seg_scores.append(seg_scores) all_seg_offsets.append(seg_offsets) all_within_layer_link_scores.append(within_layer_link_scores) all_cross_layer_link_scores.append(cross_layer_link_scores) self.seg_score_logits = reshape_and_concat( all_seg_scores) # (batch_size, N, 2) self.seg_scores = slim.softmax( self.seg_score_logits) # (batch_size, N, 2) self.seg_offsets = reshape_and_concat( all_seg_offsets) # (batch_size, N, 5) self.cross_layer_link_scores = reshape_and_concat( all_cross_layer_link_scores) # (batch_size, 8N, 2) self.within_layer_link_scores = reshape_and_concat( all_within_layer_link_scores) # (batch_size, 4(N - N_conv4_3), 2) self.link_score_logits = tf.concat( [self.within_layer_link_scores, self.cross_layer_link_scores], axis=1) self.link_scores = slim.softmax(self.link_score_logits) tf.summary.histogram('link_scores', self.link_scores) tf.summary.histogram('seg_scores', self.seg_scores)
def __init__(self, learning_rate=5e-4, context_length=7): self.probability_tensor = tf.placeholder(dtype='float32', shape=(None, 4)) self.neighbourhood_tensor = tf.placeholder( dtype='float32', shape=(None, context_length, context_length, 4)) self.label_tensor = tf.placeholder(dtype='int32', shape=(None, 4)) # This version runs during training with tf.variable_scope("context_classifier"): with slim.arg_scope([slim.dropout], is_training=True), \ slim.arg_scope([slim.fully_connected], normalizer_params={'is_training':True}): self.prediction_logits = classifier_model( self.probability_tensor, self.neighbourhood_tensor) # This version runs during inference. with tf.variable_scope("context_classifier", reuse=True): with slim.arg_scope([slim.dropout], is_training=False), \ slim.arg_scope([slim.fully_connected], normalizer_params={'is_training':False}): self.inference_prediction_logits = classifier_model( self.probability_tensor, self.neighbourhood_tensor) (self.train_op, self.loss) = get_training_op(self.prediction_logits, self.label_tensor, learning_rate=learning_rate) # Compute useful output on top of the network predictions. self.predictions = slim.softmax(self.prediction_logits) self.inference_predictions = slim.softmax(self.inference_prediction_logits) self.dropout_accuracy = utils.get_accuracy(self.predictions, self.label_tensor) self.accuracy = utils.get_accuracy(self.inference_predictions, self.label_tensor) self.f1 = utils.get_weighted_f1(self.inference_predictions, self.label_tensor) self.confusion = utils.get_confusion(self.inference_predictions, self.label_tensor)
def build_model(self): """ extract feature model """ self.X_feature_S = self.feature_extractor(self.source_images) self.X_feature_T = self.feature_extractor(self.target_images) self.class_pred_S = self.classifier(self.X_feature_S) self.class_pred_T = self.classifier(self.X_feature_T) self.D_S, self.D_logit_S = self.discriminator(self.X_feature_S) self.D_T, self.D_logit_T = self.discriminator(self.X_feature_T) self.D_S_sum = tf.summary.histogram("D_S", self.D_S) self.D_T_sum = tf.summary.histogram("D_T", self.D_T) self.C_loss_S = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.y_label_S, logits=self.class_pred_S)) self.class_pred_T_softmax = slim.softmax(self.class_pred_T) self.class_pred_S_softmax = slim.softmax(self.class_pred_S) self.C_T_softmax_sum_h = tf.summary.histogram( "class_pred_T_softmax", self.class_pred_T_softmax) self.C_loss_T = -self.lambda_T * tf.reduce_mean( tf.reduce_sum( self.class_pred_T_softmax * tf.log(self.class_pred_T_softmax), axis=1)) self.C_loss_S_sum = tf.summary.scalar("C_loss_S", self.C_loss_S) self.C_loss_T_sum = tf.summary.scalar("C_loss_T", self.C_loss_T) self.D_loss_S = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=self.D_logit_S, labels=self.label_sd)) self.D_loss_T = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=self.D_logit_T, labels=self.label_td)) self.D_loss = self.BetaGD * (self.D_loss_S + self.D_loss_T) self.D_loss_S_sum = tf.summary.scalar("D_loss_S", self.D_loss_S) self.D_loss_T_sum = tf.summary.scalar("D_loss_T", self.D_loss_T) self.D_loss_sum = tf.summary.scalar("D_loss", self.D_loss) self.G_loss_S = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=self.D_logit_S, labels=self.label_sg)) self.G_loss_T = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=self.D_logit_T, labels=self.label_tg)) self.G_loss = self.BetaGD * (self.G_loss_T + self.G_loss_S) self.G_loss_S_sum = tf.summary.scalar("G_loss_S", self.G_loss_S) self.G_loss_T_sum = tf.summary.scalar("G_loss_T", self.G_loss_T) self.G_loss_sum = tf.summary.scalar("G_loss", self.G_loss) self.saver = tf.train.Saver()
def construct_xybin_v1(images, is_training, n_bins): batch_norm_params = { 'is_training': is_training, 'decay': 0.8, 'updates_collections': None, 'center': True, 'scale': True, 'trainable': True } with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu, padding='SAME', weights_initializer=tf.truncated_normal_initializer( 0.0, 0.01), weights_regularizer=slim.l2_regularizer(0.0005), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): mynet = slim.repeat(images, 2, slim.conv2d, 16, [3, 3], scope='conv1') mynet = slim.max_pool2d(mynet, [2, 2], scope='pool1') mynet = slim.repeat(mynet, 2, slim.conv2d, 32, [3, 3], scope='conv2') mynet = slim.max_pool2d(mynet, [2, 2], scope='pool2') mynet = slim.repeat(mynet, 2, slim.conv2d, 64, [3, 3], scope='conv3') mynet = slim.max_pool2d(mynet, [2, 2], scope='pool3') mynet = slim.repeat(mynet, 2, slim.conv2d, 128, [3, 3], scope='conv4') mynet = slim.max_pool2d(mynet, [2, 2], scope='pool4') mynet = slim.repeat(mynet, 2, slim.conv2d, 256, [3, 3], scope='conv5') mynet = slim.max_pool2d(mynet, [2, 2], scope='pool5') features = slim.flatten(mynet, scope='flatten') with slim.arg_scope([slim.fully_connected], activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer( 0.0, 0.01), weights_regularizer=slim.l2_regularizer(0.0005), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): # To add additional fully connected layers... # Our tests showed no substantial difference #mynet = slim.fully_connected(mynet, 4096, scope='fc5') #mynet = slim.dropout(mynet, 0.5, scope='dropout5') #mynet = slim.fully_connected(mynet, 4096, scope='fc6') #mynet = slim.dropout(mynet, 0.5, scope='dropout6') xbins = slim.fully_connected(features, n_bins, activation_fn=None, scope='xbins') xbins = slim.softmax(xbins, scope='smx') ybins = slim.fully_connected(features, n_bins, activation_fn=None, scope='ybins') ybins = slim.softmax(ybins, scope='smy') mynet = tf.stack([xbins, ybins]) return mynet, features
def build_model(self): """ classier Net model """ self.C_W1 = tf.Variable(self.xavier_init([self.feature_dim, self.category_num])) self.C_b1 = tf.Variable(tf.zeros(shape=[self.category_num])) self.theta_C = [self.C_W1,self.C_b1] self.D_W1 = tf.Variable(self.xavier_init([self.feature_dim, self.h1_dim])) self.D_b1 = tf.Variable(tf.zeros(shape=[self.h1_dim])) self.D_W2 = tf.Variable(self.xavier_init([self.h1_dim, self.h2_dim])) self.D_b2 = tf.Variable(tf.zeros(shape=[self.h2_dim])) self.D_W3 = tf.Variable(self.xavier_init([self.h2_dim, self.category_num+1])) self.D_b3 = tf.Variable(tf.zeros(shape=[self.category_num+1])) self.theta_D = [self.D_W1, self.D_W2, self.D_W3,self.D_b1, self.D_b2, self.D_b3] """ extract feature model """ self.X_feature_S = self.feature_extractor(self.source_images,reuse=None) self.X_feature_T = self.feature_extractor(self.target_images,reuse=True) self.D_S, self.D_logit_S = self.discriminator(self.X_feature_S) self.D_T, self.D_logit_T = self.discriminator(self.X_feature_T) self.class_pred_S = self.classifier(self.X_feature_S) self.class_pred_T = self.classifier(self.X_feature_T) self.D_S_sum = tf.summary.histogram("D_S", self.D_S) self.D_T_sum = tf.summary.histogram("D_T", self.D_T) self.C_loss_S = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.y_label_S,logits=self.class_pred_S)) self.class_pred_T_softmax = slim.softmax(self.class_pred_T) self.class_pred_S_softmax = slim.softmax(self.class_pred_S) self.C_T_softmax_sum_h = tf.summary.histogram("class_pred_T_softmax", self.class_pred_T_softmax) #self.C_T_softmax_sum_d = tf.summary.scalar("class_pred_T_softmax", self.class_pred_T_softmax) self.C_loss_T = -self.lambda_T *tf.reduce_mean(tf.reduce_sum(self.class_pred_T_softmax * tf.log(self.class_pred_T_softmax), axis=1)) self.C_loss_S_sum = tf.summary.scalar("C_loss_S", self.C_loss_S) self.C_loss_T_sum = tf.summary.scalar("C_loss_T", self.C_loss_T) self.D_loss_S = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.D_logit_S, labels=self.label_sd)) self.D_loss_T = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.D_logit_T, labels=self.label_td)) self.D_loss = self.BetaGD * (self.D_loss_S + self.D_loss_T) #self.D_loss = self.BetaGD * self.D_loss_T self.D_loss_S_sum = tf.summary.scalar("D_loss_S", self.D_loss_S) self.D_loss_T_sum = tf.summary.scalar("D_loss_T", self.D_loss_T) self.D_loss_sum = tf.summary.scalar("D_loss", self.D_loss) self.G_loss_S = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.D_logit_S, labels=self.label_sg)) self.G_loss_T = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.D_logit_T, labels=self.label_tg)) self.G_loss = self.BetaGD*(self.G_loss_T + self.G_loss_S) #self.G_loss = self.BetaGD*(self.G_loss_T + self.G_loss_S) self.G_loss_S_sum = tf.summary.scalar("G_loss_S", self.G_loss_S) self.G_loss_T_sum = tf.summary.scalar("G_loss_T", self.G_loss_T) self.G_loss_sum = tf.summary.scalar("G_loss", self.G_loss) self.saver = tf.train.Saver(max_to_keep=1)
def fast_rcnn_predict(self): with tf.variable_scope('fast_rcnn_predict'): fast_rcnn_softmax_scores = slim.softmax( self.fast_rcnn_scores) # [-1, num_classes+1] fast_rcnn_softmax_scores_rotate = slim.softmax( self.fast_rcnn_scores_rotate) # [-1, num_classes+1] fast_rcnn_encode_boxes = tf.reshape(self.fast_rcnn_encode_boxes, [-1, 4]) fast_rcnn_encode_boxes_rotate = tf.reshape( self.fast_rcnn_encode_boxes_rotate, [-1, 5]) reference_boxes = tf.tile( self.fast_rcnn_all_level_proposals, [1, self.num_classes]) # [N, 4*num_classes] reference_boxes = tf.reshape(reference_boxes, [-1, 4]) # [N*num_classes, 4] fast_rcnn_decode_boxes = encode_and_decode.decode_boxes( encode_boxes=fast_rcnn_encode_boxes, reference_boxes=reference_boxes, scale_factors=self.scale_factors) fast_rcnn_decode_boxes_rotate = \ encode_and_decode.decode_boxes_rotate(encode_boxes=fast_rcnn_encode_boxes_rotate, reference_boxes=reference_boxes, scale_factors=self.scale_factors) fast_rcnn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries( fast_rcnn_decode_boxes, img_shape=self.img_shape) # mutilclass NMS fast_rcnn_decode_boxes = tf.reshape(fast_rcnn_decode_boxes, [-1, self.num_classes * 4]) fast_rcnn_decode_boxes_rotate = tf.reshape( fast_rcnn_decode_boxes_rotate, [-1, self.num_classes * 5]) fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category = \ self.fast_rcnn_proposals(fast_rcnn_decode_boxes, scores=fast_rcnn_softmax_scores) fast_rcnn_decode_boxes_rotate, fast_rcnn_score_rotate, fast_rcnn_head_quadrant, \ num_of_objects_rotate, detection_category_rotate = \ self.fast_rcnn_proposals_rotate(fast_rcnn_decode_boxes_rotate, scores=fast_rcnn_softmax_scores_rotate, head_quadrant=self.fast_rcnn_head_quadrant) return fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category,\ fast_rcnn_decode_boxes_rotate, fast_rcnn_score_rotate, fast_rcnn_head_quadrant, \ num_of_objects_rotate, detection_category_rotate
def build_model( x, y, num_classes=2, num_estimator=3, #we missuse num_estimator for the number of convolutions num_filter=128, is_training=True, reuse=None): """ handle model. calculate the loss and the prediction for some input x and the corresponding labels y input: x shape=[None,bands,frames,num_channels], y shape=[None] output: loss shape=(1), prediction shape=[None] CAUTION! controller.py uses a function whith this name and arguments. here we do boosting without additive training """ #preprocess y = slim.one_hot_encoding(y, num_classes) #model with tf.variable_scope('model_v1'): predictions = classify(x, num_classes=num_classes, num_filter=num_filter, route=num_estimator, is_training=is_training, reuse=reuse, scope='wk') loss = loss_fkt(predictions, y) #results predictions = tf.argmax(slim.softmax(predictions), 1) return loss, predictions
def rpn_proposals(self): with tf.variable_scope('rpn_proposals'): rpn_decode_boxes = encode_and_decode.decode_boxes(encode_boxes=self.rpn_encode_boxes, reference_boxes=self.anchors, scale_factors=self.scale_factors) if not self.is_training: # when test, clip proposals to img boundaries img_shape = tf.shape(self.img_batch) rpn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(rpn_decode_boxes, img_shape) rpn_softmax_scores = slim.softmax(self.rpn_scores) rpn_object_score = rpn_softmax_scores[:, 1] # second column represent object if self.top_k_nms: rpn_object_score, top_k_indices = tf.nn.top_k(rpn_object_score, k=self.top_k_nms) rpn_decode_boxes = tf.gather(rpn_decode_boxes, top_k_indices) valid_indices = nms.non_maximal_suppression(boxes=rpn_decode_boxes, scores=rpn_object_score, max_output_size=self.max_proposals_num, iou_threshold=self.rpn_nms_iou_threshold) valid_boxes = tf.gather(rpn_decode_boxes, valid_indices) valid_scores = tf.gather(rpn_object_score, valid_indices) rpn_proposals_boxes, rpn_proposals_scores = tf.cond( tf.less(tf.shape(valid_boxes)[0], self.max_proposals_num), lambda: boxes_utils.padd_boxes_with_zeros(valid_boxes, valid_scores, self.max_proposals_num), lambda: (valid_boxes, valid_scores)) return rpn_proposals_boxes, rpn_proposals_scores
def slim_model(inputs): with tf.variable_scope("slim-fc", [inputs]) as vs: end_points_collection = vs.original_name_scope + "_endpoints" with slim.arg_scope( [slim.fully_connected], activation_fn=tf.nn.relu, weights_initializer=tf.random_normal_initializer(stddev=0.1), biases_initializer=tf.constant_initializer(1.0), weights_regularizer=slim.l2_regularizer(0.05), outputs_collections=end_points_collection): net = slim.fully_connected( inputs, num_outputs=n_hidden1, weights_initializer=tf.random_normal_initializer(stddev=0.1), scope="fc1") net = slim.fully_connected( net, num_outputs=n_hidden2, weights_initializer=tf.random_normal_initializer(stddev=0.1), scope="fc2") net = slim.fully_connected( net, num_outputs=n_outputs, weights_initializer=tf.random_normal_initializer(stddev=0.1), activation_fn=None, scope="outputs") logits = slim.softmax(net, scope="softmax") end_points = slim.utils.convert_collection_to_dict( end_points_collection) return logits, end_points
def inference(self): """ LeNet :return: """ def init_weights(shape): return tf.Variable(tf.random_normal(shape, stddev=0.01)) # input reshaping #X = tf.random_normal([1, 28, 28,1]) X = tf.reshape(self.X, [-1, 28, 28, 1]) net = slim.conv2d(X, 32, [5, 5], scope='conv1') net = slim.max_pool2d(net, [2, 2], 2, scope='pool1') net = slim.conv2d(net, 64, [5, 5], scope='conv2') net = slim.max_pool2d(net, [2, 2], 2, scope='pool2') net = slim.flatten(net) net = slim.fully_connected(net, 1024, scope='fc3') self.logits = slim.fully_connected(net, 10, activation_fn=None, scope='fc4') self.Y_hat = slim.softmax(self.logits, scope='Predictions') # softmax
def train(epoch_num=200, batch_size=20): images, labels = mnist_util.load_mnist(mnist_params.MNIST_DATASET_PATH, 'train') images = np.reshape(images, (-1, 28, 28, 1)).astype(np.float32) labels = dense_to_one_hot(labels, mnist_params.MNIST_NUM_CLASSES).astype( np.float32) watch = log_util.Watch('TrainInfo') with tf.Session() as sess: with tf.variable_scope('mnist'), \ slim.arg_scope([slim.conv2d, slim.fully_connected], weights_initializer=tf.truncated_normal_initializer(stddev=0.01), biases_initializer=tf.zeros_initializer(), activation_fn=tf.nn.relu, trainable=True): global_step = tf.Variable(0, name='global_step', trainable=False) image_batch = tf.placeholder(tf.float32, shape=(None, 28, 28, 1), name='image') x = slim.flatten(image_batch) logits = slim.fully_connected(x, 10) label_batch = tf.placeholder( tf.float32, shape=(None, mnist_params.MNIST_NUM_CLASSES), name='label') cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits( logits=logits, labels=label_batch) predict = slim.softmax(cross_entropy) loss = tf.reduce_mean(cross_entropy) optimizer = tf.train.AdamOptimizer(learning_rate=0.05, epsilon=1.0e-08, name='adam') optimizer.minimize(loss, global_step=global_step, name='train_op') sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() # coord = tf.train.Coordinator() # threads = tf.train.start_queue_runners(sess, coord) train_op = sess.graph.get_operation_by_name('mnist/train_op') total_loss = 0 for i in range(200): for j in range(len(images) // batch_size): try: input_image = images[batch_size * j:batch_size * (j + 1)] input_label = labels[batch_size * j:batch_size * (j + 1)] [r_step, r_loss, _] = sess.run([global_step, loss, train_op], feed_dict={ image_batch: input_image, label_batch: input_label }) total_loss += r_loss watch.display('step %d, loss is %.3f' % (r_step, r_loss)) # watch.display('step %d, loss is %.3f, label is %s, image is %s' # % (r_step, r_loss, str(input_label.argmax(axis=1)), str(input_image[:, 14, 14, 0]))) except tf.errors.OutOfRangeError: saver.save(sess, mnist_params.MNIST_MODEL) print('done training mnist') break
def create(self,is_training=False): with tf.variable_scope(self.scope, reuse=self.reuse): with slim.arg_scope([slim.fully_connected], activation_fn=tf.nn.relu): with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu, padding='VALID'): net=self.inputs net = slim.conv2d(net, 64, 5, scope='conv1') self.conv1=net net = slim.max_pool2d(net, 2, stride=2, scope='pool1') self.pool1 = net net = slim.conv2d(net,128, 5, scope='conv2') self.conv2= net net = slim.max_pool2d(net, 2, stride=2, scope='pool2') self.pool2= net self.cmp=net net = tf.contrib.layers.flatten(net) self.att_flat=net net = slim.fully_connected(net, 1024, activation_fn=tf.nn.relu, scope='fc3') self.fc3= net net = slim.dropout(net,0.5, is_training=self.training_flag) net = slim.fully_connected(net,64, activation_fn=tf.tanh,scope='fc4') self.fc4 = net net = slim.fully_connected(net,10, activation_fn=None, scope='fc5') self.fc5 = net self.softmax_output=slim.softmax(net,scope='prediction')
def inference(self): with slim.arg_scope([slim.conv2d], kernel_size=3, reuse=None, trainable=True): with slim.arg_scope([slim.max_pool2d], kernel_size=2, stride=2, padding='VALID'): net = slim.conv2d(self.input_image, 32, scope='layer1_conv') print_activations(net) net = slim.max_pool2d(net, scope='pool1') print_activations(net) net = slim.conv2d(net, 64, scope='layer2_conv') print_activations(net) net = slim.max_pool2d(net, scope='pool2') print_activations(net) net = slim.conv2d(net, 128, scope='layer3_conv') print_activations(net) net = slim.max_pool2d(net, scope='pool3') print_activations(net) net = slim.conv2d(net, 256, scope='layer4_conv') print_activations(net) net = slim.max_pool2d(net, scope='pool4') print_activations(net) net = tf.reshape(net, [-1, 3 * 3 * 256]) net = slim.fully_connected(net, 512, scope='layer5_fc') print_activations(net) net = slim.fully_connected(net, 256, scope='layer6_fc') print_activations(net) net=slim.fully_connected(net,self.num_class,activation_fn=None, scope='layer7_fc') print_activations(net) net = slim.softmax(net) print_activations(net) return net
def qmoe_attention(im, quest, embed_dim, keep_prob=1.0, num_units=4, reuse=False, scope="MoeAttention"): with tf.variable_scope(scope, reuse=reuse): im_ctx = mlb(im, quest, embed_dim, keep_prob) # soft attention s_logits = slim.conv2d(im_ctx, num_units, [1, 1], activation_fn=None, scope='sp_logits') # spatial softmax att_maps = _spatial_softmax(s_logits) # aggregate visual features att_basis = _spatial_aggregate(im, att_maps, normalize=False) # compute gate with tf.variable_scope('Gate'): gate_basis = _3d_mlb(att_basis, quest, embed_dim, keep_prob) gate_logits = slim.fully_connected(gate_basis, 1, activation_fn=None, scope='logits') gates = tf.squeeze(gate_logits, squeeze_dims=[2]) gates = tf.expand_dims(slim.softmax(gates), 2) output = tf.reduce_sum(tf.mul(att_basis, gates), reduction_indices=1) return output
def moe_attention(im, quest, embed_dim, keep_prob=1.0, num_units=4, reuse=False, scope="MoeAttention"): with tf.variable_scope(scope, reuse=reuse): im_ctx = mlb(im, quest, embed_dim, keep_prob) # soft attention s_logits = slim.conv2d(im_ctx, num_units, [1, 1], activation_fn=None, scope='sp_logits') # spatial softmax att_maps = _spatial_softmax(s_logits) # aggregate visual features att_basis = _spatial_aggregate(im, att_maps, normalize=False) # compute gate with tf.variable_scope('Gate'): im_ctx = tf.reduce_mean(im, reduction_indices=[1, 2]) vq_embed = mlb(im_ctx, quest, embed_dim / 4, keep_prob, 'gate_prelogit') # vq_ctx = tf.concat(concat_dim=1, values=[im_ctx, quest]) # vq_embed = slim.fully_connected(vq_ctx, embed_dim, scope='vq_embed') # vq_embed = slim.dropout(vq_embed, keep_prob) gates = slim.fully_connected(vq_embed, num_units, activation_fn=None, scope='gate') gates = tf.expand_dims(slim.softmax(gates), 2) output = tf.reduce_sum(tf.mul(att_basis, gates), reduction_indices=1) return output
def vgg16Model(input_data, num_classes,keep_prob): with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer(0.0, 0.01), weights_regularizer=slim.l2_regularizer(0.0005)): #第一段卷积堆叠,两层堆叠,conv3-64的意义是3*3的卷积核一共64个 net = slim.repeat(input_data, 2, slim.conv2d, 64, [3, 3], scope='conv1') net = slim.max_pool2d(net, [2, 2], scope='pool1') #第二段卷积核堆叠,两层堆叠,128个3*3的卷积核,注意这里和感受野的区别 net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') net = slim.max_pool2d(net, [2, 2], scope='pool2') #第三段卷积核堆叠,三层堆叠,256个3*3的卷积核,注意这里和感受野的区别 net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') net = slim.max_pool2d(net, [2, 2], scope='pool3') #第四段卷积核堆叠,三层堆叠,512个3*3的卷积核,注意这里和感受野的区别 net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') net = slim.max_pool2d(net, [2, 2], scope='pool4') #第五段卷积核堆叠,三层堆叠,512个3*3的卷积核,注意这里和感受野的区别 net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') net = slim.max_pool2d(net, [2, 2], scope='pool5') #Flatten层用来将输入“压平”,即把多维的输入一维化,常用在从卷积层到全连接层的过渡。Flatten不影响batch的大小。 net = slim.flatten(net, scope='flat') #全连接层 net = slim.fully_connected(net, 4096, scope='fc1') net = slim.dropout(net, keep_prob=keep_prob, scope='dropt1') #全连接层 net = slim.fully_connected(net, 4096, scope='fc2') net = slim.dropout(net, keep_prob=keep_prob, scope='dropt2') #全连接层 net = slim.fully_connected(net, num_classes, scope='fc3') net = slim.softmax(net, scope='net') return net
def rpn(self, inputs): rpn_conv3x3 = slim.conv2d(inputs, 512, [3, 3], trainable=self.is_training, weights_initializer=self.cfgs.INITIALIZER, activation_fn=tf.nn.relu, scope='rpn_conv/3x3') rpn_cls_score = slim.conv2d(rpn_conv3x3, self.num_anchors_per_location * 2, [1, 1], stride=1, trainable=self.is_training, weights_initializer=self.cfgs.INITIALIZER, activation_fn=None, scope='rpn_cls_score') rpn_box_pred = slim.conv2d( rpn_conv3x3, self.num_anchors_per_location * 4, [1, 1], stride=1, trainable=self.is_training, weights_initializer=self.cfgs.BBOX_INITIALIZER, activation_fn=None, scope='rpn_bbox_pred') rpn_cls_prob = slim.softmax(rpn_cls_score, scope='rpn_cls_prob') return rpn_box_pred, rpn_cls_score, rpn_cls_prob
def build_model(x, y, num_classes=2, is_training=True, reuse=None ): """ handle model. calculate the loss and the prediction for some input x and the corresponding labels y input: x shape=[None,bands,frames,num_channels], y shape=[None] output: loss shape=(1), prediction shape=[None] CAUTION! controller.py uses a function whith this name and arguments. """ #preprocess y = slim.one_hot_encoding(y, num_classes) #model with slim.arg_scope(densenet_arg_scope(is_training)): x = tf.expand_dims(x, -1) logits = densenet(x, num_classes, reuse=reuse) #results loss = tf.reduce_mean(tf.losses.softmax_cross_entropy(logits = logits, onehot_labels = y)) predictions = tf.argmax(slim.softmax(logits),1) return loss, predictions
def create_network(self, inputs, dropout=0.0): # Create the segnet model as described in the paper with tf.variable_scope("segmentor", reuse=None): ## Encoder x = self.conv_block(inputs, no_convs=2, filters=64, dropout=dropout) skip1 = x x = self.conv_block(x, no_convs=2, filters=128, dropout=dropout) skip2 = x x = self.conv_block(x, no_convs=3, filters=256, dropout=dropout) skip3 = x x = self.conv_block(x, no_convs=3, filters=512, dropout=dropout) skip4 = x x = self.conv_block(x, no_convs=3, filters=512, dropout=dropout) ## Decoder x = self.conv_transpose_block(x, no_convs=3, filters=512, dropout=dropout) x = tf.add(x, skip4) x = self.conv_transpose_block(x, no_convs=3, filters=512, dropout=dropout, recude_on_last=True) x = tf.add(x, skip3) x = self.conv_transpose_block(x, no_convs=3, filters=256, dropout=dropout, recude_on_last=True) x = tf.add(x, skip2) x = self.conv_transpose_block(x, no_convs=2, filters=128, dropout=dropout, recude_on_last=True) x = tf.add(x, skip1) x = self.conv_transpose_block(x, no_convs=2, filters=64, dropout=dropout) ## Out x = slim.conv2d(x, self.classes, 1, activation_fn=None, scope='logits') x = slim.softmax(x) return x
def init_shufflenet(self, param): nr_groups = param['filter_group'] out_channel = param['out_channel'] complexity = param['complexity_scale_factor'] H_W = 224 out_channel = int(out_channel*complexity) input = tf.placeholder(tf.float32, [1, H_W, H_W, 3],name='input_tensor') layer = slim.convolution2d(input, 24, kernel_size=[3, 3]) layer = tf.layers.max_pooling2d(layer, pool_size = 3, strides = 2, padding='valid') # Stage 2 layer = self.shuffle_unit(layer, nr_groups, first=True) for i in range(): layer = self.shuffle_unit(layer, nr_groups) # Stage 3 layer = self.shuffle_unit(layer, nr_groups, stride = 2) for i in range(): layer = self.shuffle_unit(layer, nr_groups) # Stage 4 layer = self.shuffle_unit(layer, nr_groups, stride = 2) for i in range(): layer = self.shuffle_unit(layer, nr_groups) # Outputs global_pool = tf.reduce_mean(layer, axis = [1,2]) spatial_reduction = tf.squeeze(global_pool, [1, 2], name='SpatialSqueeze') logits = slim.fully_connected(spatial_reduction, 1000, activation_fn=None, scope='fc') output = slim.softmax(logits, scope='Predictions') output = tf.identity(output, name="output_tensor") return {'input': input, 'output': output, 'logits': global_pool}
def rpn_proposals(self): with tf.variable_scope('rpn_proposals'): rpn_decode_boxes = encode_and_decode.decode_boxes(encode_boxes=self.rpn_encode_boxes, reference_boxes=self.anchors, scale_factors=self.scale_factors) rpn_softmax_scores = slim.softmax(self.rpn_scores) rpn_object_score = rpn_softmax_scores[:, 1] # second column represent object if self.top_k_nms: rpn_object_score, top_k_indices = tf.nn.top_k(rpn_object_score, k=self.top_k_nms) rpn_decode_boxes = tf.gather(rpn_decode_boxes, top_k_indices) # NMS valid_indices = tf_wrapper.nms_rotate_tf(boxes_list=rpn_decode_boxes, scores=rpn_object_score, iou_threshold=self.rpn_nms_iou_threshold, max_output_size=self.max_proposals_num, use_gpu=cfgs.NMS_USE_GPU) valid_boxes = tf.gather(rpn_decode_boxes, valid_indices) valid_scores = tf.gather(rpn_object_score, valid_indices) # print_tensors(valid_scores, 'rpn_score') rpn_proposals_boxes, rpn_proposals_scores = tf.cond( tf.less(tf.shape(valid_boxes)[0], self.max_proposals_num), lambda: boxes_utils.padd_boxes_with_zeros(valid_boxes, valid_scores, self.max_proposals_num), lambda: (valid_boxes, valid_scores)) return rpn_proposals_boxes, rpn_proposals_scores
def slim_model(inputs): with slim.arg_scope( [slim.conv2d, slim.fully_connected], activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer(stddev=0.1), biases_initializer=tf.constant_initializer(1.0), weights_regularizer=slim.l2_regularizer(0.05)): with slim.arg_scope([slim.conv2d], kernel_size=[3, 3], padding="SAME", biases_initializer=tf.constant_initializer(0.0)): with slim.arg_scope([slim.max_pool2d], kernel_size=[2, 2], padding="SAME"): net = slim.repeat(inputs, 2, slim.conv2d, 64, scope="conv1") net = slim.max_pool2d(net, scope="poo1") net = slim.repeat(net, 2, slim.conv2d, 128, scope="conv2") net = slim.max_pool2d(net, scope="poo2") net = slim.flatten(net) net = slim.fully_connected(net, 1024, scope="fc3") net = slim.dropout(net, 0.5, scope="dropout3") net = slim.fully_connected(net, 256, scope="fc4") net = slim.dropout(net, 0.5, scope="dropout4") net = slim.fully_connected(net, 10, activation_fn=None, scope="linear") outputs = slim.softmax(net, scope="softmax4") return outputs
def rpn_proposals(self): ''' :param:self.anchors: shape:[-1, 4]->[ymin, xmin, ymax, xmax] :param:self.rpn_scores: shape:[-1, 2]->[backgroud, foreground] :param:self.rpn_encode_boxes: shape:[-1, 4]->[ycenter, xcenter, h, w] :return: valid_boxes [2000, 4] :return: valid_scores [2000,] ''' with tf.variable_scope('rpn_proposals'): rpn_decode_boxes = encode_and_decode.decode_boxes( encode_boxes=self.rpn_encode_boxes, reference_boxes=self.anchors, scale_factors=self.scale_factors) if not self.is_training: image_shape = tf.shape(self.img_batch) rpn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries( rpn_decode_boxes, image_shape) rpn_softmax_scores = slim.softmax(self.rpn_scores) rpn_object_score = rpn_softmax_scores[:, 1] if self.top_k_nms: rpn_object_score, top_k_indices = tf.nn.top_k(rpn_object_score, k=self.top_k_nms) rpn_decode_boxes = tf.gather(rpn_decode_boxes, top_k_indices) nms_indices = boxes_utils.non_maximal_suppression( rpn_decode_boxes, rpn_object_score, self.rpn_nms_iou_threshold, self.max_proposal_num) valid_scores = tf.gather(rpn_object_score, nms_indices) valid_boxes = tf.gather(rpn_decode_boxes, nms_indices) return valid_boxes, valid_scores
def _bulid_fc(self, x, out_num, name): with tf.variable_scope('fc' + name): x = slim.layers.fully_connected(x, 512) x = slim.layers.fully_connected(x, 256) x = slim.layers.fully_connected(x, out_num, None) after_softmax_x = slim.softmax(x) return x, after_softmax_x
def build_model(x, y, num_classes=2, is_training=True, num_estimator=None, num_filter=None, reuse=None): """ handle model. calculate the loss and the prediction for some input x and the corresponding labels y input: x shape=[None,bands,frames,num_channels], y shape=[None] output: loss shape=(1), prediction shape=[None] CAUTION! controller.py uses a function whith this name and arguments. """ #preprocess y = slim.one_hot_encoding(y, num_classes) print('input: ', x.get_shape()) #model logits = RNN_deepcough(x, num_outputs=num_classes, reuse=reuse, is_training=is_training) #results loss = tf.reduce_mean(softmax_cross_entropy(logits=logits, onehot_labels=y)) predictions = tf.argmax(slim.softmax(logits), 1) return loss, predictions
def fast_rcnn_predict(self): with tf.variable_scope('fast_rcnn_predict'): fast_rcnn_softmax_scores = slim.softmax( self.fast_rcnn_scores) # [-1, num_classes+1] fast_rcnn_encode_boxes = tf.reshape( self.fast_rcnn_encode_boxes, [-1, 4 ]) # fast_rcnn_encode_box oral_shape is[-1, 4*num_classes] reference_boxes = tf.tile( self.fast_rcnn_all_level_proposals, [1, self.num_classes] ) # [N, 4*num_classes] ??? bob query--reason is consist with encode_box reference_boxes = tf.reshape(reference_boxes, [-1, 4]) # [N*num_classes, 4] fast_rcnn_decode_boxes = encode_and_decode.decode_boxes( encode_boxes=fast_rcnn_encode_boxes, reference_boxes=reference_boxes, scale_factors=self.scale_factors) fast_rcnn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries( fast_rcnn_decode_boxes, img_shape=self.img_shape) # mutilclass NMS fast_rcnn_decode_boxes = tf.reshape(fast_rcnn_decode_boxes, [-1, self.num_classes * 4]) fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category = \ self.fast_rcnn_proposals(fast_rcnn_decode_boxes, scores=fast_rcnn_softmax_scores) return fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category
def build_model(x, y, num_classes=2, num_estimator=10, is_training=True, reuse=None ): """ handle model. calculate the loss and the prediction for some input x and the corresponding labels y input: x shape=[None,bands,frames,num_channels], y shape=[None] output: loss shape=(1), prediction shape=[None] CAUTION! controller.py uses a function whith this name and arguments. here we do boosting without additive training """ #preprocess y = slim.one_hot_encoding(y, num_classes) #model logits = 0 offset = 30 // num_estimator for i in range(num_estimator): #x = tf.image.crop_to_bounding_box(x, 0, offset * i, 16, 16) predictions, gamma = classify(x, num_estimator=num_estimator, num_classes=num_classes, is_training=is_training, reuse=reuse, scope='c%d'%i) zeta = gamma * 2 / (i+1) logits = (1-zeta) * logits + zeta * predictions #results loss = tf.reduce_mean(tf.losses.softmax_cross_entropy(logits = logits, onehot_labels = y, label_smoothing=0.05)) predictions = tf.argmax(slim.softmax(logits),1) return loss, predictions
def model_slim(images, labels, is_training): net = slim.conv2d(images, 32, [5, 5], scope='conv1') net = slim.max_pool2d(net, [2, 2], stride=2, scope='pool1') net = slim.conv2d(net, 64, [5, 5], scope='conv2') net = slim.max_pool2d(net, [2, 2], stride=2, scope='pool2') net = slim.flatten(net, scope='flatten') net = slim.fully_connected(net, 1024, scope='fully_connected1') net = slim.dropout(net, keep_prob=0.6, is_training=is_training) logits = slim.fully_connected(net, 10, activation_fn=None, scope='fully_connected2') prob = slim.softmax(logits) loss = slim.losses.sparse_softmax_cross_entropy(logits, labels) global_step = tf.train.get_or_create_global_step() num_batches_per_epoch = TRAIN_EXAMPLES_NUM / FLAGS.batch_size decay_steps = int(num_batches_per_epoch * 10) # Decay the learning rate exponentially based on the number of steps. lr = tf.train.exponential_decay(learning_rate=0.001, global_step=global_step, decay_steps=decay_steps, decay_rate=0.1, staircase=True) opt = tf.train.AdamOptimizer(learning_rate=lr) return opt, loss, prob
def build_model(x, y, num_classes=2, num_estimator=None, #we missuse num_estimator for the number of convolutions num_filter=16, is_training=True, reuse=None ): """ handle model. calculate the loss and the prediction for some input x and the corresponding labels y input: x shape=[None,bands,frames,num_channels], y shape=[None] output: loss shape=(1), prediction shape=[None] CAUTION! controller.py uses a function whith this name and arguments. """ #preprocess y = slim.one_hot_encoding(y, num_classes) #model logits = classify(x, num_classes=num_classes, num_filter=num_filter, route=num_estimator, is_training=is_training, reuse=reuse) #results loss = tf.reduce_mean(softmax_cross_entropy(logits = logits, onehot_labels = y)) predictions = tf.argmax(slim.softmax(logits),1) return loss, predictions
def rpn_losses(self): with tf.variable_scope('rpn_losses'): minibatch_indices, minibatch_anchor_matched_gtboxes, \ object_mask, minibatch_labels_one_hot = self.make_minibatch(self.anchors) minibatch_anchors = tf.gather(self.anchors, minibatch_indices) minibatch_encode_boxes = tf.gather(self.rpn_encode_boxes, minibatch_indices) minibatch_boxes_scores = tf.gather(self.rpn_scores, minibatch_indices) # encode gtboxes minibatch_encode_gtboxes = encode_and_decode.encode_boxes(unencode_boxes=minibatch_anchor_matched_gtboxes, reference_boxes=minibatch_anchors, scale_factors=self.scale_factors) positive_anchors_in_img = draw_box_with_color(self.img_batch, minibatch_anchors * tf.expand_dims(object_mask, 1), text=tf.shape(tf.where(tf.equal(object_mask, 1.0)))[0]) negative_mask = tf.cast(tf.logical_not(tf.cast(object_mask, tf.bool)), tf.float32) negative_anchors_in_img = draw_box_with_color(self.img_batch, minibatch_anchors * tf.expand_dims(negative_mask, 1), text=tf.shape(tf.where(tf.equal(object_mask, 0.0)))[0]) minibatch_decode_boxes = encode_and_decode.decode_boxes(encode_boxes=minibatch_encode_boxes, reference_boxes=minibatch_anchors, scale_factors=self.scale_factors) tf.summary.image('/positive_anchors', positive_anchors_in_img) tf.summary.image('/negative_anchors', negative_anchors_in_img) minibatch_boxes_softmax_scores = tf.gather(slim.softmax(self.rpn_scores), minibatch_indices) top_k_scores, top_k_indices = tf.nn.top_k(minibatch_boxes_softmax_scores[:, 1], k=20) top_k_boxes = tf.gather(minibatch_decode_boxes, top_k_indices) top_detections_in_img = draw_boxes_with_scores(self.img_batch, boxes=top_k_boxes, scores=top_k_scores) tf.summary.image('/top_20', top_detections_in_img) temp_indices = tf.reshape(tf.where(tf.greater(top_k_scores, cfgs.FINAL_SCORE_THRESHOLD)), [-1]) rpn_predict_boxes = tf.gather(top_k_boxes, temp_indices) rpn_predict_scores = tf.gather(top_k_scores, temp_indices) # losses with tf.variable_scope('rpn_location_loss'): location_loss = losses.l1_smooth_losses(predict_boxes=minibatch_encode_boxes, gtboxes=minibatch_encode_gtboxes, object_weights=object_mask) slim.losses.add_loss(location_loss) # add smooth l1 loss to losses collection with tf.variable_scope('rpn_classification_loss'): classification_loss = slim.losses.softmax_cross_entropy(logits=minibatch_boxes_scores, onehot_labels=minibatch_labels_one_hot) return location_loss, classification_loss, rpn_predict_boxes, rpn_predict_scores
def fast_rcnn_predict(self): with tf.variable_scope('fast_rcnn_predict'): fast_rcnn_softmax_scores = slim.softmax(self.fast_rcnn_scores) # [-1, num_classes+1] fast_rcnn_encode_boxes = tf.reshape(self.fast_rcnn_encode_boxes, [-1, 5]) # reference_boxes = tf.tile(self.fast_rcnn_all_level_horizontal_proposals, [1, self.num_classes]) reference_boxes = tf.tile(self.fast_rcnn_all_level_rotate_proposals, [1, self.num_classes]) # [N, 5*num_classes] reference_boxes = tf.reshape(reference_boxes, [-1, 5]) # [N*num_classes, 5] fast_rcnn_decode_boxes = encode_and_decode.decode_boxes(encode_boxes=fast_rcnn_encode_boxes, reference_boxes=reference_boxes, scale_factors=self.scale_factors) # mutilclass NMS fast_rcnn_decode_boxes = tf.reshape(fast_rcnn_decode_boxes, [-1, self.num_classes*5]) fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category = \ self.fast_rcnn_proposals(fast_rcnn_decode_boxes, scores=fast_rcnn_softmax_scores) return fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category
def mobilenet(inputs, num_classes=1000, is_training=True, width_multiplier=1, scope='MobileNet'): """ MobileNet More detail, please refer to Google's paper(https://arxiv.org/abs/1704.04861). Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the model is being trained. scope: Optional scope for the variables. Returns: logits: the pre-softmax activations, a tensor of size [batch_size, `num_classes`] end_points: a dictionary from components of the network to the corresponding activation. """ def _depthwise_separable_conv(inputs, num_pwc_filters, width_multiplier, sc, downsample=False): """ Helper function to build the depth-wise separable convolution layer. """ num_pwc_filters = round(num_pwc_filters * width_multiplier) _stride = 2 if downsample else 1 # skip pointwise by setting num_outputs=None depthwise_conv = slim.separable_convolution2d(inputs, num_outputs=None, stride=_stride, depth_multiplier=1, kernel_size=[3, 3], scope=sc+'/depthwise_conv') bn = slim.batch_norm(depthwise_conv, scope=sc+'/dw_batch_norm') pointwise_conv = slim.convolution2d(bn, num_pwc_filters, kernel_size=[1, 1], scope=sc+'/pointwise_conv') bn = slim.batch_norm(pointwise_conv, scope=sc+'/pw_batch_norm') return bn with tf.variable_scope(scope) as sc: end_points_collection = sc.name + '_end_points' with slim.arg_scope([slim.convolution2d, slim.separable_convolution2d], activation_fn=None, outputs_collections=[end_points_collection]): with slim.arg_scope([slim.batch_norm], is_training=is_training, activation_fn=tf.nn.relu, fused=True): net = slim.convolution2d(inputs, round(32 * width_multiplier), [3, 3], stride=2, padding='SAME', scope='conv_1') net = slim.batch_norm(net, scope='conv_1/batch_norm') net = _depthwise_separable_conv(net, 64, width_multiplier, sc='conv_ds_2') net = _depthwise_separable_conv(net, 128, width_multiplier, downsample=True, sc='conv_ds_3') net = _depthwise_separable_conv(net, 128, width_multiplier, sc='conv_ds_4') net = _depthwise_separable_conv(net, 256, width_multiplier, downsample=True, sc='conv_ds_5') net = _depthwise_separable_conv(net, 256, width_multiplier, sc='conv_ds_6') net = _depthwise_separable_conv(net, 512, width_multiplier, downsample=True, sc='conv_ds_7') net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_8') net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_9') net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_10') net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_11') net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_12') net = _depthwise_separable_conv(net, 1024, width_multiplier, downsample=True, sc='conv_ds_13') net = _depthwise_separable_conv(net, 1024, width_multiplier, sc='conv_ds_14') net = slim.avg_pool2d(net, [7, 7], scope='avg_pool_15') end_points = slim.utils.convert_collection_to_dict(end_points_collection) net = tf.squeeze(net, [1, 2], name='SpatialSqueeze') end_points['squeeze'] = net logits = slim.fully_connected(net, num_classes, activation_fn=None, scope='fc_16') predictions = slim.softmax(logits, scope='Predictions') end_points['Logits'] = logits end_points['Predictions'] = predictions return logits, end_points
def fast_rcnn_loss(self): with tf.variable_scope('fast_rcnn_loss'): minibatch_indices, minibatch_reference_boxes_mattached_gtboxes, minibatch_object_mask, \ minibatch_label_one_hot = self.fast_rcnn_minibatch(self.fast_rcnn_all_level_rotate_proposals) ####################### # minibatch_reference_boxes = tf.gather(self.fast_rcnn_all_level_horizontal_proposals, minibatch_indices) minibatch_reference_boxes = tf.gather(self.fast_rcnn_all_level_rotate_proposals, minibatch_indices) minibatch_encode_boxes = tf.gather(self.fast_rcnn_encode_boxes, minibatch_indices) # [minibatch_size, num_classes*5] minibatch_scores = tf.gather(self.fast_rcnn_scores, minibatch_indices) positive_proposals_in_img = draw_box_with_color(self.img_batch, minibatch_reference_boxes * tf.expand_dims( minibatch_object_mask, 1), text=tf.shape(tf.where(tf.equal(minibatch_object_mask, 1.0)))[0]) negative_mask = tf.cast(tf.logical_not(tf.cast(minibatch_object_mask, tf.bool)), tf.float32) negative_proposals_in_img = draw_box_with_color(self.img_batch, minibatch_reference_boxes * tf.expand_dims(negative_mask, 1), text=tf.shape(tf.where(tf.equal(minibatch_object_mask, 0.0)))[0]) tf.summary.image('/positive_proposals', positive_proposals_in_img) tf.summary.image('/negative_proposals', negative_proposals_in_img) minibatch_decode_boxes = encode_and_decode.decode_boxes(encode_boxes=minibatch_encode_boxes, reference_boxes=minibatch_reference_boxes, scale_factors=self.scale_factors) minibatch_softmax_scores = tf.gather(slim.softmax(self.fast_rcnn_scores), minibatch_indices) top_k_scores, top_k_indices = tf.nn.top_k(minibatch_softmax_scores[:, 1], k=5) top_detections_in_img = draw_boxes_with_scores(self.img_batch, boxes=tf.gather(minibatch_decode_boxes, top_k_indices), scores=top_k_scores) tf.summary.image('/top_5', top_detections_in_img) # encode gtboxes minibatch_encode_gtboxes = \ encode_and_decode.encode_boxes( unencode_boxes=minibatch_reference_boxes_mattached_gtboxes, reference_boxes=minibatch_reference_boxes, scale_factors=self.scale_factors) # [minibatch_size, num_classes*5] minibatch_encode_gtboxes = tf.tile(minibatch_encode_gtboxes, [1, self.num_classes]) class_weights_list = [] category_list = tf.unstack(minibatch_label_one_hot, axis=1) for i in range(1, self.num_classes+1): tmp_class_weights = tf.ones(shape=[tf.shape(minibatch_encode_boxes)[0], 5], dtype=tf.float32) tmp_class_weights = tmp_class_weights * tf.expand_dims(category_list[i], axis=1) class_weights_list.append(tmp_class_weights) class_weights = tf.concat(class_weights_list, axis=1) # [minibatch_size, num_classes*5] # loss with tf.variable_scope('fast_rcnn_classification_loss'): fast_rcnn_classification_loss = slim.losses.softmax_cross_entropy(logits=minibatch_scores, onehot_labels=minibatch_label_one_hot) # if DEBUG: # print_tensors(minibatch_scores, 'minibatch_scores') # print_tensors(classification_loss, '2nd_cls_loss') with tf.variable_scope('fast_rcnn_location_loss'): fast_rcnn_location_loss = losses.l1_smooth_losses(predict_boxes=minibatch_encode_boxes, gtboxes=minibatch_encode_gtboxes, object_weights=minibatch_object_mask, classes_weights=class_weights) slim.losses.add_loss(fast_rcnn_location_loss) return fast_rcnn_location_loss, fast_rcnn_classification_loss