def tower_loss(scope): images, labels = read_and_decode() if net == 'vgg_16': with slim.arg_scope(vgg.vgg_arg_scope()): logits, end_points = vgg.vgg_16(images, num_classes=FLAGS.num_classes) elif net == 'vgg_19': with slim.arg_scope(vgg.vgg_arg_scope()): logits, end_points = vgg.vgg_19(images, num_classes=FLAGS.num_classes) elif net == 'resnet_v1_101': with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits, end_points = resnet_v1.resnet_v1_101(images, num_classes=FLAGS.num_classes) logits = tf.reshape(logits, [FLAGS.batch_size, FLAGS.num_classes]) elif net == 'resnet_v1_50': with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits, end_points = resnet_v1.resnet_v1_50(images, num_classes=FLAGS.num_classes) logits = tf.reshape(logits, [FLAGS.batch_size, FLAGS.num_classes]) elif net == 'resnet_v2_50': with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, end_points = resnet_v2.resnet_v2_50(images, num_classes=FLAGS.num_classes) logits = tf.reshape(logits, [FLAGS.batch_size, FLAGS.num_classes]) else: raise Exception('No network matched with net %s.' % net) assert logits.shape == (FLAGS.batch_size, FLAGS.num_classes) _ = cal_loss(logits, labels) losses = tf.get_collection('losses', scope) total_loss = tf.add_n(losses, name='total_loss') for l in losses + [total_loss]: loss_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', l.op.name) tf.summary.scalar(loss_name, l) return total_loss
def get_slim_resnet_v1_byname(net_name, inputs, num_classes=None, is_training=True, global_pool=True, output_stride=None, weight_decay=0.): if net_name == 'resnet_v1_50': with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_50( inputs=inputs, num_classes=num_classes, is_training=is_training, global_pool=global_pool, output_stride=output_stride, ) return logits, end_points if net_name == 'resnet_v1_101': with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_101( inputs=inputs, num_classes=num_classes, is_training=is_training, global_pool=global_pool, output_stride=output_stride, ) return logits, end_points
def build_2(self, inputs, input_pixel_size, is_training, scope='resnet_v1_101', weight_decay=0.0001): with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_101( inputs=inputs, num_classes=None, is_training=is_training, global_pool=False, output_stride=None, spatial_squeeze=False) feature_maps_dict = { 'C2': self.share_net['resnet_v1_101/block1/unit_2/bottleneck_v1'], 'C3': self.share_net['resnet_v1_101/block2/unit_3/bottleneck_v1'], 'C4': self.share_net['resnet_v1_101/block3/unit_22/bottleneck_v1'], 'C5': self.share_net['resnet_v1_101/block4'] } feature_maps_out = feature_maps_dict['C5'] return feature_maps_out, feature_maps_dict
def getCNNFeatures(self, input_tensor, out_dim, fc_initializer): graph = tf.Graph() with graph.as_default(): with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_50(input_tensor, num_classes=None) model_path = os.path.join(self.checkpoints_dir, self.ckpt_name) init_fn = tf.contrib.framework.assign_from_checkpoint_fn( model_path, slim.get_model_variables('resnet_v1')) flattened = tf.reshape(end_points["resnet_v1_50/block4"], [-1, fc_dim]) print flattened.get_shape() with vs.variable_scope('fc_resnet'): W = vs.get_variable("W", [fc_dim, out_dim], initializer=fc_initializer) b = vs.get_variable("b", [out_dim], initializer=fc_initializer) output = tf.nn.relu(tf.matmul(flattened, W) + b) return init_fn, output #TEST: # cnn_f_extractor = CNN_FeatureExtractor() # inputt = tf.constant(np.arange(12288, dtype=np.float32), shape=[1, 64, 64, 3]) # inputfn, features = cnn_f_extractor.getCNNFeatures(inputt, 256, tf.contrib.layers.variance_scaling_initializer()) # print features.get_shape()
def build_model(self, inp, mode, regularizer=None): net = inp['img'] training = (mode == tf.estimator.ModeKeys.TRAIN) with tf.variable_scope('encode'): with slim.arg_scope( resnet_v1.resnet_arg_scope( weight_decay=self.config_dict['ext'] ['encoder_l2_decay'])): net, _ = resnet_v1.resnet_v1_50(net, num_classes=None, is_training=training, global_pool=True) with tf.variable_scope('classify'): # net = tf.layers.max_pooling2d(net, net.shape.as_list()[1], 1) # net = tf.layers.conv2d(net, 1024, 1, kernel_regularizer=regularizer) net = tf.layers.conv2d(net, self.config_dict['label_cnt'], 1, kernel_regularizer=regularizer) logits = tf.squeeze(net, axis=(1, 2)) return logits
def top_feature_net(input, anchors, inds_inside, num_bases): stride=8 with tf.variable_scope("top_base") as sc: arg_scope = resnet_v1.resnet_arg_scope(weight_decay=0.0) with slim.arg_scope(arg_scope) : net, end_points = resnet_v1.resnet_v1_50(input, None, global_pool=False, output_stride=8) #pdb.set_trace() block=end_points['top_base/resnet_v1_50/block4'] # block = conv2d_bn_relu(block, num_kernels=512, kernel_size=(1,1), stride=[1,1,1,1], padding='SAME', name='2') tf.summary.histogram('rpn_top_block', block) # tf.summary.histogram('rpn_top_block_weights', tf.get_collection('2/conv_weight')[0]) with tf.variable_scope('top') as scope: #up = upsample2d(block, factor = 2, has_bias=True, trainable=True, name='1') #up = block up = conv2d_bn_relu(block, num_kernels=128, kernel_size=(3,3), stride=[1,1,1,1], padding='SAME', name='2') scores = conv2d(up, num_kernels=2*num_bases, kernel_size=(1,1), stride=[1,1,1,1], padding='SAME', name='score') probs = tf.nn.softmax( tf.reshape(scores,[-1,2]), name='prob') deltas = conv2d(up, num_kernels=4*num_bases, kernel_size=(1,1), stride=[1,1,1,1], padding='SAME', name='delta') #<todo> flip to train and test mode nms (e.g. different nms_pre_topn values): use tf.cond with tf.variable_scope('top-nms') as scope: #non-max batch_size, img_height, img_width, img_channel = input.get_shape().as_list() img_scale = 1 # pdb.set_trace() rois, roi_scores = tf_rpn_nms( probs, deltas, anchors, inds_inside, stride, img_width, img_height, img_scale, nms_thresh=0.7, min_size=stride, nms_pre_topn=300, nms_post_topn=50, name ='nms') #<todo> feature = upsample2d(block, factor = 4, ...) feature = block
def Encoder_resnet_v1_101(x, weight_decay, is_training=True, reuse=False): """ Resnet v1-101 encoder, adds 2 fc layers after Resnet. Assumes input is [batch, height_in, width_in, channels]!! Input: - x: N x H x W x 3 - weight_decay: float - reuse: bool-> True if test Outputs: - net: N x F - variables: tf variables """ from tensorflow.contrib.slim.python.slim.nets import resnet_v1 with tf.name_scope("Encoder_resnet_v1_101", [x]): with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): net, end_points = resnet_v1.resnet_v1_101(x, num_classes=None, is_training=is_training, reuse=reuse, scope='resnet_v1_101') net = tf.reshape(net, [net.shape.as_list()[0], -1]) variables = tf.contrib.framework.get_variables('resnet_v1_101') return net, variables
def resnet_50(input_image): arg_scope = resnet_v1.resnet_arg_scope() with slim.arg_scope(arg_scope): features, _ = resnet_v1.resnet_v1_50(input_image) # feature flatten features = tf.squeeze(features) return features
def build_graph(self): with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=1e-5)): logits, end_point = resnet_v1.resnet_v1_50( self.input, num_classes=self.num_classes, scope='resnet_v1_50') # logits [-1,1,1,dim] 全局池化 dim = logits.get_shape()[-1] assert dim == self.num_classes self.logits = tf.reshape(logits, [-1, dim])
def build(self): # Input self.input = tf.placeholder( dtype=tf.float32, shape=[None, self.img_size[0], self.img_size[1], self.img_size[2]]) self.input_mean = tfutils.mean_value(self.input, self.img_mean) if self.base_net == 'vgg16': with slim.arg_scope(vgg.vgg_arg_scope()): outputs, end_points = vgg.vgg_16(self.input_mean, self.num_classes) self.prob = tf.nn.softmax(outputs, -1) self.logits = outputs elif self.base_net == 'res50': with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_50( self.input_mean, self.num_classes, is_training=self.is_train) self.prob = tf.nn.softmax(net[:, 0, 0, :], -1) self.logits = net[:, 0, 0, :] elif self.base_net == 'res101': with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_101( self.input_mean, self.num_classes, is_training=self.is_train) self.prob = tf.nn.softmax(net[:, 0, 0, :], -1) self.logits = net[:, 0, 0, :] elif self.base_net == 'res152': with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_152( self.input_mean, self.num_classes, is_training=self.is_train) self.prob = tf.nn.softmax(net[:, 0, 0, :], -1) self.logits = net[:, 0, 0, :] else: raise ValueError( 'base network should be vgg16, res50, -101, -152...') self.gt = tf.placeholder(dtype=tf.int32, shape=[None]) # self.var_list = tf.trainable_variables() if self.is_train: self.loss()
def main(_): os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpu_id if not tf.gfile.Exists(FLAGS.output_dir): tf.gfile.MakeDirs(FLAGS.output_dir) with tf.Graph().as_default() as g: with open(FLAGS.input_fname, 'r') as f: filenames = [line.split(',')[0][:-4] for line in f.readlines()] filenames = [ os.path.join(FLAGS.image_dir, name) for name in filenames \ if not os.path.exists(os.path.join(FLAGS.output_dir, name + '.npy')) ] filename_queue = tf.train.string_input_producer(filenames) reader = tf.WholeFileReader() key, value = reader.read(filename_queue) image = tf.image.decode_jpeg(value, channels=3) image_size = resnet_v1.resnet_v1.default_image_size processed_image = vgg_preprocessing.preprocess_image( image, image_size, image_size, is_training=False ) processed_images, keys = tf.train.batch( [processed_image, key], FLAGS.batch_size, num_threads=8, capacity=8*FLAGS.batch_size*5, allow_smaller_final_batch=True ) # Create the model with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_101( processed_images, num_classes=1000, is_training=False ) init_fn = slim.assign_from_checkpoint_fn( FLAGS.checkpoint_dir, slim.get_model_variables() ) pool5 = g.get_operation_by_name('resnet_v1_101/pool5').outputs[0] pool5 = tf.transpose(pool5, perm=[0, 3, 1, 2]) # (batch_size, 2048, 1, 1) with tf.Session() as sess: init_fn(sess) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) try: for step in tqdm(range(len(filenames) // FLAGS.batch_size + 1), ncols=70): if coord.should_stop(): break file_names, pool5_value = sess.run([keys, pool5]) for i in range(len(file_names)): np.save(os.path.join(FLAGS.output_dir, os.path.basename(file_names[i]).decode('utf-8') + '.npy'), pool5_value[i].astype(np.float32)) except tf.errors.OutOfRangeError: print("Done feature extraction -- epoch limit reached") finally: coord.request_stop() coord.join(threads)
def inference(self): x = tf.reshape(self.x, shape=[-1, self.input_shape[0], self.input_shape[1], self.input_shape[2]]) with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits, end_points = resnet_v1.resnet_v1_50(x, num_classes=self.nclasses, is_training=self.is_training # , spatial_squeeze=True , global_pool=True ) # remove in the future if squeeze build in resnet_v1 function net = array_ops.squeeze(logits, [1,2], name='SpatialSqueeze') return net
def resNet(images, is_training=True, reuse=False, scope=None): """Constructs network based on resnet_v1_50. Args: images: A tensor of size [batch, height, width, channels]. weight_decay: The parameters for weight_decay regularizer. is_training: Whether or not in training mode. reuse: Whether or not the layer and its variables should be reused. Returns: feature_map: Features extracted from the model, which are not l2-normalized. """ # Construct Resnet50 features. with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=0.0001)): block = resnet_v1.resnet_v1_block blocks = [ block('block1', base_depth=64, num_units=3, stride=2), block('block2', base_depth=128, num_units=4, stride=2), block('block3', base_depth=256, num_units=6, stride=1), block('block4', base_depth=512, num_units=3, stride=1) ] x30, end_points = resnet_v1.resnet_v1(images, blocks, is_training=is_training, global_pool=False, reuse=reuse, scope=scope, include_root_block=True) x60 = end_points[scope + '/block1'] x60 = slim.conv2d(x60, 64, [1, 1], 1, padding='SAME', activation_fn=None, reuse=reuse, scope='conv2d_final_x60') x30 = slim.conv2d(x30, 512, [1, 1], 1, padding='SAME', activation_fn=None, reuse=reuse, scope='conv2d_final_x30') # get layer outputs we want end_points_ = {} # end_points_ = end_points['resnet_v1_50/block2'] # end_points_ = end_points['resnet_v1_50/block3'] # end_points_ = end_points['resnet_v1_50/block4'] # end_points_['x30'] = end_points['resnet_v1_50/final'] end_points_['x60'] = x60 end_points_['x30'] = x30 return end_points_
def rgb_feature_net(input): arg_scope = resnet_v1.resnet_arg_scope(weight_decay=0.0) with slim.arg_scope(arg_scope): net, end_points = resnet_v1.resnet_v1_50(input, None, global_pool=False, output_stride=8) block=end_points['resnet_v1_50/block4'] # block = conv2d_bn_relu(block, num_kernels=512, kernel_size=(1,1), stride=[1,1,1,1], padding='SAME', name='2') #<todo> feature = upsample2d(block, factor = 4, ...) tf.summary.histogram('rgb_top_block', block) feature = block return feature
def _vision(preprocessed_inputs, reuse=True): with tf.variable_scope("vision", reuse=reuse): with slim.arg_scope(resnet_v1.resnet_arg_scope()): resnet_output, _ = resnet_v1.resnet_v1_50( preprocessed_inputs, is_training=True) if not config["fine_tune_vision"]: resnet_output = tf.stop_gradient(resnet_output) resnet_output = tf.squeeze(resnet_output, axis=[1, 2]) resnet_output = tf.nn.dropout( resnet_output, keep_prob=self.vision_keep_prob_ph) vision_result = slim.fully_connected(resnet_output, num_hidden_hyper, activation_fn=None) return vision_result, resnet_output
def inference(hypes, images, train=True): """ Build ResNet encoder :param hypes: :param images: :param train: :return: """ is_training = tf.convert_to_tensor(train, dtype='bool', name='is_training') layers = hypes['arch']['layers'] deep_feat = hypes['arch'].get('deep_feat', 'block4') early_feat = hypes['arch'].get('early_feat', 'block1') blocks = ['block1', 'block2', 'block3', 'block4'] assert early_feat in blocks assert deep_feat in blocks[1:] if layers == 50: resnet = resnet_v1.resnet_v1_50 elif layers == 101: resnet = resnet_v1.resnet_v1_101 elif layers == 152: resnet = resnet_v1.resnet_v1_152 else: logging.error('Resnet only has 50, 101, or 152 layers. Got', layers) exit(1) with slim.arg_scope(resnet_v1.resnet_arg_scope(is_training)): logits, endpoints = resnet(images) for name in blocks: layer_name = 'resnet_v1_%d/%s' % (layers, name) tf.summary.histogram('/%s_activation' % name, endpoints[layer_name]) tf.summary.scalar('/%s_sparsity' % name, tf.nn.zero_fraction(endpoints[layer_name])) if train: restore = tf.global_variables() hypes['init_function'] = _initalize_variables hypes['restore'] = restore return { 'early_feat': endpoints['resnet_v1_%d/%s' % (layers, early_feat)], 'deep_feat': endpoints['resnet_v1_%d/%s' % (layers, deep_feat)] }
def teacher(self, x, j): with slim.arg_scope(resnet_v1.resnet_arg_scope()): x = utils.nchw_to_nhwc(x) batch_out, batch_list = resnet_v1.resnet_v1_50(x, 1000, is_training=True) feature = batch_list['resnet_v1_50/block2/unit_4/bottleneck_v1/conv1'] self.init_fn_1 = slim.assign_from_checkpoint_fn( self.pre_dir + '/resnet_v1_50.ckpt', slim.get_model_variables('resnet_v1_50')) ''' del which has no gradient ''' # print(batch_list) x = utils.nhwc_to_nchw(feature) x, var = vnect(x, j) return x, var
def build_resnet50_v1(img_input, l2_weight_decay=0.01, is_training=True, prefix=''): """ Builds resnet50_v1 model from slim, with strides reversed. Returns the last five block outputs to be used transposed convolution layers """ with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=l2_weight_decay)): block4, endpoints = resnet_v1_50(img_input, is_training=is_training, global_pool=False) block3 = endpoints[f'{prefix}resnet_v1_50/block3'] block2 = endpoints[f'{prefix}resnet_v1_50/block2'] block1 = endpoints[f'{prefix}resnet_v1_50/block1'] conv1 = endpoints[f'{prefix}resnet_v1_50/conv1'] return conv1, block1, block2, block3, block4
def batch_pred(models_path, images_list, labels_nums, data_format): [batch_size, resize_height, resize_width, depths] = data_format input_images = tf.placeholder( dtype=tf.float32, shape=[None, resize_height, resize_width, depths], name='input') # model with slim.arg_scope(resnet_v1.resnet_arg_scope()): out, end_points = resnet_v1.resnet_v1_50(inputs=input_images, num_classes=labels_nums, is_training=False) out = tf.squeeze(out, [1, 2]) score = tf.nn.softmax(out, name='pre') class_id = tf.argmax(score, 1) gpu_options = tf.GPUOptions(allow_growth=False) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess, models_path) tot = len(images_list) for idx in range(0, tot, batch_size): images = list() idx_end = min(tot, idx + batch_size) print(idx) for i in range(idx, idx_end): image_path = images_list[i] image = open(image_path, 'rb').read() image = tf.image.decode_jpeg(image, channels=3) processed_image = preprocess_image(image, resize_height, resize_width) processed_image = sess.run(processed_image) # print("processed_image.shape", processed_image.shape) images.append(processed_image) images = np.array(images) start = time.time() sess.run([score, class_id], feed_dict={input_images: images}) end = time.time() print("time of batch {} is %f".format(batch_size) % (end - start)) sess.close()
def backbone(self): with slim.arg_scope(resnet_v1.resnet_arg_scope()): with slim.arg_scope([slim.conv2d], trainable=False): # output, end_points = resnet_v1.resnet_v1_50(self.inputs, num_classes=cfgs.NUM_CLASS, is_training=self.is_training) output, end_points = resnet_v1.resnet_v1_101( self.inputs, num_classes=None, is_training=self.is_training, global_pool=False) output = slim.conv2d(output, cfgs.NUM_CLASS, [1, 1], activation_fn=None, normalizer_fn=None, scope='logits') output = tf.reduce_mean(output, [1, 2], name='global_pool') logits = tf.nn.softmax(output) return output, logits
def resnet(x, num_classes=1000, is_train=False, reuse=False): net_in = tl.layers.InputLayer(x, name='input_layer') with slim.arg_scope(resnet_arg_scope()): ## Alternatively, you should implement inception_v3 without TensorLayer as follow. # logits, end_points = inception_v3(X, num_classes=1011, # is_training=False) network = tl.layers.SlimNetsLayer( prev_layer=net_in, slim_layer=model_, slim_args={ 'num_classes': num_classes, 'is_training': is_train, 'reuse': reuse }, name= model_name # <-- the name should be the same with the ckpt model ) y = tf.reshape(network.outputs, [-1, num_classes]) # y = tf.nn.softmax(y) return network, y
def main(): tf.reset_default_graph() input_node = tf.placeholder(tf.float32, shape=(1, 224, 224, 3), name="input") print("input_node:", input_node) with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, _ = resnet_v1.resnet_v1_50(input_node, 1000, is_training=False) print("net:", net) saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, model_path) tf.train.write_graph(sess.graph_def, './pb_model', 'model.pb') freeze_graph.freeze_graph('pb_model/model.pb', '', False, model_path, 'resnet_v1_50/logits/BiasAdd', 'save/restore_all', 'save/Const:0', 'pb_model/frozen_resnet_v1_50.pb', False, "") print("done")
def get_model(input_pls, is_training, bn=False, bn_decay=None, img_size=224, FLAGS=None): if FLAGS.act == "relu": activation_fn = tf.nn.relu elif FLAGS.act == "elu": activation_fn = tf.nn.elu input_imgs = input_pls['imgs'] input_pnts = input_pls['pnts'] input_gvfs = input_pls['gvfs'] input_onedge = input_pls['onedge'] input_trans_mat = input_pls['trans_mats'] input_obj_rot_mats = input_pls['obj_rot_mats'] batch_size = input_imgs.get_shape()[0].value # endpoints end_points = {} end_points['pnts'] = input_pnts if FLAGS.rot: end_points['gt_gvfs_xyz'] = tf.matmul(input_gvfs, input_obj_rot_mats) end_points['pnts_rot'] = tf.matmul(input_pnts, input_obj_rot_mats) else: end_points['gt_gvfs_xyz'] = input_gvfs #* 10 end_points['pnts_rot'] = input_pnts if FLAGS.edgeweight != 1.0: end_points['onedge'] = input_onedge input_pnts_rot = end_points['pnts_rot'] end_points['imgs'] = input_imgs # B*H*W*3|4 # Image extract features if input_imgs.shape[1] != img_size or input_imgs.shape[2] != img_size: if FLAGS.alpha: ref_img_rgb = tf.compat.v1.image.resize_bilinear( input_imgs[:, :, :, :3], [img_size, img_size]) ref_img_alpha = tf.image.resize_nearest_neighbor( tf.expand_dims(input_imgs[:, :, :, 3], axis=-1), [img_size, img_size]) ref_img = tf.concat([ref_img_rgb, ref_img_alpha], axis=-1) else: ref_img = tf.compat.v1.image.resize_bilinear( input_imgs, [img_size, img_size]) else: ref_img = input_imgs end_points['resized_ref_img'] = ref_img if FLAGS.encoder[:6] == "vgg_16": vgg.vgg_16.default_image_size = img_size with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer(FLAGS.wd)): ref_feats_embedding, encdr_end_points = vgg.vgg_16( ref_img, num_classes=FLAGS.num_classes, is_training=False, scope='vgg_16', spatial_squeeze=False) elif FLAGS.encoder == "sim_res": ref_feats_embedding, encdr_end_points = res_sim_encoder.res_sim_encoder( ref_img, FLAGS.batch_size, is_training=is_training, activation_fn=activation_fn, bn=bn, bn_decay=bn_decay, wd=FLAGS.wd) elif FLAGS.encoder == "resnet_v1_50": resnet_v1.default_image_size = img_size with slim.arg_scope(resnet_v1.resnet_arg_scope()): ref_feats_embedding, encdr_end_points = resnet_v1.resnet_v1_50( ref_img, FLAGS.num_classes, is_training=is_training, scope='resnet_v1_50') scopelst = [ "resnet_v1_50/block1", "resnet_v1_50/block2", "resnet_v1_50/block3", 'resnet_v1_50/block4' ] elif FLAGS.encoder == "resnet_v1_101": resnet_v1.default_image_size = img_size with slim.arg_scope(resnet_v1.resnet_arg_scope()): ref_feats_embedding, encdr_end_points = resnet_v1.resnet_v1_101( ref_img, FLAGS.num_classes, is_training=is_training, scope='resnet_v1_101') scopelst = [ "resnet_v1_101/block1", "resnet_v1_101/block2", "resnet_v1_101/block3", 'resnet_v1_101/block4' ] elif FLAGS.encoder == "resnet_v2_50": resnet_v2.default_image_size = img_size with slim.arg_scope(resnet_v1.resnet_arg_scope()): ref_feats_embedding, encdr_end_points = resnet_v2.resnet_v2_50( ref_img, FLAGS.num_classes, is_training=is_training, scope='resnet_v2_50') scopelst = [ "resnet_v2_50/block1", "resnet_v2_50/block2", "resnet_v2_50/block3", 'resnet_v2_50/block4' ] elif FLAGS.encoder == "resnet_v2_101": resnet_v2.default_image_size = img_size with slim.arg_scope(resnet_v1.resnet_arg_scope()): ref_feats_embedding, encdr_end_points = resnet_v2.resnet_v2_101( ref_img, FLAGS.num_classes, is_training=is_training, scope='resnet_v2_101') scopelst = [ "resnet_v2_101/block1", "resnet_v2_101/block2", "resnet_v2_101/block3", 'resnet_v2_101/block4' ] end_points['img_embedding'] = ref_feats_embedding point_img_feat = None gvfs_feat = None sample_img_points = get_img_points(input_pnts, input_trans_mat) # B * N * 2 if FLAGS.img_feat_onestream: with tf.compat.v1.variable_scope("sdfimgfeat") as scope: if FLAGS.encoder[:3] == "vgg": conv1 = tf.compat.v1.image.resize_bilinear( encdr_end_points['vgg_16/conv1/conv1_2'], (FLAGS.img_h, FLAGS.img_w)) point_conv1 = tf.contrib.resampler.resampler( conv1, sample_img_points) conv2 = tf.compat.v1.image.resize_bilinear( encdr_end_points['vgg_16/conv2/conv2_2'], (FLAGS.img_h, FLAGS.img_w)) point_conv2 = tf.contrib.resampler.resampler( conv2, sample_img_points) conv3 = tf.compat.v1.image.resize_bilinear( encdr_end_points['vgg_16/conv3/conv3_3'], (FLAGS.img_h, FLAGS.img_w)) point_conv3 = tf.contrib.resampler.resampler( conv3, sample_img_points) if FLAGS.encoder[-7:] != "smaller": conv4 = tf.compat.v1.image.resize_bilinear( encdr_end_points['vgg_16/conv4/conv4_3'], (FLAGS.img_h, FLAGS.img_w)) point_conv4 = tf.contrib.resampler.resampler( conv4, sample_img_points) point_img_feat = tf.concat(axis=2, values=[ point_conv1, point_conv2, point_conv3, point_conv4 ]) # small else: print("smaller vgg") point_img_feat = tf.concat( axis=2, values=[point_conv1, point_conv2, point_conv3]) # small elif FLAGS.encoder[:3] == "res": # print(encdr_end_points.keys()) conv1 = tf.compat.v1.image.resize_bilinear( encdr_end_points[scopelst[0]], (FLAGS.img_h, FLAGS.img_w)) point_conv1 = tf.contrib.resampler.resampler( conv1, sample_img_points) conv2 = tf.compat.v1.image.resize_bilinear( encdr_end_points[scopelst[1]], (FLAGS.img_h, FLAGS.img_w)) point_conv2 = tf.contrib.resampler.resampler( conv2, sample_img_points) conv3 = tf.compat.v1.image.resize_bilinear( encdr_end_points[scopelst[2]], (FLAGS.img_h, FLAGS.img_w)) point_conv3 = tf.contrib.resampler.resampler( conv3, sample_img_points) # conv4 = tf.compat.v1.image.resize_bilinear(encdr_end_points[scopelst[3]], (FLAGS.img_h, FLAGS.img_w)) # point_conv4 = tf.contrib.resampler.resampler(conv4, sample_img_points) point_img_feat = tf.concat( axis=2, values=[point_conv1, point_conv2, point_conv3]) else: conv1 = tf.compat.v1.image.resize_bilinear( encdr_end_points[0], (FLAGS.img_h, FLAGS.img_w)) point_conv1 = tf.contrib.resampler.resampler( conv1, sample_img_points) conv2 = tf.compat.v1.image.resize_bilinear( encdr_end_points[1], (FLAGS.img_h, FLAGS.img_w)) point_conv2 = tf.contrib.resampler.resampler( conv2, sample_img_points) conv3 = tf.compat.v1.image.resize_bilinear( encdr_end_points[2], (FLAGS.img_h, FLAGS.img_w)) point_conv3 = tf.contrib.resampler.resampler( conv3, sample_img_points) # conv4 = tf.compat.v1.image.resize_bilinear(encdr_end_points[scopelst[3]], (FLAGS.img_h, FLAGS.img_w)) # point_conv4 = tf.contrib.resampler.resampler(conv4, sample_img_points) point_img_feat = tf.concat( axis=2, values=[point_conv1, point_conv2, point_conv3]) print("point_img_feat.shape", point_img_feat.get_shape()) point_img_feat = tf.expand_dims(point_img_feat, axis=2) if FLAGS.decoder == "att": gvfs_feat = gvfnet.get_gvf_att_imgfeat( input_pnts_rot, ref_feats_embedding, point_img_feat, is_training, batch_size, bn, bn_decay, wd=FLAGS.wd, activation_fn=activation_fn) elif FLAGS.decoder == "skip": gvfs_feat = gvfnet.get_gvf_basic_imgfeat_onestream_skip( input_pnts_rot, ref_feats_embedding, point_img_feat, is_training, batch_size, bn, bn_decay, wd=FLAGS.wd, activation_fn=activation_fn) else: gvfs_feat = gvfnet.get_gvf_basic_imgfeat_onestream( input_pnts_rot, ref_feats_embedding, point_img_feat, is_training, batch_size, bn, bn_decay, wd=FLAGS.wd, activation_fn=activation_fn) else: if not FLAGS.multi_view: with tf.compat.v1.variable_scope("sdfprediction") as scope: gvfs_feat = gvfnet.get_gvf_basic(input_pnts_rot, ref_feats_embedding, is_training, batch_size, bn, bn_decay, wd=FLAGS.wd, activation_fn=activation_fn) end_points['pred_gvfs_xyz'], end_points['pred_gvfs_dist'], end_points[ 'pred_gvfs_direction'] = None, None, None if FLAGS.XYZ: end_points['pred_gvfs_xyz'] = gvfnet.xyz_gvfhead( gvfs_feat, batch_size, wd=FLAGS.wd, activation_fn=activation_fn) end_points['pred_gvfs_dist'] = tf.sqrt( tf.reduce_sum(tf.square(end_points['pred_gvfs_xyz']), axis=2, keepdims=True)) end_points[ 'pred_gvfs_direction'] = end_points['pred_gvfs_xyz'] / tf.maximum( end_points['pred_gvfs_dist'], 1e-6) else: end_points['pred_gvfs_dist'], end_points[ 'pred_gvfs_direction'] = gvfnet.dist_direct_gvfhead( gvfs_feat, batch_size, wd=FLAGS.wd, activation_fn=activation_fn) end_points['pred_gvfs_xyz'] = end_points[ 'pred_gvfs_direction'] * end_points['pred_gvfs_dist'] end_points["sample_img_points"] = sample_img_points # end_points["ref_feats_embedding"] = ref_feats_embedding end_points["point_img_feat"] = point_img_feat return end_points
def rgb_feature_net(input): arg_scope = resnet_v1.resnet_arg_scope(weight_decay=0.0) with slim.arg_scope(arg_scope): net, end_points = resnet_v1.resnet_v1_50(input, None, global_pool=False, output_stride=8) # pdb.set_trace() block4 = end_points['resnet_v1_50/block4'] block3 = end_points['resnet_v1_50/block3'] block2 = end_points['resnet_v1_50/block2'] # block1=end_points['resnet_v1_50/block1/unit_3/bottleneck_v1/conv1'] with tf.variable_scope("rgb_up") as sc: block4_ = conv2d_bn_relu(block4, num_kernels=256, kernel_size=(1, 1), stride=[1, 1, 1, 1], padding='SAME', name='4') up4 = upsample2d(block4_, factor=2, has_bias=True, trainable=True, name='up4') block3_ = conv2d_bn_relu(block3, num_kernels=256, kernel_size=(1, 1), stride=[1, 1, 1, 1], padding='SAME', name='3') up3 = upsample2d(block3_, factor=2, has_bias=True, trainable=True, name='up3') block2_ = conv2d_bn_relu(block2, num_kernels=256, kernel_size=(1, 1), stride=[1, 1, 1, 1], padding='SAME', name='2') up2 = upsample2d(block2_, factor=2, has_bias=True, trainable=True, name='up2') up_34 = tf.add(up4, up3, name="up_add_3_4") up = tf.add(up_34, up2, name="up_add_3_4_2") block = conv2d_bn_relu(up, num_kernels=256, kernel_size=(3, 3), stride=[1, 1, 1, 1], padding='SAME', name='rgb_ft') # block1_ = conv2d_bn_relu(block1, num_kernels=256, kernel_size=(1,1), stride=[1,1,1,1], padding='SAME', name='1') # up =tf.add(block1_, up_, name="up_add") # block = conv2d_bn_relu(block, num_kernels=512, kernel_size=(1,1), stride=[1,1,1,1], padding='SAME', name='2') #<todo> feature = upsample2d(block, factor = 4, ...) tf.summary.histogram('rgb_top_block', block) feature = block return feature
def evaluate(): g = tf.Graph() with g.as_default(): image_list, label_list = data_process.read_labeled_image_list( FLAGS.input_file) # split into sequences, note: in the cnn models case this is splitting into batches of length: seq_length ; # for the cnn-rnn models case, I do not check whether the images in a sequence are consecutive or the images are from the same video/the images are displaying the same person image_list, label_list = data_process.make_rnn_input_per_seq_length_size( image_list, label_list, FLAGS.seq_length) images = tf.convert_to_tensor(image_list) labels = tf.convert_to_tensor(label_list) # Makes an input queue input_queue = tf.train.slice_input_producer([images, labels, images], num_epochs=None, shuffle=False, seed=None, capacity=1000, shared_name=None, name=None) images_batch, labels_batch, image_locations_batch = data_process.decodeRGB( input_queue, FLAGS.seq_length, FLAGS.size) images_batch = tf.to_float(images_batch) images_batch -= 128.0 images_batch /= 128.0 # scale all pixel values in range: [-1,1] images_batch = tf.reshape(images_batch, [-1, 96, 96, 3]) labels_batch = tf.reshape(labels_batch, [-1, 2]) if FLAGS.network == 'vggface_4096': from vggface import vggface_4096x4096x2 as net network = net.VGGFace(FLAGS.batch_size * FLAGS.seq_length) network.setup(images_batch) prediction = network.get_output() elif FLAGS.network == 'vggface_2000': from vggface import vggface_4096x2000x2 as net network = net.VGGFace(FLAGS.batch_size * FLAGS.seq_length) network.setup(images_batch) prediction = network.get_output() elif FLAGS.network == 'affwildnet_resnet': from tensorflow.contrib.slim.python.slim.nets import resnet_v1 with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, _ = resnet_v1.resnet_v1_50(inputs=images_batch, is_training=False, num_classes=None) with tf.variable_scope('rnn') as scope: cnn = tf.reshape( net, [FLAGS.batch_size, FLAGS.sequence_length, -1]) cell = tf.nn.rnn_cell.MultiRNNCell( [tf.nn.rnn_cell.GRUCell(128) for _ in range(2)]) outputs, _ = tf.nn.dynamic_rnn(cell, cnn, dtype=tf.float32) outputs = tf.reshape( outputs, (FLAGS.batch_size * FLAGS.sequence_length, 128)) weights_initializer = tf.truncated_normal_initializer( stddev=0.01) weights = tf.get_variable('weights_output', shape=[128, 2], initializer=weights_initializer, trainable=True) biases = tf.get_variable('biases_output', shape=[2], initializer=tf.zeros_initializer, trainable=True) prediction = tf.nn.xw_plus_b(outputs, weights, biases) elif FLAGS.network == 'affwildnet_vggface': from affwildnet import vggface_gru as net network = net.VGGFace(FLAGS.batch_size, FLAGS.seq_length) network.setup(images_batch) prediction = network.get_output() num_batches = int(len(image_list) / FLAGS.batch_size) variables_to_restore = tf.global_variables() with tf.Session() as sess: init_fn = slim.assign_from_checkpoint_fn( FLAGS.pretrained_model_checkpoint_path, variables_to_restore, ignore_missing_vars=False) init_fn(sess) print('Loading model {}'.format( FLAGS.pretrained_model_checkpoint_path)) tf.train.start_queue_runners(sess=sess) coord = tf.train.Coordinator() evaluated_predictions = [] evaluated_labels = [] images = [] try: for _ in range(num_batches): pr, l, imm = sess.run( [prediction, labels_batch, image_locations_batch]) evaluated_predictions.append(pr) evaluated_labels.append(l) images.append(imm) if coord.should_stop(): break coord.request_stop() except Exception as e: coord.request_stop(e) predictions = np.reshape(evaluated_predictions, (-1, 2)) labels = np.reshape(evaluated_labels, (-1, 2)) images = np.reshape(images, (-1)) conc_arousal = concordance_cc2(predictions[:, 1], labels[:, 1]) conc_valence = concordance_cc2(predictions[:, 0], labels[:, 0]) for i in range(len(predictions)): print("Labels: ", labels[i], "Predictions: ", predictions[i], "Error: ", (abs(labels[i] - predictions[i]))) print( "------------------------------------------------------------------------------" ) print('Concordance on valence : {}'.format(conc_valence)) print('Concordance on arousal : {}'.format(conc_arousal)) print('Concordance on total : {}'.format( (conc_arousal + conc_valence) / 2)) mse_arousal = sum( (predictions[:, 1] - labels[:, 1])**2) / len(labels[:, 1]) print('MSE Arousal : {}'.format(mse_arousal)) mse_valence = sum( (predictions[:, 0] - labels[:, 0])**2) / len(labels[:, 0]) print('MSE Valence : {}'.format(mse_valence)) return conc_valence, conc_arousal, ( conc_arousal + conc_valence) / 2, mse_arousal, mse_valence
def get_featuremap(net_name, input, num_classes=None): ''' #tensorlayer input = tl.layers.InputLayer(input) if net_name == 'resnet_v1_50': with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=cfg.FEATURE_WEIGHT_DECAY)): featuremap = tl.layers.SlimNetsLayer(prev_layer=input, slim_layer=resnet_v1.resnet_v1_50, slim_args={ 'num_classes': num_classes, 'is_training': True, 'global_pool': False }, name='resnet_v1_50' ) sv = tf.train.Supervisor() with sv.managed_session() as sess: a = sess.run(featuremap.all_layers) print(a) feature_w_loss = tf.reduce_sum(slim.losses.get_regularization_losses()) return featuremap.outputs, feature_w_loss, featuremap.all_params if net_name == 'resnet_v1_101': with slim.arg_scope(resnet_v1.resnet_arg_scope()): featuremap = tl.layers.SlimNetsLayer(prev_layer=input, slim_layer=resnet_v1.resnet_v1_101, slim_args={ 'num_classes': num_classes, 'is_training': True, 'global_pool': False }, name='resnet_v1_101' ) feature_w_loss = tf.reduce_sum(slim.losses.get_regularization_losses()) return featuremap.outputs, feature_w_loss, featuremap.all_params if net_name == 'resnet_v1_152': with slim.arg_scope(resnet_v1.resnet_arg_scope()): featuremap = tl.layers.SlimNetsLayer(prev_layer=input, slim_layer=resnet_v1.resnet_v1_152, slim_args={ 'num_classes': num_classes, 'is_training': True, 'global_pool': False }, name='resnet_v1_152' ) feature_w_loss = tf.reduce_sum(slim.losses.get_regularization_losses()) return featuremap.outputs, feature_w_loss, featuremap.all_params if net_name == 'vgg16': with slim.arg_scope(vgg.vgg_arg_scope()): featuremap = tl.layers.SlimNetsLayer(prev_layer=input, slim_layer=vgg.vgg_16, slim_args={ 'num_classes': num_classes, 'is_training': True, 'spatial_squeeze': False }, name='vgg_16' ) feature_w_loss = tf.reduce_sum(slim.losses.get_regularization_losses()) return featuremap.outputs, feature_w_loss, featuremap.all_params ''' #slim if net_name == 'resnet_v1_50': with slim.arg_scope( resnet_v1.resnet_arg_scope( weight_decay=cfg.FEATURE_WEIGHT_DECAY)): featuremap, layer_dic = resnet_v1.resnet_v1_50( inputs=input, num_classes=num_classes, is_training=False, global_pool=False) if cfg.USE_FPN: feature_maps_dict = { 'C2': layer_dic[ 'resnet_v1_50/block1/unit_2/bottleneck_v1'], # [56, 56] 'C3': layer_dic[ 'resnet_v1_50/block2/unit_3/bottleneck_v1'], # [28, 28] 'C4': layer_dic[ 'resnet_v1_50/block3/unit_5/bottleneck_v1'], # [14, 14] 'C5': layer_dic['resnet_v1_50/block4'] # [7, 7] } return feature_maps_dict return layer_dic['resnet_v1_50/block3/unit_5/bottleneck_v1'] #return featuremap if net_name == 'resnet_v1_101': with slim.arg_scope( resnet_v1.resnet_arg_scope( weight_decay=cfg.FEATURE_WEIGHT_DECAY)): featuremap, layer_dic = resnet_v1.resnet_v1_101( inputs=input, num_classes=num_classes, is_training=True, global_pool=False) if cfg.USE_FPN: feature_maps_dict = { 'C2': layer_dic[ 'resnet_v1_101/block1/unit_2/bottleneck_v1'], # [56, 56] 'C3': layer_dic[ 'resnet_v1_101/block2/unit_3/bottleneck_v1'], # [28, 28] 'C4': layer_dic[ 'resnet_v1_101/block3/unit_22/bottleneck_v1'], # [14, 14] 'C5': layer_dic['resnet_v1_101/block4'] } return feature_maps_dict return featuremap if net_name == 'vgg_16': with slim.arg_scope( resnet_v1.resnet_arg_scope( weight_decay=cfg.FEATURE_WEIGHT_DECAY)): featuremap, layer_dic = vgg.vgg_16( inputs=input, num_classes=7, is_training=False, spatial_squeeze=False, ) return layer_dic['vgg_16/conv5/conv5_3']
def main(_): with tf.name_scope('input_placeholder'): mv_placeholder = tf.placeholder(tf.float32, shape=(None, FLAGS.num_segments, 224, 224, 3 ), name = 'mv_frame') flow_placeholder = tf.placeholder(tf.float32, shape=(None, FLAGS.num_segments, 224, 224, 3 ), name = 'flow_frame') i_placeholder = tf.placeholder(tf.float32, shape=(None, FLAGS.num_segments, 224, 224, 3 ), name = 'i_frame') r_placeholder = tf.placeholder(tf.float32, shape=(None, FLAGS.num_segments, 224, 224, 3 ), name = 'r_frame') with tf.name_scope('label_placeholder'): label_placeholder = tf.placeholder(tf.int32, shape=(None), name = 'labels') with tf.name_scope('accuracy'): combine_value_ = tf.placeholder(tf.float32, shape=(), name = 'accuracy') i_value_ = tf.placeholder(tf.float32, shape=(), name = 'accuracy') mv_value_ = tf.placeholder(tf.float32, shape=(), name = 'accuracy') r_value_ = tf.placeholder(tf.float32, shape=(), name = 'accuracy') tf.summary.scalar('combine_acc', combine_value_) tf.summary.scalar('i_acc', i_value_) tf.summary.scalar('mv_acc', mv_value_) tf.summary.scalar('r_acc', r_value_) print('Finish placeholder.') with tf.name_scope('flatten_input'): b_size = tf.shape(mv_placeholder)[0] flat_mv = tf.reshape(mv_placeholder, [b_size * FLAGS.num_segments, 224, 224, 3]) # Since we have mulitple segments in a single video flat_flow = tf.reshape(flow_placeholder, [b_size * FLAGS.num_segments, 224, 224, 3]) flat_i = tf.reshape(i_placeholder, [b_size * FLAGS.num_segments, 224, 224, 3]) flat_r = tf.reshape(r_placeholder, [b_size * FLAGS.num_segments, 224, 224, 3]) with tf.variable_scope('fc_var') as var_scope: mv_weights = { 'w1': _variable_with_weight_decay('wmv1', [2048 , 512 ], 0.0005), 'w2': _variable_with_weight_decay('wmv2', [512 , N_CLASS], 0.0005) } mv_biases = { 'b1': _variable_with_weight_decay('bmv1', [ 512 ], 0.00), 'b2': _variable_with_weight_decay('bmv2', [ N_CLASS ], 0.00) } i_weights = { 'w1': _variable_with_weight_decay('wi1', [2048 , 512 ], 0.0005), 'w2': _variable_with_weight_decay('wi2', [512 , N_CLASS], 0.0005) } i_biases = { 'b1': _variable_with_weight_decay('bi1', [ 512 ], 0.00), 'b2': _variable_with_weight_decay('bi2', [ N_CLASS ], 0.00) } r_weights = { 'w1': _variable_with_weight_decay('wr1', [2048 , 512 ], 0.0005), 'w2': _variable_with_weight_decay('wr2', [512 , N_CLASS], 0.0005) } r_biases = { 'b1': _variable_with_weight_decay('br1', [ 512 ], 0.00), 'b2': _variable_with_weight_decay('br2', [ N_CLASS ], 0.00) } with tf.variable_scope('fusion_var'): fusion = tf.get_variable('fusion', [3], initializer=tf.contrib.layers.xavier_initializer()) print('Finish Flatten.') with tf.device('/gpu:0'): with tf.name_scope('FLMG'): mv_res = tf.concat([flat_mv, flat_r], axis = -1) mv = slim.conv2d(mv_res, 8, kernel_size=[3, 3], scope = 'FLMG_1') mv = slim.conv2d(mv, 8, kernel_size=[3, 3], scope = 'FLMG_2') mv = slim.conv2d(mv, 6, kernel_size=[3, 3], scope = 'FLMG_3') mv = slim.conv2d(mv, 4, kernel_size=[3, 3], scope = 'FLMG_4') mv = slim.conv2d(mv, 2, kernel_size=[3, 3], scope = 'FLMG_5') mv = slim.conv2d(mv, 3, kernel_size=[3, 3], scope = 'FLMG_6') with tf.name_scope('FLMG_LOSS'): # The cost function -- l2 mse matrix_pow_2 = tf.pow(tf.subtract(mv, flat_flow), 2) matrix_norm = tf.reduce_sum(matrix_pow_2, axis = [1,2,3]) flmg_loss = tf.reduce_mean(matrix_norm) tf.summary.scalar('flmg_loss', flmg_loss) with slim.arg_scope(resnet_v1.resnet_arg_scope()): i_feature, _ = resnet_v1.resnet_v1_152(flat_i, num_classes=None, is_training=True, scope='i_resnet') mv_feature, _ = resnet_v1.resnet_v1_50(mv, num_classes=None, is_training=True, scope='mv_resnet') r_feature, _ = resnet_v1.resnet_v1_50(flat_r, num_classes=None, is_training=True, scope='r_resnet') with tf.name_scope('reshape_feature'): i_feature = tf.reshape(i_feature, [-1, 2048]) mv_feature = tf.reshape(mv_feature, [-1, 2048]) r_feature = tf.reshape(r_feature, [-1, 2048]) with tf.name_scope('inference_model'): i_sc, i_pred = model.inference_feature (i_feature, i_weights, i_biases, FLAGS.num_segments, N_CLASS, name = 'i_inf') mv_sc, mv_pred = model.inference_feature (mv_feature, mv_weights, mv_biases, FLAGS.num_segments, N_CLASS, name = 'mv_inf') r_sc, r_pred = model.inference_feature (r_feature, r_weights, r_biases, FLAGS.num_segments, N_CLASS, name = 'r_inf') combine_sc, pred_class = model.inference_fusion ( i_sc, mv_sc, r_sc, fusion) print('Finish Model.') with tf.name_scope('classiciation_loss'): one_hot_labels = tf.one_hot(label_placeholder, N_CLASS) mv_class_loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits = mv_sc, labels = one_hot_labels, dim=1)) i_class_loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits = i_sc, labels = one_hot_labels, dim=1)) r_class_loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits = r_sc, labels = one_hot_labels, dim=1)) tf.summary.scalar('mv_cls_loss', mv_class_loss) tf.summary.scalar('i_cls_loss', i_class_loss) tf.summary.scalar('r_cls_loss', r_class_loss) combine_loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits = combine_sc, labels = one_hot_labels, dim=1)) tf.summary.scalar('fuse_cls_loss', combine_loss) total_loss = combine_loss + i_class_loss + mv_class_loss + r_class_loss + flmg_loss tf.summary.scalar('tot_cls_loss', total_loss) with tf.name_scope('weigh_decay'): weight_loss = sum(tf.get_collection('losses')) tf.summary.scalar('eight_decay_loss', weight_loss) ''' with tf.name_scope('training_var_list'): mv_variable_list = list ( set(mv_weights.values()) | set(mv_biases.values()) ) mv_resnet_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='mv_resnet') i_variable_list = list ( set(i_weights.values()) | set(i_biases.values()) ) i_resnet_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='i_resnet') r_variable_list = list ( set(r_weights.values()) | set(r_biases.values()) ) r_resnet_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='r_resnet') with tf.name_scope('summary_var'): _variable_summaries(mv_weights['w1']) _variable_summaries(i_weights['w2']) _variable_summaries(r_weights['w2']) _variable_summaries(mv_resnet_variables[0]) _variable_summaries(i_resnet_variables[0]) _variable_summaries(r_resnet_variables[0]) _variable_summaries(fusion) print('Finish variables.') ''' with tf.name_scope('optimizer'): ''' mv_fc_opt = tf.train.AdamOptimizer(FLAGS.mv_lr).minimize(mv_class_loss + weight_loss, var_list = mv_variable_list) mv_res_opt = tf.train.AdamOptimizer(FLAGS.resnet_lr).minimize(mv_class_loss, var_list = mv_resnet_variables) i_fc_opt = tf.train.AdamOptimizer(FLAGS.i_lr).minimize(i_class_loss + weight_loss, var_list = i_variable_list) i_res_opt = tf.train.AdamOptimizer(FLAGS.resnet_lr).minimize(i_class_loss, var_list = i_resnet_variables) r_fc_opt = tf.train.AdamOptimizer(FLAGS.r_lr).minimize(r_class_loss + weight_loss, var_list = r_variable_list) r_res_opt = tf.train.AdamOptimizer(FLAGS.resnet_lr).minimize(r_class_loss, var_list = r_resnet_variables) fusion_opt = tf.train.GradientDescentOptimizer(10e-6).minimize(combine_loss, var_list = fusion) ''' train_opt = tf.train.AdamOptimizer(FLAGS.tot_lr).minimize(total_loss, var_list = tf.trainable_variables()) print('Finish Optimizer.') with tf.name_scope('init_function'): init_var = tf.global_variables_initializer() with tf.name_scope('video_dataset'): train_data = dataset.buildTrainDataset_v2(FLAGS.train_list, FLAGS.data_path, FLAGS.num_segments, batch_size = FLAGS.batch_size, augment = False, shuffle = True, num_threads=2, buffer=100) test_data = dataset.buildTestDataset(FLAGS.valid_list, FLAGS.data_path, FLAGS.num_segments, batch_size = FLAGS.batch_size, num_threads = 2, buffer = 30) with tf.name_scope('dataset_iterator'): it = tf.contrib.data.Iterator.from_structure(train_data.output_types, train_data.output_shapes) next_data = it.get_next() init_data = it.make_initializer(train_data) it_test = tf.contrib.data.Iterator.from_structure(test_data.output_types, test_data.output_shapes) next_test_data = it_test.get_next() init_test_data = it_test.make_initializer(test_data) print('Finish Dataset.') restore_var = [v for v in tf.trainable_variables() if ('Adam' not in v.name)] first_restore_var = [v for v in tf.trainable_variables() if ('Adam' not in v.name and 'FLMG' not in v.name)] first_saver = tf.train.Saver(var_list=first_restore_var) my_saver = tf.train.Saver(var_list=restore_var, max_to_keep=5) config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess = tf.Session(config=config) with tf.name_scope('writer'): merged = tf.summary.merge_all() if not tf.gfile.Exists(FLAGS.log_path): tf.gfile.MakeDirs(FLAGS.log_path) previous_runs = os.listdir(FLAGS.log_path) if len(previous_runs) == 0: run_number = 1 else: run_number = len(previous_runs) + 1 logdir = 'run_%02d' % run_number tf.gfile.MakeDirs(os.path.join(FLAGS.log_path, logdir)) writer = tf.summary.FileWriter(os.path.join(FLAGS.log_path, logdir), sess.graph) with tf.name_scope('saver'): if not tf.gfile.Exists(FLAGS.save_path): tf.gfile.MakeDirs(FLAGS.save_path) ''' i_saver = tf.train.Saver(i_variable_list) mv_saver = tf.train.Saver(mv_variable_list) r_saver = tf.train.Saver(r_variable_list) i_resnet_saver = tf.train.Saver(i_resnet_variables) mv_resnet_saver = tf.train.Saver(mv_resnet_variables) r_resnet_saver = tf.train.Saver(r_resnet_variables) ''' with tf.name_scope('intialization'): sess.run(init_var) sess.run(init_data) sess.run(init_test_data) #init_i_resent (sess) #init_mv_resent (sess) #init_r_resent(sess) ''' i_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'i_model.chkp'+FLAGS.steps)) mv_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'mv_model.chkp'+FLAGS.steps)) r_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'r_model.chkp'+FLAGS.steps)) i_resnet_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'i_resnet.chkp'+FLAGS.steps)) mv_resnet_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'mv_resnet.chkp'+FLAGS.steps)) r_resnet_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'r_resnet.chkp'+FLAGS.steps)) ''' try: my_saver.restore(sess, FLAGS.continue_training) except: # First train first_saver.restore(sess, FLAGS.continue_training) ''' i_resnet_saver = tf.train.Saver(i_resnet_variables) mv_resnet_saver = tf.train.Saver(mv_resnet_variables) r_resnet_saver = tf.train.Saver(r_resnet_variables) i_resnet_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'i_resnet.chkp'+FLAGS.steps)) mv_resnet_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'mv_resnet.chkp'+FLAGS.steps)) r_resnet_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'r_resnet.chkp'+FLAGS.steps)) ''' print('Finish Loading Pretrained Model.') ''' Main training loop ''' combine_acc = 0 i_acc = 0 mv_acc = 0 r_acc = 0 start_time = time.time() for step in range(FLAGS.max_steps): # Validation if (step) % 1000 == 0 and step > 0: combine_classes = [] mv_classes = [] i_classes = [] r_classes = [] gt_label = [] for i in range(100): ti_arr, tmv_arr, tr_arr, tlabel = sess.run(next_test_data) i_class, mv_class, r_class, com_class = sess.run([i_pred, mv_pred, r_pred, pred_class], feed_dict={mv_placeholder: tmv_arr, i_placeholder: ti_arr, r_placeholder: tr_arr , label_placeholder : tlabel }) combine_classes = np.append(combine_classes, com_class) mv_classes = np.append(mv_classes, mv_class) i_classes = np.append(i_classes, i_class) r_classes = np.append(r_classes, r_class) gt_label = np.append(gt_label, tlabel) combine_acc = np.sum((combine_classes == gt_label)) / gt_label.size i_acc = np.sum((i_classes == gt_label)) / gt_label.size mv_acc = np.sum((mv_classes == gt_label)) / gt_label.size r_acc = np.sum((r_classes == gt_label)) / gt_label.size print('Step %d finished with accuracy: %f , %f , %f, %f' % (step, i_acc, mv_acc, r_acc, combine_acc)) # Training procedure i_arr, mv_arr, r_arr, flow_arr, label = sess.run(next_data) summary, _, pred, loss1, loss2, loss3, loss4, loss5 = sess.run([merged, train_opt, pred_class, mv_class_loss, i_class_loss, r_class_loss, combine_loss, flmg_loss], feed_dict={mv_placeholder: mv_arr, i_placeholder: i_arr, flow_placeholder: flow_arr, r_placeholder: r_arr , label_placeholder : label, combine_value_: combine_acc, i_value_ : i_acc, mv_value_: mv_acc, r_value_ : r_acc}) if (step) % 10 == 0 : duration = time.time() - start_time print('Step %d: %.3f sec' % (step, duration), 'mv_loss:', loss1, 'i_loss:', loss2, 'r_loss:', loss3, 'fusion_loss:', loss4, 'flmg_loss:', loss5) print('GT:', label) print('Pred:', pred) writer.add_summary(summary, step) start_time = time.time() # Model Saving if (step) % 1000 == 0 and not step == 0 : ''' i_saver.save(sess, os.path.join(FLAGS.save_path, 'i_model.chkp'), global_step = step) mv_saver.save(sess, os.path.join(FLAGS.save_path, 'mv_model.chkp'), global_step = step) r_saver.save(sess, os.path.join(FLAGS.save_path, 'r_model.chkp'), global_step = step) i_resnet_saver.save(sess, os.path.join(FLAGS.save_path, 'i_resnet.chkp'), global_step = step) mv_resnet_saver.save(sess, os.path.join(FLAGS.save_path, 'mv_resnet.chkp'), global_step = step) r_resnet_saver.save(sess, os.path.join(FLAGS.save_path, 'r_resnet.chkp'), global_step = step) ''' my_saver.save(sess, os.path.join(FLAGS.save_path, 'all_net.chkp'), global_step = step) #if (step) % 10000 == 0 and not step == 0 : # i_resnet_saver.save(sess, os.path.join(FLAGS.save_path, 'i_resnet.chkp'), global_step = step) # mv_resnet_saver.save(sess, os.path.join(FLAGS.save_path, 'mv_resnet.chkp'), global_step = step) # r_resnet_saver.save(sess, os.path.join(FLAGS.save_path, 'r_resnet.chkp'), global_step = step) writer.close()
def _get_resnet_features(inputs): with slim.arg_scope(resnet_v1.resnet_arg_scope()): resnet_v1.resnet_v1_50(inputs, num_classes=None, is_training=True) return tf.get_default_graph().get_tensor_by_name( 'resnet_v1_50/block4/unit_3/bottleneck_v1/Relu:0')
def run_training(): config = tf.ConfigProto(allow_soft_placement=True) sess = tf.Session(config=config) # sess = tf.Session() # config=tf.ConfigProto(log_device_placement=True)) # create input path and labels np.array from csv annotations df_annos = pd.read_csv(ANNOS_CSV, index_col=0) df_annos = df_annos.sample(frac=1).reset_index( drop=True) # shuffle the whole datasets if DATA == 'l8': path_col = ['l8_vis_jpg'] elif DATA == 's1': path_col = ['s1_vis_jpg'] elif DATA == 'l8s1': path_col = ['l8_vis_jpg', 's1_vis_jpg'] input_files_train = JPG_DIR + df_annos.loc[df_annos.partition == 'train', path_col].values input_labels_train = df_annos.loc[df_annos.partition == 'train', 'pop_density_log2'].values input_files_val = JPG_DIR + df_annos.loc[df_annos.partition == 'val', path_col].values input_labels_val = df_annos.loc[df_annos.partition == 'val', 'pop_density_log2'].values input_id_train = df_annos.loc[df_annos.partition == 'train', 'village_id'].values input_id_val = df_annos.loc[df_annos.partition == 'val', 'village_id'].values print('input_files_train shape:', input_files_train.shape) train_set_size = len(input_labels_train) # data input with tf.device('/cpu:0'): train_images_batch, train_labels_batch, _ = \ dataset.input_batches(FLAGS.batch_size, FLAGS.output_size, input_files_train, input_labels_train, input_id_train, IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNEL, regression=True, augmentation=True, normalization=True) val_images_batch, val_labels_batch, _ = \ dataset.input_batches(FLAGS.batch_size, FLAGS.output_size, input_files_val, input_labels_val, input_id_val, IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNEL, regression=True, augmentation=False, normalization=True) images_placeholder = tf.placeholder( tf.float32, shape=[None, IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNEL]) labels_placeholder = tf.placeholder(tf.float32, shape=[ None, ]) print('finish data input') TRAIN_BATCHES_PER_EPOCH = int( train_set_size / FLAGS.batch_size) # number of training batches/steps in each epoch MAX_STEPS = TRAIN_BATCHES_PER_EPOCH * FLAGS.max_epoch # total number of training batches/steps # CNN forward reference if MODEL == 'vgg': with slim.arg_scope( vgg.vgg_arg_scope(weight_decay=FLAGS.weight_decay)): outputs, _ = vgg.vgg_16(images_placeholder, num_classes=FLAGS.output_size, dropout_keep_prob=FLAGS.dropout_keep, is_training=True) outputs = tf.squeeze( outputs ) # change shape from (B,1) to (B,), same as label input if MODEL == 'resnet': with slim.arg_scope(resnet_v1.resnet_arg_scope()): outputs, _ = resnet_v1.resnet_v1_152(images_placeholder, num_classes=FLAGS.output_size, is_training=True) outputs = tf.squeeze( outputs ) # change shape from (B,1) to (B,), same as label input # loss labels_real = tf.pow(2.0, labels_placeholder) outputs_real = tf.pow(2.0, outputs) # only loss_log2_mse are used for gradient calculate, model minimize this value loss_log2_mse = tf.reduce_mean(tf.squared_difference( labels_placeholder, outputs), name='loss_log2_mse') loss_real_rmse = tf.sqrt(tf.reduce_mean( tf.squared_difference(labels_real, outputs_real)), name='loss_real_rmse') loss_real_mae = tf.losses.absolute_difference(labels_real, outputs_real) tf.summary.scalar('loss_log2_mse', loss_log2_mse) tf.summary.scalar('loss_real_rmse', loss_real_rmse) tf.summary.scalar('loss_real_mae', loss_real_mae) # accuracy (R2) def r_sqaured(labels, outputs): sst = tf.reduce_sum( tf.squared_difference(labels, tf.reduce_mean(labels))) sse = tf.reduce_sum(tf.squared_difference(labels, outputs)) return (1.0 - tf.div(sse, sst)) r2_log2 = r_sqaured(labels_placeholder, outputs) r2_real = r_sqaured(labels_real, outputs_real) tf.summary.scalar('r2_log2', r2_log2) tf.summary.scalar('r2_real', r2_real) # determine the model vairables to restore from pre-trained checkpoint if MODEL == 'vgg': if DATA == 'l8s1': model_variables = slim.get_variables_to_restore( exclude=['vgg_16/fc8', 'vgg_16/conv1']) else: model_variables = slim.get_variables_to_restore( exclude=['vgg_16/fc8']) if MODEL == 'resnet': model_variables = slim.get_variables_to_restore( exclude=['resnet_v1_152/logits', 'resnet_v1_152/conv1']) # training step and learning rate global_step = tf.Variable(0, name='global_step', trainable=False) #, dtype=tf.int64) learning_rate = tf.train.exponential_decay( FLAGS.learning_rate, # initial learning rate global_step=global_step, # current step decay_steps=MAX_STEPS, # total numbers step to decay decay_rate=FLAGS.lr_decay_rate ) # final learning rate = FLAGS.learning_rate * decay_rate tf.summary.scalar('learning_rate', learning_rate) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) # optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate) # optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) # to only update gradient in first and last layer # vars_update = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'vgg_16/(conv1|fc8)') # print('variables to update in traing: ', vars_update) train_op = optimizer.minimize( loss_log2_mse, global_step=global_step) #, var_list = vars_update) # summary output in tensorboard summary = tf.summary.merge_all() summary_writer_train = tf.summary.FileWriter( os.path.join(LOG_DIR, 'log_train'), sess.graph) summary_writer_val = tf.summary.FileWriter( os.path.join(LOG_DIR, 'log_val'), sess.graph) # variable initialize init = tf.global_variables_initializer() sess.run(init) # restore the model from pre-trained checkpoint restorer = tf.train.Saver(model_variables) restorer.restore(sess, PRETRAIN_WEIGHTS) print('loaded pre-trained weights: ', PRETRAIN_WEIGHTS) # saver object to save checkpoint during training saver = tf.train.Saver(tf.global_variables(), max_to_keep=10) print('start training...') epoch = 0 best_r2 = -float('inf') for step in xrange(MAX_STEPS): if step % TRAIN_BATCHES_PER_EPOCH == 0: epoch += 1 start_time = time.time() # record the time used for each batch images_out, labels_out = sess.run( [train_images_batch, train_labels_batch]) # inputs of this batch, numpy array format duration_batch = time.time() - start_time if step == 0: print("finished reading batch data") print("images_out shape:", images_out.shape) feed_dict = { images_placeholder: images_out, labels_placeholder: labels_out } _, train_loss, train_accuracy, train_outputs, lr = \ sess.run([train_op, loss_log2_mse, r2_log2, outputs, learning_rate], feed_dict=feed_dict) duration = time.time() - start_time if step % 10 == 0 or ( step + 1) == MAX_STEPS: # print traing loss every 10 batches print('Step %d epoch %d lr %.3e: log2 MSE loss = %.4f log2 R2 = %.4f (%.3f sec, %.3f sec(each batch))' \ % (step, epoch, lr, train_loss, train_accuracy, duration*10, duration_batch)) summary_str = sess.run(summary, feed_dict=feed_dict) summary_writer_train.add_summary(summary_str, step) summary_writer_train.flush() if step % 50 == 0 or ( step + 1 ) == MAX_STEPS: # calculate and print validation loss every 50 batches images_out, labels_out = sess.run( [val_images_batch, val_labels_batch]) feed_dict = { images_placeholder: images_out, labels_placeholder: labels_out } val_loss, val_accuracy = sess.run([loss_log2_mse, r2_log2], feed_dict=feed_dict) print('Step %d epoch %d: val log2 MSE = %.4f val log2 R2 = %.4f ' % (step, epoch, val_loss, val_accuracy)) summary_str = sess.run(summary, feed_dict=feed_dict) summary_writer_val.add_summary(summary_str, step) summary_writer_val.flush() # in each epoch, if the validation R2 is higher than best R2, save the checkpoint if step % (TRAIN_BATCHES_PER_EPOCH - TRAIN_BATCHES_PER_EPOCH % 50) == 0: if val_accuracy > best_r2: best_r2 = val_accuracy checkpoint_file = os.path.join(LOG_DIR, 'model.ckpt') saver.save(sess, checkpoint_file, global_step=step, write_state=True)
def mem_encoder(img, seg, is_training): image = tf.reshape(img, [-1] + list(img.get_shape())[2:]) seg = tf.reshape(seg, [-1] + list(seg.get_shape())[2:]) image = image - tf.constant( _RGB_MEAN, dtype=tf.float32, shape=(1, 1, 1, 3)) seg = seg - 127.5 image_seg = tf.concat([image, seg], axis=-1) with tf.contrib.slim.arg_scope( resnet_arg_scope(batch_norm_decay=0.9, weight_decay=0.0)): with tf.variable_scope('mem_encoder'): with tf.variable_scope('resnet_v1_50', values=[image]) as sc: end_points_collection = sc.name + '_end_points' with slim.arg_scope([slim.conv2d, bottleneck], outputs_collections=end_points_collection): with slim.arg_scope([slim.batch_norm], is_training=is_training): net = image_seg net = conv2d_seg(net, 64, 7, stride=2, scope='mem_conv1_seg') net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1') with tf.variable_scope('block1', values=[net]) as sc_block: with tf.variable_scope('unit_1', values=[net]): net = bottleneck(net, depth=4 * 64, depth_bottleneck=64, stride=1) with tf.variable_scope('unit_2', values=[net]): net = bottleneck(net, depth=4 * 64, depth_bottleneck=64, stride=1) with tf.variable_scope('unit_3', values=[net]): net = bottleneck(net, depth=4 * 64, depth_bottleneck=64, stride=2) with tf.variable_scope('block2', values=[net]) as sc_block: with tf.variable_scope('unit_1', values=[net]): net = bottleneck(net, depth=4 * 128, depth_bottleneck=128, stride=1) with tf.variable_scope('unit_2', values=[net]): net = bottleneck(net, depth=4 * 128, depth_bottleneck=128, stride=1) with tf.variable_scope('unit_3', values=[net]): net = bottleneck(net, depth=4 * 128, depth_bottleneck=128, stride=1) with tf.variable_scope('unit_4', values=[net]): net = bottleneck(net, depth=4 * 128, depth_bottleneck=128, stride=2) with tf.variable_scope('block3', values=[net]) as sc_block: with tf.variable_scope('unit_1', values=[net]): net = bottleneck(net, depth=4 * 256, depth_bottleneck=256, stride=1) with tf.variable_scope('unit_2', values=[net]): net = bottleneck(net, depth=4 * 256, depth_bottleneck=256, stride=1) with tf.variable_scope('unit_3', values=[net]): net = bottleneck(net, depth=4 * 256, depth_bottleneck=256, stride=1) with tf.variable_scope('unit_4', values=[net]): net = bottleneck(net, depth=4 * 256, depth_bottleneck=256, stride=1) with tf.variable_scope('unit_5', values=[net]): net = bottleneck(net, depth=4 * 256, depth_bottleneck=256, stride=1) with tf.variable_scope('unit_6', values=[net]): net = bottleneck(net, depth=4 * 256, depth_bottleneck=256, stride=2) key = tf.layers.conv2d( net, filters=int(net.get_shape()[-1]) / 8, kernel_size=(1, 1), activation=None, padding='SAME', name='mem_key') value = tf.layers.conv2d( net, filters=int(net.get_shape()[-1]) / 2, kernel_size=(1, 1), activation=None, padding='SAME', name='mem_value') net = tf.reshape(net, [config.batch_size, -1] + list(net.get_shape())[1:]) key = tf.reshape(key, [config.batch_size, -1] + list(key.get_shape())[1:]) value = tf.reshape(value, [config.batch_size, -1] + list(value.get_shape())[1:]) return key, value, net