def extract_features(self, inputs): mean = tf.constant( self.cfg['mean_pixel'], dtype=tf.float32, shape=[1, 1, 1, 3], name="img_mean" ) im_centered = inputs - mean if 'resnet' in self.cfg['net_type']: # The next part of the code depends upon which tensorflow version you have. vers = tf.__version__ vers = vers.split( "." ) # Updated based on https://github.com/AlexEMG/DeepLabCut/issues/44 net_fun = net_funcs[self.cfg['net_type']] if int(vers[0]) == 1 and int(vers[1]) < 4: # check if lower than version 1.4. with slim.arg_scope(resnet_v1.resnet_arg_scope(False)): net, end_points = net_fun( im_centered, global_pool=False, output_stride=16 ) else: with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = net_fun( im_centered, global_pool=False, output_stride=16, is_training=False ) elif 'mobilenet' in self.cfg['net_type']: net_fun = net_funcs[self.cfg['net_type']] with slim.arg_scope(mobilenet_v2.training_scope()): net, end_points = net_fun(im_centered) elif 'efficientnet' in self.cfg['net_type']: im_centered /= tf.constant(eff.STDDEV_RGB, shape=[1, 1, 3]) net, end_points = eff.build_model_base(im_centered, self.cfg['net_type'], use_batch_norm=self.cfg['use_batch_norm'], drop_out=self.cfg['use_drop_out']) return net, end_points
def extract_features(self, inputs): net_fun = net_funcs[self.cfg.net_type] mean = tf.constant(self.cfg.mean_pixel, dtype=tf.float32, shape=[1, 1, 1, 3], name="img_mean") im_centered = inputs - mean # The next part of the code depends upon which tensorflow version you have. vers = tf.__version__ vers = vers.split( "." ) # Updated based on https://github.com/AlexEMG/DeepLabCut/issues/44 if int(vers[0]) == 1 and int( vers[1]) < 4: # check if lower than version 1.4. with slim.arg_scope(resnet_v1.resnet_arg_scope(False)): net, end_points = net_fun(im_centered, global_pool=False, output_stride=16) else: with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = net_fun(im_centered, global_pool=False, output_stride=16, is_training=False) return net, end_points
def eval(x, num_classes=110): with slim.arg_scope(inception.inception_v1_arg_scope()): logits_inc_v1, end_points_inc_v1 = inception.inception_v1( x, num_classes=num_classes, is_training=False, scope='InceptionV1') pred1 = tf.argmax(end_points_inc_v1['Predictions'], 1) # rescale pixle range from [-1, 1] to [0, 255] for resnet_v1 and vgg's input image = (((x + 1.0) * 0.5) * 255.0) processed_imgs_res_v1_50 = preprocess_for_model(image, 'resnet_v1_50') with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits_res_v1_50, end_points_res_v1_50 = resnet_v1.resnet_v1_50( processed_imgs_res_v1_50, num_classes=num_classes, is_training=False, scope='resnet_v1_50') end_points_res_v1_50['logits'] = tf.squeeze( end_points_res_v1_50['resnet_v1_50/logits'], [1, 2]) end_points_res_v1_50['probs'] = tf.nn.softmax( end_points_res_v1_50['logits']) pred2 = tf.argmax(end_points_res_v1_50['probs'], 1) # image = (((x + 1.0) * 0.5) * 255.0)#.astype(np.uint8) processed_imgs_vgg_16 = preprocess_for_model(image, 'vgg_16') with slim.arg_scope(vgg.vgg_arg_scope()): logits_vgg_16, end_points_vgg_16 = vgg.vgg_16(processed_imgs_vgg_16, num_classes=num_classes, is_training=False, scope='vgg_16') end_points_vgg_16['logits'] = end_points_vgg_16['vgg_16/fc8'] end_points_vgg_16['probs'] = tf.nn.softmax(end_points_vgg_16['logits']) pred3 = tf.argmax(end_points_vgg_16['probs'], 1) return [pred1, pred2, pred3]
def _load_tf_model(checkpoint_file): # Placeholder input_tensor = tf.compat.v1.placeholder(tf.float32, shape=(None, 224, 224, 3), name='input_image') # Make the Tensorflow warnings go away # Load the model sess = tf.compat.v1.Session() arg_scope = resnet_v1.resnet_arg_scope() with tf.contrib.slim.arg_scope(arg_scope): logits, _ = resnet_v1.resnet_v1_152(input_tensor, num_classes=1000, is_training=False, reuse=tf.AUTO_REUSE) probabilities = tf.nn.softmax(logits) saver = tf.train.Saver() try: saver.restore(sess, checkpoint_file) except ValueError: raise mlhub.utils.DataResourceNotFoundException(checkpoint_file) return sess, logits, probabilities, input_tensor
def forward(self, input_tensor, is_training): # inputs has shape [batch, 513, 513, 3] input_tensor = tf.image.resize_images(input_tensor, [512, 512]) with slim.arg_scope(resnet_v1.resnet_arg_scope(is_training)): net, end_points = resnet_v1.resnet_v1_101(input_tensor, None, global_pool=False, output_stride=16) print(net.get_shape()) h = L.convolution2d_transpose(net, 64, [5, 5], [4, 4], activation_fn=None) h = tf.nn.relu(h) h = L.dropout(h, keep_prob=0.5, is_training=is_training) h = L.convolution2d_transpose(h, 32, [5, 5], [2, 2], activation_fn=None) h = tf.nn.relu(h) h = L.dropout(h, keep_prob=0.5, is_training=is_training) print(h) h = L.convolution2d(h, len(self.classes) + 1, [1, 1], [1, 1], activation_fn=None) print(h) return h
def build_net(self, x, is_training): """ Defines network architecture (ResNet-50 feature extractor + classifier) """ # network architecture with slim.arg_scope(resnet_v1.resnet_arg_scope()): _, end_points = resnet_v1.resnet_v1_50(x, num_classes=2, is_training=is_training) with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu): net = end_points[ 'resnet_v1_50/block4'] # last bottleneck before logits with tf.variable_scope('resnet_v1_50'): z = slim.conv2d(net, self.config.dim_z, [7, 7], padding='VALID', activation_fn=tf.nn.relu, scope='bottleneck_layer') logits = slim.conv2d(z, 2, [1, 1], activation_fn=None, scope='logit_layer') return logits, z
def get_resnet(image_tensor, reuse): with slim.arg_scope(resnet_v1.resnet_arg_scope(is_training=False)): mean = tf.constant(cfg.pixel_mean, dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean') image = image_tensor - mean assert cfg.resnet_type == '101' net_fun = resnet_v1.resnet_v1_101 net, end_points = net_fun(image, global_pool=False, output_stride=16, reuse=reuse) layer_name = 'resnet_v1_{}/block3/unit_22/bottleneck_v1'.format( cfg.resnet_type) block2_out = end_points[layer_name] stride = 16 layer_prefixes = ['resnet_v1_101/conv1'] layer_prefixes += [ 'resnet_v1_101/block1/unit_{}'.format(i + 1) for i in range(3) ] layer_prefixes += [ 'resnet_v1_101/block2/unit_{}'.format(i + 1) for i in range(4) ] layer_prefixes += [ 'resnet_v1_101/block3/unit_{}'.format(i + 1) for i in range(23) ] layer_prefixes += [ 'resnet_v1_101/block4/unit_{}'.format(i + 1) for i in range(3) ] ignore_prefixes = layer_prefixes[:cfg.gnet.freeze_n_imfeat_layers] idx = layer_prefixes.index('resnet_v1_101/block3/unit_22') ignore_prefixes += layer_prefixes[idx + 1:] return block2_out, stride, ignore_prefixes, end_points
def test_base_fcn(net_name, img_fnames, weight_fname): "Quick effort to test the base fcns and figure out what preprocessing is appropriate--isn't well documented." inputs = tf.placeholder(tf.float32, [None, None, None, 3]) inputs = _vgg_preprocess(inputs) if net_name == 'resnet_v1_152': with slim.arg_scope(resnet_v1.resnet_arg_scope()) as scope: base_net, _ = resnet_v1.resnet_v1_152(inputs, is_training=False, num_classes=1000) else: raise Exception('net_name not recognized.') pred = tf.argmax(base_net, -1) saver = tf.train.Saver() sess = tf.InteractiveSession() #tf.global_variables_initializer().run() saver.restore(sess, weight_fname) for img_fname in img_fnames: img = cv2.imread(img_fname) prediction = sess.run(pred, feed_dict={inputs: img[np.newaxis, :, :, :]}) print('pred for %s:' % img_fname) print(imagenet_names[int(prediction)]) print('=========')
def Eval(x_img_224, x_img_299, y): input_image = x_img_224 - tf.reshape(tf.constant([123.68, 116.78, 103.94]), [1, 1, 1, 3]) with slim.arg_scope(resnet_v1.resnet_arg_scope()) as scope: logits_res_v1_50, end_points_res_v1_50 = resnet_v1.resnet_v1_50( input_image, num_classes=110, is_training=False, scope='resnet_v1_50', reuse=tf.AUTO_REUSE) end_points_res_v1_50['logits'] = tf.squeeze( end_points_res_v1_50['resnet_v1_50/logits'], [1, 2]) end_points_res_v1_50['probs'] = tf.nn.softmax( end_points_res_v1_50['logits']) res_label = tf.argmax(end_points_res_v1_50['probs'][0], -1) y_r = end_points_res_v1_50['probs'][0][y[0]] with slim.arg_scope(vgg.vgg_arg_scope()) as scope: logits_vgg_16, end_points_vgg_16 = vgg.vgg_16(input_image, num_classes=110, is_training=False, scope='vgg_16', reuse=tf.AUTO_REUSE) end_points_vgg_16['logits'] = end_points_vgg_16['vgg_16/fc8'] end_points_vgg_16['probs'] = tf.nn.softmax(end_points_vgg_16['logits']) vgg_label = tf.argmax(end_points_vgg_16['probs'][0], -1) y_v = end_points_vgg_16['probs'][0][y[0]] return res_label, vgg_label, y_r, y_v
def _build_model(self, visual_images): """ Builds a ResNet-50 network using slim. """ # visual_images = tf.placeholder(tf.float32, [None, self.height, self.width, self.channels], name='visual_images') is_training = tf.placeholder(tf.bool, name='is_training') keep_prob = tf.placeholder(tf.float32, name='keep_prob') with slim.arg_scope(resnet.resnet_arg_scope(weight_decay=5e-4)): output, network = resnet50.resnet_v1_50( visual_images, num_classes=self.num_classes, is_training=is_training, global_pool=False) # output = tf.squeeze(output, [1, 2]) network.update({ 'input': visual_images, 'is_training': is_training, 'keep_prob': keep_prob }) self.output = output self.network = network self.train_vars2 = slim.get_trainable_variables( self.scope + '/block') + slim.get_trainable_variables(self.scope + '/conv1') self.train_vars = slim.get_trainable_variables( self.scope + '/logits') + slim.get_trainable_variables(self.scope + '/conv_map')
def _load_tf_model(checkpoint_file): # Placeholder input_tensor = tf.placeholder(tf.float32, shape=(None, 224, 224, 3), name='input_image') # Load the model sess = tf.Session() arg_scope = resnet_v1.resnet_arg_scope() with tf.contrib.slim.arg_scope(arg_scope): logits, _ = resnet_v1.resnet_v1_152(input_tensor, num_classes=1000, is_training=False, reuse=tf.AUTO_REUSE) probabilities = tf.nn.softmax(logits) saver = tf.train.Saver() saver.restore(sess, checkpoint_file) def predict_for(image): pred, pred_proba = sess.run([logits, probabilities], feed_dict={input_tensor: image}) return pred_proba return predict_for
def _build_model(self): """ Builds a ResNet-50 network using slim. """ visual_images = tf.placeholder( tf.float32, [None, self.height, self.width, self.channels], name='visual_images') is_training = tf.placeholder(tf.bool, name='is_training') keep_prob = tf.placeholder(tf.float32, name='keep_prob') with slim.arg_scope( resnet_v1.resnet_arg_scope(batch_norm_decay=0.997)): output, network = resnet_v2.resnet_one_stream_main( visual_images, nr_frames=self.nr_frames, num_classes=self.num_classes, is_training=is_training, scope=self.scope) # predictions for each video are the avg of frames' predictions # TRAIN ############################### output = tf.reshape(output, [-1, self.nr_frames, self.num_classes]) output = tf.reduce_mean(output, axis=1) network.update({ 'input': visual_images, 'is_training': is_training, 'keep_prob': keep_prob }) return output, network
def _build_model(self): """ Builds a ResNet-50 network using slim. """ visual_images = tf.placeholder( tf.float32, [None, self.height, self.width, self.channels], name='visual_images') is_training = tf.placeholder(tf.bool, name='is_training') keep_prob = tf.placeholder(tf.float32, name='keep_prob') with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=5e-4)): output, network = resnet_v1.resnet_v1_50( visual_images, num_classes=self.num_classes, is_training=is_training) output = tf.squeeze(output, [1, 2]) network.update({ 'input': visual_images, 'is_training': is_training, 'keep_prob': keep_prob }) return output, network
def localizationNet(input, is_train=False, reuse=False, scope='resnet_v1_50'): lrelu = lambda x: tf.nn.leaky_relu(x, 0.2) with tf.variable_scope(scope, reuse=reuse): with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, _ = resnet_v1.resnet_v1_50(scale_RGB(input), global_pool=True, is_training=is_train, reuse=reuse) net = tl.layers.InputLayer(net) net = tl.layers.FlattenLayer(net, name='flatten') net = tl.layers.DenseLayer(net, n_units=2048, act=tf.identity, name='df/dense1') net = tl.layers.DenseLayer(net, n_units=1024, act=tf.identity, name='df/dense2') net = tl.layers.DenseLayer(net, n_units=512, act=tf.identity, name='df/dense3') net = tl.layers.DenseLayer(net, n_units=50, act=tf.identity, name='df/dense4') thetas_affine = net.outputs return thetas_affine
def single_stream(self, images, modality, is_training, reuse=False): with tf.variable_scope(modality, reuse=reuse): with slim.arg_scope(resnet_v1.resnet_arg_scope()): _, end_points = resnet_v1.resnet_v1_50(images, self.no_classes, is_training=is_training, reuse=reuse) # last bottleneck before logits net = end_points[modality + '/resnet_v1_50/block4'] if 'autoencoder' in self.mode: return net with tf.variable_scope(modality + '/resnet_v1_50', reuse=reuse): bottleneck = slim.conv2d(net, self.hidden_repr_size, [7, 7], padding='VALID', activation_fn=tf.nn.relu, scope='f_repr') net = slim.conv2d(bottleneck, self.no_classes, [1, 1], activation_fn=None, scope='_logits_') if ('train_hallucination' in self.mode or 'test_disc' in self.mode or 'train_eccv' in self.mode): return net, bottleneck return net
def inference (images, train=True, resnet_stride=8): with slim.arg_scope(resnet_v1.resnet_arg_scope(train)): net, end_points = resnet_v1_slim(images, num_classes = None, global_pool = False, output_stride = resnet_stride) # replace resnet_v1_slim above with resnet_v1.resnet_v1_50/101/... # to use standard architectures. # num_classes: Number of predicted classes for classification tasks. If None # we return the features before the logit layer. # global_pool: If True, we perform global average pooling before computing the # logits. Set to True for image classification, False for dense prediction. # output_stride: If None, then the output will be computed at the nominal # network stride. If output_stride is not None, it specifies the requested # ratio of input to output spatial resolution. resnet_depth = utils.last_dimension(net.get_shape(), min_rank=4) shape = tf.unpack(tf.shape(images)) print(shape.__class__) shape.pop() shape.append(tf.constant(FLAGS.out_channels, dtype=tf.int32)) print(len(shape)) filters = tf.Variable( tf.truncated_normal( [resnet_stride*2+1, resnet_stride*2+1, FLAGS.out_channels, resnet_depth], dtype=tf.float32, stddev=0.01), name='filters') logits = tf.nn.conv2d_transpose(net, filters, tf.pack(shape), [1,resnet_stride,resnet_stride,1], padding='SAME', name='upscale') return logits
def feature_extractor(patch): with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_50( patch, 1000, is_training=self.train_mode, reuse=tf.AUTO_REUSE) resnet_feature = end_points['resnet_v1_50/block4'] resnet_feature = tf.reduce_mean(resnet_feature, [1, 2], keepdims=True) resnet_feature = tf.squeeze(resnet_feature) resnet_feature = tf.reshape(resnet_feature, [-1, 2048]) drop = tf.layers.dropout(resnet_feature, rate=self.drop1, training=self.train_mode) glimpse_feature = tf.layers.dense( inputs=drop, units=512, activation=tf.nn.relu, kernel_initializer=tf.glorot_uniform_initializer(), bias_initializer=tf.constant_initializer(0.1), # kernel_regularizer=tf.nn.l2_loss, name='glimpse_feature/fc', reuse=tf.AUTO_REUSE) return glimpse_feature
def build_graph(self, orig_image): mean = tf.get_variable('resnet_v1_50/mean_rgb', shape=[3]) with guided_relu(): with slim.arg_scope(resnet_v1.resnet_arg_scope()): image = tf.expand_dims(orig_image - mean, 0) logits, _ = resnet_v1.resnet_v1_50(image, 1000) saliency_map(logits, orig_image, name="saliency")
def det_lesion_resnet(inputs, is_training_option=False, scope='det_lesion'): """Defines the network Args: inputs: Tensorflow placeholder that contains the input image scope: Scope name for the network Returns: net: Output Tensor of the network end_points: Dictionary with all Tensors of the network """ with tf.variable_scope(scope, 'det_lesion', [inputs]) as sc: end_points_collection = sc.name + '_end_points' with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_50( inputs, is_training=is_training_option) net = slim.flatten(net, scope='flatten5') net = slim.fully_connected( net, 1, activation_fn=tf.nn.sigmoid, weights_initializer=initializers.xavier_initializer(), scope='output') utils.collect_named_outputs(end_points_collection, 'det_lesion/output', net) end_points = slim.utils.convert_collection_to_dict(end_points_collection) return net, end_points
def non_target_graph(x, y, i, x_max, x_min, grad): eps = 2.0 * max_epsilon / 255.0 alpha = eps / num_iter num_classes = 110 with slim.arg_scope(inception.inception_v1_arg_scope()): logits_inc_v1, end_points_inc_v1 = inception.inception_v1( x, num_classes=num_classes, is_training=False, scope='InceptionV1') # rescale pixle range from [-1, 1] to [0, 255] for resnet_v1 and vgg's input image = (((x + 1.0) * 0.5) * 255.0) processed_imgs_res_v1_50 = preprocess_for_model(image, 'resnet_v1_50') with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits_res_v1_50, end_points_res_v1_50 = resnet_v1.resnet_v1_50( processed_imgs_res_v1_50, num_classes=num_classes, is_training=False, scope='resnet_v1_50') end_points_res_v1_50['logits'] = tf.squeeze( end_points_res_v1_50['resnet_v1_50/logits'], [1, 2]) end_points_res_v1_50['probs'] = tf.nn.softmax( end_points_res_v1_50['logits']) # image = (((x + 1.0) * 0.5) * 255.0)#.astype(np.uint8) processed_imgs_vgg_16 = preprocess_for_model(image, 'vgg_16') with slim.arg_scope(vgg.vgg_arg_scope()): logits_vgg_16, end_points_vgg_16 = vgg.vgg_16(processed_imgs_vgg_16, num_classes=num_classes, is_training=False, scope='vgg_16') end_points_vgg_16['logits'] = end_points_vgg_16['vgg_16/fc8'] end_points_vgg_16['probs'] = tf.nn.softmax(end_points_vgg_16['logits']) ######################## # Using model predictions as ground truth to avoid label leaking pred = tf.argmax( end_points_inc_v1['Predictions'] + end_points_res_v1_50['probs'] + end_points_vgg_16['probs'], 1) first_round = tf.cast(tf.equal(i, 0), tf.int64) y = first_round * pred + (1 - first_round) * y one_hot = tf.one_hot(y, num_classes) ######################## logits = (end_points_inc_v1['Logits'] + end_points_res_v1_50['logits'] + end_points_vgg_16['logits']) / 3.0 cross_entropy = tf.losses.softmax_cross_entropy(one_hot, logits, label_smoothing=0.0, weights=1.0) noise = tf.gradients(cross_entropy, x)[0] noise = noise / tf.reduce_mean(tf.abs(noise), [1, 2, 3], keep_dims=True) noise = momentum * grad + noise x = x + alpha * tf.sign(noise) x = tf.clip_by_value(x, x_min, x_max) i = tf.add(i, 1) return x, y, i, x_max, x_min, noise
def _build_graph(self, inputs): orig_image = inputs[0] mean = tf.get_variable('resnet_v1_50/mean_rgb', shape=[3]) with guided_relu(): with slim.arg_scope(resnet_v1.resnet_arg_scope(is_training=False)): image = tf.expand_dims(orig_image - mean, 0) logits, _ = resnet_v1.resnet_v1_50(image, 1000) tp.symbolic_functions.saliency_map(logits, orig_image, name="saliency")
def _build_graph(self, inputs): orig_image = inputs[0] mean = tf.get_variable('resnet_v1_50/mean_rgb', shape=[3]) with tp.symbolic_functions.guided_relu(): with slim.arg_scope(resnet_v1.resnet_arg_scope(is_training=False)): image = tf.expand_dims(orig_image - mean, 0) logits, _ = resnet_v1.resnet_v1_50(image, 1000) tp.symbolic_functions.saliency_map(logits, orig_image, name="saliency")
def net_graph(inputs_X): def encoder(tensor_name, layer_name): with tf.variable_scope(layer_name): encoder_tensor = tf.get_default_graph().get_tensor_by_name( tensor_name) encoder_tensor = layers_lib.conv2d( encoder_tensor, 256, [1, 1], stride=1, padding='VALID', scope="conv1", activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, weights_regularizer=layers_lib.l2_regularizer(1e-4)) encoder_tensor = layers_lib.conv2d( encoder_tensor, 256, [3, 3], stride=1, padding='VALID', scope="conv3", activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, weights_regularizer=layers_lib.l2_regularizer(1e-4)) out_tensor = math_ops.reduce_mean(encoder_tensor, [1, 2], name='gap', keepdims=False) #old style #out_tensor = tf.reduce_mean(encoder_tensor,axis=[1,2]) return out_tensor with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_50(inputs_X, is_training=True) #orginal net with tf.variable_scope("encoder"): encoder1 = encoder("resnet_v1_50/block1/unit_3/bottleneck_v1/Relu:0", "encoder1") encoder2 = encoder("resnet_v1_50/block2/unit_4/bottleneck_v1/Relu:0", "encoder2") encoder3 = encoder("resnet_v1_50/block3/unit_6/bottleneck_v1/Relu:0", "encoder3") encoder4 = encoder("resnet_v1_50/block4/unit_3/bottleneck_v1/Relu:0", "encoder4") concat = tf.concat([encoder1, encoder2, encoder3, encoder4], -1, name='concat') predictions = layers_lib.fully_connected( concat, 1, name="fintune_FC", weights_regularizer=layers_lib.l2_regularizer(1e-4)) tf.add_to_collection("predictions", predictions) current_epoch = tf.Variable(0, name="current_epoch") return predictions, current_epoch
def target_graph(x, y, i, x_max, x_min, grad): eps = 2.0 * max_epsilon / 255.0 alpha = eps / num_iter num_classes = 110 #input image size[224,224,3] images3 = tf.image.resize_bilinear(input_diversity(x), [224, 224], align_corners=False) with slim.arg_scope(inception.inception_v1_arg_scope()): logits_inc_v1, end_points_inc_v1 = inception.inception_v1( images3, num_classes=num_classes, is_training=False, scope='InceptionV1') # rescale pixle range from [-1, 1] to [0, 255] for resnet_v1 and vgg's input image1 = (((input_diversity(x) + 1.0) * 0.5) * 255.0) processed_imgs_res_v1_50 = preprocess_for_model(image1, 'resnet_v1_50') with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits_res_v1_50, end_points_res_v1_50 = resnet_v1.resnet_v1_50( processed_imgs_res_v1_50, num_classes=num_classes, is_training=False, scope='resnet_v1_50') end_points_res_v1_50['logits'] = tf.squeeze(end_points_res_v1_50['resnet_v1_50/logits'], [1, 2]) end_points_res_v1_50['probs'] = tf.nn.softmax(end_points_res_v1_50['logits']) # image = (((x + 1.0) * 0.5) * 255.0)#.astype(np.uint8) image2 = (((input_diversity(x) + 1.0) * 0.5) * 255.0) processed_imgs_vgg_16 = preprocess_for_model(image2, 'vgg_16') with slim.arg_scope(vgg.vgg_arg_scope()): logits_vgg_16, end_points_vgg_16 = vgg.vgg_16( processed_imgs_vgg_16, num_classes=num_classes, is_training=False, scope='vgg_16') end_points_vgg_16['logits'] = end_points_vgg_16['vgg_16/fc8'] end_points_vgg_16['probs'] = tf.nn.softmax(end_points_vgg_16['logits']) one_hot = tf.one_hot(y, num_classes) logits = (end_points_inc_v1['Logits'] + end_points_res_v1_50['logits'] + end_points_vgg_16['logits']) / 3.0 cross_entropy = tf.losses.softmax_cross_entropy(one_hot, logits, label_smoothing=0.0, weights=1.0) noise = tf.gradients(cross_entropy, x)[0] noise = tf.nn.depthwise_conv2d(noise, stack_kernel, strides=[1, 1, 1, 1], padding='SAME') noise = noise / tf.reshape(tf.contrib.keras.backend.std(tf.reshape(noise, [batch_size, -1]), axis=1), [batch_size, 1, 1, 1]) noise = momentum * grad + noise noise = noise / tf.reshape(tf.contrib.keras.backend.std(tf.reshape(noise, [batch_size, -1]), axis=1), [batch_size, 1, 1, 1]) noise1 = tf.image.resize_images(noise, [140, 140], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) print("noise shape:", noise.shape) noise1 = alpha * tf.clip_by_value(tf.round(noise1), -2, 2) noise_paded = tf.pad(noise1,[[0, 0], [42, 42], [42, 42], [0, 0]], constant_values=0.) x = x - noise_paded x = tf.clip_by_value(x, x_min, x_max) print("x.shape:", x.shape) i = tf.add(i, 1) return x, y, i, x_max, x_min, noise
def target_graph(x, y, i, x_max, x_min, grad): eps = 2.0 * max_epsilon / 255.0 alpha = eps / num_iter num_classes = 110 with slim.arg_scope(inception.inception_v1_arg_scope()): logits_inc_v1, end_points_inc_v1 = inception.inception_v1( x, num_classes=num_classes, is_training=False, scope='InceptionV1') # rescale pixle range from [-1, 1] to [0, 255] for resnet_v1 and vgg's input image = (((x + 1.0) * 0.5) * 255.0) processed_imgs_res_v1_50 = preprocess_for_model(image, 'resnet_v1_50') with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits_res_v1_50, end_points_res_v1_50 = resnet_v1.resnet_v1_50( processed_imgs_res_v1_50, num_classes=num_classes, is_training=False, scope='resnet_v1_50') end_points_res_v1_50['logits'] = tf.squeeze( end_points_res_v1_50['resnet_v1_50/logits'], [1, 2]) end_points_res_v1_50['probs'] = tf.nn.softmax( end_points_res_v1_50['logits']) # image = (((x + 1.0) * 0.5) * 255.0)#.astype(np.uint8) processed_imgs_vgg_16 = preprocess_for_model(image, 'vgg_16') with slim.arg_scope(vgg.vgg_arg_scope()): logits_vgg_16, end_points_vgg_16 = vgg.vgg_16(processed_imgs_vgg_16, num_classes=num_classes, is_training=False, scope='vgg_16') end_points_vgg_16['logits'] = end_points_vgg_16['vgg_16/fc8'] end_points_vgg_16['probs'] = tf.nn.softmax(end_points_vgg_16['logits']) ######################## one_hot = tf.one_hot(y, num_classes) ######################## logits = (end_points_inc_v1['Logits'] + end_points_res_v1_50['logits'] + end_points_vgg_16['logits']) / 3.0 cross_entropy = tf.losses.softmax_cross_entropy(one_hot, logits, label_smoothing=0.0, weights=1.0) noise = tf.gradients(cross_entropy, x)[0] noise = noise / tf.reshape( tf.contrib.keras.backend.std(tf.reshape(noise, [batch_size, -1]), axis=1), [batch_size, 1, 1, 1]) noise = momentum * grad + noise noise = noise / tf.reshape( tf.contrib.keras.backend.std(tf.reshape(noise, [batch_size, -1]), axis=1), [batch_size, 1, 1, 1]) x = x - alpha * tf.clip_by_value(tf.round(noise), -2, 2) x = tf.clip_by_value(x, x_min, x_max) i = tf.add(i, 1) return x, y, i, x_max, x_min, noise
def net_graph_debug(inputs_X): def encoder(tensor_name, layer_name): with tf.variable_scope(layer_name): encoder_tensor = tf.get_default_graph().get_tensor_by_name( tensor_name) #tf.summary.histogram(layer_name+'resnet_out',encoder_tensor) encoder_tensor = layers_lib.conv2d(encoder_tensor, 256, [1, 1], stride=2, padding='SAME', scope="conv1", activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, trainable=True) encoder_tensor = layers_lib.conv2d(encoder_tensor, 256, [3, 3], stride=2, padding='SAME', scope="conv3", activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, trainable=True) out_tensor = math_ops.reduce_mean(encoder_tensor, [1, 2], name='gap', keepdims=False) #tf.summary.histogram(layer_name,out_tensor) #old style #out_tensor = tf.reduce_mean(encoder_tensor,axis=[1,2]) return out_tensor with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_50(inputs_X, is_training=True) #current_epoch = tf.Variable(0, name="current_epoch") with tf.variable_scope("encoder"): encoder1 = encoder("resnet_v1_50/block1/unit_3/bottleneck_v1/Relu:0", "encoder1") encoder2 = encoder("resnet_v1_50/block2/unit_4/bottleneck_v1/Relu:0", "encoder2") encoder3 = encoder("resnet_v1_50/block3/unit_6/bottleneck_v1/Relu:0", "encoder3") encoder4 = encoder("resnet_v1_50/block4/unit_3/bottleneck_v1/Relu:0", "encoder4") concat = tf.concat([encoder1, encoder2, encoder3, encoder4], -1, name='concat') #tf.summary.histogram('concat',concat) predictions = layers_lib.fully_connected(concat, 1, activation_fn=tf.nn.relu, scope="fintune_FC") current_epoch = tf.Variable(0, name="current_epoch") return predictions, current_epoch return end_points, current_epoch
def def_net(self): if net == 'vgg_16': with slim.arg_scope(vgg.vgg_arg_scope()): _, end_points = vgg.vgg_16(self.images, num_classes=FLAGS.num_classes, dropout_keep_prob=1.0, is_training=False) elif net == 'resnet_v1_101': with slim.arg_scope(resnet_v1.resnet_arg_scope()): _, end_points = resnet_v1.resnet_v1_101( self.images, num_classes=FLAGS.num_classes) elif net == 'resnet_v1_50': with slim.arg_scope(resnet_v1.resnet_arg_scope(is_training=True)): _, end_points = resnet_v1.resnet_v1_50( self.images, num_classes=FLAGS.num_classes) else: raise Exception('No network matched with net %s' % net) self.end_points = end_points
def build_single_resnet(train_tfdata, is_train, name_scope='resnet_v1_50', variable_scope=''): with slim.arg_scope(resnet_v1.resnet_arg_scope(is_training=is_train)): identity, end_points = resnet_v1.resnet_v1_50( train_tfdata, num_classes=FLAGS.num_class, global_pool=True) feature = slim.flatten(tf.get_default_graph().get_tensor_by_name( '%s%s/pool5:0' % (variable_scope, name_scope))) return identity, feature
def extract_features(self, inputs): net_fun = net_funcs[self.cfg.net_type] mean = tf.constant(self.cfg.mean_pixel, dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean') im_centered = inputs - mean # The next part of the code depends upon which tensorflow version you have. vers = tf.__version__ if float(vers[0:3]) < 1.4: with slim.arg_scope(resnet_v1.resnet_arg_scope(False)): net, end_points = net_fun(im_centered, global_pool=False, output_stride=16) else: with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = net_fun(im_centered, global_pool=False, output_stride=16,is_training=False) return net,end_points
def resnet_v1_101_base(input_image): #input_image = tf.expand_dims(tf_image_std, axis=0) net = input_image with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_101(input_image, num_classes=None, global_pool=False, output_stride=16, is_training=True) return net
def get_backbone(self,features): if self.flags.model_variant.startswith('xception'): assert False,'not implement' elif self.flags.model_variant=='resnet_v2_50': # inputs has shape [batch, 513, 513, 3] with slim.arg_scope(resnet_v2.resnet_arg_scope()): net, end_points = resnet_v2.resnet_v2_50(features, self.num_classes, is_training=False, global_pool=False, output_stride=self.output_stride) elif self.flags.model_variant=='resnet_v1_50': # The key difference of the full preactivation 'v2' variant compared to the # 'v1' variant in [1] is the use of batch normalization before every weight layer. with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_50(features, self.num_classes, is_training=False, global_pool=False, output_stride=self.output_stride) elif self.flags.model_variant=='resnet_v2_101': # inputs has shape [batch, 513, 513, 3] with slim.arg_scope(resnet_v2.resnet_arg_scope()): net, end_points = resnet_v2.resnet_v2_101(features, self.num_classes, is_training=False, global_pool=False, output_stride=self.output_stride) elif self.flags.model_variant=='resnet_v1_101': # The key difference of the full preactivation 'v2' variant compared to the # 'v1' variant in [1] is the use of batch normalization before every weight layer. with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_101(features, self.num_classes, is_training=False, global_pool=False, output_stride=self.output_stride) else: assert False,'not implement' print(end_points.keys()) print(net)
def resnet_feature(self, images, scope_name, train_mode=True): with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_50(images, 1000, is_training=train_mode, reuse=tf.AUTO_REUSE) resnet_block_4 = end_points[scope_name + '/resnet_v1_50/block4'] resnet_feature = tf.reduce_mean(resnet_block_4, [1, 2], keepdims=True) resnet_feature = tf.squeeze(resnet_feature) resnet_feature = tf.reshape(resnet_feature, [-1, 2048]) return resnet_block_4, resnet_feature
def _get_endpoints(model_name, img_tensor): if model_name == "res50": with slim.arg_scope(resnet_v1.resnet_arg_scope()): _, end_points = resnet_v1.resnet_v1_50(img_tensor, 1000, is_training=False) return end_points["predictions"] elif model_name == "res152": with slim.arg_scope(resnet_v1.resnet_arg_scope()): _, end_points = resnet_v1.resnet_v1_152(img_tensor, 1000, is_training=False) return end_points["predictions"] elif model_name.startswith("mobilenet"): with tf.contrib.slim.arg_scope( mobilenet_v2.training_scope(is_training=False)): _, endpoints = mobilenet_v2.mobilenet(img_tensor) return endpoints["Predictions"]
def extract_features(self, inputs): net_fun = net_funcs[self.cfg.net_type] mean = tf.constant(self.cfg.mean_pixel, dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean') im_centered = inputs - mean with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = net_fun(im_centered, global_pool=False, output_stride=16, is_training=False) return net, end_points
def setUp(self): tf.reset_default_graph() self.nbclasses = 1000 inputs = tf.placeholder(tf.float32, [1, 224, 224, 3]) with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_50(inputs, self.nbclasses, is_training=False) saver = tf.train.Saver(tf.global_variables()) check_point = 'test/data/resnet_v1_50.ckpt' sess = tf.InteractiveSession() saver.restore(sess, check_point) conv_name = 'resnet_v1_50/block4/unit_3/bottleneck_v1/Relu' self.graph_origin = tf.get_default_graph().as_graph_def() self.insp = darkon.Gradcam(inputs, self.nbclasses, conv_name) self.sess = sess
def build_single_resnet(train_tfdata, is_train, name_scope = 'resnet_v1_50', variable_scope = ''): with slim.arg_scope(resnet_v1.resnet_arg_scope(is_training=is_train)): identity, end_points = resnet_v1.resnet_v1_50(train_tfdata, num_classes=FLAGS.num_class, global_pool = True) feature = slim.flatten(tf.get_default_graph().get_tensor_by_name('%s%s/pool5:0' % (variable_scope, name_scope))) return identity, feature