def __init__(self, num_classes, train_layers=None, weights_path='DEFAULT'): """Create the graph of the resnetv2_101 model. """ # Parse input arguments into class variables if weights_path == 'DEFAULT': self.WEIGHTS_PATH = "./pre_trained_models/resnet_v2_101.ckpt" else: self.WEIGHTS_PATH = weights_path self.train_layers = train_layers with tf.variable_scope("input"): self.image_size = resnet_v2.resnet_v2_101.default_image_size self.x_input = tf.placeholder(tf.float32, [None, self.image_size, self.image_size, 3], name="x_input") self.y_input = tf.placeholder(tf.float32, [None, num_classes], name="y_input") self.learning_rate = tf.placeholder(tf.float32, name="learning_rate") # train with arg_scope(resnet_v2.resnet_arg_scope()): self.logits, _ = resnet_v2.resnet_v2_101(self.x_input, num_classes=num_classes, is_training=True, reuse=tf.AUTO_REUSE ) # validation with arg_scope(resnet_v2.resnet_arg_scope()): self.logits_val, _ = resnet_v2.resnet_v2_101(self.x_input, num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE ) with tf.name_scope("loss"): self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.logits, labels=self.y_input)) self.loss_val = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.logits_val, labels=self.y_input)) with tf.name_scope("train"): self.global_step = tf.Variable(0, name="global_step", trainable=False) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) var_list = [v for v in tf.trainable_variables() if v.name.split('/')[-2] in train_layers or v.name.split('/')[-3] in train_layers ] gradients = tf.gradients(self.loss, var_list) self.grads_and_vars = list(zip(gradients, var_list)) optimizer = tf.train.GradientDescentOptimizer(self.learning_rate) with tf.control_dependencies(update_ops): self.train_op = optimizer.apply_gradients(grads_and_vars=self.grads_and_vars, global_step=self.global_step) with tf.name_scope("probability"): self.probability = tf.nn.softmax(self.logits_val, name="probability") with tf.name_scope("prediction"): self.prediction = tf.argmax(self.logits_val, 1, name="prediction") with tf.name_scope("accuracy"): correct_prediction = tf.equal(self.prediction, tf.argmax(self.y_input, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"), name="accuracy")
def get_embeddings(instances): image_size = 299 query_embeddings = {} with tf.Graph().as_default(): image = tf.placeholder(tf.uint8, (None, None, 3)) processed_image = inception_preprocessing.preprocess_image( image, image_size, image_size, is_training=False) processed_image = tf.expand_dims(processed_image, 0) with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, _ = resnet_v2.resnet_v2_101(processed_image, 1001, is_training=False) pool5 = tf.get_default_graph().get_tensor_by_name( "resnet_v2_101/pool5:0") init_fn = slim.assign_from_checkpoint_fn( 'resnet_v2_101.ckpt', slim.get_model_variables('resnet_v2')) with tf.Session() as sess: init_fn(sess) for cls_id in instances.keys(): ins, patch = instances[cls_id] scaled_img, logit_vals, embedding = sess.run( [processed_image, logits, pool5], feed_dict={image: patch}) query_embeddings[cls_id] = embedding[0, 0, 0, :] return query_embeddings
def graph(x, y, i, x_max, x_min, grad): eps = 2.0 * FLAGS.max_epsilon / 255.0 num_iter = FLAGS.num_iter alpha = eps / num_iter momentum = FLAGS.momentum num_classes = 1001 with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits_resnet, end_points_resnet = resnet_v2.resnet_v2_101( x + momentum * grad, num_classes=num_classes, is_training=False) pred = tf.argmax(end_points_resnet['predictions'], 1) first_round = tf.cast(tf.equal(i, 0), tf.int64) y = first_round * pred + (1 - first_round) * y one_hot = tf.one_hot(y, num_classes) logits = (logits_resnet) / 7.25 cross_entropy = tf.losses.softmax_cross_entropy(one_hot, logits, label_smoothing=0.0, weights=1.0) noise = tf.gradients(cross_entropy, x)[0] noise = noise / tf.reduce_mean(tf.abs(noise), [1, 2, 3], keep_dims=True) noise = momentum * grad + noise x = x + alpha * tf.sign(noise) x = tf.clip_by_value(x, x_min, x_max) i = tf.add(i, 1) return x, y, i, x_max, x_min, noise
def test_resnet_v2_101(img_dir): """ Test ResNet-V1-101 with a single image. :param img_dir: Path of the image to be classified :return: classification result and probability of a single image """ img = cv2.imread(img_dir) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = cv2.resize(img, (224, 224)) / 255 img = img.reshape((1, 224, 224, 3)) tf.reset_default_graph() inputs = tf.placeholder(name='input_images', shape=[None, 224, 224, 3], dtype=tf.float32) with slim.arg_scope(resnet_arg_scope()): _, _ = resnet_v2_101(inputs, 1001, is_training=False) with tf.Session() as sess: tf.train.Saver().restore(sess, './models/resnet_v2_101.ckpt') inputs = sess.graph.get_tensor_by_name('input_images:0') outputs = sess.graph.get_tensor_by_name( 'resnet_v2_101/SpatialSqueeze:0') pred = tf.argmax(tf.nn.softmax(outputs), axis=1)[0] prob = tf.reduce_max(tf.nn.softmax(outputs), axis=1)[0] pred, prob = sess.run([pred, prob], feed_dict={inputs: img}) name = label_dict[pred] print('Result of ResNet-V1-101:', name, prob) return name, prob
def inference_resnet_v2_101(x_input, dropout_keep_prob=1, num_classes=1001): with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, _ = resnet_v2.resnet_v2_101(x_input, num_classes=num_classes, is_training=False) probs = tf.nn.softmax(logits) model_vars = [var for var in tf.global_variables() \ if var.name.startswith('resnet_v2_101/')] return probs, logits, model_vars
def graph(x, y, i, x_max, x_min, grad, grad2): eps = 2.0 * FLAGS.max_epsilon / 255.0 num_classes = 1001 with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_v3, end_points_v3 = inception_v3.inception_v3( input_diversity(x), num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE) with slim.arg_scope(inception_v4.inception_v4_arg_scope()): logits_v4, end_points_v4 = inception_v4.inception_v4( input_diversity(x), num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE) with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope()): logits_res_v2, end_points_res_v2 = inception_resnet_v2.inception_resnet_v2( input_diversity(x), num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE) with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits_resnet, end_points_resnet = resnet_v2.resnet_v2_101( input_diversity(x), num_classes=num_classes, is_training=False, scope='resnet_v2_101', reuse=tf.AUTO_REUSE) logits = (logits_v3 + logits_v4 + logits_res_v2 + logits_resnet) / 4 auxlogits = (end_points_v3['AuxLogits'] + end_points_v4['AuxLogits'] + end_points_res_v2['AuxLogits']) / 3 cross_entropy = tf.losses.softmax_cross_entropy(y, logits, label_smoothing=0.0, weights=1.0) cross_entropy += tf.losses.softmax_cross_entropy(y, auxlogits, label_smoothing=0.0, weights=0.4) noise = tf.gradients(cross_entropy, x)[0] noise = tf.nn.depthwise_conv2d(noise, stack_kernel, strides=[1, 1, 1, 1], padding='SAME') noise1 = grad + 1.5 * noise noise2 = grad2 + 1.9 * noise * noise x = x + (eps / 17.6786) * ( (1 - 0.9**(i + 1)) / tf.sqrt(1 - 0.99**(i + 1))) * tf.tanh( 1.3 * noise1 / tf.sqrt(noise2)) x = tf.clip_by_value(x, x_min, x_max) i = tf.add(i, 1) return x, y, i, x_max, x_min, noise1, noise2
def graph(x, y, i, grad): num_classes = 1001 x = x with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits_resnet, end_points_resnet = resnet_v2.resnet_v2_101( x, num_classes=num_classes, is_training=False) pred = tf.argmax(end_points_resnet['predictions'], 1) first_round = tf.cast(tf.equal(i, 0), tf.int64) y = first_round * pred + (1 - first_round) * y return x, y, i, grad
def get_patch_score(query_embedding, images, num_cutoff=50): query_embedding = query_embedding / ( np.linalg.norm(query_embedding, ord=2) + np.finfo(float).eps) with tf.Graph().as_default(): image = tf.placeholder(tf.uint8, (None, None, 3)) if image.dtype != tf.float32: processed_image = tf.image.convert_image_dtype(image, dtype=tf.float32) else: processed_image = image processed_image = tf.subtract(processed_image, 0.5) processed_image = tf.multiply(processed_image, 2.0) processed_image = tf.expand_dims(processed_image, 0) with slim.arg_scope(resnet_v2.resnet_arg_scope()): postnorm, _ = resnet_v2.resnet_v2_101(processed_image, None, is_training=False, global_pool=False, output_stride=8) init_fn = slim.assign_from_checkpoint_fn( 'resnet_v2_101.ckpt', slim.get_model_variables('resnet_v2')) image_similar_embeddings = {} with tf.Session() as sess: init_fn(sess) for im in tqdm(images): img = cv2.imread(im) input_img, embedding = sess.run([processed_image, postnorm], feed_dict={image: img}) embedding = embedding / (np.expand_dims( np.linalg.norm(embedding, axis=3, ord=2), axis=3) + np.finfo(float).eps) similarity = np.tensordot(embedding, query_embedding, axes=1) similarity_peaks = np.unravel_index( np.argsort(-similarity, axis=None), similarity.shape) similarity_sorted = similarity[similarity_peaks] similarity_coords = [ np.expand_dims(c, axis=0) for c in \ similarity_peaks ] similarity_coords = np.transpose( np.concatenate(similarity_coords, axis=0)) image_similar_embeddings[im] = { 'locs': similarity_coords[:num_cutoff], 'embeddings': embedding[similarity_peaks][:num_cutoff].copy(), 'similarity': similarity } return image_similar_embeddings
def model_resnet_v2_101(images, weight_decay=1e-5, is_training=True): ''' define the model, we use slim's implemention of resnet ''' images = mean_image_subtraction(images) with slim.arg_scope(resnet_v2.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v2.resnet_v2_101(images, is_training=is_training, scope='resnet_v2_101') with tf.variable_scope('feature_fusion', values=[end_points.values]): batch_norm_params = { 'decay': 0.997, 'epsilon': 1e-5, 'scale': True, 'is_training': is_training } with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(weight_decay)): f = [end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2']] for i in range(4): print('Shape of f_{} {}'.format(i, f[i].shape)) g = [None, None, None, None] h = [None, None, None, None] num_outputs = [None, 128, 64, 32] for i in range(4): if i == 0: h[i] = f[i] else: c1_1 = slim.conv2d(tf.concat([g[i-1], f[i]], axis=-1), num_outputs[i], 1) h[i] = slim.conv2d(c1_1, num_outputs[i], 3) if i <= 2: g[i] = unpool(h[i]) else: g[i] = slim.conv2d(h[i], num_outputs[i], 3) print('Shape of h_{} {}, g_{} {}'.format(i, h[i].shape, i, g[i].shape)) # here we use a slightly different way for regression part, # we first use a sigmoid to limit the regression range, and also # this is do with the angle map F_score = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) # 4 channel of axis aligned bbox and 1 channel rotation angle geo_map = slim.conv2d(g[3], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale angle_map = (slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi/2 # angle is between [-45, 45] F_geometry = tf.concat([geo_map, angle_map], axis=-1) return F_score, F_geometry
def resnet_v2_101(inputs, is_training, opts): with slim.arg_scope(resnet_v2.resnet_arg_scope( weight_decay=opts.weight_decay, batch_norm_decay=opts.batch_norm_decay, batch_norm_epsilon=opts.batch_norm_epsilon, activation_fn=tf.nn.relu)): return resnet_v2.resnet_v2_101( inputs, num_classes=opts.num_classes, is_training=is_training, global_pool=opts.global_pool, output_stride=None, spatial_squeeze=opts.spatial_squeeze, reuse=None)
def create(self, images, num_classes, is_training): """See baseclass.""" with slim.arg_scope(resnet_v2.resnet_arg_scope()): _, endpoints = resnet_v2.resnet_v2_101( images, num_classes, is_training=is_training, spatial_squeeze=False) # Resnet's "predictions" endpoint is (n, 1, 1, m) but we really # want to have an (n, m) "Predictions" endpoint. We add a squeeze # op here to make that happen. endpoints['Predictions'] = tf.squeeze( endpoints['predictions'], [1, 2], name='SqueezePredictions') # Likewise, the endpoint "resnet_v2_101/logits" should be squeezed to # "Logits" endpoints['Logits'] = tf.squeeze( endpoints['resnet_v2_101/logits'], [1, 2], name='SqueezeLogits') return endpoints
def _build(self, x_input=None): reuse = True if self.built else None if x_input is None: x_input = self.input with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, end_points = resnet_v2.resnet_v2_101( x_input, num_classes=self.num_classes, is_training=False, reuse=reuse) self.built = True self.end_points = end_points self.logits = logits if not self.ckpt_loaded: saver = tf.train.Saver(slim.get_model_variables()) saver.restore(self.sess, ckpt_dir + 'resnet_v2_101.ckpt') self.ckpt_loaded = True
def __call__(self, x_input, batch_size=None, is_training=False): """Constructs model and return probabilities for given input.""" reuse = True if self.built else None with slim.arg_scope(resnet_utils.resnet_arg_scope()): with tf.variable_scope(self.ckpt): logits, end_points = resnet_v2.resnet_v2_101( x_input, num_classes=self.num_classes, is_training=is_training, reuse=reuse) preds = tf.argmax(logits, axis=1) self.built = True self.logits = logits self.preds = preds return logits
def graph(x, y, i, x_max, x_min, grad, eg): eps = 2.0 * FLAGS.max_epsilon / 255.0 num_iter = FLAGS.num_iter alpha = eps / num_iter num_classes = 1001 ro = 0.9 beta = 0.89 v = 0.1 eg = ro * eg + (1 - ro) * tf.square(grad) rms = tf.sqrt(eg + 0.000000001) x_n = x + (alpha / rms)*grad with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_v3, end_points_v3 = inception_v3.inception_v3( input_diversity(x_n), num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE) with slim.arg_scope(inception_v4.inception_v4_arg_scope()): logits_v4, end_points_v4 = inception_v4.inception_v4( input_diversity(x_n), num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE) with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope()): logits_res_v2, end_points_res_v2 = inception_resnet_v2.inception_resnet_v2( input_diversity(x_n), num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE) with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits_resnet, end_points_resnet = resnet_v2.resnet_v2_101( input_diversity(x_n), num_classes=num_classes, is_training=False, scope='resnet_v2_101', reuse=tf.AUTO_REUSE) logits = (logits_v3 + logits_v4 + logits_res_v2 + logits_resnet) / 4 auxlogits = (end_points_v3['AuxLogits'] + end_points_v4['AuxLogits'] + end_points_res_v2['AuxLogits']) / 3 cross_entropy = tf.losses.softmax_cross_entropy(y, logits, label_smoothing=0.0, weights=1.0) cross_entropy += tf.losses.softmax_cross_entropy(y, auxlogits, label_smoothing=0.0, weights=0.4) noise = tf.gradients(cross_entropy, x_n)[0] noise = tf.nn.depthwise_conv2d(noise, stack_kernel, strides=[1, 1, 1, 1], padding='SAME') noise1 = noise / tf.reduce_mean(tf.abs(noise), [1, 2, 3], keep_dims=True) noise = beta * grad + (1-beta) * noise1 noise2 = (1-v) * noise + v * noise1 x = x + alpha * tf.sign(noise2) x = tf.clip_by_value(x, x_min, x_max) i = tf.add(i, 1) return x, y, i, x_max, x_min, noise, eg
def output_features(image_batch): if FLAGS.model == 'inceptionv4': with slim.arg_scope(inception_v4.inception_v4_arg_scope()): net, _ = inception_v4.inception_v4(image_batch, None, is_training=False) net = tf.squeeze(net, [1, 2]) elif FLAGS.model == 'resnet101v2': with slim.arg_scope(resnet_v2.resnet_arg_scope()): net, _ = resnet_v2.resnet_v2_101(image_batch, None, is_training=False, global_pool=True) net = tf.squeeze(net, [1, 2]) else: raise KeyError('{} is not supported'.format(FLAGS.model)) return net
def get_embeddings(instances, model_name, return_dict): image_size = 299 query_embeddings = [] os.environ['CUDA_VISIBLE_DEVICES'] = '0' with tf.Graph().as_default(): image = tf.placeholder(tf.uint8, (None, None, 3)) processed_image = inception_preprocessing.preprocess_image( image, image_size, image_size, is_training=False) processed_image = tf.expand_dims(processed_image, 0) with slim.arg_scope(resnet_v2.resnet_arg_scope()): if model_name == 'resnet_v2_101': logits, _ = resnet_v2.resnet_v2_101(processed_image, 1001, is_training=False) pool5 = tf.get_default_graph().get_tensor_by_name( "resnet_v2_101/pool5:0") elif model_name == 'resnet_v2_50': logits, _ = resnet_v2.resnet_v2_50(processed_image, 1001, is_training=False) pool5 = tf.get_default_graph().get_tensor_by_name( "resnet_v2_50/pool5:0") else: print("Unknown model") exit(0) if model_name == 'resnet_v2_101': init_fn = slim.assign_from_checkpoint_fn( 'resnet_v2_101.ckpt', slim.get_model_variables('resnet_v2')) elif model_name == 'resnet_v2_50': init_fn = slim.assign_from_checkpoint_fn( 'resnet_v2_50.ckpt', slim.get_model_variables('resnet_v2')) with tf.Session() as sess: init_fn(sess) for ins, patch, vis_img in instances: scaled_img, logit_vals, embedding = sess.run( [processed_image, logits, pool5], feed_dict={image: patch}) query_embeddings.append((ins, patch, vis_img, embedding[0, 0, 0, :])) return_dict['query_embeddings'] = query_embeddings
def get_embeddings(instances): image_size = 299 names = imagenet.create_readable_names_for_imagenet_labels() with tf.Graph().as_default(): image = tf.placeholder(tf.uint8, (None, None, 3)) processed_image = inception_preprocessing.preprocess_image( image, image_size, image_size, is_training=False) processed_image = tf.expand_dims(processed_image, 0) with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, _ = resnet_v2.resnet_v2_101(processed_image, 1001, is_training=False) pool5 = tf.get_default_graph().get_tensor_by_name( "resnet_v2_101/pool5:0") init_fn = slim.assign_from_checkpoint_fn( 'resnet_v2_101.ckpt', slim.get_model_variables('resnet_v2')) with tf.Session() as sess: init_fn(sess) for im_id in tqdm(instances.keys()): img = cv2.imread(instances[im_id]['path']) embedding_list = [] for b in instances[im_id]['detections']: x1 = int(b['bbox'][0]) x2 = int(b['bbox'][2]) y1 = int(b['bbox'][1]) y2 = int(b['bbox'][3]) if y2 - y1 >= 8 and x2 - x1 >= 8: patch = img[y1:y2, x1:x2, :] scaled_img, logit_vals, embedding = sess.run( [processed_image, logits, pool5], feed_dict={image: patch}) b['pool5_resnet_v2_101'] = embedding[0, 0, 0, :]
def model(images, valid_affines, seq_len, mask, weight_decay=1e-5, is_training=True, model=FLAGS.base_model): ''' define the model, we use slim's implemention of resnet ''' images = mean_image_subtraction(images, [128, 128, 128]) if model == "reset_v1_50": with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_50( images, is_training=is_training, scope='resnet_v1_50') features = ['pool5', 'pool4', 'pool3', 'pool2'] elif model == "resnet_v1_101": with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_101( images, is_training=is_training, scope='resnet_v1_101') features = ['pool5', 'pool4', 'pool3', 'pool2'] elif model == "resnet_v2_101": with slim.arg_scope( resnet_v2.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v2.resnet_v2_101( images, is_training=is_training, scope='resnet_v2_101') features = ['pool5', 'pool4', 'pool3', 'pool2'] elif model == "inception_v4": with slim.arg_scope(inception_v4.inception_v4_arg_scope()): logits, end_points = inception_v4.inception_v4( images, num_classes=None, is_training=is_training, scope='inception_v4') features = ['Mixed_7b', 'Mixed_6b' 'Mixed5a', 'Mixed_3a'] elif model == "inception_resnet_v2": with slim.arg_scope( inception_resnet_v2.inception_resnet_v2_arg_scope()): logits, end_points = inception_resnet_v2.inception_resnet_v2( images, num_classes=None, is_training=is_training, scope='inception_resnet_v2') features = ['Mixed_7a', 'Mixed_6a', 'Mixed_5b', 'MaxPool_3a_3x3'] #pretty(end_points) with tf.variable_scope('feature_fusion', values=[end_points.values]): batch_norm_params = { 'decay': 0.997, 'epsilon': 1e-5, 'scale': True, 'is_training': is_training } with slim.arg_scope( [slim.conv2d], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(weight_decay)): f = [end_points[fea] for fea in features] for i in range(4): print('Shape of f_{} {}'.format(i, f[i].shape)) g = [None, None, None, None] h = [None, None, None, None] num_outputs = [None, 128, 64, 32] for i in range(4): if i == 0: h[i] = f[i] else: c1_1 = slim.conv2d(tf.concat([g[i - 1], f[i]], axis=-1), num_outputs[i], 1) c1_2 = slim.conv2d(tf.concat([g[i - 1], f[i]], axis=-1), num_outputs[i], 1) a = slim.conv2d(slim.conv2d(c1_1, num_outputs[i], 3), num_outputs[i] // 2, 3) b = slim.conv2d(c1_2, num_outputs[i] // 2, 3) h[i] = tf.concat([a, b], axis=-1) #h[i] = slim.conv2d(c1_1, num_outputs[i], 3) if i <= 2: g[i] = unpool(h[i]) else: # g[i] = slim.conv2d(slim.conv2d(h[i], num_outputs[i], 3), num_outputs[i], 3) g[i] = slim.conv2d(h[i], num_outputs[i], 3) print('Shape of h_{} {}, g_{} {}'.format( i, h[i].shape, i, g[i].shape)) print('Shape before ROI rotate: {}'.format(g[3].shape)) text_proposals = roi_rotate(g[3], valid_affines, mask) rotated_image = roi_rotate_test(images, valid_affines, mask) print('Shape after ROI rotate: {}'.format(text_proposals.shape)) recon_f = slim.conv2d(text_proposals, 64, 3) recon_f = slim.conv2d(recon_f, 64, 3) recon_f = slim.max_pool2d(recon_f, [2, 1], stride=[2, 1]) recon_f = slim.conv2d(recon_f, 128, 3) recon_f = slim.conv2d(recon_f, 128, 3) recon_f = slim.max_pool2d(recon_f, [2, 1], stride=[2, 1]) recon_f = slim.conv2d(recon_f, 256, 3) recon_f = slim.conv2d(recon_f, 256, 3) recon_f = slim.max_pool2d(recon_f, [2, 1], stride=[2, 1]) logits = lstm_ctc(recon_f, seq_len) # here we use a slightly different way for regression part, # we first use a sigmoid to limit the regression range, and also # this is do with the angle map F_score = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) # 4 channel of axis aligned bbox and 1 channel rotation angle geo_map = slim.conv2d( g[3], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale angle_map = (slim.conv2d( g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi / 2 # angle is between [-45, 45] F_geometry = tf.concat([geo_map, angle_map], axis=-1) return F_score, F_geometry, logits, text_proposals, g[3], rotated_image
def _construct_model(model_type='resnet_v1_50'): """Constructs model for the desired type of CNN. Args: model_type: Type of model to be used. Returns: end_points: A dictionary from components of the network to the corresponding activations. Raises: ValueError: If the model_type is not supported. """ # Placeholder input. images = array_ops.placeholder( dtypes.float32, shape=(1, None, None, 3), name=_INPUT_NODE) # Construct model. if model_type == 'inception_resnet_v2': _, end_points = inception.inception_resnet_v2_base(images) elif model_type == 'inception_resnet_v2-same': _, end_points = inception.inception_resnet_v2_base( images, align_feature_maps=True) elif model_type == 'inception_v2': _, end_points = inception.inception_v2_base(images) elif model_type == 'inception_v2-no-separable-conv': _, end_points = inception.inception_v2_base( images, use_separable_conv=False) elif model_type == 'inception_v3': _, end_points = inception.inception_v3_base(images) elif model_type == 'inception_v4': _, end_points = inception.inception_v4_base(images) elif model_type == 'alexnet_v2': _, end_points = alexnet.alexnet_v2(images) elif model_type == 'vgg_a': _, end_points = vgg.vgg_a(images) elif model_type == 'vgg_16': _, end_points = vgg.vgg_16(images) elif model_type == 'mobilenet_v1': _, end_points = mobilenet_v1.mobilenet_v1_base(images) elif model_type == 'mobilenet_v1_075': _, end_points = mobilenet_v1.mobilenet_v1_base( images, depth_multiplier=0.75) elif model_type == 'resnet_v1_50': _, end_points = resnet_v1.resnet_v1_50( images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v1_101': _, end_points = resnet_v1.resnet_v1_101( images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v1_152': _, end_points = resnet_v1.resnet_v1_152( images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v1_200': _, end_points = resnet_v1.resnet_v1_200( images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v2_50': _, end_points = resnet_v2.resnet_v2_50( images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v2_101': _, end_points = resnet_v2.resnet_v2_101( images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v2_152': _, end_points = resnet_v2.resnet_v2_152( images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v2_200': _, end_points = resnet_v2.resnet_v2_200( images, num_classes=None, is_training=False, global_pool=False) else: raise ValueError('Unsupported model_type %s.' % model_type) return end_points
def graph(x, y, i, x_max, x_min, grad, y_target, y_logits): eps = 2.0 * FLAGS.max_epsilon / 255.0 num_iter = FLAGS.num_iter alpha = eps / num_iter momentum = FLAGS.momentum num_classes = 1001 # should keep original x here for output with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_v3, end_points_v3 = inception_v3.inception_v3( input_diversity(x), num_classes=num_classes, is_training=False) with slim.arg_scope(inception_v4.inception_v4_arg_scope()): logits_v4, end_points_v4 = inception_v4.inception_v4( input_diversity(x), num_classes=num_classes, is_training=False) with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope()): logits_res_v2, end_points_res_v2 = inception_resnet_v2.inception_resnet_v2( input_diversity(x), num_classes=num_classes, is_training=False) with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits_resnet, end_points_resnet = resnet_v2.resnet_v2_152( input_diversity(x), num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE) # with slim.arg_scope(resnet_v2.resnet_arg_scope()): # logits_resnet_50, end_points_resnet_50 = resnet_v2.resnet_v2_50( # input_diversity(x), num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE) with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits_resnet_101, end_points_resnet_101 = resnet_v2.resnet_v2_101( input_diversity(x), num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE) logits = (logits_resnet + logits_v3 + logits_v4 + logits_res_v2 + logits_resnet_101 ) / 5 y_oh = tf.one_hot(y, num_classes) y_target_oh = tf.one_hot(y_target, num_classes) # loss = -FLAGS.W_crs * tf.losses.softmax_cross_entropy(y_oh, # logits, # label_smoothing=0.0, # weights=1.0) # loss = - Poincare_dis(tf.clip_by_value((y_oh-0.01), 0.0, 1.0), # logits / tf.reduce_sum(tf.abs(logits), [1], keep_dims=True) ) loss_ce = tf.losses.softmax_cross_entropy(y_target_oh, logits, label_smoothing=0.0, weights=1.0) loss_po = Poincare_dis(tf.clip_by_value((y_target_oh-0.00001), 0.0, 1.0), logits / tf.reduce_sum(tf.abs(logits), [1], keep_dims=True)) loss_cos = tf.clip_by_value((Cos_dis(y_oh, logits) - Cos_dis(y_target_oh, logits) + FLAGS.a), 0.0, 2.1) # loss_cos = tf.maximum(loss_ce - tf.losses.softmax_cross_entropy(y_oh, logits, label_smoothing=0.0, weights=1.0) + FLAGS.a, 0.0) if FLAGS.loss == "ce": loss = loss_ce elif FLAGS.loss == "po": loss = loss_po elif FLAGS.loss == "trip_po": loss = loss_po + FLAGS.W_cos*loss_cos # loss += cross_entropy # loss += FLAGS.W_cos*loss_cos noise = -tf.gradients(loss, x)[0] # TI- noise = tf.nn.depthwise_conv2d(noise, stack_kernel, strides=[1, 1, 1, 1], padding='SAME') # CE Cross-entry loss must add this term if FLAGS.loss == 'ce': noise = noise / tf.reduce_mean(tf.abs(noise), [1, 2, 3], keep_dims=True) noise = momentum * grad + noise x = x + alpha * tf.sign(noise) x = tf.clip_by_value(x, x_min, x_max) i = tf.add(i, 1) return x, y, i, x_max, x_min, noise, y_target, logits
def main(_): start = time.clock() # tf.set_random_seed(1) # np.random.seed(1) # Images for inception classifier are normalized to be in [-1, 1] interval,3 # eps is a difference between pixels so it should be in [0, 2] interval. # Renormalizing epsilon from [0, 255] to [0, 2]. eps = 2.0 * FLAGS.max_epsilon / 255.0 batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3] tf.logging.set_verbosity(tf.logging.INFO) x = tf.placeholder(dtype=tf.float32, shape=batch_shape) # , name='x_Placeholder' x_max = tf.clip_by_value(x + eps, -1.0, 1.0) x_min = tf.clip_by_value(x - eps, -1.0, 1.0) # y = tf.zeros(shape=FLAGS.batch_size, dtype=tf.int64) # i = tf.zeros(shape=1, dtype=tf.int8) grad = tf.zeros(shape=batch_shape) # pred = tf.zeros(shape=FLAGS.batch_size, dtype=tf.int64) # noise = tf.zeros(shape=[FLAGS.batch_size, 299, 299, 3],dtype=tf.float32) # cross_entropy = tf.zeros(shape=FLAGS.batch_size,dtype=tf.float32) with tf.Session() as sess: eps = 2.0 * FLAGS.max_epsilon / 255.0 num_iter = FLAGS.num_iter alpha = eps / num_iter momentum = FLAGS.momentum num_classes = 1001 # pred = tf.zeros(shape=FLAGS.batch_size, dtype=tf.int64) # noise = tf.zeros(shape=[FLAGS.batch_size, 299, 299, 3], dtype=tf.float32) # cross_entropy = tf.zeros(shape=FLAGS.batch_size, dtype=tf.float32) with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_v3, end_points_v3 = inception_v3.inception_v3( x, num_classes=num_classes, is_training=False) with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_adv_v3, end_points_adv_v3 = inception_v3.inception_v3( x, num_classes=num_classes, is_training=False, scope='AdvInceptionV3') with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_ens3_adv_v3, end_points_ens3_adv_v3 = inception_v3.inception_v3( x, num_classes=num_classes, is_training=False, scope='Ens3AdvInceptionV3') with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_ens4_adv_v3, end_points_ens4_adv_v3 = inception_v3.inception_v3( x, num_classes=num_classes, is_training=False, scope='Ens4AdvInceptionV3') with slim.arg_scope(inception_v4.inception_v4_arg_scope()): logits_v4, end_points_v4 = inception_v4.inception_v4( x, num_classes=num_classes, is_training=False) with slim.arg_scope( inception_resnet_v2.inception_resnet_v2_arg_scope()): logits_res_v2, end_points_res_v2 = inception_resnet_v2.inception_resnet_v2( x, num_classes=num_classes, is_training=False) with slim.arg_scope( inception_resnet_v2.inception_resnet_v2_arg_scope()): logits_ensadv_res_v2, end_points_ensadv_res_v2 = inception_resnet_v2.inception_resnet_v2( x, num_classes=num_classes, is_training=False, scope='EnsAdvInceptionResnetV2') with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits_resnet, end_points_resnet = resnet_v2.resnet_v2_101( x, num_classes=num_classes, is_training=False) pred_dict = end_points_v3['Predictions'] + end_points_adv_v3['Predictions'] + end_points_ens3_adv_v3['Predictions'] + \ end_points_ens4_adv_v3['Predictions'] + end_points_v4['Predictions'] + \ end_points_res_v2['Predictions'] + end_points_ensadv_res_v2['Predictions'] + end_points_resnet['predictions'] pred = tf.argmax(pred_dict, axis=1) logits_dict = [ logits_v3, logits_ens3_adv_v3, logits_ens4_adv_v3, logits_v4, logits_res_v2, logits_ensadv_res_v2, logits_resnet ] # for j in range(FLAGS.batch_size): # end_points_v3_Pred = end_points_v3['Predictions'][j][pred[j]] # # end_points_adv_v3_Pred = end_points_adv_v3['Predictions'] # end_points_ens3_adv_v3_Pred = end_points_ens3_adv_v3['Predictions'][j][pred[j]] # end_points_ens4_adv_v3_Pred = end_points_ens4_adv_v3['Predictions'][j][pred[j]] # end_points_v4_Pred = end_points_v4['Predictions'][j][pred[j]] # end_points_res_v2_Pred = end_points_res_v2['Predictions'][j][pred[j]] # end_points_ensadv_res_v2_Pred = end_points_ensadv_res_v2['Predictions'][j][pred[j]] # end_points_resnet_Pred = end_points_resnet['Predictions'][j][pred[j]] # # # ens_Pred_Value = [end_points_v3_Pred * -1, end_points_ens3_adv_v3_Pred * -1, # end_points_ens4_adv_v3_Pred * -1, # end_points_v4_Pred * -1, end_points_res_v2_Pred * -1, # end_points_ensadv_res_v2_Pred * -1, # end_points_resnet_Pred * -1] # TopKFit, TopKFitIndx = tf.nn.top_k(ens_Pred_Value, FLAGS.LOW_K) # # TopKFit = TopKFit * -1 # # logits_dictionary = tf.placeholder(dtype=tf.int32, shape=(7, FLAGS.batch_size, 1001)) # TopKFitIndex = tf.placeholder(dtype=tf.int32, shape=(FLAGS.batch_size)) # logits[j] = (logits_dictionary[TopKFitIndex[0]] * 5.0 + logits_dictionary[TopKFitIndex[1]] * 4.0 + logits_dictionary[TopKFitIndex[2]] * 3.0 + logits_dictionary[TopKFitIndex[3]] * 2.0 + logits_dictionary[TopKFitIndex[4]] * 1.0) / 15.0 #i_iteration = tf.placeholder(dtype=tf.int32) #first_round = tf.cast(tf.equal(i_iteration, 0), tf.int64) #y = first_round * pred y = tf.placeholder( dtype=tf.int32, shape=[ 2, ] ) # shape= FLAGS.batchsize !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #y1 = first_round * pred + (1 - first_round) * y one_hot = tf.one_hot(y, num_classes) logits_gravity = tf.placeholder(dtype=tf.float32, shape=[FLAGS.batch_size, 7]) # logits = tf.zeros(shape=[FLAGS.batch_size, 1001], dtype=tf.float32) #for i in range(2): ########### 2 is the FLAGS.batch_size logits1 = (logits_dict[0][0] * logits_gravity[0][0] + logits_dict[1][0] * logits_gravity[0][1] + logits_dict[2][0] * logits_gravity[0][2] + logits_dict[3][0] * logits_gravity[0][3] + logits_dict[4][0] * logits_gravity[0][4] + logits_dict[5][0] * logits_gravity[0][5] + logits_dict[6][0] * logits_gravity[0][6]) / 28.0 logits2 = (logits_dict[0][1] * logits_gravity[1][0] + logits_dict[1][1] * logits_gravity[1][1] + logits_dict[2][1] * logits_gravity[1][2] + logits_dict[3][1] * logits_gravity[1][3] + logits_dict[4][1] * logits_gravity[1][4] + logits_dict[5][1] * logits_gravity[1][5] + logits_dict[6][1] * logits_gravity[1][6]) / 28.0 logits1 = tf.reshape(logits1, [1, 1001]) logits2 = tf.reshape(logits2, [1, 1001]) logits = tf.concat([logits1, logits2], 0) # logits = (logits_v3 + 0.25 * logits_adv_v3 + logits_ens3_adv_v3 + \ # logits_ens4_adv_v3 + logits_v4 + \ # logits_res_v2 + logits_ensadv_res_v2 + logits_resnet) / 7.25 # auxlogits = (end_points_v3['AuxLogits'] + 0.25 * end_points_adv_v3['AuxLogits'] + end_points_ens3_adv_v3['AuxLogits'] + \ # end_points_ens4_adv_v3['AuxLogits'] + end_points_v4['AuxLogits'] + \ # end_points_res_v2['AuxLogits'] + end_points_ensadv_res_v2['AuxLogits']) / 6.25 # cross_entropy = tf.zeros(shape=(FLAGS.batch_size)) #print(str(gravity)) cross_entropy = tf.losses.softmax_cross_entropy( one_hot, tf.clip_by_value(logits, 1e-8, tf.reduce_max(logits)), label_smoothing=0.0, weights=1.0) # cross_entropy += tf.losses.softmax_cross_entropy(one_hot, # auxlogits, # label_smoothing=0.0, # weights=0.4) #print(str(cross_entropy)) noise = tf.gradients(cross_entropy, x)[0] #print(str(tf.gradients(cross_entropy, x)) + '=========') # if(noise == None): # noise = tf.zeros(shape=[FLAGS.batch_size, 299, 299, 3]) # print('noise出了问题!') noise = noise / tf.reduce_mean(tf.abs(noise), [1, 2, 3], keep_dims=True) grad = momentum * grad + noise adv = x + alpha * tf.sign(grad) adv = tf.clip_by_value(adv, x_min, x_max) # x_adv, y, _, _, _, _, pred = tf.while_loop(stop, graph, [x, y, i, x_max, x_min, grad, pred]) # Run computation s1 = tf.train.Saver(slim.get_model_variables(scope='InceptionV3')) s2 = tf.train.Saver(slim.get_model_variables(scope='AdvInceptionV3')) s3 = tf.train.Saver( slim.get_model_variables(scope='Ens3AdvInceptionV3')) s4 = tf.train.Saver( slim.get_model_variables(scope='Ens4AdvInceptionV3')) s5 = tf.train.Saver(slim.get_model_variables(scope='InceptionV4')) s6 = tf.train.Saver( slim.get_model_variables(scope='InceptionResnetV2')) s7 = tf.train.Saver( slim.get_model_variables(scope='EnsAdvInceptionResnetV2')) s8 = tf.train.Saver(slim.get_model_variables(scope='resnet_v2')) s1.restore(sess, FLAGS.checkpoint_path_inception_v3) s2.restore(sess, FLAGS.checkpoint_path_adv_inception_v3) s3.restore(sess, FLAGS.checkpoint_path_ens3_adv_inception_v3) s4.restore(sess, FLAGS.checkpoint_path_ens4_adv_inception_v3) s5.restore(sess, FLAGS.checkpoint_path_inception_v4) s6.restore(sess, FLAGS.checkpoint_path_inception_resnet_v2) s7.restore(sess, FLAGS.checkpoint_path_ens_adv_inception_resnet_v2) s8.restore(sess, FLAGS.checkpoint_path_resnet) #sess.run(tf.global_variables_initializer()) ####### # test # for filenames, images in load_images(FLAGS.input_dir, batch_shape): # logits_v3_, Pred_dict = sess.run([logits_v3, pred_dict], feed_dict={x: images}) # label = np.argmax(Pred_dict, axis=1) # print('label::::::', label) # print('=================\n', logits_v3_) ####### sum = 0 success_num = 0 l2_distance = 0 label_distance = 0 for filenames, images in load_images(FLAGS.input_dir, batch_shape): sum += len(filenames) images = images.astype(np.float32) images_flatten_initial = images.reshape((2, 268203)) # true_label = [] # for i in range(len(filenames)): # true_label.append(filenames[i].split('-')[2][:-4]) #print('image.astype::::::::', images) #adv_images = tf.zeros(shape=batch_shape) #adv_images = [] #prediction = 0!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! prediction = [] for i in range(FLAGS.num_iter): logits_proportion = [] if i == 0: prediction = sess.run(pred, feed_dict={x: images}) print('true_label::::::::', prediction) end_points_v3_run, end_points_ens3_adv_v3_run, end_points_ens4_adv_v3_run, end_points_v4_run, end_points_res_v2_run, end_points_ensadv_res_v2_run, end_points_resnet_run = \ sess.run([end_points_v3, end_points_ens3_adv_v3, end_points_ens4_adv_v3, end_points_v4, end_points_res_v2, end_points_ensadv_res_v2, end_points_resnet], feed_dict={x: images, y: prediction}) else: adv_pred, end_points_v3_run, end_points_ens3_adv_v3_run, end_points_ens4_adv_v3_run, end_points_v4_run, end_points_res_v2_run, end_points_ensadv_res_v2_run, end_points_resnet_run = \ sess.run([pred, end_points_v3, end_points_ens3_adv_v3, end_points_ens4_adv_v3, end_points_v4, end_points_res_v2, end_points_ensadv_res_v2, end_points_resnet], feed_dict={x: images, y: prediction}) # print('prediction::::::::::', prediction) # print('end_points_v3_run[Predictions]:::::::', end_points_v3_run['Predictions']) if i == FLAGS.num_iter - 1: print('adv_pred:::::::', adv_pred) for l in range(len(filenames)): label_distance += abs(prediction[l] - adv_pred[l]) if int(prediction[l]) == adv_pred[ l]: # The test tag is always 1 more than the real tag, so I subtract 1 here success_num += 1 #print('sucess_num:::::', success_num) for j in range(FLAGS.batch_size): end_points_v3_Pred = end_points_v3_run['Predictions'][j][ prediction[j]] # end_points_adv_v3_Pred = end_points_adv_v3['Predictions'] end_points_ens3_adv_v3_Pred = end_points_ens3_adv_v3_run[ 'Predictions'][j][prediction[j]] end_points_ens4_adv_v3_Pred = end_points_ens4_adv_v3_run[ 'Predictions'][j][prediction[j]] end_points_v4_Pred = end_points_v4_run['Predictions'][j][ prediction[j]] end_points_res_v2_Pred = end_points_res_v2_run[ 'Predictions'][j][prediction[j]] end_points_ensadv_res_v2_Pred = end_points_ensadv_res_v2_run[ 'Predictions'][j][prediction[j]] end_points_resnet_Pred = end_points_resnet_run[ 'predictions'][j][prediction[j]] ens_Pred_Value = [ end_points_v3_Pred, end_points_ens3_adv_v3_Pred, end_points_ens4_adv_v3_Pred, end_points_v4_Pred, end_points_res_v2_Pred, end_points_ensadv_res_v2_Pred, end_points_resnet_Pred ] TopKFitIndx = np.argsort(ens_Pred_Value) # logits_dictionary = tf.placeholder(dtype=tf.int32, shape=(7, FLAGS.batch_size, 1001)) # TopKFitIndex = tf.placeholder(dtype=tf.int32, shape=(FLAGS.batch_size)) a = [0.0] * 7 # print('ens_Pred_Value:::::::::', ens_Pred_Value) # print('TopKFitIndx::::::', TopKFitIndx) for m in range(7): a[TopKFitIndx[m]] = 7 - m logits_proportion.append(a) # One_hot, Logits_dict, Logits1, Logits2, Logits, images, Cross_Entropy, Noise, gradient = \ # sess.run([one_hot, logits_dict, logits1, logits2, logits, adv, cross_entropy, noise, grad], # feed_dict={x: images, logits_gravity: logits_proportion, y: prediction}) images = sess.run(adv, feed_dict={ x: images, logits_gravity: logits_proportion, y: prediction }) if i == FLAGS.num_iter - 1: images_flatten_adv = images.reshape((2, 268203)) l2_distance_list = np.linalg.norm( (images_flatten_initial - images_flatten_adv), axis=1, keepdims=True) for n in range(len(filenames)): l2_distance += l2_distance_list[n] # print('One_hot::::::::::::', One_hot.shape, One_hot) # print('Logits_dict::::::::::', Logits_dict) # print('Logits1::::::::::',Logits1.shape, Logits1) # print('Logits2::::::::::',Logits2.shape, Logits2) # # print('Logits:::::::::::', Logits) # print('Cross_Entropy:::::::::::::', Cross_Entropy) # print('Noise:::::::::::', Noise) # print('gradient::::::::::', gradient) save_images(images, filenames, FLAGS.output_dir) # for filenames, images in load_images(FLAGS.input_dir, batch_shape): # #label = sess.run([pred, end_points_v3_Pred, end_points_ens3_adv_v3_Pred, end_points_ens4_adv_v3_Pred, end_points_v4_Pred, end_points_res_v2_Pred, end_points_ensadv_res_v2_Pred, end_points_resnet_Pred],) # adv_images= sess.run([x_adv], feed_dict={x_input: images}) # save_images(adv_images, filenames, FLAGS.output_dir) print('sum::::', sum) print('success_num::::', success_num) rate_wrong = success_num / sum print('rate_wrong:::::::', rate_wrong) print('l2_distance:::::::', l2_distance) print('label_distance:::::::', label_distance) end = time.clock() print('run time:::::', end - start)
def Evaluator(x, y): num_classes = 1001 # should keep original x here for output with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_v3, end_points_v3 = inception_v3.inception_v3( x, num_classes=num_classes, is_training=False) with slim.arg_scope(inception_v4.inception_v4_arg_scope()): logits_v4, end_points_v4 = inception_v4.inception_v4( x, num_classes=num_classes, is_training=False) with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope()): logits_res_v2, end_points_res_v2 = inception_resnet_v2.inception_resnet_v2( x, num_classes=num_classes, is_training=False, reuse=True) with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits_resnet, end_points_resnet = resnet_v2.resnet_v2_152( x, num_classes=num_classes, is_training=False) with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits_resnet_101, end_points_resnet_101 = resnet_v2.resnet_v2_101( x, num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE) with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits_resnet_50, end_points_resnet_50 = resnet_v2.resnet_v2_50( x, num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE) with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_adv_v3, end_points_adv_v3 = inception_v3.inception_v3( x, num_classes=num_classes, is_training=False, scope='AdvInceptionV3') with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_ens3_adv_v3, end_points_ens3_adv_v3 = inception_v3.inception_v3( x, num_classes=num_classes, is_training=False, scope='Ens3AdvInceptionV3') with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_ens4_adv_v3, end_points_ens4_adv_v3 = inception_v3.inception_v3( x, num_classes=num_classes, is_training=False, scope='Ens4AdvInceptionV3') with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope()): logits_ensadv_res_v2, end_points_ensadv_res_v2 = inception_resnet_v2.inception_resnet_v2( x, num_classes=num_classes, is_training=False, scope='EnsAdvInceptionResnetV2') # with slim.arg_scope(inception_v3.inception_v3_arg_scope()): # logits_ens_v3, end_points_ens_v3 = inception_v3.inception_v3( # x, num_classes=num_classes, is_training=False) # acc_v3 = tf.reduce_sum(tf.cast(tf.equal(tf.argmax(end_points_v3['Predictions'], 1), y), tf.float32)) # acc_v4 = tf.reduce_sum(tf.cast(tf.equal(tf.argmax(end_points_v4['Predictions'], 1), y), tf.float32)) # acc_res_v2 = tf.reduce_sum(tf.cast(tf.equal(tf.argmax(end_points_res_v2['Predictions'], 1), y), tf.float32)) # acc_resnet = tf.reduce_sum(tf.cast(tf.equal(tf.argmax(end_points_resnet['predictions'], 1), y), tf.float32)) # acc_resnet_50 = tf.reduce_sum(tf.cast(tf.equal(tf.argmax(end_points_resnet_50['predictions'], 1), y), tf.float32)) # acc_resnet_101 = tf.reduce_sum(tf.cast(tf.equal(tf.argmax(end_points_resnet_101['predictions'], 1), y), tf.float32)) # acc_ens_v3 = tf.reduce_sum(tf.cast(tf.equal(tf.argmax(end_points_ens_v3['predictions'], 1), y), tf.float32)) logits_esm = (logits_v3 + logits_v4 + logits_res_v2 + logits_resnet + logits_resnet_50 + logits_resnet_101) if FLAGS.att_model == "incep_v3": logits_esm = (logits_esm - logits_v3) / 5 elif FLAGS.att_model == "incep_v4": logits_esm = (logits_esm - logits_v4) / 5 elif FLAGS.att_model == "incep_res_v2": logits_esm = (logits_esm - logits_res_v2) / 5 elif FLAGS.att_model == "resnet_50": logits_esm = (logits_esm - logits_resnet_50) / 5 elif FLAGS.att_model == "resnet_101": logits_esm = (logits_esm - logits_resnet_101) / 5 elif FLAGS.att_model == "resnet_152": logits_esm = (logits_esm - logits_resnet) / 5 elif FLAGS.att_model == "ens3_adv_3": logits_esm = (logits_esm + logits_ens4_adv_v3 + logits_ensadv_res_v2) / 8 elif FLAGS.att_model == "ens4_adv_3": logits_esm = (logits_esm + logits_ens3_adv_v3 + logits_ensadv_res_v2) / 8 elif FLAGS.att_model == "ensadv_res_2": logits_esm = (logits_esm + logits_ens4_adv_v3 + logits_ens3_adv_v3) / 8 # top_k top_k = FLAGS.top_k acc_v3 = tf.reduce_sum( tf.cast(tf.nn.in_top_k(end_points_v3['Predictions'], y, k=top_k), tf.float32)) acc_v4 = tf.reduce_sum( tf.cast(tf.nn.in_top_k(end_points_v4['Predictions'], y, k=top_k), tf.float32)) acc_res_v2 = tf.reduce_sum( tf.cast(tf.nn.in_top_k(end_points_res_v2['Predictions'], y, k=top_k), tf.float32)) acc_resnet = tf.reduce_sum( tf.cast(tf.nn.in_top_k(end_points_resnet['predictions'], y, k=top_k), tf.float32)) acc_resnet_50 = tf.reduce_sum( tf.cast( tf.nn.in_top_k(end_points_resnet_50['predictions'], y, k=top_k), tf.float32)) acc_resnet_101 = tf.reduce_sum( tf.cast( tf.nn.in_top_k(end_points_resnet_101['predictions'], y, k=top_k), tf.float32)) acc_adv_v3 = tf.reduce_sum( tf.cast(tf.nn.in_top_k(end_points_adv_v3['Predictions'], y, k=top_k), tf.float32)) acc_ens3_adv_v3 = tf.reduce_sum( tf.cast( tf.nn.in_top_k(end_points_ens3_adv_v3['Predictions'], y, k=top_k), tf.float32)) acc_ens4_adv_v3 = tf.reduce_sum( tf.cast( tf.nn.in_top_k(end_points_ens4_adv_v3['Predictions'], y, k=top_k), tf.float32)) acc_ensadv_res_v2 = tf.reduce_sum( tf.cast( tf.nn.in_top_k(end_points_ensadv_res_v2['Predictions'], y, k=top_k), tf.float32)) acc_esm = tf.reduce_sum( tf.cast( tf.nn.in_top_k(slim.softmax(logits_esm, scope='predictions'), y, k=top_k), tf.float32)) # acc_ens_v3 = tf.reduce_sum(tf.cast(tf.equal(tf.argmax(end_points_ens_v3['predictions'], 1), y), tf.float32)) # pred1 = tf.argmax(end_points_v3['Predictions'], 1) # pred2 = tf.argmax(end_points_v4['Predictions'], 1) # pred3 = tf.argmax(end_points_res_v2['Predictions'], 1) # pred4 = tf.argmax(end_points_resnet['predictions'], 1) return acc_v3, acc_v4, acc_res_v2, acc_resnet, acc_resnet_50, acc_resnet_101, acc_adv_v3, acc_ens3_adv_v3, \ acc_ens4_adv_v3, acc_ensadv_res_v2, acc_esm, end_points_v3, end_points_v4, end_points_res_v2, end_points_resnet
def graph(x, y, i, x_max, x_min, g1, g2, e): eps = 2.0 * FLAGS.max_epsilon / 255.0 num_iter = FLAGS.num_iter alpha = eps / num_iter num_classes = 1001 beta = 0.91 k1 = 0.99 k2 = 0.72 k3 = 1.44 with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_v3, end_points_v3 = inception_v3.inception_v3( input_diversity(x), num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE) with slim.arg_scope(inception_v4.inception_v4_arg_scope()): logits_v4, end_points_v4 = inception_v4.inception_v4( input_diversity(x), num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE) with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope()): logits_res_v2, end_points_res_v2 = inception_resnet_v2.inception_resnet_v2( input_diversity(x), num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE) with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits_resnet, end_points_resnet = resnet_v2.resnet_v2_101( input_diversity(x), num_classes=num_classes, is_training=False, scope='resnet_v2_101', reuse=tf.AUTO_REUSE) logits = (logits_v3 + logits_v4 + logits_res_v2 + logits_resnet) / 4 auxlogits = (end_points_v3['AuxLogits'] + end_points_v4['AuxLogits'] + end_points_res_v2['AuxLogits']) / 3 cross_entropy = tf.losses.softmax_cross_entropy(y, logits, label_smoothing=0.0, weights=1.0) cross_entropy += tf.losses.softmax_cross_entropy(y, auxlogits, label_smoothing=0.0, weights=0.4) noise = tf.gradients(cross_entropy, x)[0] noise = tf.nn.depthwise_conv2d(noise, stack_kernel, strides=[1, 1, 1, 1], padding='SAME') noise = noise / tf.reduce_mean(tf.abs(noise), [1, 2, 3], keep_dims=True) g1 = beta * g1 + (1 - beta) * (noise - e) g2 = g2 + noise x = x + alpha * tf.sign(k1 * noise + k2 * g1 + k3 * g2) x = tf.clip_by_value(x, x_min, x_max) i = tf.add(i, 1) e = noise return x, y, i, x_max, x_min, g1, g2, e
def _construct_model(model_type='resnet_v1_50'): """Constructs model for the desired type of CNN. Args: model_type: Type of model to be used. Returns: end_points: A dictionary from components of the network to the corresponding activations. Raises: ValueError: If the model_type is not supported. """ # Placeholder input. images = array_ops.placeholder(dtypes.float32, shape=(1, None, None, 3), name=_INPUT_NODE) # Construct model. if model_type == 'inception_resnet_v2': _, end_points = inception.inception_resnet_v2_base(images) elif model_type == 'inception_resnet_v2-same': _, end_points = inception.inception_resnet_v2_base( images, align_feature_maps=True) elif model_type == 'inception_v2': _, end_points = inception.inception_v2_base(images) elif model_type == 'inception_v2-no-separable-conv': _, end_points = inception.inception_v2_base(images, use_separable_conv=False) elif model_type == 'inception_v3': _, end_points = inception.inception_v3_base(images) elif model_type == 'inception_v4': _, end_points = inception.inception_v4_base(images) elif model_type == 'alexnet_v2': _, end_points = alexnet.alexnet_v2(images) elif model_type == 'vgg_a': _, end_points = vgg.vgg_a(images) elif model_type == 'vgg_16': _, end_points = vgg.vgg_16(images) elif model_type == 'mobilenet_v1': _, end_points = mobilenet_v1.mobilenet_v1_base(images) elif model_type == 'mobilenet_v1_075': _, end_points = mobilenet_v1.mobilenet_v1_base(images, depth_multiplier=0.75) elif model_type == 'resnet_v1_50': _, end_points = resnet_v1.resnet_v1_50(images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v1_101': _, end_points = resnet_v1.resnet_v1_101(images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v1_152': _, end_points = resnet_v1.resnet_v1_152(images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v1_200': _, end_points = resnet_v1.resnet_v1_200(images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v2_50': _, end_points = resnet_v2.resnet_v2_50(images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v2_101': _, end_points = resnet_v2.resnet_v2_101(images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v2_152': _, end_points = resnet_v2.resnet_v2_152(images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v2_200': _, end_points = resnet_v2.resnet_v2_200(images, num_classes=None, is_training=False, global_pool=False) else: raise ValueError('Unsupported model_type %s.' % model_type) return end_points
def main(_): start = time.clock() # Images for inception classifier are normalized to be in [-1, 1] interval, # eps is a difference between pixels so it should be in [0, 2] interval. # Renormalizing epsilon from [0, 255] to [0, 2]. # eps = 2.0 * FLAGS.max_epsilon / 255.0 batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3] tf.logging.set_verbosity(tf.logging.INFO) eps = 2.0 * FLAGS.max_epsilon / 255.0 num_iter = FLAGS.num_iter alpha = eps / num_iter momentum = FLAGS.momentum num_classes = 1001 with tf.Session() as sess: x = tf.placeholder(dtype=tf.float32, shape=batch_shape) with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_v3, end_points_v3 = inception_v3.inception_v3( x, num_classes=num_classes, is_training=False) with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_adv_v3, end_points_adv_v3 = inception_v3.inception_v3( x, num_classes=num_classes, is_training=False, scope='AdvInceptionV3') with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_ens3_adv_v3, end_points_ens3_adv_v3 = inception_v3.inception_v3( x, num_classes=num_classes, is_training=False, scope='Ens3AdvInceptionV3') with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_ens4_adv_v3, end_points_ens4_adv_v3 = inception_v3.inception_v3( x, num_classes=num_classes, is_training=False, scope='Ens4AdvInceptionV3') with slim.arg_scope(inception_v4.inception_v4_arg_scope()): logits_v4, end_points_v4 = inception_v4.inception_v4( x, num_classes=num_classes, is_training=False) with slim.arg_scope( inception_resnet_v2.inception_resnet_v2_arg_scope()): logits_res_v2, end_points_res_v2 = inception_resnet_v2.inception_resnet_v2( x, num_classes=num_classes, is_training=False) with slim.arg_scope( inception_resnet_v2.inception_resnet_v2_arg_scope()): logits_ensadv_res_v2, end_points_ensadv_res_v2 = inception_resnet_v2.inception_resnet_v2( x, num_classes=num_classes, is_training=False, scope='EnsAdvInceptionResnetV2') with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits_resnet, end_points_resnet = resnet_v2.resnet_v2_101( x, num_classes=num_classes, is_training=False) pred = tf.argmax( end_points_v3['Predictions'] + end_points_adv_v3['Predictions'] + end_points_ens3_adv_v3['Predictions'] + \ end_points_ens4_adv_v3['Predictions'] + end_points_v4['Predictions'] + \ end_points_res_v2['Predictions'] + end_points_ensadv_res_v2['Predictions'] + end_points_resnet['predictions'], 1) y = tf.placeholder(tf.int32, shape=[ FLAGS.batch_size, ]) one_hot = tf.one_hot(y, num_classes) logits_dict = [ logits_v3, logits_adv_v3, logits_ens3_adv_v3, logits_ens4_adv_v3, logits_v4, logits_res_v2, logits_ensadv_res_v2, logits_resnet ] logits_gravity = tf.placeholder(dtype=tf.float32, shape=[FLAGS.batch_size, 8]) logits0 = (logits_dict[0][0] * logits_gravity[0][0] + logits_dict[1][0] * logits_gravity[0][1] + logits_dict[2][0] * logits_gravity[0][2]\ + logits_dict[3][0] * logits_gravity[0][3] + logits_dict[4][0] * logits_gravity[0][4] + logits_dict[5][0] * logits_gravity[0][5]\ + logits_dict[6][0] * logits_gravity[0][6] + logits_dict[7][0] * logits_gravity[0][7]) / 36 logits1 = (logits_dict[0][1] * logits_gravity[1][0] + logits_dict[1][1] * logits_gravity[1][1] + logits_dict[2][1] * logits_gravity[1][2]\ + logits_dict[3][1] * logits_gravity[1][3] + logits_dict[4][1] * logits_gravity[1][4] + logits_dict[5][1] * logits_gravity[1][5]\ + logits_dict[6][1] * logits_gravity[1][6] + logits_dict[7][1] * logits_gravity[1][7]) / 36 logits0 = tf.reshape(logits0, [1, 1001]) logits1 = tf.reshape(logits1, [1, 1001]) logits = tf.concat([logits0, logits1], 0) cross_entropy = tf.losses.softmax_cross_entropy(one_hot, logits, label_smoothing=0.0, weights=1.0) noise = tf.gradients(cross_entropy, x)[0] noise = noise / tf.reduce_mean( tf.abs(noise), [1, 2, 3], keep_dims=True ) # 可以改成 noise = noise / tf.reduce_sum(tf.abs(noise), [1, 2, 3], keep_dims=True) grad = tf.placeholder(tf.float32, shape=batch_shape) noise = momentum * grad + noise adv = x + alpha * tf.sign(noise) x_max = tf.placeholder(tf.float32, shape=batch_shape) x_min = tf.placeholder(tf.float32, shape=batch_shape) adv = tf.clip_by_value(adv, x_min, x_max) # Run computation s1 = tf.train.Saver(slim.get_model_variables(scope='InceptionV3')) s2 = tf.train.Saver(slim.get_model_variables(scope='AdvInceptionV3')) s3 = tf.train.Saver( slim.get_model_variables(scope='Ens3AdvInceptionV3')) s4 = tf.train.Saver( slim.get_model_variables(scope='Ens4AdvInceptionV3')) s5 = tf.train.Saver(slim.get_model_variables(scope='InceptionV4')) s6 = tf.train.Saver( slim.get_model_variables(scope='InceptionResnetV2')) s7 = tf.train.Saver( slim.get_model_variables(scope='EnsAdvInceptionResnetV2')) s8 = tf.train.Saver(slim.get_model_variables(scope='resnet_v2')) s1.restore(sess, FLAGS.checkpoint_path_inception_v3) s2.restore(sess, FLAGS.checkpoint_path_adv_inception_v3) s3.restore(sess, FLAGS.checkpoint_path_ens3_adv_inception_v3) s4.restore(sess, FLAGS.checkpoint_path_ens4_adv_inception_v3) s5.restore(sess, FLAGS.checkpoint_path_inception_v4) s6.restore(sess, FLAGS.checkpoint_path_inception_resnet_v2) s7.restore(sess, FLAGS.checkpoint_path_ens_adv_inception_resnet_v2) s8.restore(sess, FLAGS.checkpoint_path_resnet) sum = 0 failure_num = 0 l2_distance = 0 label_distance = 0 images = [] for filenames, images in load_images(FLAGS.input_dir, batch_shape): images = images.astype(np.float32) #不知道需不需要!!!!!!!!!!!!!!!!!!!!!!! images_flatten_initial = images.reshape((2, 268203)) sum += len(filenames) # 对于每个图片在迭代生成对抗样本的过程中,x_max和x_min 是不变的!!!!!!! x_Max = np.clip(images + eps, -1.0, 1.0) x_Min = np.clip(images - eps, -1.0, 1.0) prediction = [] Noise = [] for i in range(FLAGS.num_iter): if i == 0: prediction = sess.run(pred, feed_dict={x: images}) print('true_label::::::::', prediction) # x可能有问题!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! End_points_v3, End_points_adv_v3, End_points_ens3_adv_v3, End_points_ens4_adv_v3, End_points_v4, End_points_res_v2, End_points_ensadv_res_v2, End_points_resnet = \ sess.run([end_points_v3, end_points_adv_v3, end_points_ens3_adv_v3, end_points_ens4_adv_v3, end_points_v4, end_points_res_v2, end_points_ensadv_res_v2, end_points_resnet], feed_dict={x: images, y: prediction}) logits_proportion = [] for j in range(FLAGS.batch_size): end_points_v3_Pred = End_points_v3['Predictions'][j][ prediction[j]] end_points_adv_v3_Pred = End_points_adv_v3['Predictions'][ j][prediction[j]] end_points_ens3_adv_v3_Pred = End_points_ens3_adv_v3[ 'Predictions'][j][prediction[j]] end_points_ens4_adv_v3_Pred = End_points_ens4_adv_v3[ 'Predictions'][j][prediction[j]] end_points_v4_Pred = End_points_v4['Predictions'][j][ prediction[j]] end_points_res_v2_Pred = End_points_res_v2['Predictions'][ j][prediction[j]] end_points_ensadv_res_v2_Pred = End_points_ensadv_res_v2[ 'Predictions'][j][prediction[j]] end_points_resnet_Pred = End_points_resnet['predictions'][ j][prediction[j]] print('end_points_v3_Pred::::::', end_points_v3_Pred) print('end_points_adv_v3_Pred::::::', end_points_adv_v3_Pred) print('end_points_ens3_adv_v3_Pred::::::', end_points_ens3_adv_v3_Pred) print('end_points_ens4_adv_v3_Pred::::::', end_points_ens4_adv_v3_Pred) print('end_points_v4_Pred::::::', end_points_v4_Pred) print('end_points_res_v2_Pred::::::', end_points_res_v2_Pred) print('end_points_ensadv_res_v2_Pred::::::', end_points_ensadv_res_v2_Pred) print('end_points_resnet_Pred::::::', end_points_resnet_Pred) ens_Pred_Value = np.array([ end_points_v3_Pred, end_points_adv_v3_Pred, end_points_ens3_adv_v3_Pred, end_points_ens4_adv_v3_Pred, end_points_v4_Pred, end_points_res_v2_Pred, end_points_ensadv_res_v2_Pred, end_points_resnet_Pred ]) print('ens_Pred_Value:::::', ens_Pred_Value) TopKFitIndx = np.argsort(ens_Pred_Value) a = [0.0] * 8 for m in range(8): a[TopKFitIndx[m]] = 8 - m # a[m] = 1 logits_proportion.append(a) if i == 0: Grad = np.zeros(shape=[FLAGS.batch_size, 299, 299, 3], dtype=np.float32) Noise, images = sess.run( [noise, adv], feed_dict={ x: images, y: prediction, logits_gravity: logits_proportion, grad: Grad, x_max: x_Max, x_min: x_Min }) # x可能有问题!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! else: Noise, images = sess.run( [noise, adv], feed_dict={ x: images, y: prediction, logits_gravity: logits_proportion, grad: Noise, x_max: x_Max, x_min: x_Min }) # Noise可能有问题!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! print('images::::::', images) adv_prediction = sess.run(pred, feed_dict={x: images}) images_flatten_adv = images.reshape((2, 268203)) save_images(images, filenames, FLAGS.output_dir) l2_diatance_list = np.linalg.norm( (images_flatten_initial - images_flatten_adv), axis=1, keepdims=True) for n in range(len(filenames)): l2_distance += l2_diatance_list[n] for j in range(len(filenames)): label_distance += abs(prediction[j] - adv_prediction[j]) if int(prediction[j]) == adv_prediction[j]: failure_num += 1 print('failure_num:::::', failure_num) print('Prediction:::::::', adv_prediction) print('sum::::', sum) print('failure_num::::', failure_num) rate_wrong = failure_num / sum print('rate_wrong:::::::', rate_wrong) print('l2_distance:::::::', l2_distance) print('label_distance::::::', label_distance) end = time.clock() print('run time::::::::', end - start)
with slim.arg_scope( inception_resnet_v2.inception_resnet_v2_arg_scope()): logits_res_v2, end_points_res_v2 = inception_resnet_v2.inception_resnet_v2( x_input, num_classes=num_classes, is_training=False) with slim.arg_scope( inception_resnet_v2.inception_resnet_v2_arg_scope()): logits_ens_adv_res_v2, end_points_ens_adv_res_v2 = inception_resnet_v2.inception_resnet_v2( x_input, num_classes=num_classes, is_training=False, scope='EnsAdvInceptionResnetV2') with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits_resnet, end_points_resnet = resnet_v2.resnet_v2_101( x_input, num_classes=num_classes, is_training=False) pred_v3 = tf.argmax(end_points_v3['Predictions'], 1) pred_adv_v3 = tf.argmax(end_points_adv_v3['Predictions'], 1) pred_ens3_adv_v3 = tf.argmax(end_points_ens3_adv_v3['Predictions'], 1) pred_ens4_adv_v3 = tf.argmax(end_points_ens4_adv_v3['Predictions'], 1) pred_v4 = tf.argmax(end_points_v4['Predictions'], 1) pred_res_v2 = tf.argmax(end_points_res_v2['Predictions'], 1) pred_ens_adv_res_v2 = tf.argmax( end_points_ens_adv_res_v2['Predictions'], 1) pred_resnet = tf.argmax(end_points_resnet['predictions'], 1) s1 = tf.train.Saver(slim.get_model_variables(scope='InceptionV3')) s2 = tf.train.Saver(slim.get_model_variables(scope='AdvInceptionV3')) s3 = tf.train.Saver( slim.get_model_variables(scope='Ens3AdvInceptionV3'))
def graph(x, y, i, x_max, x_min, grad): eps = 2.0 * FLAGS.max_epsilon / 255.0 num_iter = FLAGS.num_iter alpha = eps / num_iter momentum = FLAGS.momentum num_classes = 1001 with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_v3, end_points_v3 = inception_v3.inception_v3( x, num_classes=num_classes, is_training=False) with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_adv_v3, end_points_adv_v3 = inception_v3.inception_v3( x, num_classes=num_classes, is_training=False, scope='AdvInceptionV3') with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_ens3_adv_v3, end_points_ens3_adv_v3 = inception_v3.inception_v3( x, num_classes=num_classes, is_training=False, scope='Ens3AdvInceptionV3') with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_ens4_adv_v3, end_points_ens4_adv_v3 = inception_v3.inception_v3( x, num_classes=num_classes, is_training=False, scope='Ens4AdvInceptionV3') with slim.arg_scope(inception_v4.inception_v4_arg_scope()): logits_v4, end_points_v4 = inception_v4.inception_v4( x, num_classes=num_classes, is_training=False) with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope()): logits_res_v2, end_points_res_v2 = inception_resnet_v2.inception_resnet_v2( x, num_classes=num_classes, is_training=False) with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope()): logits_ensadv_res_v2, end_points_ensadv_res_v2 = inception_resnet_v2.inception_resnet_v2( x, num_classes=num_classes, is_training=False, scope='EnsAdvInceptionResnetV2') with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits_resnet, end_points_resnet = resnet_v2.resnet_v2_101( x, num_classes=num_classes, is_training=False) pred = tf.argmax(end_points_v3['Predictions'] + end_points_adv_v3['Predictions'] + end_points_ens3_adv_v3['Predictions'] + \ end_points_ens4_adv_v3['Predictions'] + end_points_v4['Predictions'] + \ end_points_res_v2['Predictions'] + end_points_ensadv_res_v2['Predictions'] + end_points_resnet['predictions'], 1) first_round = tf.cast(tf.equal(i, 0), tf.int64) y = first_round * pred + (1 - first_round) * y one_hot = tf.one_hot(y, num_classes) logits = (logits_v3 + 0.25 * logits_adv_v3 + logits_ens3_adv_v3 + \ logits_ens4_adv_v3 + logits_v4 + \ logits_res_v2 + logits_ensadv_res_v2 + logits_resnet) / 7.25 auxlogits = (end_points_v3['AuxLogits'] + 0.25 * end_points_adv_v3['AuxLogits'] + end_points_ens3_adv_v3['AuxLogits'] + \ end_points_ens4_adv_v3['AuxLogits'] + end_points_v4['AuxLogits'] + \ end_points_res_v2['AuxLogits'] + end_points_ensadv_res_v2['AuxLogits']) / 6.25 cross_entropy = tf.losses.softmax_cross_entropy(one_hot, logits, label_smoothing=0.0, weights=1.0) cross_entropy += tf.losses.softmax_cross_entropy(one_hot, auxlogits, label_smoothing=0.0, weights=0.4) noise = tf.gradients(cross_entropy, x)[0] noise = noise / tf.reduce_mean(tf.abs(noise), [1, 2, 3], keep_dims=True) noise = momentum * grad + noise x = x + alpha * tf.sign(noise) x = tf.clip_by_value(x, x_min, x_max) i = tf.add(i, 1) return x, y, i, x_max, x_min, noise
def train(): eps = 2.0 * float(FLAGS.max_epsilon) / 256.0 tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): # Design architecture # input x_data = tf.placeholder(tf.float32, [None, FLAGS.img_height, FLAGS.img_width, 3], name="x_data") y_label = tf.placeholder(tf.float32, [None, FLAGS.num_classes], name="y_label") # generator x_generated, g_params = build_generator(x_data, FLAGS) x_generated = x_generated * eps x_generated = x_data + x_generated # discriminator(inception v3) with slim.arg_scope(inception.inception_v3_arg_scope()): _, end_points = inception.inception_v3( x_generated, num_classes=FLAGS.num_classes, is_training=False) predicted_labels = tf.argmax(end_points['Predictions'], 1) predicted_logits = end_points['Logits'] disc_var_list = slim.get_model_variables() # discriminator(resnet v2 50) x_generated2 = tf.image.resize_bilinear(x_generated, [224, 224], align_corners=False) with slim.arg_scope(resnet_utils.resnet_arg_scope()): _, end_points2 = resnet_v2.resnet_v2_50( x_generated2, num_classes=FLAGS.num_classes, is_training=False) predicted_labels2 = tf.argmax(end_points2['predictions'], 1) predicted_logits2 = end_points2['predictions'] disc_var_list2 = slim.get_model_variables()[len(disc_var_list):] # discriminator(resnet v2 152) with slim.arg_scope(resnet_utils.resnet_arg_scope()): _, end_points3 = resnet_v2.resnet_v2_152( x_generated2, num_classes=FLAGS.num_classes, is_training=False) predicted_labels3 = tf.argmax(end_points3['predictions'], 1) predicted_logits3 = end_points3['predictions'] disc_var_list3 = slim.get_model_variables()[(len(disc_var_list) + len(disc_var_list2)):] # discriminator(resnet v2 101) with slim.arg_scope(resnet_utils.resnet_arg_scope()): _, end_points4 = resnet_v2.resnet_v2_101( x_generated2, num_classes=FLAGS.num_classes, is_training=False) predicted_labels4 = tf.argmax(end_points4['predictions'], 1) predicted_logits4 = end_points4['predictions'] disc_var_list4 = slim.get_model_variables()[(len(disc_var_list) + len(disc_var_list2) + len(disc_var_list3)):] # discriminator(inception v4) with slim.arg_scope(inception.inception_v4_arg_scope()): _, end_points5 = inception.inception_v4( x_generated, num_classes=FLAGS.num_classes, is_training=False) predicted_labels5 = tf.argmax(end_points5['Predictions'], 1) predicted_logits5 = end_points['Logits'] disc_var_list5 = slim.get_model_variables()[(len(disc_var_list) + len(disc_var_list2) + len(disc_var_list3) + len(disc_var_list4)):] """ # discriminator(vgg 19) with slim.arg_scope(vgg.vgg_arg_scope()): _, end_points3 = vgg.vgg_19( x_generated2, num_classes=FLAGS.num_classes, is_training=False) predicted_labels3 = tf.argmax(end_points3['vgg_19/fc8'], 1); predicted_logits3 = end_points3['vgg_19/fc8']; disc_var_list3=slim.get_model_variables()[(len(disc_var_list)+len(disc_var_list2)):]; """ # loss and optimizer gen_acc = tf.reduce_mean( tf.cast(tf.equal(predicted_labels, tf.argmax(y_label, 1)), tf.float32)) cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=predicted_logits, labels=y_label)) gen_acc2 = tf.reduce_mean( tf.cast(tf.equal(predicted_labels2, tf.argmax(y_label, 1)), tf.float32)) cross_entropy2 = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=predicted_logits2, labels=y_label)) gen_acc3 = tf.reduce_mean( tf.cast(tf.equal(predicted_labels3, tf.argmax(y_label, 1)), tf.float32)) cross_entropy3 = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=predicted_logits3, labels=y_label)) gen_acc4 = tf.reduce_mean( tf.cast(tf.equal(predicted_labels4, tf.argmax(y_label, 1)), tf.float32)) cross_entropy4 = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=predicted_logits4, labels=y_label)) gen_acc5 = tf.reduce_mean( tf.cast(tf.equal(predicted_labels5, tf.argmax(y_label, 1)), tf.float32)) cross_entropy5 = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=predicted_logits5, labels=y_label)) infi_norm = tf.reduce_mean( tf.norm(tf.reshape(abs(x_data - x_generated), [-1, FLAGS.img_size]), ord=np.inf, axis=1)) g_loss = -1 * cross_entropy - 1 * cross_entropy2 - 1 * cross_entropy3 - 1 * cross_entropy4 - 1 * cross_entropy5 optimizer = tf.train.AdamOptimizer(0.0001) g_trainer = optimizer.minimize(g_loss, var_list=g_params) # get the data and label img_list = np.sort(glob.glob(FLAGS.input_folder + "*.png")) total_data = np.zeros( (len(img_list), FLAGS.img_height, FLAGS.img_width, 3), dtype=float) for i in range(len(img_list)): total_data[i] = imread(img_list[i], mode='RGB').astype( np.float) / 255.0 total_data[i] = total_data[i] * 2.0 - 1.0 # 0~1 -> -1~1 val_data = np.copy(total_data[0]) f = open(FLAGS.label_folder + "true_label", "r") total_label2 = np.array([i[:-1].split(",")[1] for i in f.readlines()], dtype=int) total_label = np.zeros((len(total_data), FLAGS.num_classes), dtype=int) for i in range(len(total_data)): total_label[i, total_label2[i]] = 1 val_label = np.copy(total_label[0]) # shuffle total_idx = range(len(total_data)) np.random.shuffle(total_idx) total_data = total_data[total_idx] total_label = total_label[total_idx] # Run computation saver = tf.train.Saver(disc_var_list) saver2 = tf.train.Saver(disc_var_list2) saver3 = tf.train.Saver(disc_var_list3) saver_gen = tf.train.Saver(g_params) # initialization init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) saver.restore(sess, FLAGS.checkpoint_path + FLAGS.checkpoint_file_name) saver2.restore(sess, FLAGS.checkpoint_path + FLAGS.checkpoint_file_name2) saver3.restore(sess, FLAGS.checkpoint_path + FLAGS.checkpoint_file_name3) # training for i in range(FLAGS.max_epoch): tr_infi = 0 tr_ce = 0 tr_gen_acc = 0 tr_ce2 = 0 tr_gen_acc2 = 0 tr_ce3 = 0 tr_gen_acc3 = 0 tr_ce4 = 0 tr_gen_acc4 = 0 tr_ce5 = 0 tr_gen_acc5 = 0 for j in range(len(total_data) / FLAGS.batch_size): batch_data = total_data[j * FLAGS.batch_size:(j + 1) * FLAGS.batch_size] batch_label = total_label[j * FLAGS.batch_size:(j + 1) * FLAGS.batch_size] acc_p3, ce_p3, acc_p2, ce_p2, acc_p, ce_p, infi_p, _ = sess.run( [ gen_acc3, cross_entropy3, gen_acc2, cross_entropy2, gen_acc, cross_entropy, infi_norm, g_trainer ], feed_dict={ x_data: batch_data, y_label: batch_label }) tr_infi += infi_p tr_ce += ce_p tr_gen_acc += acc_p tr_ce2 += ce_p2 tr_gen_acc2 += acc_p2 tr_ce3 += ce_p3 tr_gen_acc3 += acc_p3 print( str(i + 1) + " Epoch InfiNorm:" + str(tr_infi / (j + 1)) + ",CE: " + str(tr_ce / (j + 1)) + ",Acc: " + str(tr_gen_acc / (j + 1)) + ",CE2: " + str(tr_ce2 / (j + 1)) + ",Acc2: " + str(tr_gen_acc2 / (j + 1)) + ",CE3: " + str(tr_ce3 / (j + 1)) + ",Acc3: " + str(tr_gen_acc3 / (j + 1))) total_idx = range(len(total_data)) np.random.shuffle(total_idx) total_data = total_data[total_idx] total_label = total_label[total_idx] saver_gen.save( sess, "my-models_iv3_rv250_rv2152/my-model_" + str(FLAGS.max_epsilon) + ".ckpt")
def main(_): batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3] num_classes = 1001 itr = 30 tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): # Prepare graph x_input = tf.placeholder(tf.float32, shape=batch_shape) img_resize_tensor = tf.placeholder(tf.int32, [2]) x_input_resize = tf.image.resize_images( x_input, img_resize_tensor, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) shape_tensor = tf.placeholder(tf.int32, [3]) padded_input = padding_layer_iyswim(x_input_resize, shape_tensor) # 330 is the last value to keep 8*8 output, 362 is the last value to keep 9*9 output, stride = 32 padded_input.set_shape( (FLAGS.batch_size, FLAGS.image_resize, FLAGS.image_resize, 3)) with slim.arg_scope( inception_resnet_v2.inception_resnet_v2_arg_scope()): logits_ensadv_res_v2, end_points_ensadv_res_v2 = inception_resnet_v2.inception_resnet_v2( padded_input, num_classes=num_classes, is_training=False, create_aux_logits=True) with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_adv_v3, end_points_v3 = inception_v3.inception_v3( padded_input, num_classes=num_classes, is_training=False, scope='AdvInceptionV3') with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_ens3_adv_v3, end_points_ens3_adv_v3 = inception_v3.inception_v3( padded_input, num_classes=num_classes, is_training=False, scope='Ens3AdvInceptionV3') with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_ens3_adv_v3, end_points_ens4_adv_v3 = inception_v3.inception_v3( padded_input, num_classes=num_classes, is_training=False, scope='Ens4AdvInceptionV3') with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits_resnet, end_points_resnet = resnet_v2.resnet_v2_101( padded_input, num_classes=num_classes, is_training=False) logits = (logits_ensadv_res_v2 + logits_adv_v3 + logits_ens3_adv_v3 + logits_ens3_adv_v3 + logits_resnet) / 5 Aux = (end_points_ensadv_res_v2['AuxLogits'] + end_points_v3['AuxLogits'] + end_points_ens3_adv_v3['AuxLogits'] + end_points_ens4_adv_v3['AuxLogits']) * 0.1 predicted_labels = tf.argmax((logits + Aux), 1) #predicted_labels = tf.argmax(end_points['Predictions'], 1) # Run computation #saver = tf.train.Saver(slim.get_model_variables()) #session_creator = tf.train.ChiefSessionCreator( # scaffold=tf.train.Scaffold(saver=saver), # checkpoint_filename_with_path=[FLAGS.checkpoint_path_ens3_adv_inception_v3,FLAGS.checkpoint_path_ens4_adv_inception_v3] # master=FLAGS.master) #with tf.train.MonitoredSession(session_creator=session_creator) as sess: s1 = tf.train.Saver(slim.get_model_variables(scope='AdvInceptionV3')) s2 = tf.train.Saver( slim.get_model_variables(scope='Ens3AdvInceptionV3')) s3 = tf.train.Saver( slim.get_model_variables(scope='Ens4AdvInceptionV3')) s4 = tf.train.Saver( slim.get_model_variables(scope='EnsAdvInceptionResnetV2')) s5 = tf.train.Saver(slim.get_model_variables(scope='resnet_v2')) with tf.Session() as sess: s1.restore(sess, FLAGS.checkpoint_path_adv_inception_v3) s2.restore(sess, FLAGS.checkpoint_path_ens3_adv_inception_v3) s3.restore(sess, FLAGS.checkpoint_path_ens4_adv_inception_v3) s4.restore(sess, FLAGS.checkpoint_path_ens_adv_inception_resnet_v2) s5.restore(sess, FLAGS.checkpoint_path_resnet) with tf.gfile.Open(FLAGS.output_file, 'w') as out_file: for filenames, images in load_images(FLAGS.input_dir, batch_shape): final_preds = np.zeros( [FLAGS.batch_size, num_classes, itr]) for j in range(itr): if np.random.randint(0, 2, size=1) == 1: images = images[:, :, ::-1, :] resize_shape_ = np.random.randint(310, 331) final_preds[..., j] = sess.run( [predicted_labels], feed_dict={ x_input: images, img_resize_tensor: [resize_shape_] * 2, shape_tensor: np.array([ random.randint( 0, FLAGS.image_resize - resize_shape_), random.randint( 0, FLAGS.image_resize - resize_shape_), FLAGS.image_resize ]) }) final_probs = np.sum(final_preds, axis=-1) labels = np.argmax(final_probs, 1) for filename, label in zip(filenames, labels): out_file.write('{0},{1}\n'.format(filename, label))
def main(_): batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3] num_classes = 1001 # max_epsilon over checking # get original images origin_img_list=np.sort(glob.glob(FLAGS.origin_img_dir+"*.png")); origin_imgs=np.zeros((len(origin_img_list),FLAGS.image_height,FLAGS.image_width,3),dtype=float); for i in range(len(origin_img_list)): origin_imgs[i]=imread(origin_img_list[i],mode='RGB').astype(np.float); # get adv images adv_img_list=np.sort(glob.glob(FLAGS.input_dir+"*.png")); adv_imgs=np.zeros((len(adv_img_list),FLAGS.image_height,FLAGS.image_width,3),dtype=float); for i in range(len(adv_img_list)): adv_imgs[i]=imread(adv_img_list[i],mode='RGB').astype(np.float); epsilon_list=np.linalg.norm(np.reshape(abs(origin_imgs-adv_imgs),[-1,FLAGS.image_height*FLAGS.image_width*3]),ord=np.inf,axis=1); #print(epsilon_list);exit(1); over_epsilon_list=np.zeros((len(origin_img_list),2),dtype=object); cnt=0; for i in range(len(origin_img_list)): file_name=origin_img_list[i].split("/")[-1]; file_name=file_name.split(".")[0]; over_epsilon_list[i,0]=file_name; if(epsilon_list[i]>FLAGS.max_epsilon): over_epsilon_list[i,1]="1"; cnt+=1; tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): # Prepare graph x_input = tf.placeholder(tf.float32, shape=batch_shape) if(FLAGS.checkpoint_file_name=="inception_v3.ckpt"): with slim.arg_scope(inception.inception_v3_arg_scope()): _, end_points = inception.inception_v3( x_input, num_classes=num_classes, is_training=False) predicted_labels = tf.argmax(end_points['Predictions'], 1) elif(FLAGS.checkpoint_file_name=="inception_v4.ckpt"): with slim.arg_scope(inception.inception_v4_arg_scope()): _, end_points = inception.inception_v4( x_input, num_classes=num_classes, is_training=False) predicted_labels = tf.argmax(end_points['Predictions'], 1) elif(FLAGS.checkpoint_file_name=="inception_resnet_v2_2016_08_30.ckpt"): with slim.arg_scope(inception.inception_resnet_v2_arg_scope()): _, end_points = inception.inception_resnet_v2( x_input, num_classes=num_classes, is_training=False) predicted_labels = tf.argmax(end_points['Predictions'], 1) elif(FLAGS.checkpoint_file_name=="resnet_v2_101.ckpt"): x_input2 = tf.image.resize_bilinear(x_input,[224,224],align_corners=False); with slim.arg_scope(resnet_utils.resnet_arg_scope()): _, end_points = resnet_v2.resnet_v2_101( x_input2, num_classes=num_classes, is_training=False) predicted_labels = tf.argmax(end_points['predictions'], 1) elif(FLAGS.checkpoint_file_name=="resnet_v2_50.ckpt"): x_input2 = tf.image.resize_bilinear(x_input,[224,224],align_corners=False); with slim.arg_scope(resnet_utils.resnet_arg_scope()): _, end_points = resnet_v2.resnet_v2_50( x_input2, num_classes=num_classes, is_training=False) predicted_labels = tf.argmax(end_points['predictions'], 1) elif(FLAGS.checkpoint_file_name=="resnet_v2_152.ckpt"): x_input2 = tf.image.resize_bilinear(x_input,[224,224],align_corners=False); with slim.arg_scope(resnet_utils.resnet_arg_scope()): _, end_points = resnet_v2.resnet_v2_152( x_input2, num_classes=num_classes, is_training=False) predicted_labels = tf.argmax(end_points['predictions'], 1) elif(FLAGS.checkpoint_file_name=="inception_v1.ckpt"): x_input2 = tf.image.resize_bilinear(x_input,[224,224],align_corners=False); with slim.arg_scope(inception.inception_v1_arg_scope()): _, end_points = inception.inception_v1( x_input2, num_classes=num_classes, is_training=False) predicted_labels = tf.argmax(end_points['Predictions'], 1) elif(FLAGS.checkpoint_file_name=="inception_v2.ckpt"): x_input2 = tf.image.resize_bilinear(x_input,[224,224],align_corners=False); with slim.arg_scope(inception.inception_v2_arg_scope()): _, end_points = inception.inception_v2( x_input2, num_classes=num_classes, is_training=False) predicted_labels = tf.argmax(end_points['Predictions'], 1) # Resnet v1 and vgg are not working now elif(FLAGS.checkpoint_file_name=="vgg_16.ckpt"): x_input_list=tf.unstack(x_input,FLAGS.batch_size,0); for i in range(FLAGS.batch_size): x_input_list[i]=vgg_preprocessing.preprocess_image(x_input_list[i],224,224); x_input2=tf.stack(x_input_list,0); with slim.arg_scope(vgg.vgg_arg_scope()): _, end_points = vgg.vgg_16( x_input2, num_classes=num_classes-1, is_training=False) predicted_labels = tf.argmax(end_points['vgg_16/fc8'], 1)+1 elif(FLAGS.checkpoint_file_name=="vgg_19.ckpt"): x_input_list=tf.unstack(x_input,FLAGS.batch_size,0); for i in range(FLAGS.batch_size): x_input_list[i]=vgg_preprocessing.preprocess_image(x_input_list[i],224,224); x_input2=tf.stack(x_input_list,0); with slim.arg_scope(vgg.vgg_arg_scope()): _, end_points = vgg.vgg_19( x_input2, num_classes=num_classes-1, is_training=False) predicted_labels = tf.argmax(end_points['vgg_19/fc8'], 1)+1 elif(FLAGS.checkpoint_file_name=="resnet_v1_50.ckpt"): x_input_list=tf.unstack(x_input,FLAGS.batch_size,0); for i in range(FLAGS.batch_size): x_input_list[i]=vgg_preprocessing.preprocess_image(x_input_list[i],224,224); x_input2=tf.stack(x_input_list,0); with slim.arg_scope(resnet_utils.resnet_arg_scope()): _, end_points = resnet_v1.resnet_v1_50( x_input, num_classes=num_classes-1, is_training=False) predicted_labels = tf.argmax(end_points['predictions'], 1)+1 elif(FLAGS.checkpoint_file_name=="resnet_v1_101.ckpt"): x_input_list=tf.unstack(x_input,FLAGS.batch_size,0); for i in range(FLAGS.batch_size): x_input_list[i]=vgg_preprocessing.preprocess_image(x_input_list[i],224,224); x_input2=tf.stack(x_input_list,0); with slim.arg_scope(resnet_utils.resnet_arg_scope()): _, end_points = resnet_v1.resnet_v1_101( x_input2, num_classes=num_classes-1, is_training=False) predicted_labels = tf.argmax(end_points['predictions'], 1)+1 elif(FLAGS.checkpoint_file_name=="resnet_v1_152.ckpt"): x_input_list=tf.unstack(x_input,FLAGS.batch_size,0); for i in range(FLAGS.batch_size): x_input_list[i]=vgg_preprocessing.preprocess_image(x_input_list[i],224,224); x_input2=tf.stack(x_input_list,0); with slim.arg_scope(resnet_utils.resnet_arg_scope()): _, end_points = resnet_v1.resnet_v1_152( x_input2, num_classes=num_classes-1, is_training=False) predicted_labels = tf.argmax(end_points['predictions'], 1)+1 # Run computation saver = tf.train.Saver(slim.get_model_variables()) session_creator = tf.train.ChiefSessionCreator( scaffold=tf.train.Scaffold(saver=saver), checkpoint_filename_with_path=FLAGS.checkpoint_path+FLAGS.checkpoint_file_name, master=FLAGS.master) f=open(FLAGS.true_label,"r"); t_label_list=np.array([i[:-1].split(",") for i in f.readlines()]); score=0; with tf.train.MonitoredSession(session_creator=session_creator) as sess: with tf.gfile.Open(FLAGS.output_file, 'w') as out_file: for filenames, images in load_images(FLAGS.input_dir, batch_shape): labels = sess.run(predicted_labels, feed_dict={x_input: images}) for filename, label in zip(filenames, labels): f_name=filename.split(".")[0]; t_label=int(t_label_list[t_label_list[:,0]==f_name,1][0]); if(t_label!=label): if(over_epsilon_list[over_epsilon_list[:,0]==f_name,1]!="1"): score+=1; #out_file.write('{0},{1}\n'.format(filename, label)) print("Over max epsilon#: "+str(cnt)); print(str(FLAGS.max_epsilon)+" max epsilon Score: "+str(score));
def get_query_similarity(gpu_id, query_embedding, image_id_queue, model_name, embeddings_queue, num_cutoff=50): os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_id) norm_query_embeddings = {} query_embedding_size = query_embedding.shape[0] num_queries = 1 query_mat = np.zeros((num_queries, query_embedding_size), dtype=np.float32) norm_embedding = query_embedding / ( np.linalg.norm(query_embedding, ord=2) + np.finfo(float).eps) query_mat[0] = norm_embedding with tf.Graph().as_default(): image = tf.placeholder(tf.uint8, (None, None, 3)) query_in = tf.placeholder(tf.float32, (num_queries, query_embedding_size)) if image.dtype != tf.float32: processed_image = tf.image.convert_image_dtype(image, dtype=tf.float32) else: processed_image = image processed_image = tf.subtract(processed_image, 0.5) processed_image = tf.multiply(processed_image, 2.0) processed_image = tf.expand_dims(processed_image, 0) with slim.arg_scope(resnet_v2.resnet_arg_scope()): if model_name == 'resnet_v2_101': postnorm, _ = resnet_v2.resnet_v2_101(processed_image, None, is_training=False, global_pool=False, output_stride=8) elif model_name == 'resnet_v2_50': postnorm, _ = resnet_v2.resnet_v2_50(processed_image, None, is_training=False, global_pool=False, output_stride=8) else: print('Unknown model') exit(0) postnorm = tf.nn.l2_normalize(postnorm, axis=3) query_similarity = tf.tensordot(query_in, postnorm, axes=[[1], [3]]) query_sorted_idxs = tf.argsort(tf.reshape(query_similarity, [num_queries, -1]), direction='DESCENDING') query_sorted_idxs = query_sorted_idxs[:, :num_cutoff] if model_name == 'resnet_v2_101': init_fn = slim.assign_from_checkpoint_fn( 'resnet_v2_101.ckpt', slim.get_model_variables('resnet_v2')) elif model_name == 'resnet_v2_50': init_fn = slim.assign_from_checkpoint_fn( 'resnet_v2_50.ckpt', slim.get_model_variables('resnet_v2')) with tf.Session() as sess: init_fn(sess) while True: image_id, im_path = image_id_queue.get() if image_id == None: break img = cv2.imread(im_path) height, width = img.shape[:2] max_image_size = 1920 * 1080 if height * width > max_image_size: scale_factor = math.sqrt((height * width) / max_image_size) img = cv2.resize(img, (int( width / scale_factor), int(height / scale_factor))) input_img, embedding, similarity, sorted_idxs = \ sess.run([processed_image, postnorm, query_similarity, query_sorted_idxs], feed_dict={image: img, query_in: query_mat}) query_embedding = query_mat[0] similarity_peaks = np.unravel_index(sorted_idxs[0], similarity[0].shape) query_similar_embeddings = \ { 'embeddings': embedding[similarity_peaks].copy(), 'similarity': similarity[0].copy() } embeddings_queue.put((image_id, query_similar_embeddings))