def test_inception_v1(img_dir): """ Test Inception-V1 with a single image. :param img_dir: Path of the image to be classified :return: classification result and probability of a single image """ img = cv2.imread(img_dir) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = cv2.resize(img, (224, 224)) / 255 img = img.reshape((1, 224, 224, 3)) tf.reset_default_graph() inputs = tf.placeholder(name='input_images', shape=[None, 224, 224, 3], dtype=tf.float32) with slim.arg_scope(inception_v1_arg_scope()): _, _ = inception_v1(inputs, 1001, is_training=False) with tf.Session() as sess: tf.train.Saver().restore(sess, './models/inception_v1.ckpt') inputs = sess.graph.get_tensor_by_name('input_images:0') outputs = sess.graph.get_tensor_by_name( 'InceptionV1/Logits/SpatialSqueeze:0') pred = tf.argmax(tf.nn.softmax(outputs), axis=1)[0] prob = tf.reduce_max(tf.nn.softmax(outputs), axis=1)[0] pred, prob = sess.run([pred, prob], feed_dict={inputs: img}) name = label_dict[pred] print('Result of Inception-V1:', name, prob) return name, prob
def make_inceptionv1bn_multi_embeddings(batch_imgs, embedding_dims, n_heads, phase_is_train, uniform_bias=False, weight_decay=0.00004, pooling='avg'): # Slim output layer names # 'Mixed_3b', 'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'MaxPool_5a_2x2', 'Mixed_5b', 'Mixed_5c' emb_info = [ 'Mixed_3b', 'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'MaxPool_5a_2x2', 'Mixed_5b', 'Mixed_5c' ] if (n_heads == 1): emb_info = ['Mixed_5c'] left_embedding = embedding_dims with slim.arg_scope( inception_v1.inception_v1_arg_scope(weight_decay=weight_decay)): net, endpoints = inception_v1.inception_v1( batch_imgs, num_classes=0, dropout_keep_prob= 1.0, # output before dropout layer is returned if num_classes is 0 is_training=phase_is_train) for i in range(n_heads): emb_dim = int(math.ceil( left_embedding / float(n_heads - i))) # put the residual in the preceding embeddings left_embedding -= emb_dim with tf.variable_scope('loss%d' % i) as scope: emb1 = tf.reduce_mean(endpoints[emb_info[i]], [1, 2]) final_emb = emb1 if (pooling == 'avgnmax'): emb2 = tf.reduce_max(endpoints[emb_info[i]], [1, 2]) final_emb = tf.concat([emb1, emb2], 1) endpoints['emb_%d' % i] = slim.fully_connected( final_emb, emb_dim, activation_fn=None) endpoints['embedding%d' % i] = tf.nn.l2_normalize( endpoints['emb_%d' % i], dim=1) with tf.variable_scope('fc_embedding') as scope: embs = [endpoints['embedding%d' % i] for i in range(n_heads)] endpoints['fc_embedding'] = tf.concat(embs, 1) / np.sqrt(n_heads) # print('Endpoints') # for k,v in endpoints.items(): # print((k,v)) return endpoints, None
def run_benchmark(): """Run the benchmark on Inception_V1.""" with tf.Graph().as_default(): # Generate some dummy images. image_size = 224 # Note that our padding definition is slightly different the cuda-convnet. # In order to force the model to start with the same activations sizes, # we add 3 to the image_size and employ VALID padding above. images = tf.Variable(tf.random_normal( [FLAGS.batch_size, 3, image_size, image_size], dtype=tf.float32, stddev=1e-1), trainable=False) labels = tf.Variable(tf.ones([FLAGS.batch_size], dtype=tf.int32), trainable=False) # Build a Graph that computes the logits predictions from the # inference model. logits, end_points = inception_v1.inception_v1(images) # Build an initialization operation. init = tf.global_variables_initializer() # Start running operations on the Graph. config = tf.ConfigProto() # config.gpu_options.allocator_type = 'BFC' sess = tf.Session(config=config) sess.run(init) # Run the forward benchmark. time_tensorflow_run(sess, logits, "Forward") # Add a simple objective so we can calculate the backward pass. objective = loss(logits, labels) # Variables to train. variables_to_train = tf.trainable_variables() # Compute the gradient with respect to all the parameters. grad = tf.gradients(objective, variables_to_train) # Run the backward benchmark. time_tensorflow_run(sess, grad, "Forward-backward")
def train(dataset_dir, base_lr=0.01, max_steps=30000, train_log_dir='./logs/inception_v1_Momentum_0.01', preprocessing_name='inception_v1'): """ :param dataset_dir: 存放数据的根目录 :param base_lr: 学习率 :param max_steps: 最大迭代次数 :param train_log_dir: 模型文件的存放位置 :param preprocessing_name: 所使用的预处理名称 :return: """ dataset = convert_flower_to_tfrecord.read_tfrecords( split_name='train', dataset_dir=dataset_dir) provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers= 4, # The number of parallel readers that read data from the dataset. common_queue_capacity=20 * batch_size, common_queue_min=10 * batch_size) [images, labels] = provider.get(['image', 'label']) images_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=True) images = images_preprocessing_fn(images, resize_height, resize_width) # 得到batch_size大小的图像和标签 train_batch_images, train_batch_labels = tf.train.batch( [images, labels], batch_size=batch_size, num_threads=4, # The number of threads used to create the batches. capacity=5 * batch_size) # 对标签进行one-hot编码 train_batch_labels = tf.one_hot(train_batch_labels, num_classes, on_value=1, off_value=0) with slim.arg_scope(inception_v1.inception_v1_arg_scope() ): # inception_v1.inception_v1_arg_scope()括号不能掉,表示一个函数 out, end_points = inception_v1.inception_v1( inputs=input_images, num_classes=num_classes, is_training=is_training, dropout_keep_prob=keep_prob) # 计算loss,accuracy,选择优化器 loss = tf.losses.softmax_cross_entropy(onehot_labels=input_labels, logits=out) accuracy = tf.reduce_mean( tf.cast(tf.equal(tf.argmax(out, 1), tf.argmax(input_labels, 1)), tf.float32)) * 100.0 optimizer = tf.train.MomentumOptimizer(learning_rate=base_lr, momentum=0.9) # 这里可以使用不同的优化函数 # 在定义训练的时候, 注意到我们使用了`batch_norm`层时,需要更新每一层的`average`和`variance`参数, # 正常的训练过程不包括更新,需要我们去手动像下面这样更新 with tf.control_dependencies(tf.get_collection( tf.GraphKeys.UPDATE_OPS)): # 执行完更新操作之后,再进行训练操作 train_op = slim.learning.create_train_op(total_loss=loss, optimizer=optimizer) saver = tf.train.Saver() init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) for steps in np.arange(max_steps): sess.run([images, labels]) input_batch_images, input_batch_labels = sess.run( [train_batch_images, train_batch_labels]) _, train_loss = sess.run( [train_op, loss], feed_dict={ input_images: input_batch_images, input_labels: input_batch_labels, keep_prob: 0.8, is_training: True }) # 得到训练过程中的loss, accuracy值 if steps % 50 == 0 or (steps + 1) == max_steps: train_acc = sess.run(accuracy, feed_dict={ input_images: input_batch_images, input_labels: input_batch_labels, keep_prob: 1.0, is_training: False }) print('Step: %d, loss: %.4f, accuracy: %.4f' % (steps, train_loss, train_acc)) # 每隔2000步储存一下模型文件 if steps % 2000 == 0 or (steps + 1) == max_steps: checkpoint_path = os.path.join(train_log_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=steps) coord.request_stop() coord.join(threads)
def compute_feature_of_batch_ts_with_cnn(file_path_of_ts, file_path_of_feature, cnn_model_name, file_path_of_pretrained_model): r''' compute feature of somme time series with pretrained CNN :param file_path_of_ts: file path of time series :param file_path_of_feature: file path of saving feature :param cnn_model_name: name of CNN model :param file_path_of_pretrained_model: file path of pretrained CNN :return: '' ''' #tf.reset_default_graph() #read data data = pd.read_csv(file_path_of_ts) #data=data.sample(20) #change dataframe to list id_list = data.iloc[:, 0].tolist() data_list = change_dataframe_to_dict_(data) model = cnn_model_name checkpoint_file = file_path_of_pretrained_model # I only have these because I thought some take in size of (299,299), but maybe not if 'inception' in model: height, width, channels = 224, 224, 3 if 'resnet' in model: height, width, channels = 224, 224, 3 if 'vgg' in model: height, width, channels = 224, 224, 3 if model == 'inception_resnet_v2': height, width, channels = 299, 299, 3 x = tf.placeholder(tf.float32, shape=(1, height, width, channels)) # load up model specific stuff if model == 'inception_v1': #from inception_v1 import * from nets import inception_v1 arg_scope = inception_v1.inception_v1_arg_scope() with slim.arg_scope(arg_scope): logits, end_points = inception_v1.inception_v1(x, is_training=False, num_classes=None) features = end_points['AvgPool_0a_7x7'] # print('logits') # print(logits.shape) # print('features') # print(features.shape) elif model == 'inception_v2': #from inception_v2 import * from nets import inception_v2 arg_scope = inception_v2.inception_v2_arg_scope() with slim.arg_scope(arg_scope): logits, end_points = inception_v2(x, is_training=False, num_classes=None) features = end_points['AvgPool_1a'] elif model == 'inception_v3': #from inception_v3 import * from nets import inception_v3 arg_scope = inception_v3.inception_v3_arg_scope() with slim.arg_scope(arg_scope): logits, end_points = inception_v3(x, is_training=False, num_classes=None) features = end_points['AvgPool_1a'] elif model == 'inception_resnet_v2': #from inception_resnet_v2 import * from nets import inception_resnet_v2 arg_scope = inception_resnet_v2.inception_resnet_v2_arg_scope() with slim.arg_scope(arg_scope): logits, end_points = inception_resnet_v2(x, is_training=False, num_classes=1001) features = end_points['PreLogitsFlatten'] elif model == 'resnet_v1_50': #from resnet_v1 import * from nets import resnet_v1 arg_scope = resnet_v1.resnet_arg_scope() with slim.arg_scope(arg_scope): logits, end_points = resnet_v1.resnet_v1_50(x, is_training=False, num_classes=1000) features = end_points['global_pool'] elif model == 'resnet_v1_101': #from resnet_v1 import * from nets import resnet_v1 arg_scope = resnet_v1.resnet_arg_scope() with slim.arg_scope(arg_scope): logits, end_points = resnet_v1.resnet_v1_101(x, is_training=False, num_classes=1000) features = end_points['global_pool'] elif model == 'vgg_16': #from vgg import * from nets import vgg arg_scope = vgg.vgg_arg_scope() with slim.arg_scope(arg_scope): logits, end_points = vgg.vgg_16(x, is_training=False) features = end_points['vgg_16/fc8'] elif model == 'vgg_19': #from vgg import * from nets import vgg arg_scope = vgg.vgg_arg_scope() with slim.arg_scope(arg_scope): logits, end_points = vgg.vgg_19(x, is_training=False) features = end_points['vgg_19/fc8'] #cpu_config = tf.ConfigProto(intra_op_parallelism_threads = 8, inter_op_parallelism_threads = 8, device_count = {'CPU': 3}) #sess = tf.Session(config = cpu_config) sess = tf.Session() saver = tf.train.Saver() saver.restore(sess, checkpoint_file) feature_list = [] count_temp = 0 for i in range(len(data_list)): count_temp = count_temp + 1 #imaging ts ts_dict = data_list[i] ts = ts_dict['ts'] id = ts_dict['id'] new_ts = min_max_transform(ts) normalized = np.array(new_ts) fig, ax = plt.subplots() #plt.imshow(recurrence_plot.rec_plot(normalized), cmap=plt.cm.gray) plt.imshow(recurrence_plot.rec_plot(normalized)) ax.set_xticks([]) ax.set_yticks([]) #print(id) path = "inception-v1/" + id + ".jpg" plt.savefig(path) plt.close(fig) #compute feature # #begin to compute features image = misc.imread(path) #from matplotlib.pyplot import imread #image=imread(path) # print('image') # print(image.size) image = misc.imresize(image, (height, width)) image = np.expand_dims(image, 0) feature = np.squeeze(sess.run(features, feed_dict={x: image})) feature_list.append(feature) # print('feature-test') # print(feature) os.remove(path) if count_temp % 100 == 0: print(count_temp) #begin to process parellel result and write_to_csv feature_array = np.array(feature_list) feature_df = pd.DataFrame(feature_array) # print(feature_df.shape) # print(len(id_list)) #add id feature_df.insert(loc=0, column='id', value=id_list) # print(feature_final_df.shape) # print(feature_final_df.head()) feature_df.to_csv(file_path_of_feature, index=False) gc.collect()
print('constructing model') config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) #build the graph img_place_holder = tf.placeholder( tf.float32, [None, default_image_size, default_image_size, 3]) label_place_holder = tf.placeholder(tf.float32, [None, num_training_category]) alpha_place_holder = tf.placeholder(tf.float32, shape=()) lr_place_holder = tf.placeholder(tf.float32, shape=()) #build backbone, InceptionV1 if args.base_network == 'InceptionV1': with slim.arg_scope(inception_v1.inception_v1_arg_scope()): net_output, _ = inception_v1.inception_v1( img_place_holder, embedding_dim=args.embedding_dim, use_bn=args.bn) test_net_output, _ = inception_v1.inception_v1( img_place_holder, embedding_dim=args.embedding_dim, reuse=True, is_training=False, use_bn=args.bn) # else: print('Unknown network.') quit() #build final classifier with tf.variable_scope('retrieval'): retrieval_layer = layers.retrieval_layer_2(embedding_dim, num_training_category)
def main(): data_path = '<train-CARLA-VP.tfrecords>' model_type = 'vgg-16' train_dir = '<saved_model_path>' est_label = 'horvpz' num_bins = 500 sphere_params = np.load('<carlavp_label_to_horvpz_fov_pitch.npz>') all_bins = sphere_params['all_bins'] all_sphere_centres = sphere_params['all_sphere_centres'] all_sphere_radii = sphere_params['all_sphere_radii'] if est_label == 'horfov': fov_bins = np.arange(15, 115, 100 / num_bins) half_fov_bin_size = (fov_bins[1] - fov_bins[0]) / 2 if model_type == 'inceptionv4': net_width = 299 net_height = 299 else: net_width = 224 net_height = 224 if model_type == 'vgg-m': model = pickle.load(open("<vggm-tf.p>", "rb")) average_image = np.load('<vgg_average_image.npy>') elif model_type == 'resnet50' or model_type == 'vgg-16' or model_type == 'resnet101': _R_MEAN = 123.68 _G_MEAN = 116.78 _B_MEAN = 103.94 resnet_average_channels = np.array(np.concatenate( (np.tile(_R_MEAN, (net_height, net_width, 1)), np.tile(_G_MEAN, (net_height, net_width, 1)), np.tile(_B_MEAN, (net_height, net_width, 1))), axis=2), dtype=np.float32) elif model_type == 'inceptionv1' or model_type == 'inceptionv4': print("Nothing needs to be initialized for this cnn model") else: print("ERROR: No such CNN exists") if est_label == 'horfov': no_params_model = 3 elif est_label == 'horvpz': no_params_model = 4 else: print("ERROR: No such 'est_label'") max_batch_size = 60 total_examples = sum(1 for _ in tf.python_io.tf_record_iterator(data_path)) print("Total examples: ", total_examples) divs = np.array(list(factors(total_examples))) sorted_divs = divs[divs.argsort()] batch_size = sorted_divs[sorted_divs < max_batch_size][-1] print("Batch Size:", batch_size) ct = np.arange(11, 12, 4) best_avg_man_loss = np.inf for en, consider_top in enumerate(ct): total_manhattan_loss = np.zeros(5) with tf.Graph().as_default(): tf.logging.set_verbosity(tf.logging.INFO) filename_queue = tf.train.string_input_producer([data_path]) image, label, carla_width, carla_height = util_tfio.general_read_and_decode( filename_queue, num_classes=8, dtype=tf.float64) image = tf.image.resize_images( image, [net_width, net_height], method=tf.image.ResizeMethod.BILINEAR) if model_type == 'vgg-m': image = image - average_image elif model_type == 'resnet50' or model_type == 'vgg-16' or model_type == 'resnet101': image = image - resnet_average_channels elif model_type == 'inceptionv1' or model_type == 'inceptionv4': image = tf.cast(image, tf.float32) * (1. / 255) image = (image - 0.5) * 2 else: print("ERROR: No such CNN exists") images, labels, carla_widths, carla_heights = tf.train.batch( [image, label, carla_width, carla_height], batch_size=batch_size, num_threads=1, capacity=5 * batch_size) print(images) if model_type == 'vgg-m': logits = vgg_m.cnn_vggm(images, num_classes=num_bins * no_params_model, model=model) elif model_type == 'resnet50': with slim.arg_scope(resnet_v1.resnet_arg_scope()) as scope: logits, _ = resnet_v1.resnet_v1_50( images, num_classes=num_bins * no_params_model, is_training=False, global_pool=True) # , reuse=True)# elif model_type == 'resnet101': with slim.arg_scope(resnet_v1.resnet_arg_scope()) as scope: logits, _ = resnet_v1.resnet_v1_101( images, num_classes=num_bins * no_params_model, is_training=False, global_pool=True) # , reuse=True)# elif model_type == 'vgg-16': with slim.arg_scope(vgg.vgg_arg_scope()) as scope: logits, _ = vgg.vgg_16( images, num_classes=num_bins * no_params_model, is_training=False ) # , global_pool=False)#, reuse=True)# elif model_type == 'inceptionv1': with slim.arg_scope( inception_v1.inception_v1_arg_scope()) as scope: logits, _ = inception_v1.inception_v1( images, num_classes=num_bins * no_params_model, is_training=False ) # , global_pool=False)#, reuse=True)# elif model_type == 'inceptionv4': with slim.arg_scope( inception_v4.inception_v4_arg_scope()) as scope: logits, _ = inception_v4.inception_v4( images, num_classes=num_bins * no_params_model, is_training=False ) # , global_pool=False)#, reuse=True)# else: print("ERROR: No such CNN exists") checkpoint_path = train_dir init_fn = slim.assign_from_checkpoint_fn( checkpoint_path, slim.get_variables_to_restore()) print("--------------------------------------------------------") print("No. of examples not evaluated because of batch size:", np.mod(total_examples, batch_size)) print("--------------------------------------------------------") with tf.Session() as sess: with slim.queues.QueueRunners(sess): sess.run(tf.initialize_local_variables()) init_fn(sess) for loop_no in range( int(np.floor(total_examples / batch_size))): np_rawpreds, np_images_raw, np_labels, np_width, np_height = sess.run( [ logits, images, labels, carla_widths, carla_heights ]) for i in range(batch_size): pred_indices = np.zeros(no_params_model, dtype=np.int32) output_vals = np_rawpreds[i, :].squeeze().reshape( no_params_model, -1) for ln in range(no_params_model): predsoft = my_softmax( output_vals[ln, :][np.newaxis]).squeeze() topindices = predsoft.argsort( )[::-1][:consider_top] probsindices = predsoft[topindices] / np.sum( predsoft[topindices]) pred_indices[ln] = np.abs( int( np.round( np.sum(probsindices * topindices)))) if est_label == 'horfov': estimated_input_points = get_horvpz_from_projected_4indices_modified( np.hstack( (pred_indices[:2], 0, 0)), all_bins, all_sphere_centres, all_sphere_radii) my_fov = fov_bins[ pred_indices[2]] + half_fov_bin_size fx, fy, roll_from_horizon, my_tilt = get_intrinisic_extrinsic_params_from_horfov( img_dims=(np_width[i], np_height[i]), horizonvector=estimated_input_points, fov=my_fov, net_dims=(net_width, net_height)) elif est_label == 'horvpz': estimated_input_points = get_horvpz_from_projected_4indices_modified( pred_indices[:4], all_bins, all_sphere_centres, all_sphere_radii) fx, fy, roll_from_horizon, my_tilt = \ get_intrinisic_extrinsic_params_from_horizonvector_vpz( img_dims=(np_width[i], np_height[i]), horizonvector_vpz=estimated_input_points, net_dims=(net_width, net_height)) my_fov_fx = degrees( np.arctan(np_width[i] / (2 * fx)) * 2) my_fov_fy = degrees( np.arctan(np_width[i] / (2 * fy)) * 2) my_tilt = -degrees(my_tilt) roll_from_horizon = roll_from_horizon gt_label = np_labels[i, :].reshape(4, -1) gt_fov = gt_label[3, 0] gt_pitch = gt_label[3, 1] gt_roll = degrees( atan((gt_label[1, 1] - gt_label[0, 1]) / (gt_label[1, 0] - gt_label[0, 0]))) manhattan_loss = [ np.abs(my_fov_fx - gt_fov), np.abs(my_fov_fy - gt_fov), np.abs(((my_fov_fx + my_fov_fy) / 2) - gt_fov), np.abs(my_tilt - gt_pitch), np.abs(roll_from_horizon - gt_roll) ] total_manhattan_loss += manhattan_loss avg_manhattan_loss = total_manhattan_loss / total_examples print("ct:", consider_top, "Average manhattan loss per scalar: ", avg_manhattan_loss) print( "-------------------------------------------------------------------" ) this_loss = np.mean( np.hstack((avg_manhattan_loss[1], avg_manhattan_loss[3:]))) if this_loss < best_avg_man_loss: best_avg_man_loss = this_loss display_loss = [ consider_top, -1, avg_manhattan_loss[1], avg_manhattan_loss[3], avg_manhattan_loss[4] ] print("Best loss:", display_loss)
def network_fn(images, **kwargs): with slim.arg_scope(arg_scope): return inception_v1.inception_v1(images, num_classes, is_training=is_training)
def main(_): if not FLAGS.dataset_dir: raise ValueError( 'You must supply the dataset directory with --dataset_dir') dropout_val = 0.8 is_flip = True is_smoothing = True maintain_aspect_ratio = True min_perc = 0.90 is_random_crops = False max_rotation = 0 num_bins = 500 no_output_params = 4 num_classes = no_output_params * num_bins eval_num_classes = 7 * num_bins num_samples = sum( 1 for _ in tf.python_io.tf_record_iterator(FLAGS.dataset_dir)) print("No. of training examples: ", num_samples) assert max_rotation >= 0 print('---------------------------------------------------------') print('Make sure that no. of training samples is actually ' + str(num_samples)) print('---------------------------------------------------------') if FLAGS.model_name == 'inception-v4': net_width = 299 net_height = 299 else: net_width = 224 net_height = 224 tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): global_step = slim.create_global_step() data_path = FLAGS.dataset_dir filename_queue = tf.train.string_input_producer([data_path]) image, label, carla_width, carla_height = util_tfio.general_read_and_decode( filename_queue, num_classes=8, dtype=tf.float64) print(image) print(label) # -------------------------------------------------------------------------------------------------------------------- degree_angle = tf.random_uniform([], minval=-max_rotation, maxval=max_rotation, dtype=tf.float32) radian_angle = util_tfgeometry.tf_deg2rad(degree_angle) label = tf.reshape(label, (4, 2)) # my_fov = label[3, 0] # my_pitch = label[3, 1] label = label[:3, :] if is_flip: image, bool_flip = util_tfimage.random_flip_left_right(image) def flip_gt(): return tf.stack( ([[ tf.cast(carla_width, label.dtype) - label[1, 0], label[1, 1] ], [ tf.cast(carla_width, label.dtype) - label[0, 0], label[0, 1] ], [ tf.cast(carla_width, label.dtype) - label[2, 0], label[2, 1] ]])) def gt(): return label label = tf.cond(bool_flip, flip_gt, gt) if max_rotation > 0: # image rotation is buggy on GPU with tf.device('/cpu:0'): image = tf.contrib.image.rotate(image, radian_angle, interpolation='BILINEAR') max_width, max_height = util_tfgeometry.rotatedRectWithMaxArea_tf( carla_width, carla_height, radian_angle) max_height = tf.cast(tf.floor(max_height), tf.int32) max_width = tf.cast(tf.floor(max_width), tf.int32) print("max_width, height", max_width, max_height) image = tf.image.resize_image_with_crop_or_pad( image, target_height=max_height, target_width=max_width) rot_vps = util_tfgeometry.rotate_vps( (carla_width / 2, carla_height / 2), label, tf.cast(radian_angle, dtype=tf.float64)) crop_rot_vps = util_tfgeometry.center_crop_vps( rot_vps, orig_dims=(carla_width, carla_height), crop_dims=(max_width, max_height)) else: max_width = carla_width max_height = carla_height crop_rot_vps = label if maintain_aspect_ratio: image, max_width, max_height = util_tfimage.square_random_crop( image, max_width, max_height) if not is_random_crops: image = tf.image.resize_images( image, [net_width, net_height], method=tf.image.ResizeMethod.BILINEAR) float_max_height = tf.cast(max_height, tf.float64) float_max_width = tf.cast(max_width, tf.float64) final_vps = util_tfgeometry.resize_vps( crop_rot_vps, orig_dims=(float_max_width, float_max_height), resize_dims=(net_width, net_height)) else: rand_perc = tf.random_uniform([], minval=min_perc, maxval=1.0) crop_height = tf.maximum( net_height, tf.cast(tf.floor(rand_perc * tf.cast(max_height, tf.float32)), dtype=tf.int32)) crop_width = tf.maximum( net_width, tf.cast(tf.floor(rand_perc * tf.cast(max_width, tf.float32)), dtype=tf.int32)) image, off_height, off_width = vgg_preprocessing._custom_random_crop( [image], crop_height, crop_width)[0] image = tf.image.resize_images( image, [net_width, net_height], method=tf.image.ResizeMethod.BILINEAR) temp_final_vps = util_tfgeometry.offset_vps( crop_rot_vps, off_height, off_width) float_crop_height = tf.cast(crop_height, tf.float64) float_crop_width = tf.cast(crop_width, tf.float64) final_vps = util_tfgeometry.resize_vps( temp_final_vps, orig_dims=(float_crop_width, float_crop_height), resize_dims=(net_width, net_height)) image = util_tfimage.distort_color(image, color_ordering=tf.random_uniform( [], minval=0, maxval=4, dtype=tf.int32), fast_mode=False) # Value here, before pre-processing below will be 0-255 if FLAGS.model_name == 'vgg-m': model = pickle.load(open("<vggm-tf.p>", "rb")) average_image = np.load('<vgg_average_image.npy>') image = image - average_image elif FLAGS.model_name == 'resnet-50' or FLAGS.model_name == 'resnet-101' or FLAGS.model_name == 'vgg-16': image = vgg_preprocessing.my_preprocess_image(image) elif FLAGS.model_name == 'mobilenet-v1' or FLAGS.model_name == 'inception-v1' or \ FLAGS.model_name == 'inception-v4': image = tf.cast(image, tf.float32) * (1. / 255) image = (image - 0.5) * 2 else: sys.exit("Invalid value for model name!") label = tf.reshape(final_vps, (3, 2)) all_label = tf.concat([label, [[0], [0], [0]]], axis=1) output_label, output_indices = util_tfprojection.get_all_projected_from_3vps_modified_tf( all_label, no_bins=num_bins, img_dims=(net_width, net_height), verbose=False) if is_smoothing: stddev = 0.5 max_indices = tf.argmax(output_label, axis=1) normalized = tf.distributions.Normal( loc=tf.reshape(tf.cast(max_indices, dtype=tf.float64), (no_output_params, 1)), scale=tf.constant(stddev, dtype=tf.float64)) probs = normalized.prob( tf.tile( tf.reshape( tf.cast(tf.range(output_label.shape[1]), dtype=tf.float64), (1, -1)), (no_output_params, 1))) act_normalized = probs / tf.reduce_sum( probs, axis=1, keepdims=True) label = tf.reshape(act_normalized, [-1]) else: label = tf.reshape(output_label, [-1]) print("SHAPE AT END:", image, label) # -------------------------------------------------------------------------------------------------------------------- # shuffle requires 'min_after_dequeue' parameter (min to keep in queue) images, labels = tf.train.shuffle_batch( [image, label], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=6 * FLAGS.batch_size, min_after_dequeue=4 * FLAGS.batch_size) labels = tf.stop_gradient(labels) ########################### # Reading evaluation data # ########################### if FLAGS.model_name == 'inception-v4': eval_path = '' else: eval_path = '<eval-CARLA-VP.tfrecords' eval_max_batch_size = min(50, FLAGS.batch_size) no_eval_examples = sum( 1 for _ in tf.python_io.tf_record_iterator(eval_path)) divs = np.array(list(factors(no_eval_examples))) sorted_divs = divs[divs.argsort()] eval_batch_size = sorted_divs[sorted_divs < eval_max_batch_size][-1] print("EVALUATION BATCH SIZE:", eval_batch_size) print("Number of examples in evaluation dataset: ", no_eval_examples) eval_filename_queue = tf.train.string_input_producer( [eval_path]) # , num_epochs=2) e_image, e_label = util_tfio.read_and_decode_evaluation( eval_filename_queue, eval_num_classes, net_height, net_width) print("eval_num_classes:", eval_num_classes) # Value here, before pre-processing below will be 0-255 if FLAGS.model_name == 'vgg-m': e_image = e_image - average_image elif FLAGS.model_name == 'resnet-50' or FLAGS.model_name == 'resnet-101' or FLAGS.model_name == 'vgg-16': e_image = vgg_preprocessing.my_preprocess_image(e_image) elif FLAGS.model_name == 'mobilenet-v1' or FLAGS.model_name == 'inception-v1' or \ FLAGS.model_name == 'inception-v4': e_image = tf.cast(e_image, tf.float32) * (1. / 255) e_image = (e_image - 0.5) * 2 else: sys.exit("Invalid value for model name!") e_images, e_labels = tf.train.batch( [e_image, e_label], batch_size=eval_batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * eval_batch_size) # -------------------------- print("PREFETCH_QUEUE, CAPACITY:", FLAGS.batch_size, ", NUM_THREADS:", FLAGS.num_preprocessing_threads) batch_queue = slim.prefetch_queue.prefetch_queue( [images, labels], capacity=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads) images, labels = batch_queue.dequeue() if FLAGS.model_name == 'vgg-m': logits = vgg_m.cnn_vggm(images, num_classes=num_classes, model=model) eval_logits = vgg_m.cnn_vggm(e_images, num_classes=num_classes, model=model, reuse=True) elif FLAGS.model_name == 'vgg-16': with slim.arg_scope(vgg.vgg_arg_scope()): logits, end_points = vgg.vgg_16(images, num_classes=num_classes, is_training=True, dropout_keep_prob=dropout_val) eval_logits, _ = vgg.vgg_16(e_images, num_classes=num_classes, is_training=False, reuse=True) elif FLAGS.model_name == 'resnet-50': with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits, end_points = resnet_v1.resnet_v1_50( images, num_classes=num_classes, is_training=True) eval_logits, _ = resnet_v1.resnet_v1_50( e_images, num_classes=num_classes, is_training=False, reuse=True) elif FLAGS.model_name == 'resnet-101': with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits, end_points = resnet_v1.resnet_v1_101( images, num_classes=num_classes, is_training=True) eval_logits, _ = resnet_v1.resnet_v1_101( e_images, num_classes=num_classes, is_training=False, reuse=True) elif FLAGS.model_name == 'inception-v1': with slim.arg_scope(inception_v1.inception_v1_arg_scope()): logits, end_points = inception_v1.inception_v1( images, num_classes=num_classes, is_training=True, dropout_keep_prob=dropout_val) eval_logits, _ = inception_v1.inception_v1( e_images, num_classes=num_classes, is_training=False, reuse=True) elif FLAGS.model_name == 'inception-v4': with slim.arg_scope(inception_v4.inception_v4_arg_scope()): logits, end_points = inception_v4.inception_v4( images, num_classes=num_classes, is_training=True, dropout_keep_prob=dropout_val) eval_logits, _ = inception_v4.inception_v4( e_images, num_classes=num_classes, is_training=False, reuse=True) elif FLAGS.model_name == 'mobilenet-v1': with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope()): logits, end_points = mobilenet_v1.mobilenet_v1( images, num_classes=num_classes, is_training=True, dropout_keep_prob=dropout_val) eval_logits, _ = mobilenet_v1.mobilenet_v1( e_images, num_classes=num_classes, is_training=False, reuse=True) else: sys.exit("Invalid value for model name!") jumps = int(num_classes / no_output_params) classification_loss_1 = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=labels[:, :jumps], logits=logits[:, :jumps])) classification_loss_2 = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=labels[:, jumps:2 * jumps], logits=logits[:, jumps:2 * jumps])) classification_loss_3 = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=labels[:, 2 * jumps:3 * jumps], logits=logits[:, 2 * jumps:3 * jumps])) classification_loss_4 = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=labels[:, 3 * jumps:4 * jumps], logits=logits[:, 3 * jumps:4 * jumps])) ############################################################################################## # try implementing L1 loss among both here to help visualize comparison with validation loss logits_ind = tf.argmax(tf.reshape(logits, (-1, no_output_params, num_bins)), axis=2) labels_ind = tf.argmax(tf.reshape(labels, (-1, no_output_params, num_bins)), axis=2) print("Logits_ind shape:", logits_ind.shape) train_l1_loss = tf.reduce_sum(tf.abs(logits_ind - labels_ind)) regularization_loss = tf.add_n(slim.losses.get_regularization_losses()) total_loss = (classification_loss_1 + classification_loss_2 + classification_loss_3 + classification_loss_4 + regularization_loss) print("After classification loss:") print(logits.shape) print(labels.shape) print("---------------------------------------") # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) # Add summaries for losses. # for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): for loss in tf.get_collection(tf.GraphKeys.LOSSES): summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss)) # Add summaries for variables. for variable in slim.get_model_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) ######################################### # Configure the optimization procedure. # ######################################### learning_rate = tf.placeholder(tf.float32, shape=[], name="learning_rate") optimizer = util_tftraining.configure_optimizer(learning_rate, FLAGS=FLAGS) print("learning rate tensor:", learning_rate) # Variables to train. variables_to_train = util_tftraining.get_variables_to_train( FLAGS=FLAGS) print("-----------------------------------------") print("variables to train: ", variables_to_train) print("-----------------------------------------") train_op = slim.learning.create_train_op( total_loss=total_loss, optimizer=optimizer, variables_to_train=variables_to_train, global_step=global_step) if classification_loss_1 is not None: tf.summary.scalar('Losses/classification_loss_1', classification_loss_1) if classification_loss_2 is not None: tf.summary.scalar('Losses/classification_loss_2', classification_loss_2) if classification_loss_3 is not None: tf.summary.scalar('Losses/classification_loss_3', classification_loss_3) if classification_loss_4 is not None: tf.summary.scalar('Losses/classification_loss_4', classification_loss_4) if regularization_loss is not None: tf.summary.scalar('Losses/regularization_loss', regularization_loss) # Add total_loss to summary. summaries.add(tf.summary.scalar('total_loss', total_loss)) summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES)) # Merge all summaries together. tf.summary.merge(list(summaries), name='summary_op') session_config = tf.ConfigProto() session_config.allow_soft_placement = True session_config.gpu_options.allow_growth = True init_fn = util_tftraining.get_init_fn(FLAGS=FLAGS) print("Before learning.train", flush=True) print("---------------------------------------------------") print("---------------------------------------------------") early_stop_epochs = 10 no_steps_in_epoch = int(np.ceil(num_samples / FLAGS.batch_size)) scaffold = tf.train.Scaffold(saver=tf.train.Saver( max_to_keep=early_stop_epochs + 3)) show_eval_loss_every_steps = no_steps_in_epoch / 5 save_checkpoint_every_steps = no_steps_in_epoch / 5 with tf.train.MonitoredTrainingSession( master='', is_chief=True, checkpoint_dir=FLAGS.train_dir, scaffold=scaffold, hooks=None, chief_only_hooks=None, save_checkpoint_steps=save_checkpoint_every_steps, save_summaries_secs=FLAGS.save_summaries_secs, config=session_config, stop_grace_period_secs=120, log_step_count_steps=0, max_wait_secs=10) as mon_sess: print("-----------------------------------------") if init_fn is not None: init_fn(mon_sess) print("Succesfully loaded model") else: print("A model already exists in the 'train_dir' path") print("-----------------------------------------") last_sum_train_loss = 0 last_sum_tl1_loss = 0 best_sum_train_loss = np.inf step_no = 0 current_lr = FLAGS.learning_rate no_params = 7 consider_params = 4 consider_top = 11 best_eval_wa = np.inf best_eval_epoch = 0 while True: _, train_loss, tl1_loss = mon_sess.run( [train_op, total_loss, train_l1_loss], feed_dict={learning_rate: current_lr}) last_sum_train_loss += train_loss last_sum_tl1_loss += tl1_loss epoch_no = int( np.floor((step_no * FLAGS.batch_size) / num_samples)) if np.mod(step_no, FLAGS.log_every_n_steps) == 0: print("Epoch {}, Step {}, lr={:0.5f}, Loss: {}".format( epoch_no, step_no, current_lr, train_loss), flush=True) # calculating evaluation loss alongside as well if np.mod(step_no, show_eval_loss_every_steps) == 0: print("--In eval block--") total_l1_loss = 0 total_wa_loss = 0 for loop_no in range( int(np.floor(no_eval_examples / eval_batch_size))): np_rawpreds, np_labels = mon_sess.run( [eval_logits, e_labels]) for i in range(eval_batch_size): predicted_label = np.argmax( np_rawpreds[i, :].reshape(consider_params, -1), axis=1) gt_label = np.argmax(np_labels[i, :].reshape( no_params, -1)[:consider_params, :], axis=1) l1_loss = np.sum(np.abs(predicted_label - gt_label)) wa = 0 for ln in range(consider_params): predsoft = my_softmax( np_rawpreds[i, :].reshape( consider_params, -1)[ln, :][np.newaxis]) predsoft = predsoft.squeeze() labsoft = np_labels[i, :].reshape( no_params, -1)[ln, :] topindices = predsoft.argsort( )[::-1][:consider_top] probsindices = predsoft[topindices] / np.sum( predsoft[topindices]) wa += np.abs( int( np.round( np.sum(probsindices * topindices))) - labsoft.argmax()) total_l1_loss += l1_loss total_wa_loss += wa avg_manhattan_loss = total_l1_loss / no_eval_examples avg_wa_loss = total_wa_loss / no_eval_examples print( "-------------------------------------------------------------------" ) print("Average manhattan loss per scalar:", avg_manhattan_loss / consider_params) print( "Average manhattan loss(Weighted avg. top 10 bins)per scalar:", avg_wa_loss / consider_params) print( "-------------------------------------------------------------------", flush=True) if avg_wa_loss < best_eval_wa: best_eval_wa = avg_wa_loss best_eval_epoch = epoch_no if avg_wa_loss > best_eval_wa and ( epoch_no - best_eval_epoch ) > early_stop_epochs and current_lr < 1e-3 and epoch_no > 10: print("STOPPING TRAINING at epoch: ", epoch_no, ", best epoch was:", best_eval_epoch, "(step: ", best_eval_epoch * num_samples / FLAGS.batch_size, ")") print("Current eval_wa:", avg_wa_loss, ", best eval_wa:", best_eval_wa) break if step_no > 0: last_sum_train_loss /= show_eval_loss_every_steps last_sum_tl1_loss /= (no_steps_in_epoch * FLAGS.batch_size * no_output_params) if last_sum_train_loss > best_sum_train_loss: if current_lr > FLAGS.end_learning_rate: print("Dividing learning rate by 10.0") current_lr /= 10.0 best_sum_train_loss = last_sum_train_loss else: print( "Already reached lowest possible lr i.e. ", current_lr) else: best_sum_train_loss = last_sum_train_loss print("last_sum_train_loss:", last_sum_train_loss) print("L1_train_loss:", last_sum_tl1_loss) last_sum_train_loss = 0 last_sum_tl1_loss = 0 ######################################################################################### step_no += 1 if FLAGS.max_number_of_steps is not None: if step_no >= FLAGS.max_number_of_steps: break print("Final Step {}, Loss: {}".format(step_no, train_loss)) print("---------------------The End-----------------------") print("---------------------------------------------------") print("---------------------------------------------------")
if not os.path.isfile(tfmodel + '.meta'): raise IOError( ('{:s} not found.\nDid you run ./data/script/' 'fetch_faster_rcnn_models.sh?').format(tfmodel + '.meta')) # set config tfconfig = tf.ConfigProto(allow_soft_placement=True) tfconfig.gpu_options.allow_growth = True # init session sess = tf.Session(config=tfconfig) # load network if demonet == 'vgg16': net = vgg16(batch_size=1) elif demonet == 'inception_v1': net = inception_v1(batch_size=1) else: raise NotImplementedError net.create_architecture(sess, "TEST", 8, tag='default', anchor_scales=[1, 2, 3]) saver = tf.train.Saver() saver.restore(sess, tfmodel) print('Loaded network {:s}'.format(tfmodel)) output_graph_def = tf.graph_util.convert_variables_to_constants( sess, sess.graph.as_graph_def(), [
features=(tf.float32, [224, 224, 3]), labels=(tf.int32, [1]), batch_size=options.batchSize, validation_dataset=val_data) images, labels = dataset.tensors # As sequence file's label is one-based, so labels need to subtract 1. zero_based_label = labels - 1 is_training = tf.placeholder(dtype=tf.bool, shape=()) with slim.arg_scope(inception_v1.inception_v1_arg_scope(weight_decay=0.0, use_batch_norm=False)): logits, end_points = inception_v1.inception_v1(images, dropout_keep_prob=0.6, num_classes=1000, is_training=is_training) loss = tf.reduce_mean(tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=zero_based_label)) iterationPerEpoch = int(ceil(float(1281167) / options.batchSize)) if options.maxEpoch: maxIteration = iterationPerEpoch * options.maxEpoch else: maxIteration = options.maxIteration warmup_iteration = options.warmupEpoch * iterationPerEpoch if warmup_iteration == 0: warmupDelta = 0.0 else:
def main(_): # Images for inception classifier are normalized to be in [-1, 1] interval, # eps is a difference between pixels so it should be in [0, 2] interval. # Renormalizing epsilon from [0, 255] to [0, 2]. tf.logging.set_verbosity(tf.logging.INFO) full_start = timer() batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3] all_images_taget_class, all_images_true_label = load_target_class( FLAGS.input_dir) if not os.path.exists(FLAGS.output_dir): os.mkdir(FLAGS.output_dir) with tf.Graph().as_default(): # Prepare graph x_input = tf.placeholder(tf.float32, shape=batch_shape) target_class_input = tf.placeholder(tf.int32, shape=[FLAGS.batch_size]) momentum = FLAGS.momentum eps = 2.0 * FLAGS.max_epsilon / 255.0 alpha = 0.2 num_classes = 1000 num_classes_a = 1001 # image = x_input image = input_diversity(x_input) # image = batch_dct2d(image) """ 224 input """ processed_imgs_res_v1_50 = preprocess_for_model(image, 'resnet_v1_50') with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits_res_v1_50, end_points_res_v1_50 = resnet_v1.resnet_v1_50( processed_imgs_res_v1_50, num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE) processed_imgs_res_v1_101 = preprocess_for_model( image, 'resnet_v1_101') with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits_res_v1_101, end_points_res_v1_101 = resnet_v1.resnet_v1_101( processed_imgs_res_v1_101, num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE) processed_res_v1 = preprocess_for_model(image, 'resnet_v1_152') with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits_res_v1_152, end_points_res_v1 = resnet_v1.resnet_v1_152( processed_res_v1, num_classes=num_classes, is_training=False, scope='resnet_v1_152', reuse=tf.AUTO_REUSE) processed_imgs_vgg_16 = preprocess_for_model(image, 'vgg_16') with slim.arg_scope(vgg.vgg_arg_scope()): logits_vgg_16, end_points_vgg_16 = vgg.vgg_16( processed_imgs_vgg_16, num_classes=num_classes, is_training=False, scope='vgg_16') processed_imgs_vgg_19 = preprocess_for_model(image, 'vgg_19') with slim.arg_scope(vgg.vgg_arg_scope()): logits_vgg_19, end_points_vgg_19 = vgg.vgg_19( processed_imgs_vgg_19, num_classes=num_classes, is_training=False, scope='vgg_19') logits_clean_a = (logits_res_v1_50 + logits_res_v1_101 + logits_res_v1_152 + logits_vgg_16 + logits_vgg_19) / 5.0 processed_imgs_inception_v1 = preprocess_for_model( image, 'inception_v1') with slim.arg_scope(inception_v1.inception_v1_arg_scope()): logits_inception_v1, end_points_inception_v1 = inception_v1.inception_v1( processed_imgs_inception_v1, num_classes=num_classes_a, is_training=False, reuse=tf.AUTO_REUSE) """ 299 input """ x_div = preprocess_for_model(image, 'inception_v3') with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_inc_v3, end_points_inc_v3 = inception_v3.inception_v3( x_div, num_classes=num_classes_a, is_training=False, scope='InceptionV3') with slim.arg_scope(inception_v4.inception_v4_arg_scope()): logits_inc_v4, end_points_inc_v4 = inception_v4.inception_v4( x_div, num_classes=num_classes_a, is_training=False, scope='InceptionV4') with slim.arg_scope( inception_resnet_v2.inception_resnet_v2_arg_scope()): logits_inc_res_v2, end_points_inc_res_v2 = inception_resnet_v2.inception_resnet_v2( x_div, num_classes=num_classes_a, is_training=False, scope='InceptionResnetV2') logits_clean_b = (logits_inc_v3 + logits_inc_v4 + logits_inc_res_v2 + logits_inception_v1) / 4.0 """ add adv model """ with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_adv_v3, end_points_adv_v3 = inception_v3.inception_v3( x_div, num_classes=num_classes_a, is_training=False, scope='AdvInceptionV3') with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_ens3_adv_v3, end_points_ens3_adv_v3 = inception_v3.inception_v3( x_div, num_classes=num_classes_a, is_training=False, scope='Ens3AdvInceptionV3') with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_ens4_adv_v3, end_points_ens4_adv_v3 = inception_v3.inception_v3( x_div, num_classes=num_classes_a, is_training=False, scope='Ens4AdvInceptionV3') with slim.arg_scope( inception_resnet_v2.inception_resnet_v2_arg_scope()): logits_ensadv_res_v2, end_points_ensadv_res_v2 = inception_resnet_v2.inception_resnet_v2( x_div, num_classes=num_classes_a, is_training=False, scope='EnsAdvInceptionResnetV2') with slim.arg_scope( inception_resnet_v2.inception_resnet_v2_arg_scope()): logits_adv_res_v2, end_points_adv_res_v2 = inception_resnet_v2.inception_resnet_v2( x_div, num_classes=num_classes_a, is_training=False, scope='AdvInceptionResnetV2') logits_ens_a = (logits_adv_v3 + logits_ens3_adv_v3 + logits_ens4_adv_v3 + logits_ensadv_res_v2 + logits_adv_res_v2) / 5.0 logits_ens_aux = (end_points_adv_v3['AuxLogits'] + end_points_ens3_adv_v3['AuxLogits'] + end_points_ens4_adv_v3['AuxLogits'] + end_points_adv_res_v2['AuxLogits'] + end_points_ensadv_res_v2['AuxLogits']) / 5.0 label_test = tf.argmax(logits_adv_v3, axis=1) """ ensemble model loss """ clean_logits = (logits_clean_a + logits_clean_b[:, 1:1001]) / 2.0 adv_logits = logits_ens_a[:, 1:1001] + logits_ens_aux[:, 1:1001] logits = (clean_logits + adv_logits) / 2.0 ens_labels = tf.argmax(logits, axis=1) one_hot = tf.one_hot(target_class_input, num_classes) loss_adv_v3 = tf.losses.softmax_cross_entropy(one_hot, logits_adv_v3[:, 1:1001], label_smoothing=0.0, weights=1.0) loss_ens3_adv_v3 = tf.losses.softmax_cross_entropy( one_hot, logits_ens3_adv_v3[:, 1:1001], label_smoothing=0.0, weights=1.0) loss_ens4_adv_v3 = tf.losses.softmax_cross_entropy( one_hot, logits_ens4_adv_v3[:, 1:1001], label_smoothing=0.0, weights=1.0) loss_ensadv_res_v2 = tf.losses.softmax_cross_entropy( one_hot, logits_ensadv_res_v2[:, 1:1001], label_smoothing=0.0, weights=1.0) loss_adv_res_v2 = tf.losses.softmax_cross_entropy( one_hot, logits_adv_res_v2[:, 1:1001], label_smoothing=0.0, weights=1.0) loss_res_v1_101 = tf.losses.softmax_cross_entropy(one_hot, logits_res_v1_101, label_smoothing=0.0, weights=1.0) loss_res_v1_50 = tf.losses.softmax_cross_entropy(one_hot, logits_res_v1_50, label_smoothing=0.0, weights=1.0) loss_vgg_16 = tf.losses.softmax_cross_entropy(one_hot, logits_vgg_16, label_smoothing=0.0, weights=1.0) loss_res_v1_152 = tf.losses.softmax_cross_entropy(one_hot, logits_res_v1_152, label_smoothing=0.0, weights=1.0) total_loss = tf.losses.softmax_cross_entropy(one_hot, logits, label_smoothing=0.0, weights=1.0) noise = tf.gradients(total_loss, x_input)[0] kernel = gkern(15, FLAGS.sig).astype(np.float32) stack_kernel = np.stack([kernel, kernel, kernel]).swapaxes(2, 0) stack_kernel = np.expand_dims(stack_kernel, 3) noise = tf.nn.depthwise_conv2d(noise, stack_kernel, strides=[1, 1, 1, 1], padding='SAME') # [batch, out_height, out_width, in_channels * channel_multiplier] noise = noise / tf.reshape( tf.contrib.keras.backend.std(tf.reshape(noise, [FLAGS.batch_size, -1]), axis=1), [FLAGS.batch_size, 1, 1, 1]) # noise = momentum * grad + noise noise = noise / tf.reshape( tf.contrib.keras.backend.std(tf.reshape(noise, [FLAGS.batch_size, -1]), axis=1), [FLAGS.batch_size, 1, 1, 1]) s1 = tf.train.Saver(slim.get_model_variables(scope='InceptionV1')) s2 = tf.train.Saver(slim.get_model_variables(scope='InceptionV3')) s3 = tf.train.Saver(slim.get_model_variables(scope='InceptionV4')) s4 = tf.train.Saver(slim.get_model_variables(scope='resnet_v1_50')) s5 = tf.train.Saver(slim.get_model_variables(scope='resnet_v1_101')) s6 = tf.train.Saver(slim.get_model_variables(scope='resnet_v1_152')) s7 = tf.train.Saver(slim.get_model_variables(scope='vgg_16')) s8 = tf.train.Saver(slim.get_model_variables(scope='vgg_19')) s9 = tf.train.Saver( slim.get_model_variables(scope='InceptionResnetV2')) s10 = tf.train.Saver( slim.get_model_variables(scope='AdvInceptionResnetV2')) s11 = tf.train.Saver( slim.get_model_variables(scope='Ens3AdvInceptionV3')) s12 = tf.train.Saver( slim.get_model_variables(scope='Ens4AdvInceptionV3')) s13 = tf.train.Saver( slim.get_model_variables(scope='EnsAdvInceptionResnetV2')) s14 = tf.train.Saver(slim.get_model_variables(scope='AdvInceptionV3')) print('Created Graph') with tf.Session() as sess: s1.restore(sess, FLAGS.checkpoint_path_inception_v1) s2.restore(sess, FLAGS.checkpoint_path_inception_v3) s3.restore(sess, FLAGS.checkpoint_path_inception_v4) s4.restore(sess, FLAGS.checkpoint_path_resnet_v1_50) s5.restore(sess, FLAGS.checkpoint_path_resnet_v1_101) s6.restore(sess, FLAGS.checkpoint_path_resnet_v1_152) s7.restore(sess, FLAGS.checkpoint_path_vgg_16) s8.restore(sess, FLAGS.checkpoint_path_vgg_19) s9.restore(sess, FLAGS.checkpoint_path_inception_resnet_v2) s10.restore(sess, FLAGS.checkpoint_path_adv_inception_resnet_v2) s11.restore(sess, FLAGS.checkpoint_path_ens3_adv_inception_v3) s12.restore(sess, FLAGS.checkpoint_path_ens4_adv_inception_v3) s13.restore(sess, FLAGS.checkpoint_path_ens_adv_inception_resnet_v2) s14.restore(sess, FLAGS.checkpoint_path_adv_inception_v3) print('Initialized Models') processed = 0.0 defense, tgt, untgt, final = 0.0, 0.0, 0.0, 0.0 idx = 0 for filenames, images in load_images(FLAGS.input_dir, batch_shape): target_class_for_batch = ( [all_images_taget_class[n[:-4]] for n in filenames] + [0] * (FLAGS.batch_size - len(filenames))) true_label_for_batch = ( [all_images_true_label[n[:-4]] for n in filenames] + [0] * (FLAGS.batch_size - len(filenames))) x_max = np.clip(images + eps, -1.0, 1.0) x_min = np.clip(images - eps, -1.0, 1.0) adv_img = np.copy(images) for i in range(FLAGS.iterations): # loss_set = sess.run([loss_adv_v3,loss_ens3_adv_v3,loss_ens4_adv_v3,loss_ensadv_res_v2, # loss_adv_res_v2,loss_res_v1_101,loss_res_v1_50,loss_vgg_16,loss_res_v1_152], # feed_dict={x_input: batch_NLM(adv_img), # target_class_input: target_class_for_batch}) # print ("loss:",loss_set) # label_ens_model = sess.run([a,b,c,d],feed_dict={x_input: adv_img,target_class_input: target_class_for_batch}) # print ("label_ens_model:",label_ens_model) # print (target_class_for_batch,true_label_for_batch) adv_img = batch_NLM(adv_img) if i % 5 == 0 else adv_img ens_loss, pred, grad, pred_adv_v3 = sess.run( [total_loss, ens_labels, noise, label_test], feed_dict={ x_input: adv_img, target_class_input: target_class_for_batch }) adv_img = adv_img - alpha * np.clip(np.round(grad), -2, 2) adv_img = np.clip(adv_img, x_min, x_max) print("{} \t total_loss {}".format(i, ens_loss)) print('prediction :', pred) print('target_label :', target_class_for_batch) print('true_label :', true_label_for_batch) # print ("{} \t total_loss {} predction {} \t target class {} \t true label {} \t ".format(i,ens_loss,pred,target_class_for_batch,true_label_for_batch)) # print ("model predction {} \t target class {} \t true label {} \t ".format(pred,target_class_for_batch,true_label_for_batch)) print( "final prediction {} \t target class {} \t true label {} \t " .format(pred, target_class_for_batch, true_label_for_batch)) processed += FLAGS.batch_size tgt += sum( np.equal(np.array(pred), np.array(target_class_for_batch))) defense += sum( np.equal(np.array(pred), np.array(true_label_for_batch))) untgt = processed - tgt - defense print("processed {} \t acc {} {} \t tgt {} {} \t untgt {} {} ". format(processed, defense, defense / processed, tgt, tgt / processed, untgt, untgt / processed)) full_end = timer() print("DONE: Processed {} images in {} sec".format( processed, full_end - full_start)) save_images(adv_img, filenames, FLAGS.output_dir) print("DONE: Processed {} images in {} sec".format( processed, full_end - full_start))