import i3d # import skvideo import utils.pre_process_rgb_flow as img_tool #%% gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333) tf_config = tf.ConfigProto() # tf_config.gpu_options.per_process_gpu_memory_fraction = 0.99 eval_type = 'rgb' cfg = ki3du.load_config(yml_path='run_config.yml') ATTACK_CFG = cfg.SINGLE_VIDEO_ATTACK kinetics_classes = ki3du.load_kinetics_classes(eval_type) k_i3d = ki3du.kinetics_i3d(ckpt_path=cfg.MODEL.CKPT_PATH, batch_size=ATTACK_CFG.BATCH_SIZE) if ATTACK_CFG.IMPROVE_ADV_LOSS: adversarial_loss = k_i3d.improve_adversarial_loss(margin=ATTACK_CFG.PROB_MARGIN, targeted = ATTACK_CFG.TARGETED_ATTACK, logits = ATTACK_CFG.USE_LOGITS) else: adversarial_loss = k_i3d.ce_adversarial_loss(targeted=ATTACK_CFG.TARGETED_ATTACK) beta_0_default = tf.constant(1, dtype=tf.float32) beta_0 = tf.placeholder_with_default(beta_0_default, name='beta_0', shape=beta_0_default.shape) beta_1_default = tf.constant(0.1, dtype=tf.float32) beta_1 = tf.placeholder_with_default(beta_1_default, name='beta_1', shape=beta_1_default.shape)
sys.path.insert(1, os.path.realpath(os.path.pardir)) # import skvideo from utils import pre_process_rgb_flow as img_tool from utils import kinetics_i3d_utils as ki3du _IMAGE_SIZE = 224 _BATCH_SIZE = 1 _SAMPLE_VIDEO_FRAMES = 90 #90 #79 # 79 #90 #90 #250 #90 #79 _BASE_PATCH_FRAMES = _SAMPLE_VIDEO_FRAMES #_SAMPLE_VIDEO_FRAMES #_SAMPLE_VIDEO_FRAMES # 1# _SAMPLE_VIDEO_FRAMES # 1:for sticker _SAMPLE_VIDEO_FRAMES # 1 _IND_START = 0 # 0 #50 _IND_END = _SAMPLE_VIDEO_FRAMES kinetics_classes = ki3du.load_kinetics_classes() #%% model loader ckpt_path = '/data/DL/Adversarial/kinetics-i3d/result/generalization/universal/val_test/all_cls_shuffle_t15000_v2000_/' ckpt_last = tf.train.latest_checkpoint(checkpoint_dir=ckpt_path) ckpt_last = '/data/DL/Adversarial/kinetics-i3d/result/generalization/model_gen_untargeted_ce_loss_reg/model_step_00000' model = ki3du.kinetics_i3d(ckpt_path=ckpt_last, batch_size=_BATCH_SIZE, init_pert_from_ckpt=True) inputs = model.rgb_input labels = model.labels perturbation = model.eps_rgb adversarial_inputs_rgb = model.adversarial_inputs_rgb
def main(argv, arc): videos_base_path = argv[1] class_name = argv[2] tf_dst_folder = argv[3] # videos_base_path = '/data/DL/Adversarial/ActivityNet/Crawler/Kinetics/database/val/' # class_name ='hula hooping' # tf_dst_folder = '/data/DL/Adversarial/ActivityNet/Crawler/Kinetics/database/tfrecord_uint8/val/' if class_name == 'all': classes_list = listdir(videos_base_path) else: classes_list = [class_name] if not os.path.exists(tf_dst_folder): os.makedirs(tf_dst_folder) n_frames = ki3du._SAMPLE_VIDEO_FRAMES kinetics_classes = ki3du.load_kinetics_classes() for c in classes_list: videos_list = os.path.join(videos_base_path, c) if not os.path.exists(videos_list): print('{} not exist'.format(videos_list)) continue video_list_path = glob.glob(videos_list + '*/*.mp4') k = 0 for i, v in enumerate(video_list_path): if i % 100 == 0: if i > 0: writer.close() k += 1 target_folder = os.path.join(tf_dst_folder, c) if not os.path.exists(target_folder): os.makedirs(target_folder) train_filename = os.path.join( target_folder, 'kinetics_{}_{:04}.tfrecords'.format( c, k)) # address to save the TFRecords file # open the TFRecords file writer = tf.python_io.TFRecordWriter(train_filename) cls = c cls_id = kinetics_classes.index(cls) vid_path = v if os.path.exists(v) == False: continue try: frames = skvideo.io.vread(vid_path) # frames = frames.astype('float32') / 128. - 1 except: os.remove(vid_path) continue if frames.shape[0] < ki3du._SAMPLE_VIDEO_FRAMES: continue #frames = np.pad(frames, ((0, _SAMPLE_VIDEO_FRAMES-frames.shape[0]),(0,0),(0,0),(0,0)),'wrap') else: frames = frames[-ki3du._SAMPLE_VIDEO_FRAMES:] # prob = sess.run(softmax,feed_dict={rgb_input:frames}) # top_id = prob.argmax() # if cls_id!=top_id: # continue feature = { 'train/label': img_tool._int64_feature(cls_id), 'train/video': img_tool._bytes_feature(frames.tobytes()) } example = tf.train.Example(features=tf.train.Features( feature=feature)) writer.write(example.SerializeToString()) writer.close()
def model_fn(features, labels, mode, params): rgb_sample = features beta_0 = params.LAMBDA beta_1 = params.BETA_1 beta_2 = params.BETA_2 model_dir = 'model/' #params.model_dir kinetics_classes = ki3du.load_kinetics_classes() if params.CYCLIC_ATTACK: cyclic_flag = 1.0 else: cyclic_flag = 0.0 if params.CYCLIC_PERTURBATION_ATTACK: cyclic_pert_flag = 1.0 else: cyclic_pert_flag = 0.0 adv_flag = 1.0 if ATTACK_CFG.FLICKERING_ATTACK: k_i3d = ki3du.kinetics_i3d(ckpt_path='', batch_size=None, init_model=False, rgb_input=rgb_sample, labels=labels, cyclic_flag_default_c=cyclic_flag, cyclic_pert_flag_default_c=cyclic_pert_flag, default_adv_flag_c=adv_flag) else: # "Sparse Adversarial Perturbations for Videos" https://arxiv.org/pdf/1803.02536.pdf k_i3d = ki3du.kinetics_i3d_L12(ckpt_path='', batch_size=None, init_model=False, rgb_input=rgb_sample, labels=labels, cyclic_flag_default_c=cyclic_flag, default_adv_flag_c=adv_flag) inputs = k_i3d.rgb_input perturbation = k_i3d.eps_rgb adversarial_inputs_rgb = k_i3d.adversarial_inputs_rgb eps_rgb = k_i3d.eps_rgb adv_flag = k_i3d.adv_flag softmax = k_i3d.softmax softmax_clean = k_i3d.softmax_clean model_logits = k_i3d.model_logits # labels = k_i3d.labels # cyclic_input_flag = k_i3d.cyclic_flag # cyclic_pert_flag = k_i3d.cyclic_pert_flag norm_reg = k_i3d.norm_reg diff_norm_reg = k_i3d.diff_norm_reg laplacian_norm_reg = k_i3d.laplacian_norm_reg L12_loss = k_i3d.loss_L12 thickness = k_i3d.thickness roughness = k_i3d.roughness thickness_relative = k_i3d.thickness_relative roughness_relative = k_i3d.roughness_relative predictions = { #'prob': softmax, 'perturbation': tf.convert_to_tensor(perturbation) } if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions) #,evaluation_hooks=[eval_summary_hook]) if ATTACK_CFG.IMPROVE_ADV_LOSS: adversarial_loss = k_i3d.improve_adversarial_loss( margin=params.PROB_MARGIN, targeted=params.TARGETED_ATTACK, logits=params.USE_LOGITS) else: adversarial_loss = k_i3d.ce_adversarial_loss( targeted=params.TARGETED_ATTACK) if ATTACK_CFG.FLICKERING_ATTACK: regularizer_loss = beta_1 * norm_reg + beta_2 * diff_norm_reg + beta_2 * laplacian_norm_reg # +lab_reg else: regularizer_loss = beta_1 * L12_loss weighted_regularizer_loss = beta_0 * regularizer_loss loss = adversarial_loss + weighted_regularizer_loss if mode == tf.estimator.ModeKeys.EVAL: prob_clean = softmax_clean if params.TARGETED_ATTACK: miss_cond = tf.argmax(softmax, axis=-1) == params.TARGETED_CLASS else: miss_cond = tf.argmax(softmax, axis=-1) != labels if params.TARGETED_ATTACK == False: valid_videos = tf.equal(tf.argmax(prob_clean, axis=-1), labels) else: valid_videos = None # Define the metrics: miss_cls_metric, miss_cls_metric_update_op = ki3du.miss_cls_fn( predictions=tf.argmax(softmax, axis=-1), labels=labels, weights=valid_videos, #valid_videos, targeted=params.TARGETED_ATTACK) # , weights=valid_videos) logging_hook = tf.train.LoggingTensorHook({"ACC": miss_cls_metric}, at_end=True) eval_metric_ops = { 'ACC: 1- FOOLING_RATIO': (miss_cls_metric, miss_cls_metric_update_op) } return tf.estimator.EstimatorSpec( mode, loss=loss, eval_metric_ops=eval_metric_ops, prediction_hooks=[logging_hook ]) #,evaluation_hooks=[eval_summary_hook]) prob_to_min = k_i3d.to_min_prob prob_to_max = k_i3d.to_max_prob learning_rate_default = tf.constant(0.001, dtype=tf.float32) learning_rate = tf.placeholder_with_default( learning_rate_default, name='learning_rate', shape=learning_rate_default.shape) global_step = tf.train.get_or_create_global_step() optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) gradients = optimizer.compute_gradients(loss=loss, var_list=perturbation) train_op = optimizer.apply_gradients(gradients, global_step) logging_hook = tf.train.LoggingTensorHook({"loss": loss}, every_n_iter=10) tf.summary.scalar('Loss/total', loss) tf.summary.scalar('Loss/adversarial_loss', adversarial_loss) tf.summary.scalar('Loss/regularizer_loss', regularizer_loss) tf.summary.scalar('Loss/regularizer_loss_weighted', weighted_regularizer_loss) tf.summary.scalar('Loss/thickness', norm_reg) tf.summary.scalar('Loss/L12', L12_loss) tf.summary.scalar('Loss/first_order_temporal_diff', diff_norm_reg) tf.summary.scalar('Loss/second_order_temporal_diff', laplacian_norm_reg) tf.summary.scalar('Perturbation/thickness_%%', thickness_relative) tf.summary.scalar('Perturbation/roughness_%%', roughness_relative) tf.summary.scalar('Perturbation/max', tf.reduce_max(eps_rgb)) tf.summary.scalar('Perturbation/min', tf.reduce_min(eps_rgb)) tf.summary.scalar('Probability/prob_to_min', tf.reduce_mean(prob_to_min)) tf.summary.scalar('Probability/prob_to_max', tf.reduce_mean(prob_to_max)) summary_op = tf.summary.merge_all() train_summary_hook = tf.train.SummarySaverHook(save_steps=50, output_dir=os.path.join( model_dir, "train"), summary_op=summary_op) return tf.estimator.EstimatorSpec( mode, loss=loss, train_op=train_op, training_hooks=[train_summary_hook, logging_hook])
def main_random_videos(database_path, class_name, tf_dst_folder): #%% # videos_base_path = argv[1] # class_name = argv[2] # tf_dst_folder = argv[3] # class_name = 'all' # database_path='/data/DL/Adversarial/ActivityNet/Crawler/Kinetics/database/val/' # tf_dst_folder='/home/ubadmin/pony/database/Kinetics/tfrecord_uint8/val/all_cls_shuffle/' num_videos_in_single_tfrecord = 50 # import pdb # pdb.set_trace() # videos_base_path = '/data/DL/Adversarial/ActivityNet/Crawler/Kinetics/database/val/' # class_name ='hula hooping' # tf_dst_folder = '/data/DL/Adversarial/ActivityNet/Crawler/Kinetics/database/tfrecord_uint8/val/' # if class_name=='all': # classes_list =listdir(videos_base_path) # else: # classes_list =[class_name] if not os.path.exists(tf_dst_folder): os.makedirs(tf_dst_folder) n_frames = ki3du._SAMPLE_VIDEO_FRAMES kinetics_classes = ki3du.load_kinetics_classes() videos_list_path = glob.glob(database_path + '*/*.mp4') random.shuffle(videos_list_path) k = 0 i = 0 #%% for v in videos_list_path: if i % num_videos_in_single_tfrecord == 0: if k > 0: writer.close() target_folder = os.path.join(tf_dst_folder) if not os.path.exists(target_folder): os.makedirs(target_folder) train_filename = os.path.join( target_folder, 'kinetics_N_{}_{:04}.tfrecords'.format( num_videos_in_single_tfrecord, k)) # address to save the TFRecords file # open the TFRecords file writer = tf.python_io.TFRecordWriter(train_filename) k += 1 cls = v.split('/')[-2] cls_id = kinetics_classes.index(cls) vid_path = v if os.path.exists(v) == False: continue try: frames = skvideo.io.vread(vid_path) # frames = frames.astype('float32') / 128. - 1 except: os.remove(vid_path) continue if frames.shape[0] < ki3du._SAMPLE_VIDEO_FRAMES: continue #frames = np.pad(frames, ((0, _SAMPLE_VIDEO_FRAMES-frames.shape[0]),(0,0),(0,0),(0,0)),'wrap') else: frames = frames[-ki3du._SAMPLE_VIDEO_FRAMES:] # prob = sess.run(softmax,feed_dict={rgb_input:frames}) # top_id = prob.argmax() # if cls_id!=top_id: # continue feature = { 'train/label': img_tool._int64_feature(cls_id), 'train/video': img_tool._bytes_feature(frames.tobytes()) } example = tf.train.Example(features=tf.train.Features(feature=feature)) writer.write(example.SerializeToString()) i += 1 #%% writer.close()
def main(argv, arc): kinetics_classes = ki3du.load_kinetics_classes() path = argv[1] save_to_vid = 0 # path ='/data/DL/Adversarial/kinetics-i3d/result/videos_for_tests/npy/kinetics@triple_jumpbeta_1_0.50.pkl' # path='/data/DL/Adversarial/kinetics-i3d/result/videos_for_tests/pkl_final/rgb_sIn7Te48YL4@shooting_goal_(soccer)/rgb_sIn7Te48YL4@shooting_goal_(soccer)beta_1_1.00.pkl' with open(path, 'rb') as handle: tmp_dict = pickle.load(handle) tmp_dict['softmax'] = np.concatenate(tmp_dict['softmax'], axis=0) tmp_struct = namedtuple("dict", tmp_dict.keys())(*tmp_dict.values()) # First set up the figure, the axis, and the plot element we want to animate fig = plt.figure(facecolor='black') #,constrained_layout=True) ax_pert_graph = fig.add_subplot(2, 3, 5, facecolor='k') ax_pert_graph.set_xlim((0, 90)) ax_adv_vid = fig.add_subplot(2, 3, 3) ax_pert_vid = fig.add_subplot(2, 3, 2) ax_cln_vid = fig.add_subplot(2, 3, 1) # ax1 = ax.twinx() # ax = plt.axes(xlim=(0, 90), ylim=(-0.1, 0.1)) ax_adv_vid.axis('OFF') ax_pert_vid.axis('OFF') ax_cln_vid.axis('OFF') line, = ax_pert_graph.plot([], [], lw=2) adv_video = ((tmp_struct.adv_video[0] + 1.0) * 127.5).astype(np.uint8) dummy_img = adv_video[0] cln_video = ((tmp_struct.rgb_sample[0] + 1.0) * 127.5).astype(np.uint8) pert_raw = tmp_struct.perturbation[-1].copy( ) - tmp_struct.perturbation[-1].min() scale_factor = int(2 / pert_raw.max()) pert_raw /= pert_raw.max() pert_raw *= 255 pert_raw = pert_raw.astype(np.uint8) pert_video = np.repeat(pert_raw, 224, axis=1) pert_video = np.repeat(pert_video, 224, axis=2) pert = tmp_struct.perturbation[-1].squeeze() / 2.0 * 100 font = { 'family': 'serif', 'color': 'white', 'weight': 'normal', 'size': 16, } # mng = plt.get_current_fig_manager() # mng.full_screen_toggle() # manager = plt.get_current_fig_manager() # manager.window.showMaximized() ax_cln_vid.set_title( 'Clean video\n top-1 class: {}'.format( kinetics_classes[tmp_struct.correct_cls_id]), font) ax_pert_vid.set_title( 'Perturbation\n' + r'(amplified $\times${} for visualization)'.format(scale_factor), font) ax_adv_vid.set_title( 'Adversarial video\n top-1 class: {}'.format( kinetics_classes[tmp_struct.softmax[-1].argmax()]), font) ax_pert_graph.set_title( 'RGB Perturbation\n percents from the full scale of the image', font) ax_pert_graph.set_ylabel('Amplitude from full scale[%]', font) font2 = { 'family': 'serif', 'color': 'y', 'weight': 'normal', 'size': 16, } ax_pert_graph.set_xlabel('Current\nperturbation', font2) y_top = 1.2 * np.abs(pert).max() ax_pert_graph.set_ylim(-y_top, y_top) # ax_pert_graph.yaxis.label.set_color('white') ax_pert_graph.tick_params(axis='y', labelcolor='w') ax_pert_graph.tick_params(axis='x', colors='k') ax_pert_graph.grid(True) pp = y_top - np.abs(pert).max() ax_pert_graph.arrow(45, -y_top, 0, 0.5 * pp, head_width=2, head_length=0.5 * pp, fc='y', ec='y') ax_pert_graph.arrow(45, y_top, 0, -0.5 * pp, head_width=2, head_length=0.5 * pp, fc='y', ec='y') # plt.tight_layout() # ax_pert_graph.annotate('a polar annotation', # xy=(45, -1),# theta, radius # xytext=(0.5, 1), # fraction, fraction # textcoords='figure fraction', # arrowprops=dict(facecolor='white', shrink=0.05), # horizontalalignment='left', # verticalalignment='bottom') # ax_pert_graph.spines['left'].set_color('w') fig.set_size_inches(19, 11) # ax_pert_graph.spines['left'].set_color('white') # plt.rc('axes',edgecolor='white') img_adv = ax_adv_vid.imshow(np.zeros_like(dummy_img, dtype=np.uint8), zorder=1) img_cln = ax_cln_vid.imshow(np.zeros_like(dummy_img, dtype=np.uint8), zorder=1) img_pert = ax_pert_vid.imshow(np.zeros_like(dummy_img, dtype=np.uint8), zorder=1) plus_pos = [ (ax_cln_vid.get_position().x1 + ax_pert_vid.get_position().x0) / 2, (ax_cln_vid.get_position().y1 + ax_cln_vid.get_position().y0) / 2 ] fig.text(plus_pos[0], plus_pos[1], '$+$', horizontalalignment='center', verticalalignment='center', fontsize=18, color='white') equal_pos = [ (ax_pert_vid.get_position().x1 + ax_adv_vid.get_position().x0) / 2, (ax_pert_vid.get_position().y1 + ax_pert_vid.get_position().y0) / 2 ] fig.text(equal_pos[0], equal_pos[1], '$=$', horizontalalignment='center', verticalalignment='center', fontsize=18, color='white') lines = [] plotlays, plotcols = [3], ["red", "green", "blue"] roughness = tmp_struct.smoothness[-1] thickness = tmp_struct.fatness[-1] beta1 = tmp_struct.beta_1 if hasattr(tmp_struct, 'beta_3'): beta2 = tmp_struct.beta_2 + tmp_struct.beta_3 else: beta2 = tmp_struct.beta_2 * 2 fig.suptitle( 'Adversarial example: ' + r'$\beta_1$={},$\beta_2$={},'.format(beta1, beta2) + ' Thickness={:.2f}%, Roughness={:.2f}%'.format(thickness, roughness), color='w', fontsize=16) fig.subplots_adjust(hspace=0.22) # plt.text(10,550 , 'I. Naeh, R. Pony, S. Mannor \"Patternless Adversarial Attacks on Video Recognition Networks\" arXiv', # verticalalignment='bottom', horizontalalignment='right', # color='green', fontsize=15) for index in range(3): lobj = ax_pert_graph.plot([], [], lw=2, color=plotcols[index])[0] lines.append(lobj) # initialization function: plot the background of each frame def init(): for i in lines: line.set_data([], []) img_adv.set_data(np.zeros_like(dummy_img, dtype=np.uint8)) img_cln.set_data(np.zeros_like(dummy_img, dtype=np.uint8)) img_pert.set_data(np.zeros_like(dummy_img, dtype=np.uint8)) return lines # animation function. This is called sequentially def animate(i): ii = i % 90 x = np.linspace(0, 89, 90) y = np.roll(pert, -ii - 45, 0) img_adv.set_data(adv_video[ii]) img_cln.set_data(cln_video[ii]) img_pert.set_data(pert_video[ii]) y_mean = y.mean(axis=-1) y_std = y.std(axis=-1) for lnum, line in enumerate(lines): line.set_data(x, y[..., lnum]) # set data for each line separately. # p1 = ax_pert_graph.fill_between(np.arange(50,91) , -y_top,y_top, facecolor = 'gray', alpha = 0.2) # p2 = ax_pert_graph.fill_between(np.arange(0,41) , -y_top,y_top, facecolor = 'gray', alpha = 0.2) # p3 = ax_pert_graph.fill_between(np.arange(45,47) , -y_top,y_top, facecolor = 'y', alpha = 0.5) return lines[0], lines[1], lines[ 2], img_adv, img_cln, img_pert #,p1,p2#img, #lines # call the animator. blit=True means only re-draw the parts that have changed. anim = animation.FuncAnimation(fig, animate, init_func=init, save_count=900, frames=90 * 3, interval=100, blit=True, repeat=True) # save the animation as an mp4. This requires ffmpeg or mencoder to be # installed. The extra_args ensure that the x264 codec is used, so that # the video can be embedded in html5. You ma need to adjust this for # your system: for more information, see # http://ma{{tplotlib.sourceforge.net/api/animation_api.html if save_to_vid: anim.save('{}_beta1_{}_th_{:.2f}%_rg_{:.2f}%.mp4'.format( kinetics_classes[tmp_struct.correct_cls_id].replace(' ', '_'), tmp_struct.beta_1, thickness, roughness), fps=12, dpi=100, extra_args=['-vcodec', 'libx264', '-crf', '5'], savefig_kwargs={ 'bbox_inches': 'tight', 'quality': 100, 'facecolor': 'black' }) #'-filter_complex','loop=loop=3:size=270:start=0']) plt.show()