def main(argv): cfg = BaseConfig().parse(argv) os.environ["CUDA_VISIBLE_DEVICES"] = cfg.gpu img_generator_class = locate(cfg.db_tuple_loader) args = dict() args['db_path'] = cfg.db_path args['tuple_loader_queue_size'] = cfg.tuple_loader_queue_size args['preprocess_func'] = cfg.preprocess_func args['batch_size'] = cfg.batch_size args['shuffle'] = False args['img_size'] = const.max_frame_size args['gen_hot_vector'] = True args['batch_size'] = cfg.batch_size args['csv_file'] = cfg.test_csv_file test_iter = img_generator_class(args) test_imgs, test_lbls = test_iter.imgs_and_lbls() cfg = BaseConfig().parse(argv) os.environ["CUDA_VISIBLE_DEVICES"] = cfg.gpu model_dir = cfg.checkpoint_dir print(model_dir) log_file = os.path.join(cfg.checkpoint_dir, cfg.log_filename + '_test.txt') logger = log_utils.create_logger(log_file) with tf.Graph().as_default(): #meta_file = os.path.join(model_dir,'model.ckptbest.meta') #saver = tf.train.import_meta_graph(meta_file) #ckpt_file = os.path.join(model_dir,'model.ckptbest') #saver.restore(sess,ckpt_file) #print('Model Path {}'.format(ckpt_file)) #load_model_msg = model.load_model(model_dir, ckpt_file, sess, saver, load_logits=True) #logger.info(load_model_msg) #graph = tf.get_default_graph() #print(graph.get_operations()) test_dataset = QuickTupleLoader(test_imgs, test_lbls, cfg, is_training=False, repeat=False).dataset handle = tf.placeholder(tf.string, shape=[]) iterator = tf.data.Iterator.from_string_handle( handle, test_dataset.output_types, test_dataset.output_shapes) images_ph, lbls_ph = iterator.get_next() network_class = locate(cfg.network_name) model = network_class(cfg, images_ph=images_ph, lbls_ph=lbls_ph) validation_iterator = test_dataset.make_initializable_iterator() sess = tf.InteractiveSession() validation_handle = sess.run(validation_iterator.string_handle()) ckpt_file = tf.train.latest_checkpoint(model_dir) print(ckpt_file) tf.global_variables_initializer().run() saver = tf.train.Saver() load_model_msg = model.load_model(model_dir, ckpt_file, sess, saver, load_logits=True) print(load_model_msg) ckpt_file = os.path.join(model_dir, cfg.checkpoint_filename) val_loss = tf.summary.scalar('Val_Loss', model.val_loss) val_acc_op = tf.summary.scalar('Batch_Val_Acc', model.val_accuracy) model_acc_op = tf.summary.scalar('Split_Val_Accuracy', model.val_accumulated_accuracy) run_metadata = tf.RunMetadata() tf.local_variables_initializer().run() sess.run(validation_iterator.initializer) _val_acc_op = 0 gts = [] preds = [] pred_3 = [] pred_5 = [] while True: try: # Eval network on validation/testing split feed_dict = {handle: validation_handle} gt, preds_raw, predictions, acc_per_class, val_loss_op, batch_accuracy, accuracy_op, _val_acc_op, _val_acc, c_cnf_mat, macro_acc = sess.run( [ model.val_gt, model.val_preds, model.val_class_prediction, model.val_per_class_acc_acc, val_loss, model.val_accuracy, model_acc_op, val_acc_op, model.val_accumulated_accuracy, model.val_confusion_mat, model.val_per_class_acc_acc ], feed_dict) gts += list(gt) preds += list(predictions) for g, p in zip(gt, preds_raw): preds_sort_3 = np.argsort(p)[-3:] preds_sort_5 = np.argsort(p)[-5:] if g in preds_sort_3: pred_3 += [g] else: pred_3 += [preds_sort_3[-1]] if g in preds_sort_5: pred_5 += [g] else: pred_5 += [preds_sort_5[-1]] #print('Acc per class:',acc_per_class) #print('batch:',batch_accuracy) #print('Confusion Matrix:',c_cnf_mat) #print('gt:',gt) #print('preds:',preds_raw) #print('predictions:',predictions) #logger.info('Val Acc {0}, Macro Acc: {1}'.format(_val_acc,macro_acc)) except tf.errors.OutOfRangeError: # logger.info('problem:') # logger.info('Val Acc {0}, Macro Acc: {1}'.format(_val_acc,macro_acc)) logger.info('____ Clasification Report Top 1 ____') report = classification_report(gts, preds, output_dict=True) csv_pd = classification_report_csv(report) csv_pd.to_csv( os.path.join(model_dir, 'Classification_Report_top1.csv')) logger.info(report) logger.info('____ Clasification Report Top 2 ____') report = classification_report(gts, pred_3, output_dict=True) csv_pd = classification_report_csv(report) csv_pd.to_csv( os.path.join(model_dir, 'Classification_Report_top2.csv')) logger.info(report) logger.info('____ Clasification Report Top 3 ____') report = classification_report(gts, pred_5, output_dict=True) csv_pd = classification_report_csv(report) csv_pd.to_csv( os.path.join(model_dir, 'Classification_Report_top3.csv')) logger.info(report) break
def main(cfg): # cfg.num_classes = 1001 os.environ["CUDA_VISIBLE_DEVICES"] = cfg.gpu output_dir = cfg.output_dir os_utils.touch_dir(output_dir) args_file = os.path.join(cfg.output_dir, 'args.json') with open(args_file, 'w') as f: json.dump(vars(cfg), f, ensure_ascii=False, indent=2, sort_keys=True) log_file = os.path.join(cfg.output_dir, cfg.log_filename + '.txt') logger = log_utils.create_logger(log_file) img_name_ext = cfg.img_name img_name, _ = os.path.splitext(img_name_ext) datasets_dir = './input_imgs' test_img = imageio.imread('{}/{}'.format(datasets_dir, img_name_ext)) test_img = cv2.resize(test_img, (const.frame_height, const.frame_height)) with tf.Graph().as_default(): images_ph = tf.compat.v1.placeholder(tf.float32, shape=(None, const.frame_height, const.frame_height, const.num_channels), name='input_img') lbls_ph = tf.compat.v1.placeholder(tf.int32, shape=(None, cfg.num_classes), name='class_lbls') logits_ph = tf.compat.v1.placeholder(tf.float32, shape=(None, cfg.num_classes), name='logits_lbls') per_class_logits_ph = tf.compat.v1.placeholder(tf.float32, shape=(None, cfg.num_classes), name='logits_lbls') input_ph = nn_utils.adjust_color_space(images_ph, cfg.preprocess_func) network_class = locate(cfg.network_name) model = network_class(cfg, images_ph=input_ph, lbls_ph=lbls_ph) pre_atten_feat_map_tf = tf.compat.v1.get_default_graph( ).get_tensor_by_name(cfg.replicate_net_at) pre_atten_feat_map_tf_shape = pre_atten_feat_map_tf.shape sub_feat_map_ph = tf.compat.v1.placeholder( tf.float32, shape=[ None, pre_atten_feat_map_tf_shape[1], pre_atten_feat_map_tf_shape[2], pre_atten_feat_map_tf_shape[3] ], name='feat_map_input') sub_network_class = locate(cfg.sub_network_name) sub_model = sub_network_class(cfg, images_ph=sub_feat_map_ph, lbls_ph=lbls_ph) sub_logits = sub_model.val_logits logits = model.val_logits sess = tf.compat.v1.InteractiveSession() atten_filter_position = cfg.atten_filter_position tf_atten_var = [ v for v in tf.compat.v1.global_variables() if atten_filter_position.format('atten') in v.name ][-1] ## Didn't make a difference for tf_atten_var becuase tf_atten_var is created using get_varibale, i.e., shared tf_gate_atten_var = [ v for v in tf.compat.v1.global_variables() if atten_filter_position.format('gate') in v.name ][-1] # print(tf_gate_atten_var) # optimizer = tf.train.AdamOptimizer(0.01) global_step = tf.Variable(0, name='global_step', trainable=False) logger.info('Learning rate {} {}'.format(cfg.learning_rate, cfg.max_iters)) learning_rate = tf_utils.poly_lr(global_step, cfg) optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate) class_specific = True if cfg.caf_variant == 'cls_specific' else False if class_specific: logger.info( 'Solving class specific optimization problem -- classification network' ) mult_logits_2 = per_class_logits_ph * sub_logits loss_sub = tf.reduce_sum(mult_logits_2) grads = optimizer.compute_gradients(loss_sub, var_list=[tf_atten_var]) train_op = optimizer.apply_gradients(grads, global_step=global_step) else: raise NotImplementedError('cls_oblivious version implemented yet') # train_op = optimizer.minimize(loss, var_list=[tf_atten_var]) tf.compat.v1.global_variables_initializer().run() ckpt_file = tf.train.latest_checkpoint(output_dir) logger.info('Model Path {}'.format(ckpt_file)) saver = tf.compat.v1.train.Saver( ) # saves variables learned during training load_model_msg = model.load_model(output_dir, ckpt_file, sess, saver, load_logits=True) logger.info(load_model_msg) class_predictions, ground_logits = sess.run( [model.val_class_prediction, logits], feed_dict={images_ph: np.expand_dims(test_img, 0)}) class_predictions = class_predictions[0] # print('Class Prediction {}'.format(imagenet_lbls[class_predictions])) k = 1 top_k = np.argsort(np.squeeze(ground_logits))[::-1][:k] # top_k = [235,282,94,1,225] logger.info('Top K={} {}'.format(k, [imagenet_lbls[i] for i in top_k])) filter_type = cfg.filter_type if filter_type == 'gauss': rand_initilzalier = np.random.normal( 0, 1, (tf_atten_var.shape[0], tf_atten_var.shape[1])) else: rand_initilzalier = np.random.normal( 0, 1, (tf_atten_var.shape[0], tf_atten_var.shape[1], 1)) close_gate = tf.compat.v1.assign(tf_gate_atten_var, False) open_gate = tf.compat.v1.assign(tf_gate_atten_var, True) random_init = tf.compat.v1.assign(tf_atten_var, rand_initilzalier) lr_reset = tf.compat.v1.assign(global_step, 0) MAX_INT = np.iinfo(np.int16).max # output_dir = cfg.output_dir for top_i in top_k: # top_i = 207 # To control which top_i to work on directly sess.run([open_gate, random_init, lr_reset]) # sess.run(open_gate) iteration = 0 prev_loss = MAX_INT event_gif_images = [] per_class_maximization = np.ones((1, cfg.num_classes)) per_class_maximization[0, top_i] = -1 while iteration < cfg.max_iters: if iteration == 0: sess.run([close_gate]) _pre_atten_feat_map_tf, _atten_var = sess.run( [pre_atten_feat_map_tf, tf_atten_var], feed_dict={ # sub_feat_map_ph: _pre_atten_feat_map_tf, images_ph: np.expand_dims(test_img, 0), per_class_logits_ph: per_class_maximization }) sess.run([open_gate]) _atten_var, _sub_logits, _loss, _ = sess.run( [tf_atten_var, sub_logits, loss_sub, train_op], feed_dict={ sub_feat_map_ph: _pre_atten_feat_map_tf, # images_ph:np.expand_dims(img_crops[crop_idx,:,:,:],0), per_class_logits_ph: per_class_maximization }) if iteration % 50 == 0: logger.info('Iter {0:2d}: {1:.5f} Top {2:3d} {3}'.format( iteration, _loss, top_i, imagenet_lbls[top_i])) # print(np.round(np.reshape(_atten_var,(7,7)),2)) if cfg.save_gif: frame_mask = normalize_filter(filter_type, _atten_var, tf_atten_var.shape[0], tf_atten_var.shape[1]) if class_specific: # # heatmap_utils.save_heatmap(frame_mask,save=output_dir + img_name +'_msk_cls_{}_{}.png'.format(top_i,filter_type)) plt = heatmap_utils.apply_heatmap( test_img / 255.0, frame_mask, alpha=0.7, save=output_dir + img_name + '_cls_{}_{}.png'.format(top_i, filter_type), axis='off', cmap='bwr') else: plt = heatmap_utils.apply_heatmap( test_img / 255.0, frame_mask, alpha=0.7, save=output_dir + img_name + '_{}.png'.format(filter_type), axis='off', cmap='bwr') fig = plt.gcf() data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='') w, h = fig.canvas.get_width_height() data_img = data.reshape((h, w, 3)) event_gif_images.append(data_img) # imageio.imwrite(dump_dir + '{}_test.jpg'.format(iteration),data_img) plt.close() if np.abs(_loss - prev_loss) < 10e-5: break prev_loss = _loss iteration += 1 frame_mask = normalize_filter(filter_type, _atten_var, tf_atten_var.shape[0], tf_atten_var.shape[1]) if class_specific: # imageio.imwrite(output_dir + img_name + '_msk_cls_{}_{}.png'.format(top_i, filter_type), frame_mask) heatmap_utils.apply_heatmap( test_img / 255.0, frame_mask, alpha=0.6, save=output_dir + img_name + '_cls_{}_{}.png'.format(top_i, filter_type), axis='off', cmap='bwr') else: heatmap_utils.apply_heatmap(test_img / 255.0, frame_mask, alpha=0.6, save=output_dir + img_name + '_{}.png'.format(filter_type), axis='off', cmap='bwr') if cfg.save_gif: if class_specific: imageio.mimsave( output_dir + img_name + '_cls_{}_{}.gif'.format( top_i, atten_filter_position[:-2].format('').replace( '/', '')), event_gif_images, duration=1.0) else: imageio.mimsave( output_dir + img_name + '_cls_{}_{}.gif'.format( filter_type, atten_filter_position[:-2].format('').replace( '/', '')), event_gif_images, duration=1.0)
print( red('[Error] Select at least one device to run: --p20 / --zed / --d5' )) exit(-1) print(cyan('============== Selected Devices ================')) print(cyan('{}'.format(processes))) print(cyan('==================== Guide =====================')) print(cyan('Mouse LEFT click to record videos.')) print(cyan('Mouse RIGHT click to exit the program.')) make_dir_if_not_exists(__ZED_IMG_PATH__) make_dir_if_not_exists(__P20_IMG_PATH__) make_dir_if_not_exists(__D5_IMG_PATH__) # Create logger img_logger = logutils.create_logger(__VID_LOGGING_FILE__) # Kill the process which blocks the camera connection canon.killGphoto2Process() print( yellow('Make sure that all the devices are on and at the right mode!')) # Collect events until released with Listener(on_click=on_click_run) as listener: listener.join() # Alarm that this program is closed. beep(3) print('Done!')
def main(argv): cfg = BaseConfig().parse(argv) os.environ["CUDA_VISIBLE_DEVICES"] = cfg.gpu save_model_dir = cfg.checkpoint_dir model_basename = os.path.basename(save_model_dir) touch_dir(save_model_dir) args_file = os.path.join(cfg.checkpoint_dir, 'args.json') with open(args_file, 'w') as f: json.dump(vars(cfg), f, ensure_ascii=False, indent=2, sort_keys=True) # os_utils.touch_dir(save_model_dir) log_file = os.path.join(cfg.checkpoint_dir, cfg.log_filename + '.txt') os_utils.touch_dir(cfg.checkpoint_dir) logger = log_utils.create_logger(log_file) img_generator_class = locate(cfg.db_tuple_loader) args = dict() args['db_path'] = cfg.db_path args['tuple_loader_queue_size'] = cfg.tuple_loader_queue_size args['preprocess_func'] = cfg.preprocess_func args['batch_size'] = cfg.batch_size args['shuffle'] = False args['csv_file'] = cfg.train_csv_file args['img_size'] = const.max_frame_size args['gen_hot_vector'] = True train_iter = img_generator_class(args) args['batch_size'] = cfg.batch_size args['csv_file'] = cfg.test_csv_file val_iter = img_generator_class(args) trn_images, trn_lbls = train_iter.imgs_and_lbls() val_imgs, val_lbls = val_iter.imgs_and_lbls() with tf.Graph().as_default(): if cfg.train_mode == 'semi_hard' or cfg.train_mode == 'hard' or cfg.train_mode == 'cntr': train_dataset = TripletTupleLoader(trn_images, trn_lbls, cfg).dataset elif cfg.train_mode == 'vanilla': train_dataset = QuickTupleLoader(trn_images, trn_lbls, cfg, is_training=True, shuffle=True, repeat=True).dataset else: raise NotImplementedError('{} is not a valid train mode'.format( cfg.train_mode)) val_dataset = QuickTupleLoader(val_imgs, val_lbls, cfg, is_training=False, repeat=False).dataset handle = tf.placeholder(tf.string, shape=[]) iterator = tf.data.Iterator.from_string_handle( handle, train_dataset.output_types, train_dataset.output_shapes) images_ph, lbls_ph = iterator.get_next() network_class = locate(cfg.network_name) model = network_class(cfg, images_ph=images_ph, lbls_ph=lbls_ph) # Which loss fn to impose. For example, softmax only is applied in vanilla mode, # while softmax + semi-hard triplet is applied in semi_hard mode. if cfg.train_mode == 'semi_hard': pre_logits = model.train_pre_logits _, w, h, channels = pre_logits.shape embed_dim = cfg.emb_dim embedding_net = ConvEmbed(emb_dim=embed_dim, n_input=channels, n_h=h, n_w=w) embedding = embedding_net.forward(pre_logits) embedding = tf.nn.l2_normalize(embedding, axis=-1, epsilon=1e-10) margin = cfg.margin gt_lbls = tf.argmax(model.gt_lbls, 1) metric_loss = triplet_semi.triplet_semihard_loss( gt_lbls, embedding, margin) logger.info('Triplet loss lambda {}, with margin {}'.format( cfg.triplet_loss_lambda, margin)) total_loss = model.train_loss + cfg.triplet_loss_lambda * tf.reduce_mean( metric_loss) elif cfg.train_mode == 'hard': pre_logits = model.train_pre_logits _, w, h, channels = pre_logits.shape embed_dim = cfg.emb_dim embedding_net = ConvEmbed(emb_dim=embed_dim, n_input=channels, n_h=h, n_w=w) embedding = embedding_net.forward(pre_logits) embedding = tf.nn.l2_normalize(embedding, axis=-1, epsilon=1e-10) margin = cfg.margin logger.info('Triplet loss lambda {}, with margin {}'.format( cfg.triplet_loss_lambda, margin)) gt_lbls = tf.argmax(model.gt_lbls, 1) metric_loss = triplet_hard.batch_hard(gt_lbls, embedding, margin) total_loss = model.train_loss + cfg.triplet_loss_lambda * tf.reduce_mean( metric_loss) elif cfg.train_mode == 'cntr': pre_logits = model.train_pre_logits _, w, h, channels = pre_logits.shape embed_dim = cfg.emb_dim embedding_net = ConvEmbed(emb_dim=embed_dim, n_input=channels, n_h=h, n_w=w) embedding = embedding_net.forward(pre_logits) embedding = tf.nn.l2_normalize(embedding, axis=-1, epsilon=1e-10) CENTER_LOSS_LAMBDA = 0.003 CENTER_LOSS_ALPHA = 0.5 num_fg_classes = cfg.num_classes gt_lbls = tf.argmax(model.gt_lbls, 1) center_loss_order, centroids, centers_update_op, appear_times, diff = center_loss.get_center_loss( embedding, gt_lbls, CENTER_LOSS_ALPHA, num_fg_classes) # sample_centroid = tf.reshape(tf.gather(centroids, gt_lbls), [-1, config.emb_dim]) # center_loss_order = center_loss.center_loss(sample_centroid , embedding) logger.info('Center loss lambda {}'.format(CENTER_LOSS_LAMBDA)) total_loss = model.train_loss + CENTER_LOSS_LAMBDA * tf.reduce_mean( center_loss_order) elif cfg.train_mode == 'vanilla': total_loss = model.train_loss logger.info('Train Mode {}'.format(cfg.train_mode)) # variables_to_train = model.var_2_train(); # logger.info('variables_to_train ' + str(variables_to_train)) trainable_vars = tf.trainable_variables() if cfg.caffe_iter_size > 1: ## Accumulated Gradient ## Creation of a list of variables with the same shape as the trainable ones # initialized with 0s accum_vars = [ tf.Variable(tf.zeros_like(tv.initialized_value()), trainable=False) for tv in trainable_vars ] zero_ops = [tv.assign(tf.zeros_like(tv)) for tv in accum_vars] update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if cfg.train_mode == const.Train_Mode.CNTR: update_ops.append(centers_update_op) # print(update_ops) with tf.control_dependencies(update_ops): global_step = tf.Variable(0, name='global_step', trainable=False) learning_rate = tf_utils.poly_lr(global_step, cfg) optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9) if cfg.caffe_iter_size > 1: ## Accumulated Gradient # grads = tf.Print(grads,[grads],'Grad Print'); grads = optimizer.compute_gradients(total_loss, trainable_vars) # Adds to each element from the list you initialized earlier with zeros its gradient (works because accum_vars and gvs are in the same order) accum_ops = [ accum_vars[i].assign_add(gv[0]) for i, gv in enumerate(grads) ] iter_size = cfg.caffe_iter_size # Define the training step (part with variable value update) train_op = optimizer.apply_gradients( [(accum_vars[i] / iter_size, gv[1]) for i, gv in enumerate(grads)], global_step=global_step) else: grads = optimizer.compute_gradients(total_loss) train_op = optimizer.apply_gradients(grads, global_step=global_step) sess = tf.InteractiveSession() training_iterator = train_dataset.make_one_shot_iterator() validation_iterator = val_dataset.make_initializable_iterator() training_handle = sess.run(training_iterator.string_handle()) validation_handle = sess.run(validation_iterator.string_handle()) tb_path = save_model_dir logger.info(tb_path) start_iter = tb_utils.get_latest_iteration(tb_path) train_writer = tf.summary.FileWriter(tb_path, sess.graph) tf.global_variables_initializer().run() saver = tf.train.Saver() # saves variables learned during training ckpt_file = tf.train.latest_checkpoint(save_model_dir) logger.info('Model Path {}'.format(ckpt_file)) load_model_msg = model.load_model(save_model_dir, ckpt_file, sess, saver, load_logits=False) logger.info(load_model_msg) ckpt_file = os.path.join(save_model_dir, cfg.checkpoint_filename) val_loss = tf.summary.scalar('Val_Loss', model.val_loss) val_acc_op = tf.summary.scalar('Batch_Val_Acc', model.val_accuracy) model_acc_op = tf.summary.scalar('Split_Val_Accuracy', model.val_accumulated_accuracy) best_model_step = 0 best_acc = 0 logger.info('Start Training from {}, till {}'.format( start_iter, cfg.train_iters)) # Start Training for step in range(start_iter + 1, cfg.train_iters + 1): start_time_train = time.time() # Update network weights while supporting caffe_iter_size for mini_batch in range(cfg.caffe_iter_size - 1): feed_dict = {handle: training_handle} model_loss_value, accuracy_value, _ = sess.run( [model.train_loss, model.train_accuracy, accum_ops], feed_dict) feed_dict = {handle: training_handle} model_loss_value, accuracy_value, _ = sess.run( [model.train_loss, model.train_accuracy, train_op], feed_dict) if cfg.caffe_iter_size > 1: ## Accumulated Gradient sess.run(zero_ops) train_time = time.time() - start_time_train if (step == 1 or step % cfg.logging_threshold == 0): logger.info( 'i {0:04d} loss {1:4f} Acc {2:2f} Batch Time {3:3f}'. format(step, model_loss_value, accuracy_value, train_time)) if (step % cfg.test_interval == 0): run_metadata = tf.RunMetadata() tf.local_variables_initializer().run() sess.run(validation_iterator.initializer) _val_acc_op = 0 while True: try: # Eval network on validation/testing split feed_dict = {handle: validation_handle} val_loss_op, batch_accuracy, accuracy_op, _val_acc_op, _val_acc, c_cnf_mat, macro_acc = sess.run( [ val_loss, model.val_accuracy, model_acc_op, val_acc_op, model.val_accumulated_accuracy, model.val_confusion_mat, model.val_per_class_acc_acc ], feed_dict) except tf.errors.OutOfRangeError: logger.info('Val Acc {0}, Macro Acc: {1}'.format( _val_acc, macro_acc)) break train_writer.add_run_metadata(run_metadata, 'step%03d' % step) train_writer.add_summary(val_loss_op, step) train_writer.add_summary(_val_acc_op, step) train_writer.add_summary(accuracy_op, step) train_writer.flush() if (step % 100 == 0): saver.save(sess, ckpt_file) if best_acc < _val_acc: saver.save(sess, ckpt_file + 'best') best_acc = _val_acc best_model_step = step logger.info('Best Acc {0} at {1} == {2}'.format( best_acc, best_model_step, model_basename)) logger.info('Triplet loss lambda {}'.format(cfg.triplet_loss_lambda)) logger.info('Mode {}'.format(cfg.train_mode)) logger.info('Loop complete') sess.close()
def main(argv): cfg = BaseConfig().parse(argv) os.environ["CUDA_VISIBLE_DEVICES"] = cfg.gpu img_generator_class = locate(cfg.db_tuple_loader) args = dict() args['db_path'] = cfg.db_path args['tuple_loader_queue_size'] = cfg.tuple_loader_queue_size args['preprocess_func'] = cfg.preprocess_func args['batch_size'] = cfg.batch_size args['shuffle'] = False args['img_size'] = const.max_frame_size args['gen_hot_vector'] = True args['csv_file'] = cfg.train_csv_file train_iter = img_generator_class(args) args['csv_file'] = cfg.test_csv_file val_iter = img_generator_class(args) train_imgs, train_lbls = train_iter.imgs_and_lbls() val_imgs, val_lbls = val_iter.imgs_and_lbls() # Where to save the trained model save_model_dir = cfg.checkpoint_dir model_basename = os.path.basename(save_model_dir) touch_dir(save_model_dir) ## Log experiment args_file = os.path.join(cfg.checkpoint_dir, 'args.json') with open(args_file, 'w') as f: json.dump(vars(cfg), f, ensure_ascii=False, indent=2, sort_keys=True) # os_utils.touch_dir(save_model_dir) log_file = os.path.join(cfg.checkpoint_dir, cfg.log_filename + '.txt') os_utils.touch_dir(cfg.checkpoint_dir) logger = log_utils.create_logger(log_file) with tf.Graph().as_default(): # Create train and val dataset following tensorflow Data API ## A dataset element has an image and lable train_dataset = TensorflowTupleLoader(train_imgs, train_lbls,cfg, is_training=True).dataset val_dataset = TensorflowTupleLoader(val_imgs, val_lbls,cfg, is_training=False, batch_size=cfg.batch_size, repeat=False).dataset handle = tf.placeholder(tf.string, shape=[]) iterator = tf.data.Iterator.from_string_handle( handle, train_dataset.output_types, train_dataset.output_shapes) images_ph, lbls_ph = iterator.get_next() training_iterator = train_dataset.make_one_shot_iterator() validation_iterator = val_dataset.make_initializable_iterator() ## Load a pretrained network {resnet_v2 or densenet161} based on config.network_name configuration network_class = locate(cfg.network_name) model = network_class(cfg, is_training=True, images_ph=images_ph, lbls_ph=lbls_ph) trainable_vars = tf.trainable_variables() if cfg.caffe_iter_size > 1: ## Accumulated Gradient ## Creation of a list of variables with the same shape as the trainable ones # initialized with 0s accum_vars = [tf.Variable(tf.zeros_like(tv.initialized_value()), trainable=False) for tv in trainable_vars] zero_ops = [tv.assign(tf.zeros_like(tv)) for tv in accum_vars] update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): global_step = tf.Variable(0, name='global_step', trainable=False) learning_rate = tf_utils.poly_lr(global_step,cfg) optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9) if cfg.caffe_iter_size > 1: ## Accumulated Gradient grads = optimizer.compute_gradients(model.train_loss, trainable_vars) # Adds to each element from the list you initialized earlier with zeros its gradient (works because accum_vars and gvs are in the same order) accum_ops = [accum_vars[i].assign_add(gv[0]) for i, gv in enumerate(grads)] iter_size = cfg.caffe_iter_size # Define the training step (part with variable value update) train_op = optimizer.apply_gradients([(accum_vars[i] / iter_size, gv[1]) for i, gv in enumerate(grads)], global_step=global_step) else: # If accumulated gradient disabled, do regular training grads = optimizer.compute_gradients(model.train_loss) train_op = optimizer.apply_gradients(grads, global_step=global_step) # logger.info('=========================================================') # for v in tf.trainable_variables(): # mprint('trainable_variables: {0} \t {1}'.format(str(v.name),str(v.shape))) sess = tf.InteractiveSession() tf.global_variables_initializer().run() tf.local_variables_initializer().run() training_handle = sess.run(training_iterator.string_handle()) validation_handle = sess.run(validation_iterator.string_handle()) # now = datetime.now() # if (config.tensorbaord_file == None): # tb_path = config.tensorbaord_dir + now.strftime("%Y%m%d-%H%M%S") # else: # tb_path = config.tensorbaord_dir + config.tensorbaord_file start_iter = 1 # No Resume in this code version # train_writer = tf.summary.FileWriter(tb_path, sess.graph) saver = tf.train.Saver() # saves variables learned during training ckpt_file = os.path.join(save_model_dir, cfg.checkpoint_filename) print('Model Path ', ckpt_file) load_model_msg = model.load_model(save_model_dir, ckpt_file, sess, saver, is_finetuning=True) logger.info(load_model_msg) val_loss = tf.summary.scalar('Val_Loss', model.val_loss) val_acc_op = tf.summary.scalar('Batch_Val_Acc', model.val_accuracy) model_acc_op = tf.summary.scalar('Split_Val_Accuracy', model.val_accumulated_accuracy) logger.info('Start Training ***********') best_acc = 0 best_model_step = 0 for current_iter in range(start_iter, cfg.train_iters+1): start_time_train = time.time() feed_dict = {handle: training_handle} ## Here is where training and backpropagation start # In case accumulated gradient enabled, i.e. config.caffe_iter_size > 1 for mini_batch in range(cfg.caffe_iter_size - 1): sess.run(accum_ops, feed_dict) model_loss_value, accuracy_value, _ = sess.run([model.train_loss, model.train_accuracy, train_op], feed_dict) # In case accumulated gradient enabled, reset shadow variables if cfg.caffe_iter_size > 1: sess.run(zero_ops) ## Here is where training and backpropagation end train_time = time.time() - start_time_train if (current_iter % cfg.logging_threshold == 0 or current_iter ==1): logger.info( 'i {0:04d} loss {1:4f} Acc {2:2f} Batch Time {3:3f}'.format(current_iter, model_loss_value, accuracy_value, train_time)) if (current_iter % cfg.test_interval == 0): # run_metadata = tf.RunMetadata() tf.local_variables_initializer().run() sess.run(validation_iterator.initializer) while True: try: feed_dict = {handle: validation_handle} val_loss_op, batch_accuracy, accuracy_op, _val_acc_op, _val_acc, c_cnf_mat = sess.run( [val_loss, model.val_accuracy, model_acc_op, val_acc_op, model.val_accumulated_accuracy, model.val_confusion_mat], feed_dict) except tf.errors.OutOfRangeError: logger.info('Val Acc {0}'.format(_val_acc)) break # train_writer.add_run_metadata(run_metadata, 'step%03d' % current_iter) # train_writer.add_summary(val_loss_op, current_iter) # train_writer.add_summary(_val_acc_op, current_iter) # train_writer.add_summary(accuracy_op, current_iter) # # train_writer.flush() if (current_iter % cfg.logging_threshold == 0): saver.save(sess, ckpt_file) if best_acc < _val_acc: saver.save(sess, ckpt_file + 'best') best_acc = _val_acc best_model_step = current_iter ## Early dropping style. logger.info('Best Acc {0} at {1} == {2}'.format(best_acc, best_model_step, model_basename)) saver.save(sess, ckpt_file) ## Save final ckpt before closing sess.close()
def logger(self): if self._logger is None: self._logger = log_utils.create_logger(self.data_source_name, self.log_files_directory()) return self._logger
def main(argv): cfg = BaseConfig().parse(argv) os.environ["CUDA_VISIBLE_DEVICES"] = cfg.gpu img_generator_class = locate(cfg.db_tuple_loader) args = dict() args['db_path'] = cfg.db_path args['tuple_loader_queue_size'] = cfg.tuple_loader_queue_size args['preprocess_func'] = cfg.preprocess_func args['batch_size'] = cfg.batch_size args['shuffle'] = False args['img_size'] = const.max_frame_size args['gen_hot_vector'] = True args['batch_size'] = cfg.batch_size args['csv_file'] = cfg.test_csv_file test_iter = img_generator_class(args) test_imgs, test_lbls = test_iter.imgs_and_lbls() cfg = BaseConfig().parse(argv) os.environ["CUDA_VISIBLE_DEVICES"] = cfg.gpu model_dir = cfg.checkpoint_dir print(model_dir) log_file = os.path.join(cfg.checkpoint_dir, cfg.log_filename + '_test.txt') logger = log_utils.create_logger(log_file) with tf.Graph().as_default(): #meta_file = os.path.join(model_dir,'model.ckptbest.meta') #saver = tf.train.import_meta_graph(meta_file) #ckpt_file = os.path.join(model_dir,'model.ckptbest') #saver.restore(sess,ckpt_file) #print('Model Path {}'.format(ckpt_file)) #load_model_msg = model.load_model(model_dir, ckpt_file, sess, saver, load_logits=True) #logger.info(load_model_msg) #graph = tf.get_default_graph() #print(graph.get_operations()) test_dataset = QuickTupleLoader(test_imgs, test_lbls, cfg, is_training=False, repeat=False).dataset handle = tf.placeholder(tf.string, shape=[]) iterator = tf.data.Iterator.from_string_handle( handle, test_dataset.output_types, test_dataset.output_shapes) images_ph, lbls_ph = iterator.get_next() network_class = locate(cfg.network_name) model = network_class(cfg, images_ph=images_ph, lbls_ph=lbls_ph) validation_iterator = test_dataset.make_initializable_iterator() sess = tf.InteractiveSession() validation_handle = sess.run(validation_iterator.string_handle()) ckpt_file = tf.train.latest_checkpoint(model_dir) print(ckpt_file) tf.global_variables_initializer().run() saver = tf.train.Saver() load_model_msg = model.load_model(model_dir, ckpt_file, sess, saver, load_logits=True) print(load_model_msg) ckpt_file = os.path.join(model_dir, cfg.checkpoint_filename) val_loss = tf.summary.scalar('Val_Loss', model.val_loss) val_acc_op = tf.summary.scalar('Batch_Val_Acc', model.val_accuracy) model_acc_op = tf.summary.scalar('Split_Val_Accuracy', model.val_accumulated_accuracy) run_metadata = tf.RunMetadata() tf.local_variables_initializer().run() sess.run(validation_iterator.initializer) _val_acc_op = 0 feat = [] label = [] pooling = [] while True: try: # Eval network on validation/testing split feed_dict = {handle: validation_handle} features, labels = sess.run( [model.val_end_features, model.val_features_labels], feed_dict) print(labels.shape) feat.append(features['resnet_v2_50/block4']) pooling.append(features['global_pool']) label.append(labels) print('___________________') except tf.errors.OutOfRangeError: path = model_dir f_folder = os.path.join(model_dir, 'features') os.makedirs(f_folder, exist_ok=True) p_file = os.path.join(f_folder, 'pooling.npy') f_file = os.path.join(f_folder, 'features.npy') l_file = os.path.join(f_folder, 'labels.npy') print('pooling') pooling = np.concatenate(pooling) pooling = pooling.reshape(pooling.shape[0], -1) np.save(p_file, pooling) print('7x7') np.save(f_file, feat) print('labels') np.save(l_file, np.array(label)) break