def pairwise_stats(): cfg = load_config() dataset = create_dataset(cfg) dataset.set_shuffle(True) # dataset.set_shuffle(False) dataset.set_pairwise_stats_collect(True) num_images = dataset.num_images all_pairwise_differences = {} if cfg.mirror: num_images *= 2 for k in range(num_images): print('processing image {}/{}'.format(k, num_images - 1)) batch = dataset.next_batch() batch_stats = batch[Batch.data_item].pairwise_stats for joint_pair in batch_stats: if joint_pair not in all_pairwise_differences: all_pairwise_differences[joint_pair] = [] all_pairwise_differences[joint_pair] += batch_stats[joint_pair] stats = {} for joint_pair in all_pairwise_differences: stats[joint_pair] = {} stats[joint_pair]["mean"] = np.mean( all_pairwise_differences[joint_pair], axis=0) stats[joint_pair]["std"] = np.std(all_pairwise_differences[joint_pair], axis=0) save_stats(stats, cfg)
def pairwise_stats(): cfg = load_config() dataset = create_dataset(cfg) dataset.set_shuffle(True) dataset.set_pairwise_stats_collect(True) num_images = dataset.num_images all_pairwise_differences = {} if cfg.mirror: num_images *= 2 for k in range(num_images): print('processing image {}/{}'.format(k, num_images-1)) batch = dataset.next_batch() batch_stats = batch[Batch.data_item].pairwise_stats for joint_pair in batch_stats: if joint_pair not in all_pairwise_differences: all_pairwise_differences[joint_pair] = [] all_pairwise_differences[joint_pair] += batch_stats[joint_pair] stats = {} for joint_pair in all_pairwise_differences: stats[joint_pair] = {} stats[joint_pair]["mean"] = np.mean(all_pairwise_differences[joint_pair], axis=0) stats[joint_pair]["std"] = np.std(all_pairwise_differences[joint_pair], axis=0) save_stats(stats, cfg)
def train(): setup_logging() cfg = load_config() dataset = create_dataset(cfg) batch_spec = get_batch_spec(cfg) batch, enqueue_op, placeholders = setup_preloading(batch_spec) losses = pose_net(cfg).train(batch) total_loss = losses['total_loss'] print("total_loss:", total_loss) for k, t in losses.items(): tf.summary.scalar(k, t) merged_summaries = tf.summary.merge_all() print("merged_summaries:", merged_summaries) variables_to_restore = slim.get_variables_to_restore(include=["resnet_v1"]) restorer = tf.train.Saver(variables_to_restore) saver = tf.train.Saver(max_to_keep=5) sess = tf.Session() coord, thread = start_preloading(sess, enqueue_op, dataset, placeholders) train_writer = tf.summary.FileWriter(cfg.log_dir, sess.graph) learning_rate, train_op = get_optimizer(total_loss, cfg) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) # Restore variables from disk. restorer.restore(sess, cfg.init_weights) max_iter = int(cfg.multi_step[-1][1]) display_iters = cfg.display_iters cum_loss = 0.0 lr_gen = LearningRate(cfg) for it in range(max_iter + 1): current_lr = lr_gen.get_lr(it) [_, loss_val, summary] = sess.run([train_op, total_loss, merged_summaries], feed_dict={learning_rate: current_lr}) cum_loss += loss_val train_writer.add_summary(summary, it) if it % display_iters == 0: average_loss = cum_loss / display_iters cum_loss = 0.0 logging.info("iteration: {} loss: {} lr: {}".format( it, "{0:.4f}".format(average_loss), current_lr)) # Save snapshot if (it % cfg.save_iters == 0 and it != 0) or it == max_iter: model_name = cfg.snapshot_prefix saver.save(sess, model_name, global_step=it) sess.close() coord.request_stop() coord.join([thread])
def main(option): start_time = time.time() cfg = load_config("demo/pose_cfg_multi.yaml") dataset = create_dataset(cfg) sm = SpatialModel(cfg) sm.load() tf.reset_default_graph() draw_multi = PersonDraw() sess, inputs, outputs = predict.setup_pose_prediction(cfg) fps_time = 0 # Read image from file slopes = {} k = 0 cap = cv2.VideoCapture("http://192.168.43.31:8081") cap_user = cv2.VideoCapture('/dev/video0') cap = cap_user i = 0 while (True): ret, orig_frame = cap.read() ret2, orig_frame_user = cap_user.read() if i % 25 == 0: #frame=orig_frame frame = cv2.resize(orig_frame, (0, 0), fx=0.50, fy=0.50) user_frame = cv2.resize(orig_frame_user, (0, 0), fx=0.50, fy=0.50) co1 = run_predict(frame, sess, outputs, inputs, cfg, dataset, sm, draw_multi) print("CO1 ", co1) user_co1 = run_predict(user_frame, sess, outputs, inputs, cfg, dataset, sm, draw_multi) print("USER_CO1 ", user_co1) print("CO1 ", co1) k = None try: slope_reqd, slope_user = slope_calc(co1, user_co1) k, s = compare_images(slope_reqd, slope_user, 0.75) except IndexError: #if len(co1)!=len(user_co1): print("Except condition") pass vibrate(k) frame = cv2.resize(frame, (0, 0), fx=2.0, fy=2.0) user_frame = cv2.resize(user_frame, (0, 0), fx=2.0, fy=2.0) cv2.putText(user_frame, "FPS: %f" % (1.0 / (time.time() - fps_time)), (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) cv2.imshow('user_frame', user_frame) cv2.imshow('frame', frame) fps_time = time.time() #visualize.waitforbuttonpress() if cv2.waitKey(10) == ord('q'): break elapsed = time.time() - start_time cap.release() cap_user.release() cv2.destroyAllWindows()
def test_net(visualise, cache_scoremaps): logging.basicConfig(level=logging.INFO) cfg = load_config() dataset = create_dataset(cfg) dataset.set_shuffle(False) dataset.set_test_mode(True) sess, inputs, outputs = setup_pose_prediction(cfg) if cache_scoremaps: out_dir = cfg.scoremap_dir if not os.path.exists(out_dir): os.makedirs(out_dir) num_images = dataset.num_images predictions = np.zeros((num_images, ), dtype=np.object) for k in range(num_images): print('processing image {}/{}'.format(k, num_images - 1)) batch = dataset.next_batch() outputs_np = sess.run(outputs, feed_dict={inputs: batch[Batch.inputs]}) scmap, locref, pairwise_diff = extract_cnn_output(outputs_np, cfg) pose = argmax_pose_predict(scmap, locref, cfg.stride) pose_refscale = np.copy(pose) pose_refscale[:, 0:2] /= cfg.global_scale predictions[k] = pose_refscale if visualise: img = np.squeeze(batch[Batch.inputs]).astype('uint8') visualize.show_heatmaps(cfg, img, scmap, pose) visualize.waitforbuttonpress() if cache_scoremaps: base = os.path.basename(batch[Batch.data_item].im_path) raw_name = os.path.splitext(base)[0] out_fn = os.path.join(out_dir, raw_name + '.mat') scipy.io.savemat(out_fn, mdict={'scoremaps': scmap.astype('float32')}) out_fn = os.path.join(out_dir, raw_name + '_locreg' + '.mat') if cfg.location_refinement: scipy.io.savemat( out_fn, mdict={'locreg_pred': locref.astype('float32')}) scipy.io.savemat('predictions.mat', mdict={'joints': predictions}) sess.close()
def test_net(visualise, cache_scoremaps): logging.basicConfig(level=logging.INFO) cfg = load_config() dataset = create_dataset(cfg) dataset.set_shuffle(False) dataset.set_test_mode(True) sess, inputs, outputs = setup_pose_prediction(cfg) if cache_scoremaps: out_dir = cfg.scoremap_dir if not os.path.exists(out_dir): os.makedirs(out_dir) num_images = dataset.num_images predictions = np.zeros((num_images,), dtype=np.object) for k in range(num_images): print('processing image {}/{}'.format(k, num_images-1)) batch = dataset.next_batch() outputs_np = sess.run(outputs, feed_dict={inputs: batch[Batch.inputs]}) scmap, locref, pairwise_diff = extract_cnn_output(outputs_np, cfg) pose = argmax_pose_predict(scmap, locref, cfg.stride) pose_refscale = np.copy(pose) pose_refscale[:, 0:2] /= cfg.global_scale predictions[k] = pose_refscale if visualise: img = np.squeeze(batch[Batch.inputs]).astype('uint8') visualize.show_heatmaps(cfg, img, scmap, pose) visualize.waitforbuttonpress() if cache_scoremaps: base = os.path.basename(batch[Batch.data_item].im_path) raw_name = os.path.splitext(base)[0] out_fn = os.path.join(out_dir, raw_name + '.mat') scipy.io.savemat(out_fn, mdict={'scoremaps': scmap.astype('float32')}) out_fn = os.path.join(out_dir, raw_name + '_locreg' + '.mat') if cfg.location_refinement: scipy.io.savemat(out_fn, mdict={'locreg_pred': locref.astype('float32')}) scipy.io.savemat('predictions.mat', mdict={'joints': predictions}) sess.close()
def main(option): cfg = load_config("demo/pose_cfg_multi.yaml") dataset = create_dataset(cfg) sm = SpatialModel(cfg) sm.load() tf.reset_default_graph() draw_multi = PersonDraw() sess, inputs, outputs = predict.setup_pose_prediction(cfg) fps_time = 0 # Read image from file cap = cv2.VideoCapture('msgifs/icon4.gif') cap_user = cv2.VideoCapture('user.mp4') i = 0 while (True): ret, orig_frame = cap.read() ret2, orig_frame_user = cap_user.read() if i % 25 == 0: frame = cv2.resize(orig_frame, (0, 0), fx=0.50, fy=0.50) user_frame = cv2.resize(orig_frame_user, (0, 0), fx=0.50, fy=0.50) co1 = run_predict(frame, sess, inputs, outputs, cfg, dataset, sm, draw_multi) user_co1 = run_predict(user_frame, sess, inputs, outputs, cfg, dataset, sm, draw_multi) try: slope_reqd = slope_calc(co1) slope_user = slope_calc(user_co1) compare_images(slope_reqd, slope_user, 0.1) except IndexError: #if len(co1)!=len(user_co1): #messagebox.showinfo("Title", "Please adjust camera to show your keypoints") pass #frame = cv2.resize(frame, (0, 0), fx=2.0, fy=2.0) #user_frame = cv2.resize(user_frame, (0, 0), fx=2.0, fy=2.0) cv2.putText(user_frame, "FPS: %f" % (1.0 / (time.time() - fps_time)), (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) cv2.imshow('user_frame', user_frame) cv2.imshow('frame', frame) fps_time = time.time() #visualize.waitforbuttonpress() if cv2.waitKey(10) == ord('q'): break cap.release() cap_user.release() cv2.destroyAllWindows() cap_user.release()
def train(): setup_logging() cfg = load_config() dataset = create_dataset(cfg) batch_spec = get_batch_spec(cfg) batch, enqueue_op, placeholders = setup_preloading(batch_spec) losses = pose_net(cfg).train(batch) total_loss = losses['total_loss'] for k, t in losses.items(): tf.summary.scalar(k, t) merged_summaries = tf.summary.merge_all() variables_to_restore = slim.get_variables_to_restore(include=["resnet_v1"]) restorer = tf.train.Saver(variables_to_restore) saver = tf.train.Saver(max_to_keep=5) sess = tf.Session() coord, thread = start_preloading(sess, enqueue_op, dataset, placeholders) train_writer = tf.summary.FileWriter(cfg.log_dir, sess.graph) learning_rate, train_op = get_optimizer(total_loss, cfg) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) # Restore variables from disk. restorer.restore(sess, cfg.init_weights) max_iter = int(cfg.multi_step[-1][1]) display_iters = cfg.display_iters cum_loss = 0.0 lr_gen = LearningRate(cfg) for it in range(max_iter+1): current_lr = lr_gen.get_lr(it) [_, loss_val, summary] = sess.run([train_op, total_loss, merged_summaries], feed_dict={learning_rate: current_lr}) cum_loss += loss_val train_writer.add_summary(summary, it) if it % display_iters == 0: average_loss = cum_loss / display_iters cum_loss = 0.0 logging.info("iteration: {} loss: {} lr: {}" .format(it, "{0:.4f}".format(average_loss), current_lr)) # Save snapshot if (it % cfg.save_iters == 0 and it != 0) or it == max_iter: model_name = cfg.snapshot_prefix saver.save(sess, model_name, global_step=it) sess.close() coord.request_stop() coord.join([thread])
def test_net(visualise, cache_scoremaps, development): logging.basicConfig(level=logging.INFO) cfg = load_config() dataset = create_dataset(cfg) dataset.set_shuffle(False) sm = SpatialModel(cfg) sm.load() draw_multi = PersonDraw() from_cache = "cached_scoremaps" in cfg if not from_cache: sess, inputs, outputs = setup_pose_prediction(cfg) if cache_scoremaps: out_dir = cfg.scoremap_dir if not os.path.exists(out_dir): os.makedirs(out_dir) pairwise_stats = dataset.pairwise_stats num_images = dataset.num_images if not development else min( 10, dataset.num_images) coco_results = [] for k in range(num_images): print('processing image {}/{}'.format(k, num_images - 1)) batch = dataset.next_batch() cache_name = "{}.mat".format(batch[Batch.data_item].coco_id) if not from_cache: outputs_np = sess.run(outputs, feed_dict={inputs: batch[Batch.inputs]}) scmap, locref, pairwise_diff = extract_cnn_output( outputs_np, cfg, pairwise_stats) if cache_scoremaps: if visualise: img = np.squeeze(batch[Batch.inputs]).astype('uint8') pose = argmax_pose_predict(scmap, locref, cfg.stride) arrows = argmax_arrows_predict(scmap, locref, pairwise_diff, cfg.stride) visualize.show_arrows(cfg, img, pose, arrows) visualize.waitforbuttonpress() continue out_fn = os.path.join(out_dir, cache_name) dict = { 'scoremaps': scmap.astype('float32'), 'locreg_pred': locref.astype('float32'), 'pairwise_diff': pairwise_diff.astype('float32') } scipy.io.savemat(out_fn, mdict=dict) continue else: # cache_name = '1.mat' full_fn = os.path.join(cfg.cached_scoremaps, cache_name) mlab = scipy.io.loadmat(full_fn) scmap = mlab["scoremaps"] locref = mlab["locreg_pred"] pairwise_diff = mlab["pairwise_diff"] detections = extract_detections(cfg, scmap, locref, pairwise_diff) unLab, pos_array, unary_array, pwidx_array, pw_array = eval_graph( sm, detections) person_conf_multi = get_person_conf_multicut(sm, unLab, unary_array, pos_array) if visualise: img = np.squeeze(batch[Batch.inputs]).astype('uint8') # visualize.show_heatmaps(cfg, img, scmap, pose) """ # visualize part detections after NMS visim_dets = visualize_detections(cfg, img, detections) plt.imshow(visim_dets) plt.show() visualize.waitforbuttonpress() """ # """ visim_multi = img.copy() draw_multi.draw(visim_multi, dataset, person_conf_multi) plt.imshow(visim_multi) plt.show() visualize.waitforbuttonpress() # """ if cfg.use_gt_segm: coco_img_results = pose_predict_with_gt_segm( scmap, locref, cfg.stride, batch[Batch.data_item].gt_segm, batch[Batch.data_item].coco_id) coco_results += coco_img_results if len(coco_img_results): dataset.visualize_coco(coco_img_results, batch[Batch.data_item].visibilities) if cfg.use_gt_segm: with open('predictions_with_segm.json', 'w') as outfile: json.dump(coco_results, outfile) sess.close()
from config import load_config from dataset.factory import create as create_dataset from nnet import predict from util import visualize from dataset.pose_dataset import data_to_input from multiperson.detections import extract_detections from multiperson.predict import SpatialModel, eval_graph, get_person_conf_multicut from multiperson.visualize import PersonDraw, visualize_detections import matplotlib.pyplot as plt cfg = load_config("demo/pose_cfg_multi.yaml") dataset = create_dataset(cfg) sm = SpatialModel(cfg) sm.load() draw_multi = PersonDraw() # Load and setup CNN part detector sess, inputs, outputs = predict.setup_pose_prediction(cfg) # Read image from file file_name_ext = sys.argv[1] ## example: test_single_03.png file_name = file_name_ext.split('.')[0] ## example: test_single_03 file_name_input = 'testset/' + file_name_ext image = imread(file_name_input, mode='RGB')
def test_net(visualise, cache_scoremaps, development): logging.basicConfig(level=logging.INFO) cfg = load_config() dataset = create_dataset(cfg) dataset.set_shuffle(False) sm = SpatialModel(cfg) sm.load() draw_multi = PersonDraw() from_cache = "cached_scoremaps" in cfg if not from_cache: sess, inputs, outputs = setup_pose_prediction(cfg) if cache_scoremaps: out_dir = cfg.scoremap_dir if not os.path.exists(out_dir): os.makedirs(out_dir) pairwise_stats = dataset.pairwise_stats num_images = dataset.num_images if not development else min(10, dataset.num_images) coco_results = [] for k in range(num_images): print('processing image {}/{}'.format(k, num_images-1)) batch = dataset.next_batch() cache_name = "{}.mat".format(batch[Batch.data_item].coco_id) if not from_cache: outputs_np = sess.run(outputs, feed_dict={inputs: batch[Batch.inputs]}) scmap, locref, pairwise_diff = extract_cnn_output(outputs_np, cfg, pairwise_stats) if cache_scoremaps: if visualise: img = np.squeeze(batch[Batch.inputs]).astype('uint8') pose = argmax_pose_predict(scmap, locref, cfg.stride) arrows = argmax_arrows_predict(scmap, locref, pairwise_diff, cfg.stride) visualize.show_arrows(cfg, img, pose, arrows) visualize.waitforbuttonpress() continue out_fn = os.path.join(out_dir, cache_name) dict = {'scoremaps': scmap.astype('float32'), 'locreg_pred': locref.astype('float32'), 'pairwise_diff': pairwise_diff.astype('float32')} scipy.io.savemat(out_fn, mdict=dict) continue else: #cache_name = '1.mat' full_fn = os.path.join(cfg.cached_scoremaps, cache_name) mlab = scipy.io.loadmat(full_fn) scmap = mlab["scoremaps"] locref = mlab["locreg_pred"] pairwise_diff = mlab["pairwise_diff"] detections = extract_detections(cfg, scmap, locref, pairwise_diff) unLab, pos_array, unary_array, pwidx_array, pw_array = eval_graph(sm, detections) person_conf_multi = get_person_conf_multicut(sm, unLab, unary_array, pos_array) if visualise: img = np.squeeze(batch[Batch.inputs]).astype('uint8') #visualize.show_heatmaps(cfg, img, scmap, pose) """ # visualize part detections after NMS visim_dets = visualize_detections(cfg, img, detections) plt.imshow(visim_dets) plt.show() visualize.waitforbuttonpress() """ # """ visim_multi = img.copy() draw_multi.draw(visim_multi, dataset, person_conf_multi) plt.imshow(visim_multi) plt.show() visualize.waitforbuttonpress() # """ if cfg.use_gt_segm: coco_img_results = pose_predict_with_gt_segm(scmap, locref, cfg.stride, batch[Batch.data_item].gt_segm, batch[Batch.data_item].coco_id) coco_results += coco_img_results if len(coco_img_results): dataset.visualize_coco(coco_img_results, batch[Batch.data_item].visibilities) if cfg.use_gt_segm: with open('predictions_with_segm.json', 'w') as outfile: json.dump(coco_results, outfile) sess.close()
def train(): # 设置日志 setup_logging() # 载入训练配置文件pose_cfg.yaml cfg = load_config() # 创建数据集类的实例 dataset = create_dataset(cfg) # 获取batch_spec # 包含输入图片大小 # 关节heatmap的大小 # 关节weight的大小 # 精细化heatmap的大小 # 精细化mask的大小 batch_spec = get_batch_spec(cfg) # 根据batch_spec产生入队操作、placeholder和batch数据 batch, enqueue_op, placeholders = setup_preloading(batch_spec) # 生成网络结构并且产生losses op # 其中losses包括很多类型的loss losses = pose_net(cfg).train(batch) total_loss = losses['total_loss'] # 把多个loss合并起来 for k, t in losses.items(): # return a scalar Tensor of type string which contains a Summary protobuf. tf.summary.scalar(k, t) # returns a scalar Tensor of type string containing the serialized # Summary protocol buffer resulting from the merging merged_summaries = tf.summary.merge_all() # 获取/resnet_v1下面的所有的变量 variables_to_restore = slim.get_variables_to_restore(include=["resnet_v1"]) # Create the saver which will be used to restore the variables. # 创建一个恢复resent_v1的权重的op restorer = tf.train.Saver(variables_to_restore) # 创建一个保存训练状态的op saver = tf.train.Saver(max_to_keep=5) sess = tf.Session() # 开启一个线程去读取数据并且装入到队列 coord, thread = start_preloading(sess, enqueue_op, dataset, placeholders) # 打开一个训练的记录器 train_writer = tf.summary.FileWriter(cfg.log_dir, sess.graph) # 获取train_op和学习率op learning_rate, train_op = get_optimizer(total_loss, cfg) # 初始化全局和局部变量 sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) # Restore variables from disk.从文件中读取权重到内存 restorer.restore(sess, cfg.init_weights) # 从配置文件获取最大迭代次数 max_iter = int(cfg.multi_step[-1][1]) display_iters = cfg.display_iters cum_loss = 0.0 # 生成一个学习率产生器的实例 lr_gen = LearningRate(cfg) for it in range(max_iter + 1): # 根据当前迭代的次数产生一个学习率 current_lr = lr_gen.get_lr(it) # 进行训练 [_, loss_val, summary] = sess.run([train_op, total_loss, merged_summaries], feed_dict={learning_rate: current_lr}) # 累加loss cum_loss += loss_val # 将迭代次数保存起来 train_writer.add_summary(summary, it) if it % display_iters == 0: # 每隔display_iters就显示一次 loss average_loss = cum_loss / display_iters # 平均loss cum_loss = 0.0 logging.info("iteration: {} loss: {} lr: {}".format( it, "{0:.4f}".format(average_loss), current_lr)) # Save snapshot # 每隔cfg.save_iters次就会保存 if (it % cfg.save_iters == 0 and it != 0) or it == max_iter: # 获得模型的名称 model_name = cfg.snapshot_prefix # 保存模型 saver.save(sess, model_name, global_step=it) sess.close() # 请求数据读取线程停止 coord.request_stop() # 等待数据读取线程结束 coord.join([thread])
def video2posevideo(video_name): time_start = time.clock() import numpy as np sys.path.append(os.path.dirname(__file__) + "/../") from scipy.misc import imread, imsave from config import load_config from dataset.factory import create as create_dataset from nnet import predict from util import visualize from dataset.pose_dataset import data_to_input from multiperson.detections import extract_detections from multiperson.predict import SpatialModel, eval_graph, get_person_conf_multicut from multiperson.visualize import PersonDraw, visualize_detections import matplotlib.pyplot as plt from PIL import Image, ImageDraw, ImageFont font = ImageFont.truetype("./font/NotoSans-Bold.ttf", 24) import random cfg = load_config("demo/pose_cfg_multi.yaml") dataset = create_dataset(cfg) sm = SpatialModel(cfg) sm.load() draw_multi = PersonDraw() # Load and setup CNN part detector sess, inputs, outputs = predict.setup_pose_prediction(cfg) ################ video = read_video(video_name) video_frame_number = int(video.duration * video.fps) ## duration: second / fps: frame per second video_frame_ciphers = math.ceil(math.log(video_frame_number, 10)) ## ex. 720 -> 3 pose_frame_list = [] point_r = 3 # radius of points point_min = 10 # threshold of points - If there are more than point_min points in person, we define he/she is REAL PERSON part_min = 3 # threshold of parts - If there are more than part_min parts in person, we define he/she is REAL PERSON / part means head, arm and leg point_num = 17 # There are 17 points in 1 person def ellipse_set(person_conf_multi, people_i, point_i): return (person_conf_multi[people_i][point_i][0] - point_r, person_conf_multi[people_i][point_i][1] - point_r, person_conf_multi[people_i][point_i][0] + point_r, person_conf_multi[people_i][point_i][1] + point_r) def line_set(person_conf_multi, people_i, point_i, point_j): return (person_conf_multi[people_i][point_i][0], person_conf_multi[people_i][point_i][1], person_conf_multi[people_i][point_j][0], person_conf_multi[people_i][point_j][1]) def draw_ellipse_and_line(draw, person_conf_multi, people_i, a, b, c, point_color): draw.ellipse(ellipse_set(person_conf_multi, people_i, a), fill=point_color) draw.ellipse(ellipse_set(person_conf_multi, people_i, b), fill=point_color) draw.ellipse(ellipse_set(person_conf_multi, people_i, c), fill=point_color) draw.line(line_set(person_conf_multi, people_i, a, b), fill=point_color, width=5) draw.line(line_set(person_conf_multi, people_i, b, c), fill=point_color, width=5) for i in range(0, video_frame_number): image = video.get_frame(i/video.fps) ###################### image_batch = data_to_input(image) # Compute prediction with the CNN outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, pairwise_diff = predict.extract_cnn_output(outputs_np, cfg, dataset.pairwise_stats) detections = extract_detections(cfg, scmap, locref, pairwise_diff) unLab, pos_array, unary_array, pwidx_array, pw_array = eval_graph(sm, detections) person_conf_multi = get_person_conf_multicut(sm, unLab, unary_array, pos_array) # print('person_conf_multi: ') # print(type(person_conf_multi)) # print(person_conf_multi) # Add library to save image image_img = Image.fromarray(image) # Save image with points of pose draw = ImageDraw.Draw(image_img) people_num = 0 people_real_num = 0 people_part_num = 0 people_num = person_conf_multi.size / (point_num * 2) people_num = int(people_num) print('people_num: ' + str(people_num)) for people_i in range(0, people_num): point_color_r = random.randrange(0, 256) point_color_g = random.randrange(0, 256) point_color_b = random.randrange(0, 256) point_color = (point_color_r, point_color_g, point_color_b, 255) point_list = [] point_count = 0 point_i = 0 # index of points part_count = 0 # count of parts in THAT person # To find rectangle which include that people - list of points x, y coordinates people_x = [] people_y = [] for point_i in range(0, point_num): if person_conf_multi[people_i][point_i][0] + person_conf_multi[people_i][point_i][1] != 0: # If coordinates of point is (0, 0) == meaningless data point_count = point_count + 1 point_list.append(point_i) # Draw each parts if (5 in point_list) and (7 in point_list) and (9 in point_list): # Draw left arm draw_ellipse_and_line(draw, person_conf_multi, people_i, 5, 7, 9, point_color) part_count = part_count + 1 if (6 in point_list) and (8 in point_list) and (10 in point_list): # Draw right arm draw_ellipse_and_line(draw, person_conf_multi, people_i, 6, 8, 10, point_color) part_count = part_count + 1 if (11 in point_list) and (13 in point_list) and (15 in point_list): # Draw left leg draw_ellipse_and_line(draw, person_conf_multi, people_i, 11, 13, 15, point_color) part_count = part_count + 1 if (12 in point_list) and (14 in point_list) and (16 in point_list): # Draw right leg draw_ellipse_and_line(draw, person_conf_multi, people_i, 12, 14, 16, point_color) part_count = part_count + 1 if point_count >= point_min: people_real_num = people_real_num + 1 for point_i in range(0, point_num): if person_conf_multi[people_i][point_i][0] + person_conf_multi[people_i][point_i][1] != 0: # If coordinates of point is (0, 0) == meaningless data draw.ellipse(ellipse_set(person_conf_multi, people_i, point_i), fill=point_color) people_x.append(person_conf_multi[people_i][point_i][0]) people_y.append(person_conf_multi[people_i][point_i][1]) # Draw rectangle which include that people draw.rectangle([min(people_x), min(people_y), max(people_x), max(people_y)], fill=point_color, outline=5) if part_count >= part_min: people_part_num = people_part_num + 1 draw.text((0, 0), 'People(by point): ' + str(people_real_num) + ' (threshold = ' + str(point_min) + ')', (0,0,0), font=font) draw.text((0, 32), 'People(by line): ' + str(people_part_num) + ' (threshold = ' + str(part_min) + ')', (0,0,0), font=font) draw.text((0, 64), 'Frame: ' + str(i) + '/' + str(video_frame_number), (0,0,0), font=font) draw.text((0, 96), 'Total time required: ' + str(round(time.clock() - time_start, 1)) + 'sec', (0,0,0)) print('people_real_num: ' + str(people_real_num)) print('people_part_num: ' + str(people_part_num)) print('frame: ' + str(i)) image_img_numpy = np.asarray(image_img) pose_frame_list.append(image_img_numpy) video_pose = ImageSequenceClip(pose_frame_list, fps=video.fps) video_pose.write_videofile("testset/" + video_name + "_pose.mp4", fps=video.fps) print("Time(s): " + str(time.clock() - time_start))
def train_gan(arguments): """ Setup result directory and enable logging to file in it """ outdir = make_results_dir(arguments) logger.init(outdir, logging.INFO) logger.info('Arguments:\n{}'.format(pformat(arguments))) """ Initialize Tensorboard """ tensorboard_writer = initialize_tensorboard(outdir) """ Set random seed throughout python, pytorch and numpy """ logger.info('Using Random Seed value as: %d' % arguments['random_seed']) torch.manual_seed( arguments['random_seed']) # Set for pytorch, used for cuda as well. random.seed(arguments['random_seed']) # Set for python np.random.seed(arguments['random_seed']) # Set for numpy """ Set device - cpu or gpu """ device = torch.device( f"cuda:{opt.gpu}" if torch.cuda.is_available() else "cpu") logger.info(f'Using device - {device}') """ Load Model with weights(if available) """ G: torch.nn.Module = get_model( arguments.get('generator_model_args')).to(device) D: torch.nn.Module = get_model( arguments.get('discriminator_model_args')).to(device) if arguments['mode'] == 'dcgan': G.apply(weights_init) D.apply(weights_init) """ Create optimizer """ G_optimizer = create_optimizer(G.parameters(), arguments['generator_optimizer_args']) D_optimizer = create_optimizer(D.parameters(), arguments['discriminator_optimizer_args']) """ Create Loss """ loss = torch.nn.BCELoss().to(device=device) # GAN """ Load parameters for the Dataset """ dataset: BaseDataset = create_dataset(arguments['dataset_args'], arguments['train_data_args'], arguments['val_data_args']) """ Generate all callbacks """ callbacks: List[Callbacks] = generate_callbacks(arguments, dataset, device, outdir) # """ Create loss function """ # criterion = create_loss(arguments['loss_args']) """ Debug the inputs to model and save graph to tensorboard """ dataset.debug() # Only One model is allowed # G_dummy_input = torch.rand(size=(1, arguments['generator_model_args']['model_constructor_args']['latent_dim'])) # D_dummy_input = (torch.rand(1, # arguments['dataset_args']['name'].value['channels'], # 32, 32 # *arguments['dataset_args']['name'].value['image_size'] # ToDo Fix this # )) # tensorboard_writer.save_graph('Generator', G, G_dummy_input.to(device)) # tensorboard_writer.save_graph('Discriminator', D, D_dummy_input.to(device)) logger.info(G) logger.info(D) def reset_grad(): G.zero_grad() D.zero_grad() batch_size = arguments['train_data_args']['batch_size'] z_dim = arguments['generator_model_args']['model_constructor_args']['nz'] generator = infinite_train_gen(dataset.train_dataloader) interval_length = 10 if is_debug_mode() else 400 num_intervals = 1 if is_debug_mode() else int(arguments['num_iterations'] / interval_length) global_step = 0 # TO allocate memory required for the GPU during training and validation run_callbacks( callbacks, model=(G, D), optimizer=(G_optimizer, D_optimizer), # To Save optimizer dict for retraining. mode=CallbackMode.ON_NTH_ITERATION, iteration=global_step) reset_grad() for it in range(num_intervals): logger.info(f'Interval {it + 1}/{num_intervals}') # Set model in train mode G.train() D.train() t = trange(interval_length) for _ in t: if arguments['mode'] == 'dcgan': D_loss, G_loss = train_gan_iter(D, D_optimizer, G, G_optimizer, loss, device, generator, batch_size, reset_grad, z_dim, tensorboard_writer, global_step) elif arguments['mode'] == 'wgan-wp': D_loss, G_loss = train_wgan_iter(D, D_optimizer, G, G_optimizer, device, generator, batch_size, reset_grad, z_dim, tensorboard_writer, global_step) elif arguments['mode'] == 'wgan-noise-adversarial': D_loss, G_loss = train_noisy_wgan_iter( D, D_optimizer, G, G_optimizer, device, generator, batch_size, reset_grad, z_dim, tensorboard_writer, global_step, contamination_loss_weight=arguments[ 'contamination_loss_weight']) # Log D_Loss and G_Loss in progress_bar t.set_postfix(D_Loss=D_loss.data.cpu().item(), G_Loss=G_loss.data.cpu().item()) # Save Loss In Tensorboard tensorboard_writer.save_scalars( f'{arguments["mode"].upper()}_Loss', { 'Discriminator' if arguments['mode'] == 'dcgan' else 'Critic': D_loss.data.cpu().item(), 'Generator': G_loss.data.cpu().item() }, global_step) global_step += 1 print( f'Discriminator Loss: {D_loss.data.cpu().item()}, Generator Loss: {G_loss.data.cpu().item()}' ) run_callbacks( callbacks, model=(G, D), optimizer=(G_optimizer, D_optimizer), # To Save optimizer dict for retraining. mode=CallbackMode.ON_NTH_ITERATION, iteration=global_step) reset_grad()
def train(): setup_logging() cfg = load_config() # load newest snapshot snapshots = [fn.split('.')[0] for fn in os.listdir(os.getcwd()) if "index" in fn] if len(snapshots) > 0: iters = np.array([int(fn.split('-')[1]) for fn in snapshots]) cfg['init_weights'] = snapshots[iters.argmax()] start = iters.max() else: start = 0 dataset = create_dataset(cfg) batch_spec = get_batch_spec(cfg) batch, enqueue_op, placeholders = setup_preloading(batch_spec) losses = pose_net(cfg).train(batch) total_loss = losses['total_loss'] for k, t in losses.items(): tf.summary.scalar(k, t) merged_summaries = tf.summary.merge_all() if start==0: variables_to_restore = slim.get_variables_to_restore(include=["resnet_v1"]) restorer = tf.train.Saver(variables_to_restore) else: restorer = tf.train.Saver() saver = tf.train.Saver(max_to_keep=5) sess = tf.Session() coord, thread = start_preloading(sess, enqueue_op, dataset, placeholders) train_writer = tf.summary.FileWriter(cfg.log_dir, sess.graph) learning_rate, train_op = get_optimizer(total_loss, cfg) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) # Restore variables from disk. restorer.restore(sess, cfg.init_weights) max_iter = int(cfg.multi_step[-1][1]) display_iters = cfg.display_iters cum_loss = 0.0 lr_gen = LearningRate(cfg, start) startTime = time.time() for it in range(start, max_iter+1): current_lr = lr_gen.get_lr(it) [_, loss_val, summary] = sess.run([train_op, total_loss, merged_summaries], feed_dict={learning_rate: current_lr}) cum_loss += loss_val train_writer.add_summary(summary, it) if it % display_iters == 0: average_loss = cum_loss / display_iters cum_loss = 0.0 elapsed = timedelta(seconds=(time.time()-startTime)) logging.info("iteration: {} loss: {} lr: {} time: {}" .format(it, "{0:.4f}".format(average_loss), current_lr, elapsed)) # Save snapshot if (it % cfg.save_iters == 0 and it != start) or it == max_iter: model_name = cfg.snapshot_prefix saver.save(sess, model_name, global_step=it) sess.close() coord.request_stop() coord.join([thread])
def video2poseframe(video_name): import numpy as np sys.path.append(os.path.dirname(__file__) + "/../") from scipy.misc import imread, imsave from config import load_config from dataset.factory import create as create_dataset from nnet import predict from util import visualize from dataset.pose_dataset import data_to_input from multiperson.detections import extract_detections from multiperson.predict import SpatialModel, eval_graph, get_person_conf_multicut from multiperson.visualize import PersonDraw, visualize_detections import matplotlib.pyplot as plt from PIL import Image, ImageDraw import random cfg = load_config("demo/pose_cfg_multi.yaml") dataset = create_dataset(cfg) sm = SpatialModel(cfg) sm.load() # Load and setup CNN part detector sess, inputs, outputs = predict.setup_pose_prediction(cfg) ################ video = read_video(video_name) video_frame_number = int(video.duration * video.fps) ## duration: second / fps: frame per second video_frame_ciphers = math.ceil(math.log(video_frame_number, 10)) ## ex. 720 -> 3 if not os.path.exists('testset/' + video_name): os.makedirs('testset/' + video_name) for i in range(0, video_frame_number): image = video.get_frame(i/video.fps) ###################### image_batch = data_to_input(image) # Compute prediction with the CNN outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, pairwise_diff = predict.extract_cnn_output(outputs_np, cfg, dataset.pairwise_stats) detections = extract_detections(cfg, scmap, locref, pairwise_diff) unLab, pos_array, unary_array, pwidx_array, pw_array = eval_graph(sm, detections) person_conf_multi = get_person_conf_multicut(sm, unLab, unary_array, pos_array) print('person_conf_multi: ') print(type(person_conf_multi)) print(person_conf_multi) # Add library to save image image_img = Image.fromarray(image) # Save image with points of pose draw = ImageDraw.Draw(image_img) people_num = 0 point_num = 17 print('person_conf_multi.size: ') print(person_conf_multi.size) people_num = person_conf_multi.size / (point_num * 2) people_num = int(people_num) print('people_num: ') print(people_num) point_i = 0 # index of points point_r = 5 # radius of points people_real_num = 0 for people_i in range(0, people_num): point_color_r = random.randrange(0, 256) point_color_g = random.randrange(0, 256) point_color_b = random.randrange(0, 256) point_color = (point_color_r, point_color_g, point_color_b, 255) point_count = 0 for point_i in range(0, point_num): if person_conf_multi[people_i][point_i][0] + person_conf_multi[people_i][point_i][1] != 0: # If coordinates of point is (0, 0) == meaningless data point_count = point_count + 1 if point_count > 5: # If there are more than 5 point in person, we define he/she is REAL PERSON people_real_num = people_real_num + 1 for point_i in range(0, point_num): draw.ellipse((person_conf_multi[people_i][point_i][0] - point_r, person_conf_multi[people_i][point_i][1] - point_r, person_conf_multi[people_i][point_i][0] + point_r, person_conf_multi[people_i][point_i][1] + point_r), fill=point_color) print('people_real_num: ') print(people_real_num) video_name_result = 'testset/' + video_name + '/frame_pose_' + str(i).zfill(video_frame_ciphers) + '.jpg' image_img.save(video_name_result, "JPG")
def test_net(visualise, cache_scoremaps): # 打开python的日志功能 logging.basicConfig(level=logging.INFO) # 加载配置文件 cfg = load_config() # 根据配置文件中的信息产生数据读取类的实例 dataset = create_dataset(cfg) # 不用对数据进行洗牌 dataset.set_shuffle(False) # 告诉数据读取类没有类标,即处于测试模式 dataset.set_test_mode(True) # 该函数返回session,输入算子,输出算子 sess, inputs, outputs = setup_pose_prediction(cfg) # 是否需要保存测试过程中的heatmap if cache_scoremaps: # 保存heatmap的目录 out_dir = cfg.scoremap_dir # 目录不存在则创建 if not os.path.exists(out_dir): os.makedirs(out_dir) # 图片个数 num_images = dataset.num_images # 预测的关节坐标都保存在这里 predictions = np.zeros((num_images, ), dtype=np.object) for k in range(num_images): print('processing image {}/{}'.format(k, num_images - 1)) # 获得一批数据 batch = dataset.next_batch() # 进行预测 outputs_np = sess.run(outputs, feed_dict={inputs: batch[Batch.inputs]}) # 得到heatmap和精细化的heatmap scmap, locref = extract_cnn_output(outputs_np, cfg) # 获得最终的关节坐标 ''' pose = [ [ pos_f8[::-1], [scmap[maxloc][joint_idx]] ] .... ..... .... ] 用我的话说就是下面的结构 pose = [ [关节的坐标, 关节坐标的置信度] .... ..... .... ] ''' pose = argmax_pose_predict(scmap, locref, cfg.stride) pose_refscale = np.copy(pose) # 除以尺度,就能恢复到未经过缩放的图像的坐标系上去 # 注意0:2是左开右闭的区间只取到了0和1 pose_refscale[:, 0:2] /= cfg.global_scale predictions[k] = pose_refscale if visualise: # 获取图片 img = np.squeeze(batch[Batch.inputs]).astype('uint8') # 显示heatmap visualize.show_heatmaps(cfg, img, scmap, pose) # 等待按键按下 visualize.waitforbuttonpress() if cache_scoremaps: # 保存heatmap base = os.path.basename(batch[Batch.data_item].im_path) raw_name = os.path.splitext(base)[0] out_fn = os.path.join(out_dir, raw_name + '.mat') scipy.io.savemat(out_fn, mdict={'scoremaps': scmap.astype('float32')}) # 保存精细化关节定位的heatmap out_fn = os.path.join(out_dir, raw_name + '_locreg' + '.mat') if cfg.location_refinement: scipy.io.savemat( out_fn, mdict={'locreg_pred': locref.astype('float32')}) # 将最终预测的关节坐标保存起来 scipy.io.savemat('predictions.mat', mdict={'joints': predictions}) sess.close()
def main(): start_time=time.time() print("main hai") tf.reset_default_graph() cfg = load_config("demo/pose_cfg_multi.yaml") dataset = create_dataset(cfg) sm = SpatialModel(cfg) sm.load() draw_multi = PersonDraw() # Load and setup CNN part detector sess, inputs, outputs = predict.setup_pose_prediction(cfg) # Read image from file dir=os.listdir("stick") k=0 cap=cv2.VideoCapture(0) i=0 while (cap.isOpened()): if i%20 == 0: ret, orig_frame= cap.read() if ret==True: frame = cv2.resize(orig_frame, (0, 0), fx=0.30, fy=0.30) image= frame sse=0 mse=0 image_batch = data_to_input(frame) # Compute prediction with the CNN outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, pairwise_diff = predict.extract_cnn_output(outputs_np, cfg, dataset.pairwise_stats) detections = extract_detections(cfg, scmap, locref, pairwise_diff) unLab, pos_array, unary_array, pwidx_array, pw_array = eval_graph(sm, detections) person_conf_multi = get_person_conf_multicut(sm, unLab, unary_array, pos_array) img = np.copy(image) #coor = PersonDraw.draw() visim_multi = img.copy() co1=draw_multi.draw(visim_multi, dataset, person_conf_multi) plt.imshow(visim_multi) plt.show() visualize.waitforbuttonpress() #print("this is draw : ", co1) if k==1: qwr = np.zeros((1920,1080,3), np.uint8) cv2.line(qwr, co1[5][0], co1[5][1],(255,0,0),3) cv2.line(qwr, co1[7][0], co1[7][1],(255,0,0),3) cv2.line(qwr, co1[6][0], co1[6][1],(255,0,0),3) cv2.line(qwr, co1[4][0], co1[4][1],(255,0,0),3) cv2.line(qwr, co1[9][0], co1[9][1],(255,0,0),3) cv2.line(qwr, co1[11][0], co1[11][1],(255,0,0),3) cv2.line(qwr, co1[8][0], co1[8][1],(255,0,0),3) cv2.line(qwr, co1[10][0], co1[10][1],(255,0,0),3) # In[9]: cv2.imshow('r',qwr) qwr2="stick/frame"+str(k)+".jpg" qw1 = cv2.cvtColor(qwr, cv2.COLOR_BGR2GRAY) qw2= cv2.cvtColor(qwr2, cv2.COLOR_BGR2GRAY) fig = plt.figure("Images") images = ("Original", qw1), ("Contrast", qw2) for (i, (name, image)) in enumerate(images): ax = fig.add_subplot(1, 3, i + 1) ax.set_title(name) plt.imshow(hash(tuple(image))) # compare the images s,m=compare_images(qw1, qw2, "Image1 vs Image2") k+=1 sse=s mse=m else: break elapsed= time.time()-start_time #print("sse score : ", sse) print("Mean squared error : ", elapsed/100) cap.release() cv2.destroyAllWindows()
) train_data_args = dict( batch_size=64, shuffle=True, to_train=True, ) val_data_args = dict( batch_size=train_data_args['batch_size'] * 4, shuffle=False, validate_step_size=1, ) dataset: BaseDataset = create_dataset(dataset_args, train_data_args, val_data_args) device = torch.device(f"cuda:0" if torch.cuda.is_available() else "cpu") eval_model.load_state_dict(torch.load( './logs/2019-12-22T02:24:08.329024_mode_classification_model_ConvNetSimple_dataset_MNIST_subset_1.0_bs_64_name_Adam_lr_0.001/epoch_0032-model-val_accuracy_99.11754911754912.pth')) eval_model = torch.nn.Sequential(*list(eval_model.children())[:4]) start = time.time() outdir = './logs/fretchet_score' transform = None callback = FrechetInceptionScoreCallback(outdir='./logs/frechet_score', device=device, classifier=eval_model,
from PIL import Image, ImageDraw, ImageFont font = ImageFont.truetype("./font/NotoSans-Bold.ttf", 12) import random # for object-tracker import dlib # import video_pose #################### cfg = load_config("demo/pose_cfg_multi.yaml") dataset = create_dataset(cfg) sm = SpatialModel(cfg) sm.load() draw_multi = PersonDraw() # Load and setup CNN part detector sess, inputs, outputs = predict.setup_pose_prediction(cfg) ########## ## Get the source of video parser = ap.ArgumentParser() parser.add_argument('-f', "--videoFile", help="Path to Video File") parser.add_argument('-w', "--videoWidth", help="Width of Output Video")
def objective(arguments): """ Main Pipeline for training and cross-validation. ToDo - Testing will be done separately in test.py. """ """ Setup result directory and enable logging to file in it """ outdir = make_results_dir(arguments) logger.init(outdir, logging.INFO) logger.info('Arguments:\n{}'.format(pformat(arguments))) """ Initialize Tensorboard """ tensorboard_writer = initialize_tensorboard(outdir) """ Set random seed throughout python, pytorch and numpy """ logger.info('Using Random Seed value as: %d' % arguments['random_seed']) torch.manual_seed( arguments['random_seed']) # Set for pytorch, used for cuda as well. random.seed(arguments['random_seed']) # Set for python np.random.seed(arguments['random_seed']) # Set for numpy """ Set device - cpu or gpu """ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") logger.info(f'Using device - {device}') """ Load Model with weights(if available) """ model: torch.nn.Module = get_model(arguments.get('model_args')).to(device) """ Create loss function """ criterion = create_loss(arguments['loss_args']) """ Create optimizer """ optimizer = create_optimizer(model.parameters(), arguments['optimizer_args']) """ Load parameters for the Dataset """ dataset: BaseDataset = create_dataset(arguments['dataset_args'], arguments['train_data_args'], arguments['val_data_args']) """ Generate all callbacks """ callbacks: List[Callbacks] = generate_callbacks(arguments, dataset, device, outdir) """ Debug the inputs to model and save graph to tensorboard """ dataset.debug() dummy_input = (torch.rand( 1, arguments['dataset_args']['name'].value['channels'], *arguments['dataset_args']['name'].value['image_size'], )).to(device) tensorboard_writer.save_graph(model, dummy_input) """ Pipeline - loop over the dataset multiple times """ max_validation_accuracy = 0 itr = 0 best_model_path = None delete_old_models = True run_callbacks(callbacks, model=model, optimizer=optimizer, mode=CallbackMode.ON_TRAIN_BEGIN) for epoch in range(arguments['nb_epochs']): """ Train the model """ train_data_args = arguments['train_data_args'] if train_data_args['to_train']: train_dataloader = dataset.train_dataloader progress_bar = ProgressBar( target=len(train_dataloader), clear=True, description=f"Training {epoch + 1}/{arguments['nb_epochs']}: ") loss_running_average = RunningAverage() run_callbacks(callbacks, model=model, optimizer=optimizer, mode=CallbackMode.ON_EPOCH_BEGIN, epoch=epoch) model.train() for i, data in enumerate(train_dataloader, 0): # get the inputs inputs, labels = data inputs = inputs.to(device) labels = labels.to(device) # zero the parameter gradients optimizer.zero_grad() # Forward Pass outputs = model(inputs) classification_loss = criterion(outputs, labels) tensorboard_writer.save_scalar('Classification_Loss', classification_loss.item(), itr) classification_loss.backward() optimizer.step() # Compute running loss. Not exact but efficient. running_loss = loss_running_average.add_new_sample( classification_loss.item()) progress_bar.update(i + 1, [ ('current loss', classification_loss.item()), ('running loss', running_loss), ]) tensorboard_writer.save_scalar('Training_Loss', classification_loss, itr) itr += 1 # Callbacks ON_EPOCH_END should be run only when training is enabled. Thus call here. run_callbacks(callbacks, model=model, optimizer=optimizer, mode=CallbackMode.ON_EPOCH_END, epoch=epoch) """ Validate the model """ val_data_args = arguments['val_data_args'] if val_data_args['validate_step_size'] > 0 and \ epoch % val_data_args['validate_step_size'] == 0: correct, total = 0, 0 validation_dataloader = dataset.validation_dataloader progress_bar = ProgressBar( target=len(validation_dataloader), clear=True, description=f"Validating {epoch + 1}/{arguments['nb_epochs']}: " ) model.eval() with torch.no_grad(): for i, data in enumerate(validation_dataloader, 0): inputs, labels = data inputs = inputs.to(device) labels = labels.to(device) outputs = model(inputs) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() progress_bar.update(i + 1, [ ('Batch Accuracy', 100 * correct / total), ]) val_accuracy = 100 * correct / total tensorboard_writer.save_scalar('Validation_Accuracy', val_accuracy, itr) logger.info( f'Accuracy of the network on the {dataset.get_val_dataset_size} validation images: {val_accuracy} %%' ) """ Save Model """ if val_accuracy > max_validation_accuracy: if delete_old_models and best_model_path: delete_old_file(best_model_path) best_model_path = os.path.join( outdir, f'epoch_{epoch:04}-model-val_accuracy_{val_accuracy}.pth') torch.save(model.state_dict(), best_model_path) max_validation_accuracy = val_accuracy tensorboard_writer.flush() # Exit loop if training not needed if not train_data_args['to_train']: break run_callbacks(callbacks, model=model, optimizer=optimizer, mode=CallbackMode.ON_TRAIN_END) logger.info('Finished Training') close_tensorboard() logger.info(f'Max Validation accuracy is {max_validation_accuracy}') return max_validation_accuracy # Return in case later u wanna add hyperopt.