def test(model_cfg, dataset_cfg, checkpoint, batch_size=64, gpus=1, workers=4): dataset = call_obj(**dataset_cfg) data_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=False, num_workers=workers) # put model on gpus if isinstance(model_cfg, list): model = [call_obj(**c) for c in model_cfg] model = torch.nn.Sequential(*model) else: model = call_obj(**model_cfg) load_checkpoint(model, checkpoint, map_location='cpu') model = MMDataParallel(model, device_ids=range(gpus)).cuda() model.eval() results = [] labels = [] prog_bar = ProgressBar(len(dataset)) for data, label in data_loader: with torch.no_grad(): output = model(data).data.cpu().numpy() results.append(output) labels.append(label) for i in range(len(data)): prog_bar.update() results = np.concatenate(results) labels = np.concatenate(labels) print('Top 1: {:.2f}%'.format(100 * topk_accuracy(results, labels, 1))) print('Top 5: {:.2f}%'.format(100 * topk_accuracy(results, labels, 5)))
def test(model_cfg, dataset_cfg, checkpoint, batch_size=64, gpus=1, workers=2): dataset = call_obj(**dataset_cfg) data_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=False, num_workers=workers) # put model on gpus if isinstance(model_cfg, list): model = [call_obj(**c) for c in model_cfg] model = torch.nn.Sequential(*model) else: model = call_obj(**model_cfg) load_checkpoint(model, checkpoint, map_location='cpu') model = MMDataParallel(model, device_ids=range(gpus)).cuda() #model = MMDataParallel(model) model.eval() results = [] labels = [] prog_bar = ProgressBar(len(dataset)) total_time = 0 for data, label in data_loader: with torch.no_grad(): start = time.time() output = model(data).data.cpu().numpy() if torch.cuda.is_available(): torch.cuda.synchronize() t = time.time() - start total_time += t results.append(output) labels.append(label) for i in range(len(data)): prog_bar.update() results = np.concatenate(results) labels = np.concatenate(labels) #macs, params = get_model_complexity_info(model.cuda(), (3, 300, 18, 2), as_strings=True, # print_per_layer_stat=True, verbose=True) #print('{:<30} {:<8}'.format('Computational complexity: ', macs)) #print('{:<30} {:<8}'.format('Number of parameters: ', params)) print("Average infer time: ", total_time / len(data_loader)) print("Total infer time: ", total_time) print('Top 1: {:.2f}%'.format(100 * topk_accuracy(results, labels, 1))) print('Top 5: {:.2f}%'.format(100 * topk_accuracy(results, labels, 5)))
def test(model_cfg, dataset_cfg, checkpoint, batch_size=64, gpus=1, workers=4): #cnt = 0 #confusion conf_matrix = torch.zeros(model_cfg.num_class, model_cfg.num_class) #confusion set_determined_seed(seed) torch.multiprocessing.set_sharing_strategy('file_system') dataset = call_obj(**dataset_cfg) data_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=False, num_workers=workers) # put model on gpus if isinstance(model_cfg, list): model = [call_obj(**c) for c in model_cfg] model = torch.nn.Sequential(*model) else: model = call_obj(**model_cfg) load_checkpoint(model, checkpoint, map_location='cpu') model = MMDataParallel(model, device_ids=get_gpus(gpus)).cuda() model.eval() results = [] labels = [] prog_bar = ProgressBar(len(dataset)) for data, label in data_loader: with torch.no_grad(): # cnt += 1 # print("\n"+str(cnt)) # torch.cuda.empty_cache() output = model(data).data.cpu().numpy() results.append(output) labels.append(label) for i in range(len(data)): prog_bar.update() results = np.concatenate(results) labels = np.concatenate(labels) #confusion conf_matrix = confusion_matrix( torch.max(torch.from_numpy(results), 1)[1], labels, conf_matrix) np.save('/home/computer/WBH/GCN/INTERGCN/conf.npy', conf_matrix) #confusion print('Top 1: {:.2f}%'.format(100 * topk_accuracy(results, labels, 1))) print('Top 5: {:.2f}%'.format(100 * topk_accuracy(results, labels, 5)))
def detect(inputs, results, model_cfg, dataset_cfg, checkpoint, video_dir, batch_size=64, gpus=1, workers=4): print('detect start') # put model on gpus if isinstance(model_cfg, list): model = [call_obj(**c) for c in model_cfg] model = torch.nn.Sequential(*model) else: model = call_obj(**model_cfg) load_checkpoint(model, checkpoint, map_location='cpu') model = MMDataParallel(model, device_ids=range(gpus)).cuda() model.eval() results = [] labels = [] video_file_list = os.listdir(video_dir) prog_bar = ProgressBar(len(video_file_list)) for video_file in video_file_list: data = inputs.get() data_loader = data_parse(data, dataset_cfg.pipeline, dataset_cfg.data_source.num_track) data, label = data_loader with torch.no_grad(): data = torch.from_numpy(data) # 增加一维,表示batch_size data = data.unsqueeze(0) data = data.float().to("cuda:0").detach() output = model(data).data.cpu().numpy() results.append(output) labels.append(torch.tensor([label])) for i in range(len(data)): prog_bar.update() print('--------', results, labels, '--------------') results = np.concatenate(results) labels = np.concatenate(labels) print('Top 1: {:.2f}%'.format(100 * topk_accuracy(results, labels, 1))) print('Top 5: {:.2f}%'.format(100 * topk_accuracy(results, labels, 5)))
def init_twodimestimator(config, checkpoint=None, device='cpu'): if isinstance(config, str): config = Config.fromfile(config) config = config.processor_cfg elif isinstance(config, OrderedDict): config = config else: raise ValueError( 'Input config type is: {}, expect "str" or "Orderdict"'.format( type(config))) model_cfg = config.model_cfg if isinstance(model_cfg, list): model = [call_obj(**c) for c in model_cfg] model = torch.nn.Sequential(*model) else: model = call_obj(**model_cfg) load_checkpoint(model, checkpoint, map_location=device) model.to(device) model = model.eval() return model
def test(model_cfg, dataset_cfg, checkpoint, batch_size=64, gpus=1, workers=4): model = call_obj(**model_cfg) edge = model.graph.edge load_checkpoint(model, checkpoint, map_location='cpu') model = MMDataParallel(model, device_ids=range(gpus)).cuda() model.eval() sys.path.append('{}/{}/build/python'.format(os.getcwd(), "openpose")) try: from openpose import pyopenpose as op except: print('Can not find Openpose Python API.') return opWrapper = op.WrapperPython() params = dict(model_folder='openpose/models', model_pose='COCO') params["hand"] = True opWrapper = op.WrapperPython() opWrapper.configure(params) opWrapper.start() # video_capture = cv2.VideoCapture("mmskeleton/deprecated/st_gcn/resource/media/clean_and_jerk.mp4") video_capture = cv2.VideoCapture("fall01.mp4") pose_tracker = naive_pose_tracker() # start recognition start_time = time.time() frame_index = 0 gt_labels = [] with open( 'mmskeleton/deprecated/st_gcn/resource/kinetics_skeleton/label_name.txt', 'r') as f: for line in f: gt_labels.append(line.strip('\n')) while (True): tic = time.time() # get image ret, orig_image = video_capture.read() # orig_image = cv2.imread("3.jpg") if orig_image is None: break source_H, source_W, _ = orig_image.shape # orig_image = cv2.resize( # orig_image, (256 * source_W // source_H, 256)) H, W, _ = orig_image.shape # pose estimation datum = op.Datum() datum.cvInputData = orig_image opWrapper.emplaceAndPop([datum]) multi_pose = datum.poseKeypoints # (num_person, num_joint, 3) # orig_image = cv2.resize(orig_image, (768, 1024)) # cv2.imshow("orig_image-GCN", orig_image) # cv2.waitKey(0) if len(multi_pose.shape) != 3: continue # normalization multi_pose[:, :, 0] = multi_pose[:, :, 0] / W multi_pose[:, :, 1] = multi_pose[:, :, 1] / H multi_pose[:, :, 0:2] = multi_pose[:, :, 0:2] - 0.5 multi_pose[:, :, 0][multi_pose[:, :, 2] == 0] = 0 multi_pose[:, :, 1][multi_pose[:, :, 2] == 0] = 0 # pose tracking # if self.arg.video == 'camera_source': # frame_index = int((time.time() - start_time) * self.arg.fps) # else: # frame_index += 1 frame_index += 1 pose_tracker.update(multi_pose, frame_index) data_numpy = pose_tracker.get_skeleton_sequence() data = torch.from_numpy(data_numpy) data = data.unsqueeze(0) data = data.float().to("cuda:0").detach() with open("de.txt", 'w+') as f: for i in data[0][0]: f.write(str(i) + '\n\n') # break with torch.no_grad(): output = model(data).data.cpu().numpy() voting_label = int(output.argmax(axis=1)) print('voting_label_index:{}'.format(voting_label)) print(len(gt_labels)) print(gt_labels[voting_label]) print(output[0][voting_label]) app_fps = 1 / (time.time() - tic) image = render(edge, data_numpy, gt_labels[voting_label], [[gt_labels[voting_label]]], None, orig_image, app_fps) cv2.imshow("ST-GCN", image) if cv2.waitKey(1) & 0xFF == ord('q'): break
def init_recognizer(recognition_cfg, device): model = call_obj(**(recognition_cfg.model_cfg)) load_checkpoint(model, recognition_cfg.checkpoint_file, map_location=device) return model
def realtime_detect(detection_cfg, estimation_cfg, model_cfg, dataset_cfg, tracker_cfg, video_dir, category_annotation, checkpoint, batch_size=64, gpus=1, workers=4): """ 初始化 """ # 初始化模型 pose_estimators = init_pose_estimator(detection_cfg, estimation_cfg, device=0) if isinstance(model_cfg, list): model = [call_obj(**c) for c in model_cfg] model = torch.nn.Sequential(*model) else: model = call_obj(**model_cfg) load_checkpoint(model, checkpoint, map_location='cpu') model = MMDataParallel(model, device_ids=range(gpus)).cuda() model.eval() # 获取图像 video_file = 'train/clean/clean10.avi' reader = mmcv.VideoReader(os.path.join(video_dir, video_file)) video_frames = reader[:10000] if category_annotation is None: video_categories = dict() else: with open(category_annotation) as f: json_file = json.load(f) video_categories = json_file['annotations'] action_class = json_file['categories'] annotations = [] num_keypoints = -1 for i, image in enumerate(video_frames): res = inference_pose_estimator(pose_estimators, image) res['frame_index'] = i if not res['has_return']: continue num_person = len(res['joint_preds']) assert len(res['person_bbox']) == num_person for j in range(num_person): keypoints = [[p[0], p[1], round(s[0], 2)] for p, s in zip( res['joint_preds'][j].round().astype(int).tolist(), res['joint_scores'][j].tolist())] num_keypoints = len(keypoints) person_info = dict( person_bbox=res['person_bbox'][j].round().astype(int).tolist(), frame_index=res['frame_index'], id=j, person_id=None, keypoints=keypoints) annotations.append(person_info) category_id = video_categories[video_file][ 'category_id'] if video_file in video_categories else -1 info = dict(video_name=video_file, resolution=reader.resolution, num_frame=len(video_frames), num_keypoints=num_keypoints, keypoint_channels=['x', 'y', 'score'], version='1.0') video_info = dict(info=info, category_id=category_id, annotations=annotations) data_loader = data_parse(video_info, dataset_cfg.pipeline, dataset_cfg.data_source.num_track) data, label = data_loader with torch.no_grad(): data = torch.from_numpy(data) # 增加一维,表示batch_size data = data.unsqueeze(0) data = data.float().to("cuda:0").detach() output = model(data).data.cpu().numpy() top1 = output.argmax() if output[:, top1] > 3: label = action_class[top1] else: label = 'unknow' print("reslt:", output) res['render_image'] = render(image, res['joint_preds'], label, res['person_bbox'], detection_cfg.bbox_thre) cv2.imshow('image', image) cv2.waitKey(10)
def test(model_cfg, dataset_cfg, checkpoint, batch_size=64, gpus=1, workers=4): model = call_obj(**model_cfg) edge = model.graph.edge load_checkpoint(model, checkpoint, map_location='cpu') model = MMDataParallel(model, device_ids=range(gpus)).cuda() model.eval() sys.path.append('{}/{}/build/python'.format(os.getcwd(), "openpose")) try: from openpose import pyopenpose as op except: print('Can not find Openpose Python API.') return opWrapper = op.WrapperPython() params = dict(model_folder='openpose/models', model_pose='COCO') params["hand"] = True opWrapper = op.WrapperPython() opWrapper.configure(params) opWrapper.start() # self.model.eval() # pose_tracker = naive_pose_tracker() # video_capture = cv2.VideoCapture( "mmskeleton/deprecated/st_gcn/resource/media/ta_chi.mp4") # video_capture = cv2.VideoCapture("fall01.mp4") pose_tracker = naive_pose_tracker() # start recognition start_time = time.time() frame_index = 0 gt_labels = [] with open('configs/recognition/st_gcn/xview/label.txt', 'r') as f: for line in f: gt_labels.append(line.strip('\n')) while (True): tic = time.time() # get image ret, orig_image = video_capture.read() # orig_image = cv2.imread("3.jpg") if orig_image is None: break source_H, source_W, _ = orig_image.shape # orig_image = cv2.resize( # orig_image, (256 * source_W // source_H, 256)) H, W, _ = orig_image.shape # pose estimation datum = op.Datum() datum.cvInputData = orig_image opWrapper.emplaceAndPop([datum]) body_ntu = dict() body_ntu_list = [] left_hand = datum.handKeypoints[ 0] # keypoints:(num_person, num_joint, 3) right_hand = datum.handKeypoints[1] body_ntu["1"] = datum.poseKeypoints[0][8] body_ntu["2"] = np.array([ datum.poseKeypoints[0][8][0], (datum.poseKeypoints[0][8][1] + datum.poseKeypoints[0][1][1]) / 2, datum.poseKeypoints[0][8][2] ]) body_ntu["3"] = np.array([ datum.poseKeypoints[0][0][0], (datum.poseKeypoints[0][0][1] + datum.poseKeypoints[0][1][1]) / 2, datum.poseKeypoints[0][0][2] ]) body_ntu["4"] = datum.poseKeypoints[0][0] body_ntu["5"] = datum.poseKeypoints[0][5] body_ntu["6"] = datum.poseKeypoints[0][6] body_ntu["7"] = datum.poseKeypoints[0][7] body_ntu["8"] = left_hand[0][0] body_ntu["9"] = datum.poseKeypoints[0][2] body_ntu["10"] = datum.poseKeypoints[0][3] body_ntu["11"] = datum.poseKeypoints[0][4] body_ntu["12"] = right_hand[0][0] body_ntu["13"] = datum.poseKeypoints[0][12] body_ntu["14"] = datum.poseKeypoints[0][13] body_ntu["15"] = datum.poseKeypoints[0][14] body_ntu["16"] = datum.poseKeypoints[0][19] body_ntu["17"] = datum.poseKeypoints[0][9] body_ntu["18"] = datum.poseKeypoints[0][10] body_ntu["19"] = datum.poseKeypoints[0][11] body_ntu["20"] = datum.poseKeypoints[0][22] body_ntu["21"] = datum.poseKeypoints[0][1] body_ntu["22"] = left_hand[0][12] body_ntu["23"] = left_hand[0][4] body_ntu["24"] = right_hand[0][12] body_ntu["25"] = right_hand[0][4] for key in body_ntu: x, y, z = body_ntu[key] # cv2.putText(orig_image, key, (int(x), int(y)), # cv2.FONT_HERSHEY_SIMPLEX, 5, # (255, 255, 255)) body_ntu_list.append([x, y, z]) multi_pose = np.asarray([body_ntu_list]) # print(np.floor(multi_pose)) # cv2.imshow("OpenPose 1.5.1 - Tutorial Python API", fff) # cv2.waitKey(0) # orig_image = cv2.resize(orig_image, (768, 1024)) # cv2.imshow("orig_image-GCN", orig_image) # cv2.waitKey(0) if len(multi_pose.shape) != 3: continue # normalization multi_pose[:, :, 0] = multi_pose[:, :, 0] / W multi_pose[:, :, 1] = multi_pose[:, :, 1] / H multi_pose[:, :, 0:2] = multi_pose[:, :, 0:2] - 0.5 multi_pose[:, :, 0][multi_pose[:, :, 2] == 0] = 0 multi_pose[:, :, 1][multi_pose[:, :, 2] == 0] = 0 # pose tracking # if self.arg.video == 'camera_source': # frame_index = int((time.time() - start_time) * self.arg.fps) # else: # frame_index += 1 frame_index += 1 pose_tracker.update(multi_pose, frame_index) data_numpy = pose_tracker.get_skeleton_sequence() data = torch.from_numpy(data_numpy) data = data.unsqueeze(0) data = data.float().to("cuda:0").detach() with open("de.txt", 'w+') as f: for i in data[0][0]: f.write(str(i) + '\n\n') # break with torch.no_grad(): output = model(data).data.cpu().numpy() voting_label = int(output.argmax(axis=1)) print('voting_label_index:{}'.format(voting_label)) print(gt_labels[voting_label]) print(output[0][voting_label]) app_fps = 1 / (time.time() - tic) image = render(edge, data_numpy, "fall_down", [[gt_labels[voting_label]]], None, orig_image, app_fps) cv2.imshow("ST-GCN", image) if cv2.waitKey(1) & 0xFF == ord('q'): break
def test(test_cfg, model_cfg, dataset_cfg, checkpoint, batch_size, work_dir, gpus=1, workers=4): normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) dataset = call_obj(**dataset_cfg, transform=transforms.Compose([ transforms.ToTensor(), normalize, ])) data_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size * gpus, shuffle=False, num_workers=workers * gpus) # put model on gpus if isinstance(model_cfg, list): model = [call_obj(**c) for c in model_cfg] model = torch.nn.Sequential(*model) else: model = call_obj(**model_cfg) load_checkpoint(model, checkpoint, map_location='cpu') model = MMDataParallel(model, device_ids=range(gpus)).cuda() model.eval() # prepare for evaluation num_samples = len(dataset) prog_bar = ProgressBar(num_samples // (batch_size * gpus) + 1) all_preds = np.zeros((num_samples, model_cfg.skeleton_head.num_joints, 3), dtype=np.float32) all_boxes = np.zeros((num_samples, 6)) filenames = [] imgnums = [] image_path = [] idx = 0 # copy from hrnet with torch.no_grad(): for i, (input, meta, target, target_weight) in enumerate(data_loader): # get prediction outputs = model.forward(input, return_loss=False) if isinstance(outputs, list): output = outputs[-1] else: output = outputs # filp test if test_cfg.flip: input_flipped = np.flip(input.cpu().numpy(), 3).copy() input_flipped = torch.from_numpy(input_flipped).cuda() outputs_flipped = model(input_flipped, return_loss=False) if isinstance(outputs_flipped, list): output_flipped = outputs_flipped[-1] else: output_flipped = outputs_flipped output_flipped = flip_back(output_flipped.cpu().numpy(), dataset.flip_pairs) output_flipped = torch.from_numpy(output_flipped.copy()).cuda() # feature is not aligned, shift flipped heatmap for higher accuracy if test_cfg.shift_heatmap: output_flipped[:, :, :, 1:] = \ output_flipped.clone()[:, :, :, 0:-1] output = (output + output_flipped) * 0.5 c = meta['center'].numpy() s = meta['scale'].numpy() score = meta['score'].numpy() num_images = input.size(0) preds, maxvals = get_final_preds(test_cfg.post_process, output.detach().cpu().numpy(), c, s) all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] all_preds[idx:idx + num_images, :, 2:3] = maxvals # double check this all_boxes parts all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2] all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2] all_boxes[idx:idx + num_images, 4] = np.prod(s * 200, 1) all_boxes[idx:idx + num_images, 5] = score image_path.extend(meta['image']) idx += num_images prog_bar.update() name_values, perf_indicator = dataset.evaluate(test_cfg, all_preds, work_dir, all_boxes, image_path, filenames, imgnums) return perf_indicator