def test_transforms_presets_ssd(): im_fname = gcv.utils.download('https://github.com/dmlc/web-data/blob/master/' + 'gluoncv/detection/biking.jpg?raw=true', path='biking.jpg') x, orig_img = ssd.load_test(im_fname, short=512) x1, orig_img1 = ssd.transform_test(mx.image.imread(im_fname), short=512) np.testing.assert_allclose(x.asnumpy(), x1.asnumpy()) np.testing.assert_allclose(orig_img, orig_img1) if not osp.isdir(osp.expanduser('~/.mxnet/datasets/voc')): return train_dataset = gcv.data.VOCDetection(splits=((2007, 'trainval'), (2012, 'trainval'))) val_dataset = gcv.data.VOCDetection(splits=[(2007, 'test')]) width, height = (512, 512) net = gcv.model_zoo.get_model('ssd_512_resnet50_v1_voc', pretrained=False, pretrained_base=False) net.initialize() num_workers = 0 batch_size = 4 with autograd.train_mode(): _, _, anchors = net(mx.nd.zeros((1, 3, height, width))) batchify_fn = Tuple(Stack(), Stack(), Stack()) # stack image, cls_targets, box_targets train_loader = gluon.data.DataLoader( train_dataset.transform(ssd.SSDDefaultTrainTransform(width, height, anchors)), batch_size, True, batchify_fn=batchify_fn, last_batch='rollover', num_workers=num_workers) val_batchify_fn = Tuple(Stack(), Pad(pad_val=-1)) val_loader = gluon.data.DataLoader( val_dataset.transform(ssd.SSDDefaultValTransform(width, height)), batch_size, False, batchify_fn=val_batchify_fn, last_batch='keep', num_workers=num_workers) train_loader2 = gluon.data.DataLoader( train_dataset.transform(ssd.SSDDefaultTrainTransform(width, height)), batch_size, True, batchify_fn=val_batchify_fn, last_batch='rollover', num_workers=num_workers) for loader in [train_loader, val_loader, train_loader2]: for i, batch in enumerate(loader): if i > 1: break pass
def test_transforms_presets_ssd(): im_fname = gcv.utils.download('https://github.com/dmlc/web-data/blob/master/' + 'gluoncv/detection/biking.jpg?raw=true', path='biking.jpg') x, orig_img = ssd.load_test(im_fname, short=512) x1, orig_img1 = ssd.transform_test(mx.image.imread(im_fname), short=512) np.testing.assert_allclose(x.asnumpy(), x1.asnumpy()) np.testing.assert_allclose(orig_img, orig_img1) if not osp.isdir(osp.expanduser('~/.mxnet/datasets/voc')): return train_dataset = VOCDetectionTiny() val_dataset = VOCDetectionTiny(splits=[('tiny_motorbike', 'test')]) width, height = (512, 512) net = gcv.model_zoo.get_model('ssd_512_resnet50_v1_voc', pretrained=False, pretrained_base=False) net.initialize() num_workers = 0 batch_size = 4 with autograd.train_mode(): _, _, anchors = net(mx.nd.zeros((1, 3, height, width))) batchify_fn = Tuple(Stack(), Stack(), Stack()) # stack image, cls_targets, box_targets train_loader = gluon.data.DataLoader( train_dataset.transform(ssd.SSDDefaultTrainTransform(width, height, anchors)), batch_size, True, batchify_fn=batchify_fn, last_batch='rollover', num_workers=num_workers) val_batchify_fn = Tuple(Stack(), Pad(pad_val=-1)) val_loader = gluon.data.DataLoader( val_dataset.transform(ssd.SSDDefaultValTransform(width, height)), batch_size, False, batchify_fn=val_batchify_fn, last_batch='keep', num_workers=num_workers) train_loader2 = gluon.data.DataLoader( train_dataset.transform(ssd.SSDDefaultTrainTransform(width, height)), batch_size, True, batchify_fn=val_batchify_fn, last_batch='rollover', num_workers=num_workers) for loader in [train_loader, val_loader, train_loader2]: for i, batch in enumerate(loader): if i > 1: break pass
def get_full_frame_info(a_frame): x, frame = transform_test(a_frame, short=512) class_IDs, scores, bounding_boxs = detector(x) pose_input, upscale_bbox = detector_to_simple_pose(frame, class_IDs, scores, bounding_boxs) if len(upscale_bbox)>0: predicted_heatmap = pose_net(pose_input) pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox) pred_coords = pred_coords.asnumpy() return class_IDs, scores, upscale_bbox, pred_coords, confidence, bounding_boxs
def detection(net, image, use_gpu): ''' 进行预测: input: net(dict): 模型 image(str): 图片(numpy) use_gpu(bool): 是否使用gpu return: pred(dict): 包含各种信息的字典(若未检测到人则该返回值为None) img(numpy): 图片 ''' if use_gpu: ctx = mx.gpu() else: ctx = mx.cpu() img_adarry = nd.array(image) x, img = transform_test(img_adarry, short=512, max_size=1024, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) if use_gpu: # 转移至GPU x = x.as_in_context(ctx) class_IDs, scores, bounding_boxs = net['detector'](x) pose_input, upscale_bbox = detector_to_simple_pose(img, class_IDs, scores, bounding_boxs) if len(upscale_bbox) == 0: # 图片中未检测到人 return None, img if use_gpu: # 转移至GPU pose_input = pose_input.as_in_context(ctx) predicted_heatmap = net['pose_net'](pose_input) pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox) if use_gpu: # 转移至GPU pred_coords = pred_coords.as_in_context(ctx) pred = { 'class_IDs': class_IDs, 'scores': scores, 'bounding_boxs': bounding_boxs, 'pred_coords': pred_coords, 'confidence': confidence } return pred, img
def preprocessing(input_img, **kwargs): to_normalize = False if np.percentile(input_img, 98) > 1.0 else True if len(input_img.shape) == 4: print( "Only preprocessing single image, we will consider the first one of the batch" ) image = input_img[0] * 255.0 if to_normalize else input_img[0] * 1.0 else: image = input_img * 255.0 if to_normalize else input_img * 1.0 image = cv2.resize(image, input_shape) x, _ = transform_test(mx.nd.array(image), min(input_shape)) return x
def get_skeleton_from_frame(a_frame): ok_flag = False extra_person_flag = False x, frame = transform_test(a_frame, short=512) class_IDs, scores, bounding_boxs = detector(x) pose_input, upscale_bbox = detector_to_simple_pose(frame, class_IDs, scores, bounding_boxs) b_coords = 0 if len(upscale_bbox)>0: predicted_heatmap = pose_net(pose_input) pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox) pred_coords = pred_coords.asnumpy() b_coords = pred_coords[0] if pred_coords.shape[0]>2: extra_person_flag = True if pred_coords.shape[0]==2: # doing best guest when two boxes ( subject and background are similar) if upscale_bbox[0][3]==512: b_coords = pred_coords[1] ok_flag= True return ok_flag, extra_person_flag, b_coords
cap1 = cv.VideoCapture(args.input) cap2 = cv.VideoCapture(args.demo) # 标准特征 angeleCal = AngeleCal(args.data) ret1, frame1 = cap1.read() ret2, frame2 = cap2.read() while ret1 and ret2: frame1 = mx.nd.array(cv.cvtColor(frame1, cv.COLOR_BGR2RGB)).astype('uint8') frame2 = cv.cvtColor(frame2, cv.COLOR_BGR2RGB) # 目标检测 x, img = transform_test(frame1, short=512, max_size=680) x = x.as_in_context(ctx) class_IDs, scores, bounding_boxs = detector(x) pose_input, upscale_bbox = detector_to_simple_pose(img, class_IDs, scores, bounding_boxs, ctx=ctx) # 姿态识别 results = None if len(upscale_bbox) > 0: predicted_heatmap = estimator(pose_input) pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox)
parser = argparse.ArgumentParser() parser.add_argument('--input') parser.add_argument('--output', required=True) args = parser.parse_args() # 视频读取 cap = cv.VideoCapture(args.input) ret, frame = cap.read() features = [] while ret: frame = mx.nd.array(cv.cvtColor(frame, cv.COLOR_BGR2RGB)).astype('uint8') # 目标检测 x, img = transform_test(frame, short=512) x = x.as_in_context(ctx) class_IDs, scores, bounding_boxs = detector(x) pose_input, upscale_bbox = detector_to_simple_pose(img, class_IDs, scores, bounding_boxs, ctx=ctx) if len(upscale_bbox) > 0: predicted_heatmap = estimator(pose_input) pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox) img = cv_plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxs, scores)