def parse_image(cv_image): candidate, subset = body_estimation(cv_image) canvas = copy.deepcopy(cv_image) canvas = util.draw_bodypose(canvas, candidate, subset) # detect hand hands_list = util.handDetect(candidate, subset, cv_image) all_hand_peaks = [] for x, y, w, is_left in hands_list: peaks = hand_estimation(cv_image[y:y + w, x:x + w, :]) peaks[:, 0] = np.where(peaks[:, 0] == 0, peaks[:, 0], peaks[:, 0] + x) peaks[:, 1] = np.where(peaks[:, 1] == 0, peaks[:, 1], peaks[:, 1] + y) all_hand_peaks.append(peaks) canvas = util.draw_handpose(canvas, all_hand_peaks) rgb_image = canvas skeleton = [(-1, -1)] * 18 for i in range(18): for n in range(len(subset)): index = int(subset[n][i]) if index == -1: continue x, y = candidate[index][0:2] skeleton[i] = (x, y) return rgb_image, skeleton, [peaks.tolist() for peaks in all_hand_peaks]
from hand import Hand from body import Body import matplotlib.pyplot as plt import copy import numpy as np body_estimation = Body('model/body_pose_model.pth') hand_estimation = Hand('model/hand_pose_model.pth') test_image = 'images/demo.jpg' oriImg = cv2.imread(test_image) # B,G,R order candidate, subset = body_estimation(oriImg) canvas = copy.deepcopy(oriImg) canvas = util.draw_bodypose(canvas, candidate, subset) # detect hand hands_list = util.handDetect(candidate, subset, oriImg) all_hand_peaks = [] for x, y, w, is_left in hands_list: # cv2.rectangle(canvas, (x, y), (x+w, y+w), (0, 255, 0), 2, lineType=cv2.LINE_AA) # cv2.putText(canvas, 'left' if is_left else 'right', (x, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) # if is_left: # plt.imshow(oriImg[y:y+w, x:x+w, :][:, :, [2, 1, 0]]) # plt.show() peaks = hand_estimation(oriImg[y:y + w, x:x + w, :]) peaks[:, 0] = np.where(peaks[:, 0] == 0, peaks[:, 0], peaks[:, 0] + x) peaks[:, 1] = np.where(peaks[:, 1] == 0, peaks[:, 1], peaks[:, 1] + y) # else: # peaks = hand_estimation(cv2.flip(oriImg[y:y+w, x:x+w, :], 1)) # peaks[:, 0] = np.where(peaks[:, 0]==0, peaks[:, 0], w-peaks[:, 0]-1+x)
def detect(oriImg, image_name, mode, model, hand_estimation, body_estimation, rpn, labels): height, width, channel = oriImg.shape canvas = copy.deepcopy(oriImg) if mode == "openpose": candidate, subset = body_estimation(oriImg) canvas = util.draw_bodypose(canvas, candidate, subset) # detect hand hands_list = util.handDetect(candidate, subset, oriImg) if rpn and labels: # detect object detections , scale = detect_object.detect(rpn, oriImg) all_hand_peaks = [] for x, y, w, is_left in hands_list: cv2.rectangle(canvas, (x, y), (x+w, y+w), (0, 255, 0), 2, lineType=cv2.LINE_AA) peaks = hand_estimation(oriImg[y:y+w, x:x+w, :]) peaks[:, 0] = np.where(peaks[:, 0]==0, peaks[:, 0], peaks[:, 0]+x) peaks[:, 1] = np.where(peaks[:, 1]==0, peaks[:, 1], peaks[:, 1]+y) all_hand_peaks.append(peaks) elif mode == "handpose": bounding_box = inference_detector(model, oriImg) if rpn and labels: # detect object detections , scale = detect_object.detect(rpn, oriImg) all_hand_peaks = [] for xmin, ymin, xmax, ymax, prob in bounding_box[0]: if prob < 0.4: continue fixed_xmin = int(xmin) - 50 if fixed_xmin <=0: fixed_xmin = 1 fixed_xmax = int(xmax) + 50 if fixed_xmax >= width: fixed_xmax = width - 1 fixed_ymin = int(ymin) - 50 if fixed_ymin <=0: fixed_ymin = 1 fixed_ymax = int(ymax) + 50 if fixed_ymax >= height: fixed_ymax = height - 1 cv2.rectangle(canvas, (fixed_xmin, fixed_ymin), (fixed_xmax, fixed_ymax), (0, 0, 255), 2) peaks = hand_estimation(oriImg[fixed_ymin:fixed_ymax, fixed_xmin:fixed_xmax, :]) peaks[:, 0] = np.where(peaks[:, 0]==0, peaks[:, 0], peaks[:, 0]+fixed_xmin) peaks[:, 1] = np.where(peaks[:, 1]==0, peaks[:, 1], peaks[:, 1]+fixed_ymin) all_hand_peaks.append(peaks) if rpn and labels: canvas = detect_object.draw_ssd_bbox(canvas, detections, scale, labels) if all_hand_peaks: canvas = util.draw_handpose(canvas, all_hand_peaks) canvas = cv2.resize(canvas,(width, height)) cv2.imwrite(os.path.join('demo','output',image_name),canvas) return
def main(webcamera=False): args = get_option() if args.object_detection: rpn = ssd_setting.build_ssd('test', 300, 21) rpn.load_weights(args.ssd_weight_path) from ssd.data import VOC_CLASSES as labels #bounding box model model = init_detector(args.config_file, args.weight_file, device="cuda:0") hand_estimation = Hand(args.hand_weight_path) if args.mode == "openpose": body_estimation = Body(args.body_weight_path) if args.video_name == "webcam": video = cv2.VideoCapture(0) else: video = cv2.VideoCapture(os.path.join('demo', 'input', args.video_name)) fps = int(video.get(cv2.CAP_PROP_FPS)) width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) fourcc = cv2.VideoWriter_fourcc("m", "p", "4", "v") new_video = cv2.VideoWriter( os.path.join('demo', 'output', args.video_name.split('.')[0] + ".mp4"), fourcc, fps, (width, height)) i = 0 start = time.perf_counter() while True: ret, frame = video.read() if ret: canvas = copy.deepcopy(frame) if args.mode == "openpose": candidate, subset = body_estimation(frame) canvas = util.draw_bodypose(canvas, candidate, subset) # detect hand hands_list = util.handDetect(candidate, subset, frame) # detect object detections, scale = detect_object.detect(rpn, frame) all_hand_peaks = [] for x, y, w, is_left in hands_list: cv2.rectangle(canvas, (x, y), (x + w, y + w), (0, 255, 0), 2, lineType=cv2.LINE_AA) peaks = hand_estimation(frame[y:y + w, x:x + w, :]) peaks[:, 0] = np.where(peaks[:, 0] == 0, peaks[:, 0], peaks[:, 0] + x) peaks[:, 1] = np.where(peaks[:, 1] == 0, peaks[:, 1], peaks[:, 1] + y) all_hand_peaks.append(peaks) elif args.mode == "handpose": bounding_box = inference_detector(model, frame) # detect object detections, scale = detect_object.detect(rpn, frame) all_hand_peaks = [] for xmin, ymin, xmax, ymax, prob in bounding_box[0]: if prob < 0.5: continue fixed_xmin = int(xmin) - 50 if fixed_xmin <= 0: fixed_xmin = 1 fixed_xmax = int(xmax) + 50 if fixed_xmax >= width: fixed_xmax = width - 1 fixed_ymin = int(ymin) - 50 if fixed_ymin <= 0: fixed_ymin = 1 fixed_ymax = int(ymax) + 50 if fixed_ymax >= height: fixed_ymax = height - 1 cv2.rectangle(canvas, (fixed_xmin, fixed_ymin), (fixed_xmax, fixed_ymax), (0, 0, 255), 2) peaks = hand_estimation(frame[fixed_ymin:fixed_ymax, fixed_xmin:fixed_xmax, :]) peaks[:, 0] = np.where(peaks[:, 0] == 0, peaks[:, 0], peaks[:, 0] + fixed_xmin) peaks[:, 1] = np.where(peaks[:, 1] == 0, peaks[:, 1], peaks[:, 1] + fixed_ymin) all_hand_peaks.append(peaks) canvas = detect_object.draw_ssd_bbox(canvas, detections, scale, labels) if all_hand_peaks: canvas = util.draw_handpose(canvas, all_hand_peaks) canvas = cv2.resize(canvas, (width, height)) new_video.write(canvas) i = i + 1 print(i) else: break end = time.perf_counter() print(end - start) new_video.release() return
def main(): args = get_option() image_name = args.image_name config_file = args.config_file weight_file = args.weight_file #model load model = init_detector(config_file, weight_file, device="cuda:0") hand_estimation = Hand('model/hand_pose_model.pth') test_image = (os.path.join('demo', 'input', image_name)) oriImg = cv2.imread(test_image) # B,G,R order height, width, channel = oriImg.shape canvas = copy.deepcopy(oriImg) if args.mode == "openpose": body_estimation = Body('model/body_pose_model.pth') candidate, subset = body_estimation(oriImg) canvas = util.draw_bodypose(canvas, candidate, subset) # detect hand hands_list = util.handDetect(candidate, subset, oriImg) all_hand_peaks = [] for x, y, w, is_left in hands_list: cv2.rectangle(canvas, (x, y), (x + w, y + w), (0, 255, 0), 2, lineType=cv2.LINE_AA) # cv2.putText(canvas, 'left' if is_left else 'right', (x, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) # if is_left: # plt.imshow(oriImg[y:y+w, x:x+w, :][:, :, [2, 1, 0]]) # plt.show() peaks = hand_estimation(oriImg[y:y + w, x:x + w, :]) peaks[:, 0] = np.where(peaks[:, 0] == 0, peaks[:, 0], peaks[:, 0] + x) peaks[:, 1] = np.where(peaks[:, 1] == 0, peaks[:, 1], peaks[:, 1] + y) # else: # peaks = hand_estimation(cv2.flip(oriImg[y:y+w, x:x+w, :], 1)) # peaks[:, 0] = np.where(peaks[:, 0]==0, peaks[:, 0], w-peaks[:, 0]-1+x) # peaks[:, 1] = np.where(peaks[:, 1]==0, peaks[:, 1], peaks[:, 1]+y) # print(peaks) all_hand_peaks.append(peaks) elif args.mode == "handpose": bounding_box = inference_detector(model, oriImg) all_hand_peaks = [] for xmin, ymin, xmax, ymax, prob in bounding_box[0]: if prob < 0.4: continue fixed_xmin = int(xmin) - 50 if fixed_xmin <= 0: fixed_xmin = 1 fixed_xmax = int(xmax) + 50 if fixed_xmax >= width: fixed_xmax = width - 1 fixed_ymin = int(ymin) - 50 if fixed_ymin <= 0: fixed_ymin = 1 fixed_ymax = int(ymax) + 50 if fixed_ymax >= height: fixed_ymax = height - 1 cv2.rectangle(canvas, (fixed_xmin, fixed_ymin), (fixed_xmax, fixed_ymax), (0, 0, 255), 2) peaks = hand_estimation(oriImg[fixed_ymin:fixed_ymax, fixed_xmin:fixed_xmax, :]) peaks[:, 0] = np.where(peaks[:, 0] == 0, peaks[:, 0], peaks[:, 0] + fixed_xmin) peaks[:, 1] = np.where(peaks[:, 1] == 0, peaks[:, 1], peaks[:, 1] + fixed_ymin) all_hand_peaks.append(peaks) if all_hand_peaks: canvas = util.draw_handpose(canvas, all_hand_peaks) canvas = cv2.resize(canvas, (width, height)) cv2.imwrite(os.path.join('demo', 'output', image_name), canvas) return
def main(webcamera=False): args = get_option() video_name = args.video_name config_file = args.config_file weight_file = args.weight_file #bounding box model model = init_detector(config_file, weight_file, device="cuda:0") hand_estimation = Hand('model/hand_pose_model.pth') body_estimation = Body('model/body_pose_model.pth') if video_name == "webcam": video = cv2.VideoCapture(0) else: video = cv2.VideoCapture(os.path.join('demo', 'input', video_name)) fps = int(video.get(cv2.CAP_PROP_FPS)) width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) fourcc = cv2.VideoWriter_fourcc("m", "p", "4", "v") new_video = cv2.VideoWriter(os.path.join('demo', 'output', video_name), fourcc, fps, (width, height)) i = 0 start = time.perf_counter() while True: ret, frame = video.read() if ret: canvas = copy.deepcopy(frame) if args.mode == "openpose": candidate, subset = body_estimation(frame) canvas = util.draw_bodypose(canvas, candidate, subset) # detect hand hands_list = util.handDetect(candidate, subset, frame) all_hand_peaks = [] for x, y, w, is_left in hands_list: cv2.rectangle(canvas, (x, y), (x + w, y + w), (0, 255, 0), 2, lineType=cv2.LINE_AA) # cv2.putText(canvas, 'left' if is_left else 'right', (x, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) # if is_left: # plt.imshow(oriImg[y:y+w, x:x+w, :][:, :, [2, 1, 0]]) # plt.show() peaks = hand_estimation(frame[y:y + w, x:x + w, :]) peaks[:, 0] = np.where(peaks[:, 0] == 0, peaks[:, 0], peaks[:, 0] + x) peaks[:, 1] = np.where(peaks[:, 1] == 0, peaks[:, 1], peaks[:, 1] + y) # else: # peaks = hand_estimation(cv2.flip(oriImg[y:y+w, x:x+w, :], 1)) # peaks[:, 0] = np.where(peaks[:, 0]==0, peaks[:, 0], w-peaks[:, 0]-1+x) # peaks[:, 1] = np.where(peaks[:, 1]==0, peaks[:, 1], peaks[:, 1]+y) # print(peaks) all_hand_peaks.append(peaks) elif args.mode == "handpose": bounding_box = inference_detector(model, frame) all_hand_peaks = [] for xmin, ymin, xmax, ymax, prob in bounding_box[0]: if prob < 0.5: continue fixed_xmin = int(xmin) - 50 if fixed_xmin <= 0: fixed_xmin = 1 fixed_xmax = int(xmax) + 50 if fixed_xmax >= width: fixed_xmax = width - 1 fixed_ymin = int(ymin) - 50 if fixed_ymin <= 0: fixed_ymin = 1 fixed_ymax = int(ymax) + 50 if fixed_ymax >= height: fixed_ymax = height - 1 cv2.rectangle(canvas, (fixed_xmin, fixed_ymin), (fixed_xmax, fixed_ymax), (0, 0, 255), 2) peaks = hand_estimation(frame[fixed_ymin:fixed_ymax, fixed_xmin:fixed_xmax, :]) peaks[:, 0] = np.where(peaks[:, 0] == 0, peaks[:, 0], peaks[:, 0] + fixed_xmin) peaks[:, 1] = np.where(peaks[:, 1] == 0, peaks[:, 1], peaks[:, 1] + fixed_ymin) all_hand_peaks.append(peaks) if all_hand_peaks: canvas = util.draw_handpose(canvas, all_hand_peaks) canvas = cv2.resize(canvas, (width, height)) new_video.write(canvas) i = i + 1 print(i) else: break end = time.perf_counter() print(end - start) new_video.release() return