def __init__(self, detector, cfg, opt): self.cfg = cfg self.opt = opt self.device = opt.device self.detector = detector self._input_size = cfg.DATA_PRESET.IMAGE_SIZE self._output_size = cfg.DATA_PRESET.HEATMAP_SIZE self._sigma = cfg.DATA_PRESET.SIGMA pose_dataset = builder.retrieve_dataset(self.cfg.DATASET.TRAIN) if cfg.DATA_PRESET.TYPE == 'simple': self.transformation = SimpleTransform( pose_dataset, scale_factor=0, input_size=self._input_size, output_size=self._output_size, rot=0, sigma=self._sigma, train=False, add_dpg=False, gpu_device=self.device) self.image = (None, None, None, None) self.det = (None, None, None, None, None, None, None) self.pose = (None, None, None, None, None, None, None)
def __init__(self, input_source, cfg, opt, queueSize=128): self.cfg = cfg self.opt = opt self.bbox_file = input_source self._input_size = cfg.DATA_PRESET.IMAGE_SIZE self._output_size = cfg.DATA_PRESET.HEATMAP_SIZE self._sigma = cfg.DATA_PRESET.SIGMA if cfg.DATA_PRESET.TYPE == 'simple': self.transformation = SimpleTransform( self, scale_factor=0, input_size=self._input_size, output_size=self._output_size, rot=0, sigma=self._sigma, train=False, add_dpg=False) # initialize the det file list boxes = None with open(self.bbox_file, 'r') as f: boxes = json.load(f) assert boxes is not None, 'Load %s fail!' % self.bbox_file self.all_imgs = [] self.all_boxes = {} self.all_scores = {} self.all_ids = {} num_boxes = 0 for k_img in range(0, len(boxes)): det_res = boxes[k_img] img_name = det_res['image_id'] if img_name not in self.all_imgs: self.all_imgs.append(img_name) self.all_boxes[img_name] = [] self.all_scores[img_name] = [] self.all_ids[img_name] = [] x1, y1, w, h = det_res['bbox'] bbox = [x1, y1, x1 + w, y1 + h] score = det_res['score'] self.all_boxes[img_name].append(bbox) self.all_scores[img_name].append(score) if 'idx' in det_res.keys(): self.all_ids[img_name].append(int(det_res['idx'])) else: self.all_ids[img_name].append(0) # initialize the queue used to store data """ pose_queue: the buffer storing post-processed cropped human image for pose estimation """ if opt.sp: self._stopped = False self.pose_queue = Queue(maxsize=queueSize) else: self._stopped = mp.Value('b', False) self.pose_queue = mp.Queue(maxsize=queueSize)
def __init__(self, input_source, detector, cfg, opt, mode='image', batchSize=1, queueSize=128): self.cfg = cfg self.opt = opt self.mode = mode self.device = opt.device if mode == 'image': self.img_dir = opt.inputpath self.imglist = [os.path.join(self.img_dir, im_name.rstrip('\n').rstrip('\r')) for im_name in input_source] self.datalen = len(input_source) elif mode == 'video': stream = cv2.VideoCapture(input_source) assert stream.isOpened(), 'Cannot capture source' self.path = input_source self.datalen = int(stream.get(cv2.CAP_PROP_FRAME_COUNT)) self.fourcc = int(stream.get(cv2.CAP_PROP_FOURCC)) self.fps = stream.get(cv2.CAP_PROP_FPS) self.frameSize = (int(stream.get(cv2.CAP_PROP_FRAME_WIDTH)), int(stream.get(cv2.CAP_PROP_FRAME_HEIGHT))) self.videoinfo = {'fourcc': self.fourcc, 'fps': self.fps, 'frameSize': self.frameSize} stream.release() self.detector = detector self.batchSize = batchSize leftover = 0 if (self.datalen) % batchSize: leftover = 1 self.num_batches = self.datalen // batchSize + leftover self._input_size = cfg.DATA_PRESET.IMAGE_SIZE self._output_size = cfg.DATA_PRESET.HEATMAP_SIZE self._sigma = cfg.DATA_PRESET.SIGMA if cfg.DATA_PRESET.TYPE == 'simple': self.transformation = SimpleTransform( self, scale_factor=0, input_size=self._input_size, output_size=self._output_size, rot=0, sigma=self._sigma, train=False, add_dpg=False, gpu_device=self.device) # initialize the queue used to store data """ image_queue: the buffer storing pre-processed images for object detection det_queue: the buffer storing human detection results pose_queue: the buffer storing post-processed cropped human image for pose estimation """ if opt.sp: self._stopped = False self.image_queue = Queue(maxsize=queueSize) self.det_queue = Queue(maxsize=10 * queueSize) self.pose_queue = Queue(maxsize=10 * queueSize) else: self._stopped = mp.Value('b', False) self.image_queue = mp.Queue(maxsize=queueSize) self.det_queue = mp.Queue(maxsize=10 * queueSize) self.pose_queue = mp.Queue(maxsize=10 * queueSize)
def __init__(self, input_source, cfg, opt, mode='image', batchSize=1, queueSize=128): self.cfg = cfg self.opt = opt self.mode = mode self.device = opt.device self.img_dir = input_source self.imglist = [ img_name for img_name in os.listdir(self.img_dir) if img_name.split('.')[-1] == 'jpg' ] self.datalen = len(self.imglist) self.batchSize = batchSize leftover = 0 if (self.datalen) % batchSize: leftover = 1 self.num_batches = self.datalen // batchSize + leftover self._input_size = cfg.DATA_PRESET.IMAGE_SIZE self._output_size = cfg.DATA_PRESET.HEATMAP_SIZE self._sigma = cfg.DATA_PRESET.SIGMA if cfg.DATA_PRESET.TYPE == 'simple': self.transformation = SimpleTransform( self, scale_factor=0, input_size=self._input_size, output_size=self._output_size, rot=0, sigma=self._sigma, train=False, add_dpg=False, gpu_device=self.device) # initialize the queue used to store data """ image_queue: the buffer storing pre-processed images for object detection det_queue: the buffer storing human detection results pose_queue: the buffer storing post-processed cropped human image for pose estimation """ if opt.sp: self._stopped = False self.pose_queue = Queue(maxsize=10 * queueSize) else: self._stopped = mp.Value('b', False) self.pose_queue = mp.Queue(maxsize=10 * queueSize)
def __init__(self, opt, root_dir, save_dir, pose_opt, queueSize=1024): self.opt = opt self.root_dir = root_dir self.dir_list = [ d for d in os.listdir(self.root_dir) if os.path.isdir(os.path.join(self.root_dir, d)) ] self.dir_list = sorted(self.dir_list, key=lambda x: int(x)) # logger.info('目标文件夹是{}'.format(self.root_path)) self.datalen = len(self.dir_list) self.start = 0 # 加载 poser self.device = torch.device('cuda') self.batchSize = 8 self.ReID_BatchSize = 50 self.gpus = opt.gpus self.pose_model = build_poser(pose_opt, self.gpus) # # 加载 ReID 模型 # self.ReIDCfg = ReIDCfg # self.ReID = ReID_Model(self.ReIDCfg) # self.ReID.cuda() # ReID 模型参数 self.distance_threshold = 1 self.height_threshold = 40 self.width_threshold = 20 self._input_size = pose_opt.DATA_PRESET.IMAGE_SIZE self._output_size = pose_opt.DATA_PRESET.HEATMAP_SIZE self._sigma = pose_opt.DATA_PRESET.SIGMA self.aspect_ratio = 0.45 if pose_opt.DATA_PRESET.TYPE == 'simple': self.transformation = SimpleTransform( self, scale_factor=0, input_size=self._input_size, output_size=self._output_size, rot=0, sigma=self._sigma, train=False, add_dpg=False, gpu_device=self.device) self.Posing_Q = Queue(maxsize=queueSize) #在骨骼关键点检测前,对左边转换后的截图进行预处理 self.PostProcess_Q = Queue( maxsize=queueSize) # 在骨骼关键点检测前,对左边转换后的截图进行预处理 self.save_dir = save_dir os.makedirs(self.save_dir, exist_ok=True)
def __init__(self, input_source, detector, cfg, opt, queueSize=1): self.cfg = cfg self.opt = opt # pipeline = rs.pipeline() # config = rs.config() # # config.enable_stream(rs.stream.color, 848, 480, rs.format.bgr8, 15) # # Start streaming # pipeline.start(config) # stream = cv2.VideoCapture(int(input_source)) # assert stream.isOpened(), 'Cannot capture source' self.path = input_source # self.fourcc = int(stream.get(cv2.CAP_PROP_FOURCC)) self.fps = 15 self.frameSize = (848, 480) self.videoinfo = {'fps': self.fps, 'frameSize': self.frameSize} # stream.release() self.detector = detector self._input_size = cfg.DATA_PRESET.IMAGE_SIZE self._output_size = cfg.DATA_PRESET.HEATMAP_SIZE self._sigma = cfg.DATA_PRESET.SIGMA if cfg.DATA_PRESET.TYPE == 'simple': self.transformation = SimpleTransform( self, scale_factor=0, input_size=self._input_size, output_size=self._output_size, rot=0, sigma=self._sigma, train=False, add_dpg=False) # initialize the queue used to store data """ pose_queue: the buffer storing post-processed cropped human image for pose estimation """ if opt.sp: self._stopped = False self.pose_queue = Queue(maxsize=queueSize) else: self._stopped = mp.Value('b', False) self.pose_queue = mp.Queue(maxsize=queueSize)
def __init__(self, input_source, detector, cfg, opt, queueSize=1): self.cfg = cfg self.opt = opt stream = cv2.VideoCapture(int(input_source)) assert stream.isOpened(), 'Cannot capture source' self.path = input_source self.fourcc = int(stream.get(cv2.CAP_PROP_FOURCC)) self.fps = stream.get(cv2.CAP_PROP_FPS) self.frameSize = (int(stream.get(cv2.CAP_PROP_FRAME_WIDTH)), int(stream.get(cv2.CAP_PROP_FRAME_HEIGHT))) self.videoinfo = { 'fourcc': self.fourcc, 'fps': self.fps, 'frameSize': self.frameSize } stream.release() self.detector = detector self._input_size = cfg.MODEL.IMAGE_SIZE self._output_size = cfg.MODEL.HEATMAP_SIZE self._crop = cfg.MODEL.EXTRA.CROP self._sigma = cfg.MODEL.EXTRA.SIGMA if cfg.MODEL.EXTRA.PRESET == 'simple': self.transformation = SimpleTransform( self, scale_factor=0, input_size=self._input_size, output_size=self._output_size, rot=0, sigma=self._sigma, train=False, add_dpg=False) # initialize the queue used to store data """ pose_queue: the buffer storing post-processed cropped human image for pose estimation """ if opt.sp: self._stopped = False self.pose_queue = Queue(maxsize=queueSize) else: self._stopped = mp.Value('b', False) self.pose_queue = mp.Queue(maxsize=queueSize)
def __init__(self, train=True, dpg=False, skip_empty=True, lazy_import=False, **cfg): self._cfg = cfg self._preset_cfg = cfg['PRESET'] self._root = cfg['ROOT'] self._img_prefix = cfg['IMG_PREFIX'] self._ann_file = os.path.join(self._root, cfg['ANN']) self._lazy_import = lazy_import self._skip_empty = skip_empty self._train = train self._dpg = dpg if 'AUG' in cfg.keys(): self._scale_factor = cfg['AUG']['SCALE_FACTOR'] self._rot = cfg['AUG']['ROT_FACTOR'] self.num_joints_half_body = cfg['AUG']['NUM_JOINTS_HALF_BODY'] self.prob_half_body = cfg['AUG']['PROB_HALF_BODY'] else: self._scale_factor = 0 self._rot = 0 self.num_joints_half_body = -1 self.prob_half_body = -1 self._input_size = self._preset_cfg['IMAGE_SIZE'] self._output_size = self._preset_cfg['HEATMAP_SIZE'] self._sigma = self._preset_cfg['SIGMA'] self._check_centers = False self.num_class = len(self.CLASSES) self._loss_type = self._preset_cfg.get('LOSS_TYPE', 'MSELoss') self.upper_body_ids = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10) self.lower_body_ids = (11, 12, 13, 14, 15, 16) if self._preset_cfg['TYPE'] == 'simple': self.transformation = SimpleTransform( self, scale_factor=self._scale_factor, input_size=self._input_size, output_size=self._output_size, rot=self._rot, sigma=self._sigma, train=self._train, add_dpg=self._dpg, loss_type=self._loss_type) else: raise NotImplementedError self._items, self._labels = self._lazy_load_json()
def __init__(self, det_file=None, opt=None, **cfg): self._cfg = cfg self._opt = opt self._preset_cfg = cfg['PRESET'] self._detector_cfg = cfg['DETECTOR'] self._root = cfg['ROOT'] self._img_prefix = cfg['IMG_PREFIX'] if not det_file: det_file = cfg['DET_FILE'] self._ann_file = os.path.join(self._root, cfg['ANN']) if os.path.exists(det_file): print("Detection results exist, will use it") else: print("Will create detection results to {}".format(det_file)) self.write_coco_json(det_file) assert os.path.exists(det_file), "Error: no detection results found" with open(det_file, 'r') as fid: self._det_json = json.load(fid) self._input_size = self._preset_cfg['IMAGE_SIZE'] self._output_size = self._preset_cfg['HEATMAP_SIZE'] self._sigma = self._preset_cfg['SIGMA'] if self._preset_cfg['TYPE'] == 'simple': self.transformation = SimpleTransform( self, scale_factor=0, input_size=self._input_size, output_size=self._output_size, rot=0, sigma=self._sigma, train=False, add_dpg=False)
class DetectionLoader(): def __init__(self, cfg, opt, device): self.cfg = cfg self.opt = opt self.device = device self._input_size = cfg.DATA_PRESET.IMAGE_SIZE self._output_size = cfg.DATA_PRESET.HEATMAP_SIZE self._sigma = cfg.DATA_PRESET.SIGMA pose_dataset = builder.retrieve_dataset(self.cfg.DATASET.TRAIN) if cfg.DATA_PRESET.TYPE == 'simple': self.transformation = SimpleTransform( pose_dataset, scale_factor=0, input_size=self._input_size, output_size=self._output_size, rot=0, sigma=self._sigma, train=False, add_dpg=False, gpu_device=self.device) self.pose = (None, None, None, None, None) def process(self, trackers, im0): ''' Function that prepares YOLOv5 outputs in format suitable for AlphaPose ''' ids = torch.zeros(len(trackers), 1) # ID numbers scores = torch.ones(len(trackers), 1) # confidence scores boxes = torch.zeros(len(trackers), 4) # bounding boxes inps = torch.zeros(len(trackers), 3, *self._input_size) # cropped_boxes = torch.zeros(len(trackers), 4) # cropped_boxes for i, d in enumerate(trackers): # Alpha pose: prepare data in required format and feed to pose estimator inps[i], cropped_box = self.transformation.test_transform( im0, d[:-1]) cropped_boxes[i] = torch.FloatTensor(cropped_box) ids[i, 0] = int(d[-1]) boxes[i, :] = torch.from_numpy(d[:-1]) self.pose = (inps, boxes, scores, ids, cropped_boxes) return self.pose
class Mscoco_det(data.Dataset): """ COCO human detection box dataset. """ EVAL_JOINTS = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] def __init__(self, det_file=None, opt=None, **cfg): self._cfg = cfg self._opt = opt self._preset_cfg = cfg['PRESET'] self._detector_cfg = cfg['DETECTOR'] self._root = cfg['ROOT'] self._img_prefix = cfg['IMG_PREFIX'] if not det_file: det_file = cfg['DET_FILE'] self._ann_file = os.path.join(self._root, cfg['ANN']) if os.path.exists(det_file): print("Detection results exist, will use it") else: print("Will create detection results to {}".format(det_file)) self.write_coco_json(det_file) assert os.path.exists(det_file), "Error: no detection results found" with open(det_file, 'r') as fid: self._det_json = json.load(fid) self._input_size = self._preset_cfg['IMAGE_SIZE'] self._output_size = self._preset_cfg['HEATMAP_SIZE'] self._sigma = self._preset_cfg['SIGMA'] if self._preset_cfg['TYPE'] == 'simple': self.transformation = SimpleTransform( self, scale_factor=0, input_size=self._input_size, output_size=self._output_size, rot=0, sigma=self._sigma, train=False, add_dpg=False) def __getitem__(self, index): det_res = self._det_json[index] if not isinstance(det_res['image_id'], int): img_id, _ = os.path.splitext(os.path.basename(det_res['image_id'])) img_id = int(img_id) else: img_id = det_res['image_id'] img_path = './data/coco/val2017/%012d.jpg' % img_id # Load image image = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB) #scipy.misc.imread(img_path, mode='RGB') is depreciated imght, imgwidth = image.shape[1], image.shape[2] x1, y1, w, h = det_res['bbox'] bbox = [x1, y1, x1 + w, y1 + h] inp, bbox = self.transformation.test_transform(image, bbox) return inp, torch.Tensor(bbox), torch.Tensor([det_res['bbox']]), torch.Tensor([det_res['image_id']]), torch.Tensor([det_res['score']]), torch.Tensor([imght]), torch.Tensor([imgwidth]) def __len__(self): return len(self._det_json) def write_coco_json(self, det_file): from pycocotools.coco import COCO import pathlib _coco = COCO(self._ann_file) image_ids = sorted(_coco.getImgIds()) det_model = get_detector(self._opt, cfg=self._detector_cfg) dets = [] for entry in tqdm(_coco.loadImgs(image_ids)): abs_path = os.path.join( self._root, self._img_prefix, entry['file_name']) det = det_model.detect_one_img(entry['id'], abs_path) if det: dets += det pathlib.Path(os.path.split(det_file)[0]).mkdir(parents=True, exist_ok=True) json.dump(dets, open(det_file, 'w')) @property def joint_pairs(self): """Joint pairs which defines the pairs of joint to be swapped when the image is flipped horizontally.""" return [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]]
class DetectionLoader(): def __init__(self, input_source, detector, cfg, opt, mode='image', batchSize=1, queueSize=128): self.cfg = cfg self.opt = opt self.mode = mode self.device = opt.device if mode == 'image': self.img_dir = opt.inputpath self.imglist = [ os.path.join(self.img_dir, im_name.rstrip('\n').rstrip('\r')) for im_name in input_source ] self.datalen = len(input_source) elif mode == 'video': # 检测视频 stream = cv2.VideoCapture(input_source) assert stream.isOpened(), 'Cannot capture source' self.path = input_source # 获取视频有多少帧 self.datalen = int(stream.get(cv2.CAP_PROP_FRAME_COUNT)) self.fourcc = int(stream.get(cv2.CAP_PROP_FOURCC)) self.fps = stream.get(cv2.CAP_PROP_FPS) self.frameSize = (int(stream.get(cv2.CAP_PROP_FRAME_WIDTH)), int(stream.get(cv2.CAP_PROP_FRAME_HEIGHT))) self.videoinfo = { 'fourcc': self.fourcc, 'fps': self.fps, 'frameSize': self.frameSize } stream.release() # 检测人物位置的检测器 self.detector = detector self.batchSize = batchSize leftover = 0 if (self.datalen) % batchSize: leftover = 1 self.num_batches = self.datalen // batchSize + leftover self._input_size = cfg.DATA_PRESET.IMAGE_SIZE self._output_size = cfg.DATA_PRESET.HEATMAP_SIZE self._sigma = cfg.DATA_PRESET.SIGMA # 姿态数据集 pose_dataset = builder.retrieve_dataset(self.cfg.DATASET.TRAIN) if cfg.DATA_PRESET.TYPE == 'simple': # 走这里 self.transformation = SimpleTransform( pose_dataset, scale_factor=0, input_size=self._input_size, output_size=self._output_size, rot=0, sigma=self._sigma, train=False, add_dpg=False, gpu_device=self.device) # initialize the queue used to store data """ image_queue: the buffer storing pre-processed images for object detection det_queue: the buffer storing human detection results pose_queue: the buffer storing post-processed cropped human image for pose estimation """ if opt.sp: self._stopped = False self.image_queue = Queue(maxsize=queueSize) self.det_queue = Queue(maxsize=10 * queueSize) self.pose_queue = Queue(maxsize=10 * queueSize) else: # 走这里 self._stopped = mp.Value('b', False) # image_queue:存放用于目标检测的图像 self.image_queue = mp.Queue(maxsize=queueSize) # det_queue: 存放检测出的人物的结果 self.det_queue = mp.Queue(maxsize=10 * queueSize) # pose_queue: 存放人物姿态的结果 self.pose_queue = mp.Queue(maxsize=10 * queueSize) def start_worker(self, target): if self.opt.sp: p = Thread(target=target, args=()) else: p = mp.Process(target=target, args=()) # p.daemon = True p.start() return p # 开始进行检测 def start(self): # start a thread to pre process images for object detection if self.mode == 'image': image_preprocess_worker = self.start_worker(self.image_preprocess) elif self.mode == 'video': # 走这里(开辟线程用于检测图像和检测人物的姿态) image_preprocess_worker = self.start_worker(self.frame_preprocess) # start a thread to detect human in images # 检测整张图像的人物位置 image_detection_worker = self.start_worker(self.image_detection) # start a thread to post process cropped human image for pose estimation # 将人物从整张图中裁剪出来 image_postprocess_worker = self.start_worker(self.image_postprocess) # 返回的是线程 return [ image_preprocess_worker, image_detection_worker, image_postprocess_worker ] def stop(self): # clear queues self.clear_queues() def terminate(self): if self.opt.sp: self._stopped = True else: self._stopped.value = True self.stop() def clear_queues(self): self.clear(self.image_queue) self.clear(self.det_queue) self.clear(self.pose_queue) def clear(self, queue): while not queue.empty(): queue.get() def wait_and_put(self, queue, item): queue.put(item) def wait_and_get(self, queue): return queue.get() def image_preprocess(self): for i in range(self.num_batches): imgs = [] orig_imgs = [] im_names = [] im_dim_list = [] for k in range(i * self.batchSize, min((i + 1) * self.batchSize, self.datalen)): if self.stopped: self.wait_and_put(self.image_queue, (None, None, None, None)) return im_name_k = self.imglist[k] # expected image shape like (1,3,h,w) or (3,h,w) img_k = self.detector.image_preprocess(im_name_k) if isinstance(img_k, np.ndarray): img_k = torch.from_numpy(img_k) # add one dimension at the front for batch if image shape (3,h,w) if img_k.dim() == 3: img_k = img_k.unsqueeze(0) orig_img_k = cv2.cvtColor( cv2.imread(im_name_k), cv2.COLOR_BGR2RGB ) # scipy.misc.imread(im_name_k, mode='RGB') is depreciated im_dim_list_k = orig_img_k.shape[1], orig_img_k.shape[0] imgs.append(img_k) orig_imgs.append(orig_img_k) im_names.append(os.path.basename(im_name_k)) im_dim_list.append(im_dim_list_k) with torch.no_grad(): # Human Detection imgs = torch.cat(imgs) im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) # im_dim_list_ = im_dim_list self.wait_and_put(self.image_queue, (imgs, orig_imgs, im_names, im_dim_list)) def frame_preprocess(self): stream = cv2.VideoCapture(self.path) assert stream.isOpened(), 'Cannot capture source' for i in range(self.num_batches): imgs = [] orig_imgs = [] im_names = [] im_dim_list = [] for k in range(i * self.batchSize, min((i + 1) * self.batchSize, self.datalen)): (grabbed, frame) = stream.read() # if the `grabbed` boolean is `False`, then we have # reached the end of the video file if not grabbed or self.stopped: # put the rest pre-processed data to the queue if len(imgs) > 0: with torch.no_grad(): # Record original image resolution imgs = torch.cat(imgs) im_dim_list = torch.FloatTensor( im_dim_list).repeat(1, 2) self.wait_and_put( self.image_queue, (imgs, orig_imgs, im_names, im_dim_list)) self.wait_and_put(self.image_queue, (None, None, None, None)) print('===========================> This video get ' + str(k) + ' frames in total.') sys.stdout.flush() stream.release() return # expected frame shape like (1,3,h,w) or (3,h,w) img_k = self.detector.image_preprocess(frame) if isinstance(img_k, np.ndarray): img_k = torch.from_numpy(img_k) # add one dimension at the front for batch if image shape (3,h,w) if img_k.dim() == 3: img_k = img_k.unsqueeze(0) im_dim_list_k = frame.shape[1], frame.shape[0] imgs.append(img_k) orig_imgs.append(frame[:, :, ::-1]) im_names.append(str(k) + '.jpg') im_dim_list.append(im_dim_list_k) with torch.no_grad(): # Record original image resolution imgs = torch.cat(imgs) im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) # im_dim_list_ = im_dim_list self.wait_and_put(self.image_queue, (imgs, orig_imgs, im_names, im_dim_list)) stream.release() # 用于图像的检测 def image_detection(self): for i in range(self.num_batches): imgs, orig_imgs, im_names, im_dim_list = self.wait_and_get( self.image_queue) if imgs is None or self.stopped: self.wait_and_put(self.det_queue, (None, None, None, None, None, None, None)) return with torch.no_grad(): # pad useless images to fill a batch, else there will be a bug for pad_i in range(self.batchSize - len(imgs)): imgs = torch.cat((imgs, torch.unsqueeze(imgs[0], dim=0)), 0) im_dim_list = torch.cat( (im_dim_list, torch.unsqueeze(im_dim_list[0], dim=0)), 0) dets = self.detector.images_detection(imgs, im_dim_list) if isinstance(dets, int) or dets.shape[0] == 0: for k in range(len(orig_imgs)): self.wait_and_put(self.det_queue, (orig_imgs[k], im_names[k], None, None, None, None, None)) continue if isinstance(dets, np.ndarray): dets = torch.from_numpy(dets) dets = dets.cpu() boxes = dets[:, 1:5] scores = dets[:, 5:6] if self.opt.tracking: ids = dets[:, 6:7] else: ids = torch.zeros(scores.shape) for k in range(len(orig_imgs)): boxes_k = boxes[dets[:, 0] == k] if isinstance(boxes_k, int) or boxes_k.shape[0] == 0: self.wait_and_put(self.det_queue, (orig_imgs[k], im_names[k], None, None, None, None, None)) continue inps = torch.zeros(boxes_k.size(0), 3, *self._input_size) cropped_boxes = torch.zeros(boxes_k.size(0), 4) self.wait_and_put(self.det_queue, (orig_imgs[k], im_names[k], boxes_k, scores[dets[:, 0] == k], ids[dets[:, 0] == k], inps, cropped_boxes)) def image_postprocess(self): for i in range(self.datalen): with torch.no_grad(): (orig_img, im_name, boxes, scores, ids, inps, cropped_boxes) = self.wait_and_get(self.det_queue) if orig_img is None or self.stopped: self.wait_and_put( self.pose_queue, (None, None, None, None, None, None, None)) return if boxes is None or boxes.nelement() == 0: self.wait_and_put( self.pose_queue, (None, orig_img, im_name, boxes, scores, ids, None)) continue # imght = orig_img.shape[0] # imgwidth = orig_img.shape[1] for i, box in enumerate(boxes): inps[i], cropped_box = self.transformation.test_transform( orig_img, box) cropped_boxes[i] = torch.FloatTensor(cropped_box) # inps, cropped_boxes = self.transformation.align_transform(orig_img, boxes) self.wait_and_put(self.pose_queue, (inps, orig_img, im_name, boxes, scores, ids, cropped_boxes)) def read(self): return self.wait_and_get(self.pose_queue) @property def stopped(self): if self.opt.sp: return self._stopped else: return self._stopped.value @property def length(self): return self.datalen
self.CLASSES = ['person'] self.EVAL_JOINTS = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ] self.joint_pairs = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]] self.lower_body_ids = (11, 12, 13, 14, 15, 16, 20, 21, 22, 23, 24, 25) self.num_joint = 17 pose_dataset = poseDataset() transformation = SimpleTransform(pose_dataset, scale_factor=0, input_size=[256, 192], output_size=[64, 48], rot=0, sigma=2, train=False, add_dpg=False, gpu_device='cuda:0') def load_coco_data(root_dir=root_dir, json_path=json_path, img_dir=img_dir): coco_data = COCO(root_dir + json_path) img_ids = coco_data.getImgIds() imgs = coco_data.imgs anns = coco_data.imgToAnns num = 0 inputs = [] for id in img_ids: num += 1
class coco_wholebody_det(data.Dataset): """ Halpe_136 human detection box dataset. """ EVAL_JOINTS = list(range(133)) def __init__(self, det_file=None, opt=None, **cfg): self._cfg = cfg self._opt = opt self._preset_cfg = cfg['PRESET'] self._root = cfg['ROOT'] self._img_prefix = cfg['IMG_PREFIX'] if not det_file: det_file = cfg['DET_FILE'] self._ann_file = os.path.join(self._root, cfg['ANN']) if os.path.exists(det_file): print("Detection results exist, will use it") else: print("Will create detection results to {}".format(det_file)) self.write_coco_json(det_file) assert os.path.exists(det_file), "Error: no detection results found" with open(det_file, 'r') as fid: self._det_json = json.load(fid) self._input_size = self._preset_cfg['IMAGE_SIZE'] self._output_size = self._preset_cfg['HEATMAP_SIZE'] self._sigma = self._preset_cfg['SIGMA'] if self._preset_cfg['TYPE'] == 'simple': self.transformation = SimpleTransform( self, scale_factor=0, input_size=self._input_size, output_size=self._output_size, rot=0, sigma=self._sigma, train=False, add_dpg=False) def __getitem__(self, index): det_res = self._det_json[index] if not isinstance(det_res['image_id'], int): img_id, _ = os.path.splitext(os.path.basename(det_res['image_id'])) img_id = int(img_id) else: img_id = det_res['image_id'] img_path = '/ssd3/Benchmark/coco/val2017/%012d.jpg' % img_id # Load image image = cv2.cvtColor( cv2.imread(img_path), cv2.COLOR_BGR2RGB) #scipy.misc.imread(img_path, mode='RGB') imght, imgwidth = image.shape[1], image.shape[2] x1, y1, w, h = det_res['bbox'] bbox = [x1, y1, x1 + w, y1 + h] inp, bbox = self.transformation.test_transform(image, bbox) return inp, torch.Tensor(bbox), torch.Tensor([ det_res['bbox'] ]), torch.Tensor([det_res['image_id']]), torch.Tensor([ det_res['score'] ]), torch.Tensor([imght]), torch.Tensor([imgwidth]) def __len__(self): return len(self._det_json) def write_coco_json(self, det_file): from xtcocotools.coco import COCO # from pycocotools.coco import COCO import pathlib _coco = COCO(self._ann_file) image_ids = sorted(_coco.getImgIds()) det_model = get_detector(self._opt) dets = [] for entry in tqdm(_coco.loadImgs(image_ids)): abs_path = os.path.join('/ssd3/Benchmark/coco', self._img_prefix, entry['file_name']) det = det_model.detect_one_img(abs_path) if det: dets += det pathlib.Path(os.path.split(det_file)[0]).mkdir(parents=True, exist_ok=True) json.dump(dets, open(det_file, 'w')) @property def joint_pairs(self): """Joint pairs which defines the pairs of joint to be swapped when the image is flipped horizontally.""" return [ [1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16], #17 body keypoints [20 - 3, 23 - 3], [21 - 3, 24 - 3], [22 - 3, 25 - 3], [26 - 3, 42 - 3], [27 - 3, 41 - 3], [28 - 3, 40 - 3], [29 - 3, 39 - 3], [30 - 3, 38 - 3], [31 - 3, 37 - 3], [32 - 3, 36 - 3], [33 - 3, 35 - 3], [43 - 3, 52 - 3], [44 - 3, 51 - 3], [45 - 3, 50 - 3], [46 - 3, 49 - 3], [47 - 3, 48 - 3], [62 - 3, 71 - 3], [63 - 3, 70 - 3], [64 - 3, 69 - 3], [65 - 3, 68 - 3], [66 - 3, 73 - 3], [67 - 3, 72 - 3], [57 - 3, 61 - 3], [58 - 3, 60 - 3], [74 - 3, 80 - 3], [75 - 3, 79 - 3], [76 - 3, 78 - 3], [87 - 3, 89 - 3], [93 - 3, 91 - 3], [86 - 3, 90 - 3], [85 - 3, 81 - 3], [84 - 3, 82 - 3], [94 - 3, 115 - 3], [95 - 3, 116 - 3], [96 - 3, 117 - 3], [97 - 3, 118 - 3], [98 - 3, 119 - 3], [99 - 3, 120 - 3], [100 - 3, 121 - 3], [101 - 3, 122 - 3], [102 - 3, 123 - 3], [103 - 3, 124 - 3], [104 - 3, 125 - 3], [105 - 3, 126 - 3], [106 - 3, 127 - 3], [107 - 3, 128 - 3], [108 - 3, 129 - 3], [109 - 3, 130 - 3], [110 - 3, 131 - 3], [111 - 3, 132 - 3], [112 - 3, 133 - 3], [113 - 3, 134 - 3], [114 - 3, 135 - 3] ]
class DetectedImgsLoader(): def __init__(self, input_source, cfg, opt, mode='image', batchSize=1, queueSize=128): self.cfg = cfg self.opt = opt self.mode = mode self.device = opt.device self.img_dir = input_source self.imglist = [ img_name for img_name in os.listdir(self.img_dir) if img_name.split('.')[-1] == 'jpg' ] self.datalen = len(self.imglist) self.batchSize = batchSize leftover = 0 if (self.datalen) % batchSize: leftover = 1 self.num_batches = self.datalen // batchSize + leftover self._input_size = cfg.DATA_PRESET.IMAGE_SIZE self._output_size = cfg.DATA_PRESET.HEATMAP_SIZE self._sigma = cfg.DATA_PRESET.SIGMA if cfg.DATA_PRESET.TYPE == 'simple': self.transformation = SimpleTransform( self, scale_factor=0, input_size=self._input_size, output_size=self._output_size, rot=0, sigma=self._sigma, train=False, add_dpg=False, gpu_device=self.device) # initialize the queue used to store data """ image_queue: the buffer storing pre-processed images for object detection det_queue: the buffer storing human detection results pose_queue: the buffer storing post-processed cropped human image for pose estimation """ if opt.sp: self._stopped = False self.pose_queue = Queue(maxsize=10 * queueSize) else: self._stopped = mp.Value('b', False) self.pose_queue = mp.Queue(maxsize=10 * queueSize) def start_worker(self, target): if self.opt.sp: p = Thread(target=target, args=()) else: p = mp.Process(target=target, args=()) # p.daemon = True p.start() return p def start(self): # start a thread to detect human in images img_preprocess_for_PoseEstimate_worker = self.start_worker( self.img_preprocess_for_PoseEstimate) return [img_preprocess_for_PoseEstimate_worker] def stop(self): # clear queues self.clear_queues() def terminate(self): if self.opt.sp: self._stopped = True else: self._stopped.value = True self.stop() def clear_queues(self): self.clear(self.pose_queue) def clear(self, queue): while not queue.empty(): queue.get() def wait_and_put(self, queue, item): queue.put(item) def wait_and_get(self, queue): return queue.get() def img_preprocess_for_PoseEstimate(self): for i in range(self.datalen): with torch.no_grad(): orig_img = cv2.imread( os.path.join(self.img_dir, self.imglist[i])) # print(os.path.join(self.img_dir, self.imglist[i])) # print('orig_img',orig_img) height, width, _ = orig_img.shape box = [0, 0, width - 1, height - 1] inp, cropped_box = self.transformation.test_transform( orig_img, box) self.wait_and_put( self.pose_queue, (inp, orig_img, cropped_box, self.imglist[i])) def read(self): return self.wait_and_get(self.pose_queue) @property def stopped(self): if self.opt.sp: return self._stopped else: return self._stopped.value @property def length(self): return self.datalen @property def joint_pairs(self): """Joint pairs which defines the pairs of joint to be swapped when the image is flipped horizontally.""" return [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]]
class FileDetectionLoader(): def __init__(self, input_source, cfg, opt, queueSize=128): self.cfg = cfg self.opt = opt self.bbox_file = input_source self._input_size = cfg.DATA_PRESET.IMAGE_SIZE self._output_size = cfg.DATA_PRESET.HEATMAP_SIZE self._sigma = cfg.DATA_PRESET.SIGMA if cfg.DATA_PRESET.TYPE == 'simple': self.transformation = SimpleTransform( self, scale_factor=0, input_size=self._input_size, output_size=self._output_size, rot=0, sigma=self._sigma, train=False, add_dpg=False) # initialize the det file list boxes = None if isinstance(self.bbox_file, list): boxes = self.bbox_file else: with open(self.bbox_file, 'r') as f: boxes = json.load(f) assert boxes is not None, 'Load %s fail!' % self.bbox_file self.all_imgs = [] self.all_boxes = {} self.all_scores = {} self.all_ids = {} num_boxes = 0 for k_img in range(0, len(boxes)): det_res = boxes[k_img] img_name = det_res['image_id'] if img_name not in self.all_imgs: self.all_imgs.append(img_name) self.all_boxes[img_name] = [] self.all_scores[img_name] = [] self.all_ids[img_name] = [] x1, y1, w, h = det_res['bbox'] bbox = [x1, y1, x1 + w, y1 + h] score = det_res['score'] self.all_boxes[img_name].append(bbox) self.all_scores[img_name].append(score) if 'idx' in det_res.keys(): self.all_ids[img_name].append(int(det_res['idx'])) else: self.all_ids[img_name].append(0) # initialize the queue used to store data """ pose_queue: the buffer storing post-processed cropped human image for pose estimation """ if opt.sp: self._stopped = False self.pose_queue = Queue(maxsize=queueSize) else: self._stopped = mp.Value('b', False) self.pose_queue = mp.Queue(maxsize=queueSize) def start_worker(self, target): if self.opt.sp: p = Thread(target=target, args=()) else: p = mp.Process(target=target, args=()) # p.daemon = True p.start() return p def start(self): # start a thread to pre process images for object detection image_preprocess_worker = self.start_worker(self.get_detection) return [image_preprocess_worker] def stop(self): # clear queues self.clear_queues() def terminate(self): if self.opt.sp: self._stopped = True else: self._stopped.value = True self.stop() def clear_queues(self): self.clear(self.pose_queue) def clear(self, queue): while not queue.empty(): queue.get() def wait_and_put(self, queue, item): if not self.stopped: queue.put(item) def wait_and_get(self, queue): if not self.stopped: return queue.get() def get_detection(self): for im_name_k in self.all_imgs: boxes = torch.from_numpy(np.array(self.all_boxes[im_name_k])) scores = torch.from_numpy(np.array(self.all_scores[im_name_k])) ids = torch.from_numpy(np.array(self.all_ids[im_name_k])) orig_img_k = cv2.cvtColor( cv2.imread(im_name_k), cv2.COLOR_BGR2RGB ) # scipy.misc.imread(im_name_k, mode='RGB') is depreciated inps = torch.zeros(boxes.size(0), 3, *self._input_size) cropped_boxes = torch.zeros(boxes.size(0), 4) for i, box in enumerate(boxes): inps[i], cropped_box = self.transformation.test_transform( orig_img_k, box) cropped_boxes[i] = torch.FloatTensor(cropped_box) self.wait_and_put(self.pose_queue, (inps, orig_img_k, im_name_k, boxes, scores, ids, cropped_boxes)) self.wait_and_put(self.pose_queue, (None, None, None, None, None, None, None)) return def read(self): return self.wait_and_get(self.pose_queue) @property def stopped(self): if self.opt.sp: return self._stopped else: return self._stopped.value @property def length(self): return len(self.all_imgs) @property def joint_pairs(self): """Joint pairs which defines the pairs of joint to be swapped when the image is flipped horizontally.""" return [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]]
def __init__(self, train=True, dpg=False, skip_empty=True, lazy_import=False, **cfg): if os.path.exists('/home/group3/background.json'): self.bgim = json.load(open('/home/group3/background.json', 'r')) else: self.bgim = None self._cfg = cfg self._preset_cfg = cfg['PRESET'] self._root = cfg['ROOT'] self._img_prefix = cfg['IMG_PREFIX'] self._ann_file = os.path.join(self._root, cfg['ANN']) self._lazy_import = lazy_import self._skip_empty = skip_empty self._train = train self._dpg = dpg if 'AUG' in cfg.keys(): self._scale_factor = cfg['AUG']['SCALE_FACTOR'] self._rot = cfg['AUG']['ROT_FACTOR'] self.num_joints_half_body = cfg['AUG']['NUM_JOINTS_HALF_BODY'] self.prob_half_body = cfg['AUG']['PROB_HALF_BODY'] else: self._scale_factor = 0 self._rot = 0 self.num_joints_half_body = -1 self.prob_half_body = -1 self._input_size = self._preset_cfg['IMAGE_SIZE'] self._output_size = self._preset_cfg['HEATMAP_SIZE'] self._sigma = self._preset_cfg['SIGMA'] self._check_centers = False self.num_class = len(self.CLASSES) self._loss_type = self._preset_cfg.get('LOSS_TYPE', 'MSELoss') self.upper_body_ids = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10) self.lower_body_ids = (11, 12, 13, 14, 15, 16) if self._preset_cfg['TYPE'] == 'simple': self.transformation = SimpleTransform( self, scale_factor=self._scale_factor, input_size=self._input_size, output_size=self._output_size, rot=self._rot, sigma=self._sigma, train=self._train, add_dpg=self._dpg, loss_type=self._loss_type) else: raise NotImplementedError with open(os.path.join(self._root, cfg["RADIUS_ANN"])) as f: self._ann_id_to_radius = { int(k): np.array(v, dtype=np.float32) for k, v in json.load(f).items() } self._items, self._labels = self._lazy_load_json()
class DetectionLoader(): def __init__(self, detector, cfg, opt): self.cfg = cfg self.opt = opt self.device = opt.device self.detector = detector self._input_size = cfg.DATA_PRESET.IMAGE_SIZE self._output_size = cfg.DATA_PRESET.HEATMAP_SIZE self._sigma = cfg.DATA_PRESET.SIGMA pose_dataset = builder.retrieve_dataset(self.cfg.DATASET.TRAIN) if cfg.DATA_PRESET.TYPE == 'simple': self.transformation = SimpleTransform( pose_dataset, scale_factor=0, input_size=self._input_size, output_size=self._output_size, rot=0, sigma=self._sigma, train=False, add_dpg=False, gpu_device=self.device) self.image = (None, None, None, None) self.det = (None, None, None, None, None, None, None) self.pose = (None, None, None, None, None, None, None) def process(self, im_name, image): # start to pre process images for object detection self.image_preprocess(im_name, image) # start to detect human in images self.image_detection() # start to post process cropped human image for pose estimation self.image_postprocess() return self def image_preprocess(self, im_name, image): # expected image shape like (1,3,h,w) or (3,h,w) img = self.detector.image_preprocess(image) if isinstance(img, np.ndarray): img = torch.from_numpy(img) # add one dimension at the front for batch if image shape (3,h,w) if img.dim() == 3: img = img.unsqueeze(0) orig_img = image # scipy.misc.imread(im_name_k, mode='RGB') is depreciated im_dim = orig_img.shape[1], orig_img.shape[0] im_name = os.path.basename(im_name) with torch.no_grad(): im_dim = torch.FloatTensor(im_dim).repeat(1, 2) self.image = (img, orig_img, im_name, im_dim) def image_detection(self): imgs, orig_imgs, im_names, im_dim_list = self.image if imgs is None: self.det = (None, None, None, None, None, None, None) return with torch.no_grad(): dets = self.detector.images_detection(imgs, im_dim_list) if isinstance(dets, int) or dets.shape[0] == 0: self.det = (orig_imgs, im_names, None, None, None, None, None) return if isinstance(dets, np.ndarray): dets = torch.from_numpy(dets) dets = dets.cpu() boxes = dets[:, 1:5] scores = dets[:, 5:6] ids = torch.zeros(scores.shape) boxes = boxes[dets[:, 0] == 0] if isinstance(boxes, int) or boxes.shape[0] == 0: self.det = (orig_imgs, im_names, None, None, None, None, None) return inps = torch.zeros(boxes.size(0), 3, *self._input_size) cropped_boxes = torch.zeros(boxes.size(0), 4) self.det = (orig_imgs, im_names, boxes, scores[dets[:, 0] == 0], ids[dets[:, 0] == 0], inps, cropped_boxes) def image_postprocess(self): with torch.no_grad(): (orig_img, im_name, boxes, scores, ids, inps, cropped_boxes) = self.det if orig_img is None: self.pose = (None, None, None, None, None, None, None) return if boxes is None or boxes.nelement() == 0: self.pose = (None, orig_img, im_name, boxes, scores, ids, None) return for i, box in enumerate(boxes): inps[i], cropped_box = self.transformation.test_transform( orig_img, box) cropped_boxes[i] = torch.FloatTensor(cropped_box) self.pose = (inps, orig_img, im_name, boxes, scores, ids, cropped_boxes) def read(self): return self.pose
def __init__(self,opt, pose_opt, ReIDCfg, C_T_output_queue,Pose_output_queue, S_Pose_Estimate, S_Number_Predict, vis=False,save_results=False, queueSize=1024): self.opt = opt self.dir_name = opt.dir_name self.root_path = os.path.join(opt.data_root, '{}'.format(opt.dir_name)) # logger.info('目标文件夹是{}'.format(self.root_path)) self.file_name = opt.file_name self.Videoparameters, \ self.setting_parameter, \ self.action_datas, \ self.channel_list, \ self.parameter = read_data_from_json_file_v2(self.root_path, self.file_name, self.opt) # 视频是否需要再次读入呢? 是否暂用资源 self.datalen = len(self.action_datas) # 加载 poser self.device = torch.device('cuda') self.batchSize = 4 self.ReID_BatchSize = 50 self.gpus = opt.gpus self.pose_model = build_poser(pose_opt,self.gpus) # 加载 ReID 模型 self.ReIDCfg = ReIDCfg self.ReID = ReID_Model(self.ReIDCfg) self.ReID.cuda() # ReID 模型参数 self.distance_threshold = 1 self.height_threshold = 95 self.width_threshold = 40 self._input_size = pose_opt.DATA_PRESET.IMAGE_SIZE self._output_size = pose_opt.DATA_PRESET.HEATMAP_SIZE self._sigma = pose_opt.DATA_PRESET.SIGMA self.aspect_ratio = 0.45 if pose_opt.DATA_PRESET.TYPE == 'simple': self.transformation = SimpleTransform( self, scale_factor=0, input_size=self._input_size, output_size=self._output_size, rot=0, sigma=self._sigma, train=False, add_dpg=False, gpu_device=self.device) self.input_Q = C_T_output_queue # 追踪数据的整体输入 self.Posing_Q = Queue(maxsize=queueSize) #在骨骼关键点检测前,对左边转换后的截图进行预处理 self.PostProcess_Q = Queue(maxsize=queueSize) # 在骨骼关键点检测前,对左边转换后的截图进行预处理 self.output_Q = Pose_output_queue self.vis = vis if self.vis == True: self.vis_path = os.path.join(self.root_path, 'vis') os.makedirs(self.vis_path, exist_ok=True) self.save_results = save_results self.S_Pose_Estimate = S_Pose_Estimate self.S_Number_Predict = S_Number_Predict if self.save_results == True: self.intermediate_results_dir = os.path.join(self.root_path, 'intermediate_results','Alphapose') os.makedirs(self.intermediate_results_dir, exist_ok=True) self.logger = Log(__name__, 'Alphapose' ).getlog()
def __init__(self, args=None): if args is None: args = Namespace( # Pose config pose_cfg='configs/coco/resnet/256x192_res50_lr1e-3_1x.yaml', # Pose checkpoint pose_checkpoint='pretrained_models/fast_res50_256x192.pth', # GPUS gpus='0', # Detection thresh det_thresh=0.5, # Detection config det_cfg='mmDetection/gfl_x101_611.py', # Detection checkpoint det_checkpoint='mmDetection/weights.pth', # Show clothe color clothe_color=True, # show bboxes showbox=True ) self.pose_cfg = update_config(args.pose_cfg) # Device configuration args.gpus = [int(i) for i in args.gpus.split(',')] if torch.cuda.device_count() >= 1 else [-1] args.device = torch.device("cuda:" + str(args.gpus[0]) if args.gpus[0] >= 0 else "cpu") args.tracking = False args.pose_track = False # Copy args self.args = args # Preprocess transformation pose_dataset = builder.retrieve_dataset(self.pose_cfg.DATASET.TRAIN) self.transformation = SimpleTransform( pose_dataset, scale_factor=0, input_size=self.pose_cfg.DATA_PRESET.IMAGE_SIZE, output_size=self.pose_cfg.DATA_PRESET.HEATMAP_SIZE, rot=0, sigma=self.pose_cfg.DATA_PRESET.SIGMA, train=False, add_dpg=False, gpu_device=args.device) self.norm_type = self.pose_cfg.LOSS.get('NORM_TYPE', None) # Post process self.heatmap_to_coord = get_func_heatmap_to_coord(self.pose_cfg) # Load Detector Model self.det_model = init_detector(args.det_cfg, checkpoint=args.det_checkpoint, device=args.device) # Load pose model self.pose_model = builder.build_sppe(self.pose_cfg.MODEL, preset_cfg=self.pose_cfg.DATA_PRESET) print(f'Loading pose model from {args.pose_checkpoint}...') self.pose_model.load_state_dict(torch.load(args.pose_checkpoint, map_location=args.device)) self.pose_model.to(args.device) self.pose_model.eval()
class Model: # Constructor def __init__(self, args=None): if args is None: args = Namespace( # Pose config pose_cfg='configs/coco/resnet/256x192_res50_lr1e-3_1x.yaml', # Pose checkpoint pose_checkpoint='pretrained_models/fast_res50_256x192.pth', # GPUS gpus='0', # Detection thresh det_thresh=0.5, # Detection config det_cfg='mmDetection/gfl_x101_611.py', # Detection checkpoint det_checkpoint='mmDetection/weights.pth', # Show clothe color clothe_color=True, # show bboxes showbox=True ) self.pose_cfg = update_config(args.pose_cfg) # Device configuration args.gpus = [int(i) for i in args.gpus.split(',')] if torch.cuda.device_count() >= 1 else [-1] args.device = torch.device("cuda:" + str(args.gpus[0]) if args.gpus[0] >= 0 else "cpu") args.tracking = False args.pose_track = False # Copy args self.args = args # Preprocess transformation pose_dataset = builder.retrieve_dataset(self.pose_cfg.DATASET.TRAIN) self.transformation = SimpleTransform( pose_dataset, scale_factor=0, input_size=self.pose_cfg.DATA_PRESET.IMAGE_SIZE, output_size=self.pose_cfg.DATA_PRESET.HEATMAP_SIZE, rot=0, sigma=self.pose_cfg.DATA_PRESET.SIGMA, train=False, add_dpg=False, gpu_device=args.device) self.norm_type = self.pose_cfg.LOSS.get('NORM_TYPE', None) # Post process self.heatmap_to_coord = get_func_heatmap_to_coord(self.pose_cfg) # Load Detector Model self.det_model = init_detector(args.det_cfg, checkpoint=args.det_checkpoint, device=args.device) # Load pose model self.pose_model = builder.build_sppe(self.pose_cfg.MODEL, preset_cfg=self.pose_cfg.DATA_PRESET) print(f'Loading pose model from {args.pose_checkpoint}...') self.pose_model.load_state_dict(torch.load(args.pose_checkpoint, map_location=args.device)) self.pose_model.to(args.device) self.pose_model.eval() #region Process one sample def process(self,img): # Detector det_result = inference_detector(self.det_model, img) # xmin, ymin, xmax, ymax, percent if isinstance(det_result, tuple): bbox_result, segm_result = det_result else: bbox_result, segm_result = det_result, None for i in range(len(bbox_result)): bbox_result det = np.vstack(bbox_result) labels = [ np.full(bbox.shape[0], i, dtype=np.int32) for i, bbox in enumerate(bbox_result) ] labels = np.concatenate(labels)[:len(det)] # sorting bboxes and labels arr_concat = np.empty((1,6)) for i in range(len(det)): label = np.array([labels[i]]) arr = np.append([det[i]], label) arr = np.array([arr]) arr_concat = np.append(arr_concat, arr, axis=0) arr_concat = np.delete(arr_concat, [0,0,0,0,0,0], axis=0) arr_concat = arr_concat[np.lexsort(([arr_concat[:, i] for i in range(arr_concat.shape[1]-1, -1, -1)]))] # arr_concat = arr_concat[np.argsort(arr_concat[:, 0])] arr_concat = np.split(arr_concat, (0,5), axis=1) det = np.array(arr_concat[1], dtype=np.float32) labels = np.ravel(arr_concat[2], order='C') labels = np.array(labels, dtype=np.int32) # split bboxes and labels det2 = det labels2 = labels scores = det2[:, -1] inds = scores > 0.3 det2 = det2[inds, :] labels2 = labels2[inds] det2 = det2.tolist() labels2 = labels2.tolist() # image 한 장에 있는 instance 각각의 labels와 bboxes # For human objects bboxes = [] cropped_boxes = [] inps = [] # Other objects other_objects = [] # Preprocess for bbox, label in zip(det, labels): acc = bbox[4] if acc>=self.args.det_thresh: bbox = bbox[:4].astype(int) # Person type & prepare for pose estimation if self.det_model.CLASSES[label]=='person': x1, y1, x2, y2 = bbox inp, cropped_box = self.transformation.test_transform(img[y1:y2, x1:x2], torch.Tensor([0, 0, x2-x1, y2-y1])) inps.append(inp.unsqueeze(0)) bboxes.append(bbox) cropped_boxes.append(cropped_box) # Other objects, just take label and bbox else: other_objects.append( (label, bbox) ) poses = [] if len(inps)>0: # Run pose model inps = torch.cat(inps).to(self.args.device) hm_datas = self.pose_model(inps).cpu() del inps # Convert heatmap to coord and score pose_coords = [] pose_scores = [] for (hm_data, cropped_box, bbox) in zip(hm_datas, cropped_boxes, bboxes): pose_coord, pose_score = self.heatmap_to_coord(hm_data[EVAL_JOINTS], cropped_box, hm_shape=self.pose_cfg.DATA_PRESET.HEATMAP_SIZE, norm_type=self.norm_type) pose_coords.append(torch.from_numpy(pose_coord + bbox[:2])) pose_scores.append(torch.from_numpy(pose_score)) # Draw bboxs and pose coordinates for bbox, pose_coord, pose_score in zip(bboxes, pose_coords, pose_scores): # # Bbox # left_top = (bbox[0], bbox[1]) # right_bottom = (bbox[2], bbox[3]) # img = cv2.rectangle(img, left_top, right_bottom, (0, 0, 255), 3) # Pose coords poses.append({"keypoints": pose_coord, "kp_score": pose_score, "box": bbox, "clothe_color": find_clothe_color(img, pose_coord)}) # labels2, det2, return poses, labels2, det2, other_objects # return poses, other_objects #endregion #region Draw results def draw_results(self, img, poses, other_objects): # Draw human results img = vis_frame_fast(img, {"result": poses}, self.args) # Draw other objects with name: for label, bbox in other_objects: label_text = self.det_model.CLASSES[label] bbox = bbox.astype(int) # Bbox left_top = (bbox[0], bbox[1]) right_bottom = (bbox[2], bbox[3]) cv2.rectangle(img, left_top, right_bottom, BBOX_COLOR, 3) # Label name cv2.putText(img, label_text, (bbox[0], bbox[1] - 2), cv2.FONT_HERSHEY_COMPLEX, FONT_SCALE, TEXT_COLOR) return img #endregion
class DetectionLoader(): def __init__(self, input_source, image_names, image_list, detector, cfg, opt, mode='image', batchSize=1, queueSize=128): self.cfg = cfg self.opt = opt self.mode = mode self.device = opt.device self.image_names = image_names if mode == 'image': self.imglist = image_list self.datalen = len(self.imglist) elif mode == 'video': stream = cv2.VideoCapture(input_source) assert stream.isOpened(), 'Cannot capture source' self.path = input_source self.datalen = int(stream.get(cv2.CAP_PROP_FRAME_COUNT)) self.fourcc = int(stream.get(cv2.CAP_PROP_FOURCC)) self.fps = stream.get(cv2.CAP_PROP_FPS) self.frameSize = (int(stream.get(cv2.CAP_PROP_FRAME_WIDTH)), int(stream.get(cv2.CAP_PROP_FRAME_HEIGHT))) self.videoinfo = { 'fourcc': self.fourcc, 'fps': self.fps, 'frameSize': self.frameSize } stream.release() self.detector = detector self.batchSize = batchSize leftover = 0 if (self.datalen) % batchSize: leftover = 1 self.num_batches = self.datalen // batchSize + leftover self._input_size = cfg.DATA_PRESET.IMAGE_SIZE self._output_size = cfg.DATA_PRESET.HEATMAP_SIZE self._sigma = cfg.DATA_PRESET.SIGMA if cfg.DATA_PRESET.TYPE == 'simple': self.transformation = SimpleTransform( self, scale_factor=0, input_size=self._input_size, output_size=self._output_size, rot=0, sigma=self._sigma, train=False, add_dpg=False, gpu_device=self.device) # initialize the queue used to store data """ image_queue: the buffer storing pre-processed images for object detection det_queue: the buffer storing human detection results pose_queue: the buffer storing post-processed cropped human image for pose estimation """ if opt.sp: self._stopped = False self.image_queue = Queue(maxsize=queueSize) self.det_queue = Queue(maxsize=10 * queueSize) self.pose_queue = Queue(maxsize=10 * queueSize) else: self._stopped = mp.Value('b', False) self.image_queue = mp.Queue(maxsize=queueSize) self.det_queue = mp.Queue(maxsize=10 * queueSize) self.pose_queue = mp.Queue(maxsize=10 * queueSize) def start_worker(self, target): if self.opt.sp: p = Thread(target=target, args=()) else: p = mp.Process(target=target, args=()) # p.daemon = True p.start() return p def start(self): # start a thread to pre process images for object detection if self.mode == 'image': self.image_preprocess_worker = self.start_worker( self.image_preprocess) elif self.mode == 'video': self.image_preprocess_worker = self.start_worker( self.frame_preprocess) # start a thread to detect human in images self.image_detection_worker = self.start_worker(self.image_detection) # start a thread to post process cropped human image for pose estimation self.image_postprocess_worker = self.start_worker( self.image_postprocess) return self def stop(self): # end threads self.image_preprocess_worker.join() self.image_detection_worker.join() self.image_postprocess_worker.join() # clear queues self.clear_queues() def terminate(self): if self.opt.sp: self._stopped = True else: self._stopped.value = True self.stop() def clear_queues(self): self.clear(self.image_queue) self.clear(self.det_queue) self.clear(self.pose_queue) def clear(self, queue): while not queue.empty(): queue.get() def wait_and_put(self, queue, item): queue.put(item) def wait_and_get(self, queue): return queue.get() def image_preprocess(self): """ Function changed, adapted to take preopened files instead of filenames """ for i in range(self.num_batches): imgs = [] orig_imgs = [] im_names = [] im_dim_list = [] for k in range(i * self.batchSize, min((i + 1) * self.batchSize, self.datalen)): if self.stopped: self.wait_and_put(self.image_queue, (None, None, None, None)) return orig_img_k = self.imglist[k] img_name = self.image_names[k] # expected image shape like (1,3,h,w) or (3,h,w) img_k = self.detector.image_preprocess(orig_img_k) if isinstance(img_k, np.ndarray): img_k = torch.from_numpy(img_k) # add one dimension at the front for batch if image shape (3,h,w) if img_k.dim() == 3: img_k = img_k.unsqueeze(0) im_dim_list_k = orig_img_k.shape[1], orig_img_k.shape[0] imgs.append(img_k) orig_imgs.append(orig_img_k) im_names.append(img_name) im_dim_list.append(im_dim_list_k) with torch.no_grad(): # Human Detection imgs = torch.cat(imgs) im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) # im_dim_list_ = im_dim_list self.wait_and_put(self.image_queue, (imgs, orig_imgs, im_names, im_dim_list)) def frame_preprocess(self): stream = cv2.VideoCapture(self.path) assert stream.isOpened(), 'Cannot capture source' for i in range(self.num_batches): imgs = [] orig_imgs = [] im_names = [] im_dim_list = [] for k in range(i * self.batchSize, min((i + 1) * self.batchSize, self.datalen)): (grabbed, frame) = stream.read() # if the `grabbed` boolean is `False`, then we have # reached the end of the video file if not grabbed or self.stopped: # put the rest pre-processed data to the queue if len(imgs) > 0: with torch.no_grad(): # Record original image resolution imgs = torch.cat(imgs) im_dim_list = torch.FloatTensor( im_dim_list).repeat(1, 2) self.wait_and_put( self.image_queue, (imgs, orig_imgs, im_names, im_dim_list)) self.wait_and_put(self.image_queue, (None, None, None, None)) print('===========================> This video get ' + str(k) + ' frames in total.') sys.stdout.flush() stream.release() return # expected frame shape like (1,3,h,w) or (3,h,w) img_k = self.detector.image_preprocess(frame) if isinstance(img_k, np.ndarray): img_k = torch.from_numpy(img_k) # add one dimension at the front for batch if image shape (3,h,w) if img_k.dim() == 3: img_k = img_k.unsqueeze(0) im_dim_list_k = frame.shape[1], frame.shape[0] imgs.append(img_k) orig_imgs.append(frame[:, :, ::-1]) im_names.append(str(k) + '.jpg') im_dim_list.append(im_dim_list_k) with torch.no_grad(): # Record original image resolution imgs = torch.cat(imgs) im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) # im_dim_list_ = im_dim_list self.wait_and_put(self.image_queue, (imgs, orig_imgs, im_names, im_dim_list)) stream.release() def image_detection(self): for i in range(self.num_batches): imgs, orig_imgs, im_names, im_dim_list = self.wait_and_get( self.image_queue) if imgs is None or self.stopped: self.wait_and_put(self.det_queue, (None, None, None, None, None, None, None)) return with torch.no_grad(): # pad useless images to fill a batch, else there will be a bug for pad_i in range(self.batchSize - len(imgs)): imgs = torch.cat((imgs, torch.unsqueeze(imgs[0], dim=0)), 0) im_dim_list = torch.cat( (im_dim_list, torch.unsqueeze(im_dim_list[0], dim=0)), 0) dets = self.detector.images_detection(imgs, im_dim_list) if isinstance(dets, int) or dets.shape[0] == 0: for k in range(len(orig_imgs)): self.wait_and_put(self.det_queue, (orig_imgs[k], im_names[k], None, None, None, None, None)) continue if isinstance(dets, np.ndarray): dets = torch.from_numpy(dets) dets = dets.cpu() boxes = dets[:, 1:5] scores = dets[:, 5:6] if self.opt.tracking: ids = dets[:, 6:7] else: ids = torch.zeros(scores.shape) for k in range(len(orig_imgs)): boxes_k = boxes[dets[:, 0] == k] if isinstance(boxes_k, int) or boxes_k.shape[0] == 0: self.wait_and_put(self.det_queue, (orig_imgs[k], im_names[k], None, None, None, None, None)) continue inps = torch.zeros(boxes_k.size(0), 3, *self._input_size) cropped_boxes = torch.zeros(boxes_k.size(0), 4) self.wait_and_put(self.det_queue, (orig_imgs[k], im_names[k], boxes_k, scores[dets[:, 0] == k], ids[dets[:, 0] == k], inps, cropped_boxes)) def image_postprocess(self): for i in range(self.datalen): with torch.no_grad(): (orig_img, im_name, boxes, scores, ids, inps, cropped_boxes) = self.wait_and_get(self.det_queue) if orig_img is None or self.stopped: self.wait_and_put( self.pose_queue, (None, None, None, None, None, None, None)) return if boxes is None or boxes.nelement() == 0: self.wait_and_put( self.pose_queue, (None, orig_img, im_name, boxes, scores, ids, None)) continue # imght = orig_img.shape[0] # imgwidth = orig_img.shape[1] for i, box in enumerate(boxes): inps[i], cropped_box = self.transformation.test_transform( orig_img, box) cropped_boxes[i] = torch.FloatTensor(cropped_box) # inps, cropped_boxes = self.transformation.align_transform(orig_img, boxes) self.wait_and_put(self.pose_queue, (inps, orig_img, im_name, boxes, scores, ids, cropped_boxes)) def read(self): return self.wait_and_get(self.pose_queue) @property def stopped(self): if self.opt.sp: return self._stopped else: return self._stopped.value @property def length(self): return self.datalen @property def joint_pairs(self): """Joint pairs which defines the pairs of joint to be swapped when the image is flipped horizontally.""" return [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]]
class DetectionLoader(): def __init__(self, input_source, detector, cfg, opt, mode='image', batchSize=1, queueSize=128): self.cfg = cfg self.opt = opt self.mode = mode self.device = opt.device if mode == 'image': self.img_dir = opt.inputpath self.imglist = [ os.path.join(self.img_dir, im_name.rstrip('\n').rstrip('\r')) for im_name in input_source ] self.datalen = len(input_source) elif mode == 'video': stream = cv2.VideoCapture(input_source) assert stream.isOpened(), 'Cannot capture source' self.path = input_source self.datalen = int(stream.get(cv2.CAP_PROP_FRAME_COUNT)) self.fourcc = int(stream.get(cv2.CAP_PROP_FOURCC)) self.fps = stream.get(cv2.CAP_PROP_FPS) self.frameSize = (int(stream.get(cv2.CAP_PROP_FRAME_WIDTH)), int(stream.get(cv2.CAP_PROP_FRAME_HEIGHT))) self.videoinfo = { 'fourcc': self.fourcc, 'fps': self.fps, 'frameSize': self.frameSize } stream.release() self.detector = detector self.batchSize = batchSize leftover = 0 if (self.datalen) % batchSize: leftover = 1 self.num_batches = self.datalen // batchSize + leftover self._input_size = cfg.DATA_PRESET.IMAGE_SIZE self._output_size = cfg.DATA_PRESET.HEATMAP_SIZE self._sigma = cfg.DATA_PRESET.SIGMA pose_dataset = builder.retrieve_dataset(self.cfg.DATASET.TRAIN) if cfg.DATA_PRESET.TYPE == 'simple': self.transformation = SimpleTransform( pose_dataset, scale_factor=0, input_size=self._input_size, output_size=self._output_size, rot=0, sigma=self._sigma, train=False, add_dpg=False, gpu_device=self.device) # initialize the queue used to store data """ image_queue: the buffer storing pre-processed images for object detection det_queue: the buffer storing human detection results pose_queue: the buffer storing post-processed cropped human image for pose estimation """ if opt.sp: self._stopped = False self.image_queue = Queue(maxsize=queueSize) self.det_queue = Queue(maxsize=10 * queueSize) self.pose_queue = Queue(maxsize=10 * queueSize) else: self._stopped = mp.Value('b', False) self.image_queue = mp.Queue(maxsize=queueSize) self.det_queue = mp.Queue(maxsize=10 * queueSize) self.pose_queue = mp.Queue(maxsize=10 * queueSize) def start_worker(self, target): if self.opt.sp: p = Thread(target=target, args=()) else: p = mp.Process(target=target, args=()) # p.daemon = True p.start() return p def start(self): # 从这里看,整个程序的思路好像是这样的,先用一个image_preprocess或frame_preprocess来处理之后放进image_queue,然后用image_detection来检测哪里有人 # 然后放进det_queue,然后用image_postprocess来做姿态检测,然后放进pose_queue # start a thread to pre process images for object detection if self.mode == 'image': image_preprocess_worker = self.start_worker(self.image_preprocess) elif self.mode == 'video': image_preprocess_worker = self.start_worker(self.frame_preprocess) # start a thread to detect human in images image_detection_worker = self.start_worker(self.image_detection) # start a thread to post process cropped human image for pose estimation image_postprocess_worker = self.start_worker(self.image_postprocess) return [ image_preprocess_worker, image_detection_worker, image_postprocess_worker ] def stop(self): # clear queues self.clear_queues() def terminate(self): if self.opt.sp: self._stopped = True else: self._stopped.value = True self.stop() def clear_queues(self): self.clear(self.image_queue) self.clear(self.det_queue) self.clear(self.pose_queue) def clear(self, queue): while not queue.empty(): queue.get() def wait_and_put(self, queue, item): queue.put(item) def wait_and_get(self, queue): return queue.get() def image_preprocess(self): # 这个函数是给image的 for i in range(self.num_batches): imgs = [] orig_imgs = [] im_names = [] im_dim_list = [] for k in range(i * self.batchSize, min((i + 1) * self.batchSize, self.datalen)): if self.stopped: self.wait_and_put(self.image_queue, (None, None, None, None)) return im_name_k = self.imglist[k] # 经过调试,发现这里的frame就应该是读到的图片了,看看跟那边的有没有什么不同,没有的话,我就在这里做处理 # plt.imshow(im_name_k) # plt.show() # expected image shape like (1,3,h,w) or (3,h,w) img_k = self.detector.image_preprocess(im_name_k) if isinstance(img_k, np.ndarray): img_k = torch.from_numpy(img_k) # add one dimension at the front for batch if image shape (3,h,w) if img_k.dim() == 3: img_k = img_k.unsqueeze(0) orig_img_k = cv2.cvtColor( cv2.imread(im_name_k), cv2.COLOR_BGR2RGB ) # scipy.misc.imread(im_name_k, mode='RGB') is depreciated im_dim_list_k = orig_img_k.shape[1], orig_img_k.shape[0] imgs.append(img_k) orig_imgs.append(orig_img_k) im_names.append(os.path.basename(im_name_k)) im_dim_list.append(im_dim_list_k) with torch.no_grad(): # Human Detection imgs = torch.cat(imgs) im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) # im_dim_list_ = im_dim_list # 传进去的4个参数,imgs是image_preprocess处理之后的所有图的返回,orig_imgs是bgr2rgb处理之后的所有图的返回,im_names是所有图的名字的lsit,im_dim_list是所有图的dim的list的集合 self.wait_and_put(self.image_queue, (imgs, orig_imgs, im_names, im_dim_list)) def frame_preprocess(self): # 这个函数是给video的 stream = cv2.VideoCapture(self.path) assert stream.isOpened(), 'Cannot capture source' for i in range(self.num_batches): imgs = [] orig_imgs = [] im_names = [] im_dim_list = [] for k in range(i * self.batchSize, min((i + 1) * self.batchSize, self.datalen)): (grabbed, frame) = stream.read() # if the `grabbed` boolean is `False`, then we have # reached the end of the video file if not grabbed or self.stopped: # put the rest pre-processed data to the queue if len(imgs) > 0: with torch.no_grad(): # Record original image resolution imgs = torch.cat(imgs) im_dim_list = torch.FloatTensor( im_dim_list).repeat(1, 2) self.wait_and_put( self.image_queue, (imgs, orig_imgs, im_names, im_dim_list)) self.wait_and_put(self.image_queue, (None, None, None, None)) print('===========================> This video get ' + str(k) + ' frames in total.') sys.stdout.flush() stream.release() return # expected frame shape like (1,3,h,w) or (3,h,w) img_k = self.detector.image_preprocess(frame) if isinstance(img_k, np.ndarray): img_k = torch.from_numpy(img_k) # add one dimension at the front for batch if image shape (3,h,w) if img_k.dim() == 3: img_k = img_k.unsqueeze(0) im_dim_list_k = frame.shape[1], frame.shape[0] imgs.append(img_k) orig_imgs.append(frame[:, :, ::-1]) im_names.append(str(k) + '.jpg') im_dim_list.append(im_dim_list_k) with torch.no_grad(): # Record original image resolution imgs = torch.cat(imgs) im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) # im_dim_list_ = im_dim_list self.wait_and_put(self.image_queue, (imgs, orig_imgs, im_names, im_dim_list)) stream.release() def image_detection(self): for i in range(self.num_batches): # imgs是image_preprocess处理之后的所有图的返回,orig_imgs是bgr2rgb处理之后的所有图的返回,im_names是所有图的名字的lsit,im_dim_list是所有图的dim的list的集合 imgs, orig_imgs, im_names, im_dim_list = self.wait_and_get( self.image_queue) # plt.imshow(imgs[0]) # plt.imshow(orig_imgs[0]) # plt.show() if imgs is None or self.stopped: self.wait_and_put(self.det_queue, (None, None, None, None, None, None, None)) return with torch.no_grad(): # pad useless images to fill a batch, else there will be a bug for pad_i in range(self.batchSize - len(imgs)): imgs = torch.cat((imgs, torch.unsqueeze(imgs[0], dim=0)), 0) im_dim_list = torch.cat( (im_dim_list, torch.unsqueeze(im_dim_list[0], dim=0)), 0) # 对于image list的任务来说,调用的是下面的这句,其实跟webcam的是一样的,关键都在images_detection # 这个返回了一个dets,是一个4x8的矩阵,我看看是什么东西 # im_dim_list是一个5x4的矩阵,5个元素都是640,360,640,360,应该跟图像尺寸有关 dets = self.detector.images_detection(imgs, im_dim_list) if isinstance(dets, int) or dets.shape[0] == 0: for k in range(len(orig_imgs)): self.wait_and_put(self.det_queue, (orig_imgs[k], im_names[k], None, None, None, None, None)) continue if isinstance(dets, np.ndarray): dets = torch.from_numpy(dets) dets = dets.cpu() boxes = dets[:, 1:5] scores = dets[:, 5:6] if self.opt.tracking: ids = dets[:, 6:7] else: ids = torch.zeros(scores.shape) for k in range(len(orig_imgs)): boxes_k = boxes[dets[:, 0] == k] if isinstance(boxes_k, int) or boxes_k.shape[0] == 0: self.wait_and_put(self.det_queue, (orig_imgs[k], im_names[k], None, None, None, None, None)) continue inps = torch.zeros(boxes_k.size(0), 3, *self._input_size) cropped_boxes = torch.zeros(boxes_k.size(0), 4) # 这里放进det_queue的,orig_imgs[k]就是一张一张放的原图, im_names[k]是一个一个放的图名, boxes_k, scores[dets[:, 0] == k], ids[dets[:, 0] == k], inps, cropped_boxes self.wait_and_put(self.det_queue, (orig_imgs[k], im_names[k], boxes_k, scores[dets[:, 0] == k], ids[dets[:, 0] == k], inps, cropped_boxes)) def image_postprocess(self): for i in range(self.datalen): with torch.no_grad(): (orig_img, im_name, boxes, scores, ids, inps, cropped_boxes) = self.wait_and_get(self.det_queue) if orig_img is None or self.stopped: self.wait_and_put( self.pose_queue, (None, None, None, None, None, None, None)) return if boxes is None or boxes.nelement() == 0: self.wait_and_put( self.pose_queue, (None, orig_img, im_name, boxes, scores, ids, None)) continue # imght = orig_img.shape[0] # imgwidth = orig_img.shape[1] for i, box in enumerate(boxes): # nandao pose estimation jiushi zai zheli shixian de ? inps[i], cropped_box = self.transformation.test_transform( orig_img, box) cropped_boxes[i] = torch.FloatTensor(cropped_box) # inps, cropped_boxes = self.transformation.align_transform(orig_img, boxes) self.wait_and_put(self.pose_queue, (inps, orig_img, im_name, boxes, scores, ids, cropped_boxes)) def read(self): return self.wait_and_get(self.pose_queue) @property def stopped(self): if self.opt.sp: return self._stopped else: return self._stopped.value @property def length(self): return self.datalen
class Halpe_136_det(data.Dataset): """ Halpe Full-Body (136 keypoints) human detection box dataset. """ EVAL_JOINTS = list(range(136)) def __init__(self, det_file=None, opt=None, **cfg): self._cfg = cfg self._opt = opt self._preset_cfg = cfg['PRESET'] self._root = cfg['ROOT'] self._img_prefix = cfg['IMG_PREFIX'] if not det_file: det_file = cfg['DET_FILE'] self._ann_file = os.path.join(self._root, cfg['ANN']) if os.path.exists(det_file): print("Detection results exist, will use it") else: print("Will create detection results to {}".format(det_file)) self.write_coco_json(det_file) assert os.path.exists(det_file), "Error: no detection results found" with open(det_file, 'r') as fid: self._det_json = json.load(fid) self._input_size = self._preset_cfg['IMAGE_SIZE'] self._output_size = self._preset_cfg['HEATMAP_SIZE'] self._sigma = self._preset_cfg['SIGMA'] if self._preset_cfg['TYPE'] == 'simple': self.transformation = SimpleTransform( self, scale_factor=0, input_size=self._input_size, output_size=self._output_size, rot=0, sigma=self._sigma, train=False, add_dpg=False) def __getitem__(self, index): det_res = self._det_json[index] if not isinstance(det_res['image_id'], int): img_id, _ = os.path.splitext(os.path.basename(det_res['image_id'])) img_id = int(img_id) else: img_id = det_res['image_id'] img_path = os.path.join(self._root, self._img_prefix, '%012d.jpg' % img_id) # Load image image = cv2.cvtColor( cv2.imread(img_path), cv2.COLOR_BGR2RGB ) # scipy.misc.imread(img_path, mode='RGB') is deprecated imght, imgwidth = image.shape[1], image.shape[2] x1, y1, w, h = det_res['bbox'] bbox = [x1, y1, x1 + w, y1 + h] inp, bbox = self.transformation.test_transform(image, bbox) return inp, torch.Tensor(bbox), torch.Tensor([ det_res['bbox'] ]), torch.Tensor([det_res['image_id']]), torch.Tensor([ det_res['score'] ]), torch.Tensor([imght]), torch.Tensor([imgwidth]) def __len__(self): return len(self._det_json) def write_coco_json(self, det_file): from pycocotools.coco import COCO import pathlib _coco = COCO(self._ann_file) image_ids = sorted(_coco.getImgIds()) det_model = get_detector(self._opt) dets = [] for entry in tqdm(_coco.loadImgs(image_ids)): abs_path = os.path.join(self._root, self._img_prefix, entry['file_name']) det = det_model.detect_one_img(abs_path) if det: dets += det pathlib.Path(os.path.split(det_file)[0]).mkdir(parents=True, exist_ok=True) json.dump(dets, open(det_file, 'w')) @property def joint_pairs(self): """Joint pairs which defines the pairs of joint to be swapped when the image is flipped horizontally.""" return [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16], [20, 21], [22, 23], [24, 25], [26, 42], [27, 41], [28, 40], [29, 39], [30, 38], [31, 37], [32, 36], [33, 35], [43, 52], [44, 51], [45, 50], [46, 49], [47, 48], [62, 71], [63, 70], [64, 69], [65, 68], [66, 73], [67, 72], [57, 61], [58, 60], [74, 80], [75, 79], [76, 78], [87, 89], [93, 91], [86, 90], [85, 81], [84, 82], [94, 115], [95, 116], [96, 117], [97, 118], [98, 119], [99, 120], [100, 121], [101, 122], [102, 123], [103, 124], [104, 125], [105, 126], [106, 127], [107, 128], [108, 129], [109, 130], [110, 131], [111, 132], [112, 133], [113, 134], [114, 135]]
class WebCamDetectionLoader(): def __init__(self, input_source, detector, cfg, opt, queueSize=1): self.cfg = cfg self.opt = opt stream = cv2.VideoCapture(input_source) assert stream.isOpened(), 'Cannot capture source' self.path = input_source self.fourcc = int(stream.get(cv2.CAP_PROP_FOURCC)) self.fps = stream.get(cv2.CAP_PROP_FPS) self.frameSize = (int(stream.get(cv2.CAP_PROP_FRAME_WIDTH)), int(stream.get(cv2.CAP_PROP_FRAME_HEIGHT))) self.videoinfo = {'fourcc': self.fourcc, 'fps': self.fps, 'frameSize': self.frameSize} stream.release() self.detector = detector self._input_size = cfg.DATA_PRESET.IMAGE_SIZE self._output_size = cfg.DATA_PRESET.HEATMAP_SIZE self._sigma = cfg.DATA_PRESET.SIGMA if cfg.DATA_PRESET.TYPE == 'simple': self.transformation = SimpleTransform( self, scale_factor=0, input_size=self._input_size, output_size=self._output_size, rot=0, sigma=self._sigma, train=False, add_dpg=False) # initialize the queue used to store data """ pose_queue: the buffer storing post-processed cropped human image for pose estimation """ if opt.sp: self._stopped = False self.pose_queue = Queue(maxsize=queueSize) else: self._stopped = mp.Value('b', False) self.pose_queue = mp.Queue(maxsize=queueSize) def start_worker(self, target): if self.opt.sp: p = Thread(target=target, args=()) else: p = mp.Process(target=target, args=()) # p.daemon = True p.start() return p def start(self): # start a thread to pre process images for object detection image_preprocess_worker = self.start_worker(self.frame_preprocess) return [image_preprocess_worker] def stop(self): # clear queues self.clear_queues() def terminate(self): if self.opt.sp: self._stopped = True else: self._stopped.value = True self.stop() def clear_queues(self): self.clear(self.pose_queue) def clear(self, queue): while not queue.empty(): queue.get() def wait_and_put(self, queue, item): if not self.stopped: queue.put(item) def wait_and_get(self, queue): if not self.stopped: return queue.get() def frame_preprocess(self): stream = cv2.VideoCapture(self.path) assert stream.isOpened(), 'Cannot capture source' # keep looping infinitely for i in count(): if self.stopped: stream.release() return if not self.pose_queue.full(): # otherwise, ensure the queue has room in it (grabbed, frame) = stream.read() if i % self.opt.gap != 0: continue # if the `grabbed` boolean is `False`, then we have # reached the end of the video file if not grabbed: stream = cv2.VideoCapture(input_source) # self.wait_and_put(self.pose_queue, (None, None, None, None, None, None, None)) # stream.release() # return # expected frame shape like (1,3,h,w) or (3,h,w) img_k = self.detector.image_preprocess(frame) if isinstance(img_k, np.ndarray): img_k = torch.from_numpy(img_k) # add one dimension at the front for batch if image shape (3,h,w) if img_k.dim() == 3: img_k = img_k.unsqueeze(0) im_dim_list_k = frame.shape[1], frame.shape[0] orig_img = frame[:, :, ::-1] im_name = str(i) + '.jpg' # im_dim_list = im_dim_list_k with torch.no_grad(): # Record original image resolution im_dim_list_k = torch.FloatTensor(im_dim_list_k).repeat(1, 2) img_det = self.image_detection((img_k, orig_img, im_name, im_dim_list_k)) self.image_postprocess(img_det) def image_detection(self, inputs): img, orig_img, im_name, im_dim_list = inputs if img is None or self.stopped: return (None, None, None, None, None, None, None) with torch.no_grad(): dets = self.detector.images_detection(img, im_dim_list) if isinstance(dets, int) or dets.shape[0] == 0: return (orig_img, im_name, None, None, None, None, None) if isinstance(dets, np.ndarray): dets = torch.from_numpy(dets) dets = dets.cpu() boxes = dets[:, 1:5] scores = dets[:, 5:6] if self.opt.tracking: ids = dets[:, 6:7] else: ids = torch.zeros(scores.shape) boxes_k = boxes[dets[:, 0] == 0] if isinstance(boxes_k, int) or boxes_k.shape[0] == 0: return (orig_img, im_name, None, None, None, None, None) inps = torch.zeros(boxes_k.size(0), 3, *self._input_size) cropped_boxes = torch.zeros(boxes_k.size(0), 4) return (orig_img, im_name, boxes_k, scores[dets[:, 0] == 0], ids[dets[:, 0] == 0], inps, cropped_boxes) def image_postprocess(self, inputs): with torch.no_grad(): (orig_img, im_name, boxes, scores, ids, inps, cropped_boxes) = inputs if orig_img is None or self.stopped: self.wait_and_put(self.pose_queue, (None, None, None, None, None, None, None)) return if boxes is None or boxes.nelement() == 0: self.wait_and_put(self.pose_queue, (None, orig_img, im_name, boxes, scores, ids, None)) return # imght = orig_img.shape[0] # imgwidth = orig_img.shape[1] for i, box in enumerate(boxes): inps[i], cropped_box = self.transformation.test_transform(orig_img, box) cropped_boxes[i] = torch.FloatTensor(cropped_box) # inps, cropped_boxes = self.transformation.align_transform(orig_img, boxes) self.wait_and_put(self.pose_queue, (inps, orig_img, im_name, boxes, scores, ids, cropped_boxes)) def read(self): return self.wait_and_get(self.pose_queue) @property def stopped(self): if self.opt.sp: return self._stopped else: return self._stopped.value @property def joint_pairs(self): """Joint pairs which defines the pairs of joint to be swapped when the image is flipped horizontally.""" return [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]]
class Alphapose(): def __init__(self,opt, pose_opt, ReIDCfg, C_T_output_queue,Pose_output_queue, S_Pose_Estimate, S_Number_Predict, vis=False,save_results=False, queueSize=1024): self.opt = opt self.dir_name = opt.dir_name self.root_path = os.path.join(opt.data_root, '{}'.format(opt.dir_name)) # logger.info('目标文件夹是{}'.format(self.root_path)) self.file_name = opt.file_name self.Videoparameters, \ self.setting_parameter, \ self.action_datas, \ self.channel_list, \ self.parameter = read_data_from_json_file_v2(self.root_path, self.file_name, self.opt) # 视频是否需要再次读入呢? 是否暂用资源 self.datalen = len(self.action_datas) # 加载 poser self.device = torch.device('cuda') self.batchSize = 4 self.ReID_BatchSize = 50 self.gpus = opt.gpus self.pose_model = build_poser(pose_opt,self.gpus) # 加载 ReID 模型 self.ReIDCfg = ReIDCfg self.ReID = ReID_Model(self.ReIDCfg) self.ReID.cuda() # ReID 模型参数 self.distance_threshold = 1 self.height_threshold = 95 self.width_threshold = 40 self._input_size = pose_opt.DATA_PRESET.IMAGE_SIZE self._output_size = pose_opt.DATA_PRESET.HEATMAP_SIZE self._sigma = pose_opt.DATA_PRESET.SIGMA self.aspect_ratio = 0.45 if pose_opt.DATA_PRESET.TYPE == 'simple': self.transformation = SimpleTransform( self, scale_factor=0, input_size=self._input_size, output_size=self._output_size, rot=0, sigma=self._sigma, train=False, add_dpg=False, gpu_device=self.device) self.input_Q = C_T_output_queue # 追踪数据的整体输入 self.Posing_Q = Queue(maxsize=queueSize) #在骨骼关键点检测前,对左边转换后的截图进行预处理 self.PostProcess_Q = Queue(maxsize=queueSize) # 在骨骼关键点检测前,对左边转换后的截图进行预处理 self.output_Q = Pose_output_queue self.vis = vis if self.vis == True: self.vis_path = os.path.join(self.root_path, 'vis') os.makedirs(self.vis_path, exist_ok=True) self.save_results = save_results self.S_Pose_Estimate = S_Pose_Estimate self.S_Number_Predict = S_Number_Predict if self.save_results == True: self.intermediate_results_dir = os.path.join(self.root_path, 'intermediate_results','Alphapose') os.makedirs(self.intermediate_results_dir, exist_ok=True) self.logger = Log(__name__, 'Alphapose' ).getlog() def Read_From_Cache(self): ''' 从文件把之前计算过的结果提取出来 ''' from utils.index_operation import get_index self.logger.debug('The pid of Alphapose.Read_From_Cache() : {}'.format(os.getpid())) self.logger.debug('The thread of Alphapose.Read_From_Cache() : {}'.format(currentThread())) cache_index = get_index(self.intermediate_results_dir) # 只需读取有用的部分即可。 action_index = self.S_Number_Predict for action_index in range(self.S_Number_Predict,self.S_Pose_Estimate): if action_index not in cache_index: # cache 中没有保存说明 此动作本身是False self.output_Q.put((False, [action_index])) else: # 从文件夹中读取出该动作对应的计算结果。 _, sub_imgs_out, target_regions = self.load_intermediate_resutls(action_index) self.output_Q.put((True, [action_index,sub_imgs_out,target_regions])) self.logger.log(23, ' Alphapose loads action {} from Cache file '.format(action_index)) def posing_preprocess_(self): self.t_posing_preprocess = Thread(target=self.posing_preprocess, args=()) self.t_posing_preprocess.daemon = True self.t_posing_preprocess.start() def posing_preprocess(self): # 预处理 self.logger.debug('The pid of Alphapose.posing_preprocess() : {}'.format(os.getpid())) self.logger.debug('The thread of Alphapose.posing_preprocess() : {}'.format(currentThread())) posing_preprocess_timer = Timer() for action_index in range(self.S_Pose_Estimate, self.datalen): self.logger.debug('alphapose.posing_preprocess() ======================================== action {}'.format(action_index)) Flag_PreProcess, (input_index, sub_imgs_tracking, ReID_features_tracking,sub_imgs_detection,ReID_features_detection) = self.input_Q.get() if input_index != action_index: self.logger.log(31, '---——————————————————————————————————index does match') raise Exception('Alphapose.update action_index_update {} != input_index {} '.format(action_index, input_index)) if Flag_PreProcess == False: self.Posing_Q.put((False,[])) continue else: # 开始计时 posing_preprocess_timer.tic() inps = [] cropped_boxes = [] # 通过 ReID 特征剔除一部分。 sub_imgs = self.imgs_sorted_by_ReID(sub_imgs_tracking,sub_imgs_detection,action_index) if len(sub_imgs) == 0 : # 被筛选后的图片序列为0,则跳过。 self.Posing_Q.put((False, [])) continue for imgs_index in range(len(sub_imgs)): orig_img = sub_imgs[imgs_index] height, width, _ = orig_img.shape box = [0, 0, width - 1, height - 1] inp, cropped_box = self.transformation.test_transform(orig_img, box) inps.append(inp) cropped_boxes.append(cropped_box) inps_ = torch.stack(inps,dim=0) self.Posing_Q.put((True,(sub_imgs,inps_,cropped_boxes))) self.logger.log(23, 'alphapose.posing_preprocess() action {} consums {}s'.format(action_index, posing_preprocess_timer.toc())) def posing_detect_(self): self.t_posing_detect = Thread(target=self.posing_detect, args=()) self.t_posing_detect.daemon = True self.t_posing_detect.start() def posing_detect(self): posing_detect_timer = Timer() for action_index in range(self.S_Pose_Estimate, self.datalen): self.logger.debug('posing_detect ------------action {} has been read '.format(action_index)) Flag_Posing_detect, preprocess_results = self.Posing_Q.get() if Flag_Posing_detect == False: self.PostProcess_Q.put((False,[])) continue else: posing_detect_timer.tic() sub_imgs, inps_ , cropped_boxes = preprocess_results inps = inps_.to(self.device) inps_len = inps_.size(0) leftover = 0 if (inps_len) % self.batchSize: leftover = 1 num_batches = inps_len // self.batchSize + leftover keypoints_all = [] for j in range(num_batches): inps_j = inps[j * self.batchSize : min((j + 1) * self.batchSize, inps_len)] sub_cropped_boxes = cropped_boxes[j * self.batchSize : min((j + 1) * self.batchSize, inps_len)] # self.logger.log(23, ' j : {}, inps_j.size() '.format(j, inps_j.size())) hm_j = self.pose_model(inps_j) keypoints_several = self.heats_to_maps(hm_j, sub_cropped_boxes) keypoints_all.extend(keypoints_several) self.PostProcess_Q.put((True,(keypoints_all,sub_imgs))) self.logger.log(23, 'alphapose.posing_detect() action {} consums {}s'.format(action_index, posing_detect_timer.toc())) def posing_postprocess_(self): self.t_posing_postprocess = Thread(target=self.posing_postprocess, args=()) self.t_posing_postprocess.daemon = True self.t_posing_postprocess.start() def posing_postprocess(self): '''对骨骼关键节点的检测结果坐后处理,并通过 简单规则对结果进行以此初步筛选。''' pposing_postprocess_timer = Timer() for action_index in range(self.S_Pose_Estimate, self.datalen): self.logger.debug('posing_postprocess ------------action {} has been read '.format(action_index)) Flag_posing_postprocess, posing_detect_resutls = self.PostProcess_Q.get() if Flag_posing_postprocess == False: self.output_Q.put((False,[action_index])) continue else: pposing_postprocess_timer.tic() keypoints_all,sub_imgs = posing_detect_resutls target_regions = [] sub_imgs_out = [] if self.vis == True: vis_dir_positive = os.path.join(self.vis_path, '{}'.format(action_index), 'Alphapose_positive') makedir_v1(vis_dir_positive) vis_dir_negative = os.path.join(self.vis_path, '{}'.format(action_index), 'Alphapose_negative') makedir_v1(vis_dir_negative) Negative_num = 0 vis_dir_small_size = os.path.join(self.vis_path, '{}'.format(action_index), 'Alphapose_small_size') makedir_v1(vis_dir_small_size) small_size_num = 0 vis_dir_small_target = os.path.join(self.vis_path, '{}'.format(action_index), 'Alphapose_small_target') makedir_v1(vis_dir_small_target) small_target_num = 0 Positive_num = 0 for k_index in range(len(keypoints_all)): # 对每一张关节点图做逐一处理 origin_img = sub_imgs[k_index] height, width, _ = origin_img.shape if height < self.height_threshold or width < self.width_threshold: small_size_num += 1 if self.vis == True: img_name = '{}.jpg'.format(k_index) cv2.imwrite(os.path.join(vis_dir_small_size, img_name), origin_img) continue keypoints = keypoints_all[k_index] # 这个判断标准和get_box的标准不一样。 # 用来判断是否背向的 l_x_max = max(keypoints[5 * 3], keypoints[11 * 3]) r_x_min = min(keypoints[6 * 3], keypoints[12 * 3]) t_y_max = max(keypoints[5 * 3 + 1], keypoints[6 * 3 + 1]) b_y_min = min(keypoints[11 * 3 + 1], keypoints[12 * 3 + 1]) if l_x_max < r_x_min and t_y_max < b_y_min: '初步判断球员是否背向' [xmin_old, xmax_old], [xmin, xmax, ymin, ymax] = self.get_box(keypoints, height, width, ratio=0.1, expand_w_min=10) # 计算上半身体长度 body_length = ymax - ymin if body_length < 20: # 130 和 60 应该来自 opt small_target_num += 1 if self.vis == True: img_name = '{}.jpg'.format(k_index) cv2.imwrite(os.path.join(vis_dir_small_target, img_name), origin_img) continue # 计算肩宽、胯宽 Shoulder_width = keypoints[6 * 3] - keypoints[5 * 3] Crotch_width = keypoints[12 * 3] - keypoints[11 * 3] aspect_ratio = (max(Shoulder_width, Crotch_width)) / (body_length) # 计算比例 if aspect_ratio >= self.aspect_ratio: # 如果这个比例合适,则送入号码检测 sub_imgs_out.append(origin_img) target_regions.append([xmin, xmax, ymin, ymax]) Positive_num += 1 # 复合条件的 +1 if self.vis == True: img_name = '{}.jpg'.format(k_index) vis_img = np.copy(origin_img) cv2.rectangle(vis_img, (xmin_old, ymin), (xmax_old, ymax), color=(255, 0, 0), thickness=1) cv2.rectangle(vis_img, (xmin, ymin), (xmax, ymax), color=(0, 255, 0), thickness=1) cv2.imwrite(os.path.join(vis_dir_positive, img_name), vis_img) else: Negative_num += 1 if self.vis == True: img_name = '{}.jpg'.format(k_index) cv2.imwrite(os.path.join(vis_dir_negative, img_name), origin_img) self.output_Q.put((True, [action_index, sub_imgs_out,target_regions ])) # 保存中间结果 if self.save_results == True: self.save_intermediate_resutls(action_index,sub_imgs_out,target_regions) # # 输出 日志 # self.logger.log(23,'Positive_num {}, Negative_num {}, small_target_num {}, small_size_num {}, all {}'.format( # Positive_num, # Negative_num, # small_target_num, # small_size_num, # len(keypoints_all))) self.logger.log(23, 'alphapose.posing_postprocess() action {} consums {}s Positive_num / All = {}/{}'.format( action_index, pposing_postprocess_timer.toc(), Positive_num, len(keypoints_all))) def save_intermediate_resutls(self,action_index,sub_imgs_out,target_regions): '''将每一次计算的结果保存下来。''' intermediate_resutls_path = os.path.join(self.intermediate_results_dir,'{}'.format(action_index)) os.makedirs(intermediate_resutls_path,exist_ok=True) # 保存图片 for img_index in range(len(sub_imgs_out)): cv2.imwrite(os.path.join(intermediate_resutls_path,'{}.jpg'.format(img_index)),sub_imgs_out[img_index]) # 保存 target_regions with open(os.path.join(intermediate_resutls_path,'{}_target_regions.json'.format(action_index)),'w') as f: results = {'target_regions' : target_regions} json.dump(results,f) def load_intermediate_resutls(self,action_index): '''将中间结果读取出来。''' intermediate_resutls_path = os.path.join(self.intermediate_results_dir,'{}'.format(action_index)) # 把这个文件夹下的图片名称读出来。 sub_imgs_names = [ img_name for img_name in os.listdir(intermediate_resutls_path) if img_name.split('.')[-1] == 'jpg' ] # 把图片名字按升序排列 sub_imgs_names = sorted(sub_imgs_names, key=lambda img_index : int(img_index.split('.')[0])) sub_imgs_out = [] for img_name in sub_imgs_names: sub_img = cv2.imread(os.path.join(intermediate_resutls_path,img_name)) sub_imgs_out.append(sub_img) # 保存 target_regions with open(os.path.join(intermediate_resutls_path, '{}_target_regions.json'.format(action_index)), 'r') as f: results = json.load(f) target_regions = results['target_regions'] return action_index,sub_imgs_out,target_regions def heats_to_maps(self,hm_data,cropped_boxes): # 将 heatmap 转化成 keypoints 数组 pred = hm_data.cpu().data.numpy() assert pred.ndim == 4 keypoints_all = [] for hms_index in range(hm_data.size(0)): pose_coord, pose_score = heatmap_to_coord_simple(pred[hms_index], cropped_boxes[hms_index]) keypoints_single = [] for n in range(pose_score.shape[0]): keypoints_single.append(float(pose_coord[n, 0])) keypoints_single.append(float(pose_coord[n, 1])) keypoints_single.append(float(pose_score[n])) keypoints_all.append(keypoints_single) return keypoints_all def get_box(self, keypoints, img_height, img_width, ratio=0.1, expand_w_min=10): '''这个get box 是用来获取球员的背部区域的''' xmin = min(keypoints[5 * 3], keypoints[11 * 3]) xmax = max(keypoints[6 * 3], keypoints[12 * 3]) ymin = min(keypoints[5 * 3 + 1], keypoints[6 * 3 + 1]) ymax = max(keypoints[11 * 3 + 1], keypoints[12 * 3 + 1]) return [int(round(xmin)), int(round(xmax))], self.expand_bbox(xmin, xmax, ymin, ymax, img_width, img_height, ratio, expand_w_min) def expand_bbox(self, left, right, top, bottom, img_width, img_height,ratio = 0.1, expand_w_min = 10): ''' 以一定的ratio向左右外扩。 不向上向下扩展了。 ''' width = right - left height = bottom - top # expand ratio expand_w_min = max(ratio * width , expand_w_min) # 最小外扩 expand_w_min new_left = np.clip(left - expand_w_min, 0, img_width) new_right = np.clip(right + expand_w_min, 0, img_width) return [int(new_left), int(new_right), int(top), int(bottom)] def imgs_sorted_by_ReID(self,imgs_tracking,imgs_detection,action_index): '''通过ReID模型来筛选与目标特征相符的图片''' sub_imgs = [] # 把追踪序列和目标人物进行对比,剔除后得到追踪序列的平均ReID特征值 if len(imgs_tracking) == 0: # 如果追踪序列长度为0的话,那就没什么好处理的了,直接返回 空 就行。 return sub_imgs else: imgs_tracking_index, distmat_tracking, output_feature = imgs_sorted_by_ReID(self.ReID, self.ReIDCfg, imgs_tracking, distance_threshold=self.distance_threshold, feat_norm='yes', version=0, batch_size=self.ReID_BatchSize) for P_index in imgs_tracking_index: sub_imgs.append(imgs_tracking[P_index]) if len(imgs_detection) > 0: # 把追踪序列的平均ReID特征值和坐标转换序列对比,进行第二次筛选 imgs_detection_index, distmat_detection, _ = imgs_sorted_by_ReID(self.ReID, self.ReIDCfg, imgs_detection, distance_threshold=self.distance_threshold, feat_norm='yes', version=2, input_features=output_feature, batch_size=self.ReID_BatchSize) for P_index_detection in imgs_detection_index: sub_imgs.append(imgs_detection[P_index_detection]) if self.vis ==True: # 将追踪序列的sub_imgs 按ReID的分类结果保存 Positive_dir = os.path.join(self.vis_path, '{}/ReID'.format(action_index)) makedir_v1(Positive_dir) Negative_dir = os.path.join(self.vis_path, '{}/ReID/Negative'.format(action_index)) for P_index, _ in enumerate(imgs_tracking): distance = distmat_tracking[0, P_index] if P_index in imgs_tracking_index: cv2.imwrite(os.path.join(Positive_dir, '{}_{:3f}.jpg'.format(P_index, distance)), imgs_tracking[P_index]) else: cv2.imwrite(os.path.join(Negative_dir, '{}_{:3f}.jpg'.format(P_index, distance)), imgs_tracking[P_index]) # 将坐标转换后序列的sub_imgs 按ReID的分类结果保存 Positive_dir_detection = os.path.join(self.vis_path, '{}/ReID/detection'.format(action_index)) makedir_v1(Positive_dir_detection) Negative_dir_detection = os.path.join(self.vis_path, '{}/ReID/detection/Negative'.format(action_index)) makedir_v1(Negative_dir_detection) for P_index_detection, _ in enumerate(imgs_detection): distance = distmat_detection[0, P_index_detection] if P_index_detection in imgs_detection_index: cv2.imwrite(os.path.join(Positive_dir_detection, '{}_{:3f}.jpg'.format(P_index_detection, distance)), imgs_detection[P_index_detection]) else: cv2.imwrite(os.path.join(Negative_dir_detection, '{}_{:3f}.jpg'.format(P_index_detection, distance)), imgs_detection[P_index_detection]) return sub_imgs @property def joint_pairs(self): """Joint pairs which defines the pairs of joint to be swapped when the image is flipped horizontally.""" return [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]]
class Alphapose_LoadImgs(): def __init__(self, opt, root_dir, save_dir, pose_opt, queueSize=1024): self.opt = opt self.root_dir = root_dir self.dir_list = [ d for d in os.listdir(self.root_dir) if os.path.isdir(os.path.join(self.root_dir, d)) ] self.dir_list = sorted(self.dir_list, key=lambda x: int(x)) # logger.info('目标文件夹是{}'.format(self.root_path)) self.datalen = len(self.dir_list) self.start = 0 # 加载 poser self.device = torch.device('cuda') self.batchSize = 8 self.ReID_BatchSize = 50 self.gpus = opt.gpus self.pose_model = build_poser(pose_opt, self.gpus) # # 加载 ReID 模型 # self.ReIDCfg = ReIDCfg # self.ReID = ReID_Model(self.ReIDCfg) # self.ReID.cuda() # ReID 模型参数 self.distance_threshold = 1 self.height_threshold = 40 self.width_threshold = 20 self._input_size = pose_opt.DATA_PRESET.IMAGE_SIZE self._output_size = pose_opt.DATA_PRESET.HEATMAP_SIZE self._sigma = pose_opt.DATA_PRESET.SIGMA self.aspect_ratio = 0.45 if pose_opt.DATA_PRESET.TYPE == 'simple': self.transformation = SimpleTransform( self, scale_factor=0, input_size=self._input_size, output_size=self._output_size, rot=0, sigma=self._sigma, train=False, add_dpg=False, gpu_device=self.device) self.Posing_Q = Queue(maxsize=queueSize) #在骨骼关键点检测前,对左边转换后的截图进行预处理 self.PostProcess_Q = Queue( maxsize=queueSize) # 在骨骼关键点检测前,对左边转换后的截图进行预处理 self.save_dir = save_dir os.makedirs(self.save_dir, exist_ok=True) def posing_preprocess_(self): self.t_posing_preprocess = Thread(target=self.posing_preprocess, args=()) self.t_posing_preprocess.daemon = True self.t_posing_preprocess.start() def posing_preprocess(self): # 预处理 posing_preprocess_timer = Timer() for dir_index in range(self.start, self.datalen): # print('1 : posing_preprocess : {} '.format(dir_index)) # 加载当前文件夹下的图片 this_dir = os.path.join(self.root_dir, self.dir_list[dir_index]) imgs_list = os.listdir(this_dir) if len(imgs_list) <= 0: print('{} is empty'.format(this_dir)) self.Posing_Q.put((False, [])) continue else: # 开始计时 posing_preprocess_timer.tic() imgs = [] inps = [] cropped_boxes = [] # # 通过 ReID 特征剔除一部分。 # sub_imgs = self.imgs_sorted_by_ReID(sub_imgs_tracking,sub_imgs_detection,dir_index) for img_index in range(len(imgs_list)): img_path = os.path.join(this_dir, imgs_list[img_index]) orig_img = cv2.imread(img_path) height, width, _ = orig_img.shape if height < self.height_threshold or width < self.width_threshold: # 图片太小了,就不放入骨骼点检测的序列中。 continue box = [0, 0, width - 1, height - 1] inp, cropped_box = self.transformation.test_transform( orig_img, box) imgs.append(orig_img) inps.append(inp) cropped_boxes.append(cropped_box) inps_ = torch.stack(inps, dim=0) self.Posing_Q.put((True, (imgs, inps_, cropped_boxes))) def posing_detect_(self): self.t_posing_detect = Thread(target=self.posing_detect, args=()) self.t_posing_detect.daemon = True self.t_posing_detect.start() def posing_detect(self): posing_detect_timer = Timer() for dir_index in range(self.start, self.datalen): # print('2 : posing_detect : {} '.format(dir_index)) Flag_Posing_detect, preprocess_results = self.Posing_Q.get() if Flag_Posing_detect == False: self.PostProcess_Q.put((False, [])) continue else: posing_detect_timer.tic() sub_imgs, inps_, cropped_boxes = preprocess_results inps = inps_.to(self.device) inps_len = inps_.size(0) leftover = 0 if (inps_len) % self.batchSize: leftover = 1 num_batches = inps_len // self.batchSize + leftover keypoints_all = [] for j in range(num_batches): inps_j = inps[j * self.batchSize:min((j + 1) * self.batchSize, inps_len)] sub_cropped_boxes = cropped_boxes[j * self.batchSize:min( (j + 1) * self.batchSize, inps_len)] # self.logger.log(23, ' j : {}, inps_j.size() '.format(j, inps_j.size())) hm_j = self.pose_model(inps_j) keypoints_several = self.heats_to_maps( hm_j, sub_cropped_boxes) keypoints_all.extend(keypoints_several) self.PostProcess_Q.put((True, (keypoints_all, sub_imgs))) def posing_postprocess_(self): self.t_posing_postprocess = Thread(target=self.posing_postprocess, args=()) self.t_posing_postprocess.daemon = True self.t_posing_postprocess.start() def posing_postprocess(self): '''对骨骼关键节点的检测结果坐后处理,并通过 简单规则对结果进行以此初步筛选。''' pposing_postprocess_timer = Timer() for dir_index in range(self.start, self.datalen): Flag_posing_postprocess, posing_detect_resutls = self.PostProcess_Q.get( ) if Flag_posing_postprocess == False: continue else: pposing_postprocess_timer.tic() keypoints_all, sub_imgs = posing_detect_resutls target_regions = [] sub_imgs_out = [] Negative_num = 0 small_target_num = 0 Positive_num = 0 if self.save_dir: vis_dir_positive = os.path.join( self.save_dir, '{:0>6d}'.format(int(self.dir_list[dir_index])), 'Alphapose_positive') makedir_v1(vis_dir_positive) vis_dir_negative = os.path.join( self.save_dir, '{:0>6d}'.format(int(self.dir_list[dir_index])), 'Alphapose_negative') makedir_v1(vis_dir_negative) vis_dir_small_target = os.path.join( self.save_dir, '{:0>6d}'.format(int(self.dir_list[dir_index])), 'Alphapose_small_target') makedir_v1(vis_dir_small_target) target_dir = os.path.join( self.save_dir, '{:0>6d}'.format(int(self.dir_list[dir_index])), 'Target') makedir_v1(target_dir) for k_index in range(len(keypoints_all)): # 对每一张关节点图做逐一处理 origin_img = sub_imgs[k_index] height, width, _ = origin_img.shape keypoints = keypoints_all[k_index] img_name = '{}.jpg'.format(k_index) # 这个判断标准和get_box的标准不一样。 # 用来判断是否背向的 l_x_max = max(keypoints[5 * 3], keypoints[11 * 3]) r_x_min = min(keypoints[6 * 3], keypoints[12 * 3]) t_y_max = max(keypoints[5 * 3 + 1], keypoints[6 * 3 + 1]) b_y_min = min(keypoints[11 * 3 + 1], keypoints[12 * 3 + 1]) if l_x_max < r_x_min and t_y_max < b_y_min: '初步判断球员是否背向' [xmin_old, xmax_old], [xmin, xmax, ymin, ymax] = self.get_box(keypoints, height, width, ratio=0.1, expand_w_min=10) # 计算上半身体长度 body_length = ymax - ymin if body_length < 20: # 130 和 60 应该来自 opt small_target_num += 1 if self.save_dir: cv2.imwrite( os.path.join(vis_dir_small_target, img_name), origin_img) continue # 计算肩宽、胯宽 Shoulder_width = keypoints[6 * 3] - keypoints[5 * 3] Crotch_width = keypoints[12 * 3] - keypoints[11 * 3] aspect_ratio = (max(Shoulder_width, Crotch_width)) / ( body_length) # 计算比例 if aspect_ratio >= self.aspect_ratio: # 如果这个比例合适,则送入号码检测 # 各个条件都满足需求了,则可以保存起来,放入号码检测的列表中 this_sub_img = origin_img[ymin:ymax, xmin:xmax] if this_sub_img.size == 0: continue cv2.imwrite(os.path.join(target_dir, img_name), this_sub_img, [cv2.IMWRITE_JPEG_QUALITY, 100]) # sub_imgs_out.append(origin_img) # target_regions.append([xmin, xmax, ymin, ymax]) Positive_num += 1 # 复合条件的 +1 if self.save_dir: vis_img = np.copy(origin_img) cv2.rectangle(vis_img, (xmin_old, ymin), (xmax_old, ymax), color=(255, 0, 0), thickness=1) cv2.rectangle(vis_img, (xmin, ymin), (xmax, ymax), color=(0, 255, 0), thickness=1) cv2.imwrite( os.path.join(vis_dir_positive, img_name), vis_img) else: Negative_num += 1 if self.save_dir: cv2.imwrite( os.path.join(vis_dir_negative, img_name), origin_img) print('3 :posing_postprocess : {} '.format(dir_index), 'Positive_num, Negative_num,small_target_num', Positive_num, Negative_num, small_target_num) def heats_to_maps(self, hm_data, cropped_boxes): # 将 heatmap 转化成 keypoints 数组 pred = hm_data.cpu().data.numpy() assert pred.ndim == 4 keypoints_all = [] for hms_index in range(hm_data.size(0)): pose_coord, pose_score = heatmap_to_coord_simple( pred[hms_index], cropped_boxes[hms_index]) keypoints_single = [] for n in range(pose_score.shape[0]): keypoints_single.append(float(pose_coord[n, 0])) keypoints_single.append(float(pose_coord[n, 1])) keypoints_single.append(float(pose_score[n])) keypoints_all.append(keypoints_single) return keypoints_all def get_box(self, keypoints, img_height, img_width, ratio=0.1, expand_w_min=10): '''这个get box 是用来获取球员的背部区域的''' xmin = min(keypoints[5 * 3], keypoints[11 * 3]) xmax = max(keypoints[6 * 3], keypoints[12 * 3]) ymin = min(keypoints[5 * 3 + 1], keypoints[6 * 3 + 1]) ymax = max(keypoints[11 * 3 + 1], keypoints[12 * 3 + 1]) return [int(round(xmin)), int(round(xmax))], self.expand_bbox(xmin, xmax, ymin, ymax, img_width, img_height, ratio, expand_w_min) def expand_bbox(self, left, right, top, bottom, img_width, img_height, ratio=0.1, expand_w_min=10): ''' 以一定的ratio向左右外扩。 不向上向下扩展了。 ''' width = right - left height = bottom - top # expand ratio expand_w_min = max(ratio * width, expand_w_min) # 最小外扩 expand_w_min new_left = np.clip(left - expand_w_min, 0, img_width) new_right = np.clip(right + expand_w_min, 0, img_width) return [int(new_left), int(new_right), int(top), int(bottom)] def imgs_sorted_by_ReID(self, imgs_tracking, imgs_detection, action_index): '''通过ReID模型来筛选与目标特征相符的图片''' sub_imgs = [] # 把追踪序列和目标人物进行对比,剔除后得到追踪序列的平均ReID特征值 if len(imgs_tracking) == 0: # 如果追踪序列长度为0的话,那就没什么好处理的了,直接返回 空 就行。 return sub_imgs else: imgs_tracking_index, distmat_tracking, output_feature = imgs_sorted_by_ReID( self.ReID, self.ReIDCfg, imgs_tracking, distance_threshold=self.distance_threshold, feat_norm='yes', version=0, batch_size=self.ReID_BatchSize) for P_index in imgs_tracking_index: sub_imgs.append(imgs_tracking[P_index]) if len(imgs_detection) > 0: # 把追踪序列的平均ReID特征值和坐标转换序列对比,进行第二次筛选 imgs_detection_index, distmat_detection, _ = imgs_sorted_by_ReID( self.ReID, self.ReIDCfg, imgs_detection, distance_threshold=self.distance_threshold, feat_norm='yes', version=2, input_features=output_feature, batch_size=self.ReID_BatchSize) for P_index_detection in imgs_detection_index: sub_imgs.append(imgs_detection[P_index_detection]) if self.vis == True: # 将追踪序列的sub_imgs 按ReID的分类结果保存 Positive_dir = os.path.join(self.vis_path, '{}/ReID'.format(action_index)) makedir_v1(Positive_dir) Negative_dir = os.path.join( self.vis_path, '{}/ReID/Negative'.format(action_index)) for P_index, _ in enumerate(imgs_tracking): distance = distmat_tracking[0, P_index] if P_index in imgs_tracking_index: cv2.imwrite( os.path.join( Positive_dir, '{}_{:3f}.jpg'.format(P_index, distance)), imgs_tracking[P_index]) else: cv2.imwrite( os.path.join( Negative_dir, '{}_{:3f}.jpg'.format(P_index, distance)), imgs_tracking[P_index]) # 将坐标转换后序列的sub_imgs 按ReID的分类结果保存 Positive_dir_detection = os.path.join( self.vis_path, '{}/ReID/detection'.format(action_index)) makedir_v1(Positive_dir_detection) Negative_dir_detection = os.path.join( self.vis_path, '{}/ReID/detection/Negative'.format(action_index)) makedir_v1(Negative_dir_detection) for P_index_detection, _ in enumerate(imgs_detection): distance = distmat_detection[0, P_index_detection] if P_index_detection in imgs_detection_index: cv2.imwrite( os.path.join( Positive_dir_detection, '{}_{:3f}.jpg'.format(P_index_detection, distance)), imgs_detection[P_index_detection]) else: cv2.imwrite( os.path.join( Negative_dir_detection, '{}_{:3f}.jpg'.format(P_index_detection, distance)), imgs_detection[P_index_detection]) return sub_imgs @property def joint_pairs(self): """Joint pairs which defines the pairs of joint to be swapped when the image is flipped horizontally.""" return [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]]