예제 #1
0
 def __init__(self):
     super(CapsuleNet, self).__init__()
     self.pose_capsules = CapsuleLayer(
         num_capsules=Config.get('num_keypoints'),
         num_route_node=Config.get('num_keypoints'),
         in_channels=Config.get('capsule', 'l_vec'),
         out_channels=1)
예제 #2
0
    parser.add_argument('--stdout_level', default=None, type=str,
                        dest='logging:stdout_level', help='To set the level to print to screen.')
    parser.add_argument('--log_file', default=None, type=str,
                        dest='logging:log_file', help='The path of log files.')

    # ***********  Params for test or submission.  **********
    parser.add_argument('--test_img', default=None, type=str,
                        dest='test_img', help='The test path of image.')
    parser.add_argument('--test_dir', default=None, type=str,
                        dest='test_dir', help='The test directory of images.')

    args_parser = parser.parse_args()

    configer = Configer(args_parser=args_parser)

    if configer.get('gpu') is not None:
        os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(str(gpu_id) for gpu_id in configer.get('gpu'))

    project_dir = os.path.dirname(os.path.realpath(__file__))
    configer.add_value(['project_dir'], project_dir)

    Log.init(logfile_level=configer.get('logging', 'logfile_level'),
             stdout_level=configer.get('logging', 'stdout_level'),
             log_file=configer.get('logging', 'log_file'),
             log_format=configer.get('logging', 'log_format'),
             rewrite=configer.get('logging', 'rewrite'))

    method_selector = MethodSelector(configer)
    model = None
    if configer.get('task') == 'pose':
        model = method_selector.select_pose_method()
예제 #3
0
파일: main.py 프로젝트: vikasmech/torchcv
                        type=str2bool,
                        nargs='?',
                        default=True,
                        help='Use CUDNN.')

    args_parser = parser.parse_args()

    if args_parser.seed is not None:
        random.seed(args_parser.seed)
        torch.manual_seed(args_parser.seed)

    cudnn.enabled = True
    cudnn.benchmark = args_parser.cudnn

    configer = Configer(args_parser=args_parser)
    abs_data_dir = os.path.expanduser(configer.get('data', 'data_dir'))
    configer.update(['data', 'data_dir'], abs_data_dir)

    if configer.get('gpu') is not None:
        os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(
            str(gpu_id) for gpu_id in configer.get('gpu'))

    if configer.get('network', 'norm_type') is None:
        configer.update(['network', 'norm_type'], 'batchnorm')

    if configer.get('phase') == 'train':
        assert len(configer.get('gpu')) > 1 or 'sync' not in configer.get(
            'network', 'norm_type')

    project_dir = os.path.dirname(os.path.realpath(__file__))
    configer.add(['project_dir'], project_dir)
예제 #4
0
class OpenPoseDeploy(object):
    def __init__(self, model_path=None, gpu_list=list()):
        self.pose_net = None
        self.configer = None
        self._init_model(model_path=model_path, gpu_list=gpu_list)

    def _init_model(self, model_path, gpu_list):
        self.device = torch.device('cpu' if len(gpu_list) == 0 else 'cuda')
        model_dict = None
        if model_path is not None:
            model_dict = torch.load(model_path)
        else:
            Log.error('Model Path is not existed.')
            exit(1)

        self.configer = Configer(config_dict=model_dict['config_dict'])

        if len(gpu_list) > 0:
            os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(str(gpu_id) for gpu_id in gpu_list)

        self.pose_model_manager = PoseModelManager(self.configer)
        self.pose_net = self.pose_model_manager.multi_pose_detector()
        self.pose_net = nn.DataParallel(self.pose_net).to(self.device)
        self.pose_net.load_state_dict(model_dict['state_dict'])
        self.pose_net.eval()

    def inference(self, image_rgb):
        image_bgr = ImageHelper.rgb2bgr(image_rgb)
        paf_avg, heatmap_avg = self.__get_paf_and_heatmap(image_rgb)
        all_peaks = self.__extract_heatmap_info(heatmap_avg)
        special_k, connection_all = self.__extract_paf_info(image_rgb, paf_avg, all_peaks)
        subset, candidate = self.__get_subsets(connection_all, special_k, all_peaks)
        json_dict = self.__get_info_tree(image_bgr, subset, candidate)

        return json_dict

    def __get_info_tree(self, image_raw, subset, candidate):
        json_dict = dict()
        height, width, _ = image_raw.shape
        json_dict['image_height'] = height
        json_dict['image_width'] = width
        object_list = list()
        for n in range(len(subset)):
            if subset[n][-1] <= 1:
                continue

            object_dict = dict()
            object_dict['keypoints'] = np.zeros((self.configer.get('data', 'num_keypoints'), 3)).tolist()
            for j in range(self.configer.get('data', 'num_keypoints')):
                index = subset[n][j]
                if index == -1:
                    object_dict['keypoints'][j][0] = -1
                    object_dict['keypoints'][j][1] = -1
                    object_dict['keypoints'][j][2] = -1

                else:
                    object_dict['keypoints'][j][0] = candidate[index.astype(int)][0]
                    object_dict['keypoints'][j][1] = candidate[index.astype(int)][1]
                    object_dict['keypoints'][j][2] = 1

            object_dict['score'] = subset[n][-2]
            object_list.append(object_dict)

        json_dict['objects'] = object_list
        return json_dict

    def __get_paf_and_heatmap(self, img_raw):
        multiplier = [scale * self.configer.get('data', 'input_size')[0] / img_raw.shape[1]
                      for scale in self.configer.get('data', 'scale_search')]

        heatmap_avg = np.zeros((img_raw.shape[0], img_raw.shape[1], self.configer.get('network', 'heatmap_out')))
        paf_avg = np.zeros((img_raw.shape[0], img_raw.shape[1], self.configer.get('network', 'paf_out')))

        for i, scale in enumerate(multiplier):
            img_test = cv2.resize(img_raw, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
            img_test_pad, pad = PadImage(self.configer.get('network', 'stride'))(img_test)
            pad_right = pad[2]
            pad_down = pad[3]
            img_test_pad = ToTensor()(img_test_pad)
            img_test_pad = Normalize(mean=self.configer.get('trans_params', 'mean'),
                                     std=self.configer.get('trans_params', 'std'))(img_test_pad)
            with torch.no_grad():
                img_test_pad = img_test_pad.unsqueeze(0).to(self.device)
                paf_out_list, heatmap_out_list = self.pose_net(img_test_pad)

            paf_out = paf_out_list[-1]
            heatmap_out = heatmap_out_list[-1]

            # extract outputs, resize, and remove padding
            heatmap = heatmap_out.data.squeeze().cpu().numpy().transpose(1, 2, 0)
            heatmap = cv2.resize(heatmap,  (0, 0), fx=self.configer.get('network', 'stride'),
                                 fy=self.configer.get('network', 'stride'), interpolation=cv2.INTER_CUBIC)
            heatmap = heatmap[:img_test_pad.size(2) - pad_down, :img_test_pad.size(3) - pad_right, :]
            heatmap = cv2.resize(heatmap, (img_raw.shape[1], img_raw.shape[0]), interpolation=cv2.INTER_CUBIC)

            paf = paf_out.data.squeeze().cpu().numpy().transpose(1, 2, 0)
            paf = cv2.resize(paf, (0, 0), fx=self.configer.get('network', 'stride'),
                                 fy=self.configer.get('network', 'stride'), interpolation=cv2.INTER_CUBIC)
            paf = paf[:img_test_pad.size(2) - pad_down, :img_test_pad.size(3) - pad_right, :]
            paf = cv2.resize(paf, (img_raw.shape[1], img_raw.shape[0]), interpolation=cv2.INTER_CUBIC)

            heatmap_avg = heatmap_avg + heatmap / len(multiplier)
            paf_avg = paf_avg + paf / len(multiplier)

        return paf_avg, heatmap_avg

    def __extract_heatmap_info(self, heatmap_avg):
        all_peaks = []
        peak_counter = 0

        for part in range(self.configer.get('data', 'num_keypoints')):
            map_ori = heatmap_avg[:, :, part]
            map_gau = gaussian_filter(map_ori, sigma=3)

            map_left = np.zeros(map_gau.shape)
            map_left[1:, :] = map_gau[:-1, :]
            map_right = np.zeros(map_gau.shape)
            map_right[:-1, :] = map_gau[1:, :]
            map_up = np.zeros(map_gau.shape)
            map_up[:, 1:] = map_gau[:, :-1]
            map_down = np.zeros(map_gau.shape)
            map_down[:, :-1] = map_gau[:, 1:]

            peaks_binary = np.logical_and.reduce(
                (map_gau >= map_left, map_gau >= map_right, map_gau >= map_up,
                 map_gau >= map_down, map_gau > self.configer.get('vis', 'part_threshold')))

            peaks = zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])  # note reverse
            peaks = list(peaks)
            peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks]
            ids = range(peak_counter, peak_counter + len(peaks))
            peaks_with_score_and_id = [peaks_with_score[i] + (ids[i],) for i in range(len(ids))]

            all_peaks.append(peaks_with_score_and_id)
            peak_counter += len(peaks)

        return all_peaks

    def __extract_paf_info(self, img_raw, paf_avg, all_peaks):
        connection_all = []
        special_k = []
        mid_num = 10

        for k in range(len(self.configer.get('details', 'limb_seq'))):
            score_mid = paf_avg[:, :, [k*2, k*2+1]]
            # self.pose_visualizer.vis_paf(score_mid, img_raw, name='pa{}'.format(k))
            candA = all_peaks[self.configer.get('details', 'limb_seq')[k][0] - 1]
            candB = all_peaks[self.configer.get('details', 'limb_seq')[k][1] - 1]
            nA = len(candA)
            nB = len(candB)
            if nA != 0 and nB != 0:
                connection_candidate = []
                for i in range(nA):
                    for j in range(nB):
                        vec = np.subtract(candB[j][:2], candA[i][:2])
                        norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1]) + 1e-9
                        vec = np.divide(vec, norm)

                        startend = zip(np.linspace(candA[i][0], candB[j][0], num=mid_num),
                                       np.linspace(candA[i][1], candB[j][1], num=mid_num))
                        startend = list(startend)

                        vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0]
                                          for I in range(len(startend))])
                        vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1]
                                          for I in range(len(startend))])

                        score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1])
                        score_with_dist_prior = sum(score_midpts) / len(score_midpts)
                        score_with_dist_prior += min(0.5 * img_raw.shape[0] / norm - 1, 0)

                        num_positive = len(np.nonzero(score_midpts > self.configer.get('vis', 'limb_threshold'))[0])
                        criterion1 = num_positive > int(0.8 * len(score_midpts))
                        criterion2 = score_with_dist_prior > 0
                        if criterion1 and criterion2:
                            connection_candidate.append(
                                [i, j, score_with_dist_prior, score_with_dist_prior + candA[i][2] + candB[j][2]])

                connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True)
                connection = np.zeros((0, 5))
                for c in range(len(connection_candidate)):
                    i, j, s = connection_candidate[c][0:3]
                    if i not in connection[:, 3] and j not in connection[:, 4]:
                        connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]])
                        if len(connection) >= min(nA, nB):
                            break

                connection_all.append(connection)
            else:
                special_k.append(k)
                connection_all.append([])

        return special_k, connection_all

    def __get_subsets(self, connection_all, special_k, all_peaks):
        # last number in each row is the total parts number of that person
        # the second last number in each row is the score of the overall configuration
        subset = -1 * np.ones((0, self.configer.get('data', 'num_keypoints') + 2))
        candidate = np.array([item for sublist in all_peaks for item in sublist])

        for k in self.configer.get('details', 'mini_tree'):
            if k not in special_k:
                partAs = connection_all[k][:, 0]
                partBs = connection_all[k][:, 1]
                indexA, indexB = np.array(self.configer.get('details', 'limb_seq')[k]) - 1

                for i in range(len(connection_all[k])):  # = 1:size(temp,1)
                    found = 0
                    subset_idx = [-1, -1]
                    for j in range(len(subset)):  # 1:size(subset,1):
                        if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]:
                            subset_idx[found] = j
                            found += 1

                    if found == 1:
                        j = subset_idx[0]
                        if (subset[j][indexB] != partBs[i]):
                            subset[j][indexB] = partBs[i]
                            subset[j][-1] += 1
                            subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
                    elif found == 2:  # if found 2 and disjoint, merge them
                        j1, j2 = subset_idx
                        membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2]
                        if len(np.nonzero(membership == 2)[0]) == 0:  # merge
                            subset[j1][:-2] += (subset[j2][:-2] + 1)
                            subset[j1][-2:] += subset[j2][-2:]
                            subset[j1][-2] += connection_all[k][i][2]
                            subset = np.delete(subset, j2, 0)
                        else:  # as like found == 1
                            subset[j1][indexB] = partBs[i]
                            subset[j1][-1] += 1
                            subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]

                    # if find no partA in the subset, create a new subset
                    elif not found:
                        row = -1 * np.ones(self.configer.get('data', 'num_keypoints') + 2)
                        row[indexA] = partAs[i]
                        row[indexB] = partBs[i]
                        row[-1] = 2
                        row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2]
                        subset = np.vstack([subset, row])

        return subset, candidate
예제 #5
0
    # ***********  Params for test or submission.  **********
    parser.add_argument('--test_img',
                        default=None,
                        type=str,
                        dest='test_img',
                        help='The test path of image.')
    parser.add_argument('--test_dir',
                        default=None,
                        type=str,
                        dest='test_dir',
                        help='The test directory of images.')

    args_parser = parser.parse_args()

    configer = Configer(args_parser=args_parser)
    abs_data_dir = os.path.expanduser(configer.get('data', 'data_dir'))
    configer.update_value(['data', 'data_dir'], abs_data_dir)

    if configer.get('gpu') is not None:
        os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(
            str(gpu_id) for gpu_id in configer.get('gpu'))

    project_dir = os.path.dirname(os.path.realpath(__file__))
    configer.add_key_value(['project_dir'], project_dir)

    log_file = configer.get('logging', 'log_file')
    new_log_file = '{}_{}'.format(
        log_file, time.strftime("%Y-%m-%d_%X", time.localtime()))
    configer.update_value(['logging', 'log_file'], new_log_file)

    Log.init(logfile_level=configer.get('logging', 'logfile_level'),
예제 #6
0
    parser.add_argument('--cudnn',
                        type=str2bool,
                        nargs='?',
                        default=True,
                        help='Use CUDNN.')

    args_parser = parser.parse_args()
    if args_parser.seed is not None:
        random.seed(args_parser.seed)
        torch.manual_seed(args_parser.seed)

    cudnn.enabled = True
    cudnn.benchmark = args_parser.cudnn

    configer = Configer(args_parser=args_parser)
    abs_data_dir = os.path.expanduser(configer.get('data', 'data_dir'))
    configer.update(['data', 'data_dir'], abs_data_dir)

    if configer.get('gpu') is not None:
        os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(
            str(gpu_id) for gpu_id in configer.get('gpu'))

    project_dir = os.path.dirname(os.path.realpath(__file__))
    configer.add(['project_dir'], project_dir)

    if configer.get('logging', 'log_to_file'):
        log_file = configer.get('logging', 'log_file')
        new_log_file = '{}_{}'.format(
            log_file, time.strftime("%Y-%m-%d_%X", time.localtime()))
        configer.update(['logging', 'log_file'], new_log_file)
    else:
예제 #7
0
class SingleShotDetectorDeploy(object):
    def __init__(self, model_path=None, gpu_id=0):
        self.model_path = model_path
        self.det_visualizer = DetVisualizer(self.configer)
        self.det_model_manager = DetModelManager(self.configer)
        self.default_boxes = PriorBoxLayer(self.configer)()
        self.det_net = None

    def _init_model(self, model_path, gpu_id):
        os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)
        self.det_net = self.det_model_manager.object_detector()
        self.det_net = nn.DataParallel(self.det_net).cuda()
        if model_path is not None:
            model_dict = torch.load(model_path)
            self.det_net.load_state_dict(model_dict['state_dict'])
            self.configer = Configer(config_dict=model_dict['config_dict'])
        else:
            Log.error('Model Path is not existed.')
            exit(1)

        self.det_net.eval()

    def inference(self, image_rgb):
        image_bgr = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR)
        inputs = cv2.resize(image_rgb,
                            tuple(self.configer.get('data', 'input_size')))
        inputs = ToTensor()(inputs)
        inputs = Normalize(mean=self.configer.get('trans_params', 'mean'),
                           std=self.configer.get('trans_params',
                                                 'std'))(inputs)

        inputs = Variable(inputs.unsqueeze(0).cuda(), volatile=True)
        bbox, cls = self.det_net(inputs)
        bbox = bbox.cpu().data.squeeze(0)
        cls = F.softmax(cls.cpu().squeeze(0), dim=-1).data
        boxes, lbls, scores, has_obj = self.__decode(bbox, cls)
        if has_obj:
            boxes = boxes.cpu().numpy()
            boxes = np.clip(boxes, 0, 1)
            lbls = lbls.cpu().numpy()
            scores = scores.cpu().numpy()
            img_shape = image_bgr.shape
            for i in range(len(boxes)):
                boxes[i][0] = int(boxes[i][0] * img_shape[1])
                boxes[i][2] = int(boxes[i][2] * img_shape[1])
                boxes[i][1] = int(boxes[i][1] * img_shape[0])
                boxes[i][3] = int(boxes[i][3] * img_shape[0])

            img_canvas = self.__draw_box(image_bgr, boxes, lbls, scores)

            # if is_save_txt:
            #    self.__save_txt(save_path, boxes, lbls, scores, img_size)
        else:
            # print('None obj detected!')
            img_canvas = image_bgr

        # Boxes is within 0-1.
        return img_canvas, lbls, scores, boxes, has_obj

    def __draw_box(self, img_raw, box_list, label_list, conf):
        img_canvas = img_raw.copy()

        for bbox, label, cf in zip(box_list, label_list, conf):
            if cf < self.configer.get('vis', 'conf_threshold'):
                continue

            class_name = self.configer.get('details',
                                           'name_seq')[label - 1] + str(cf)
            c = self.configer.get('details', 'color_list')[label - 1]
            cv2.rectangle(
                img_canvas,
                (max(0, int(bbox[0] - 10)), max(0, int(bbox[1] - 10))),
                (min(img_canvas.shape[1], int(bbox[2] + 10)),
                 min(img_canvas.shape[0], int(bbox[3] + 10))),
                color=c,
                thickness=3)

            font = cv2.FONT_HERSHEY_SIMPLEX
            cv2.putText(img_canvas,
                        class_name, (int(bbox[0] + 5), int(bbox[3] - 5)),
                        font,
                        fontScale=0.5,
                        color=c,
                        thickness=2)

        return img_canvas

    def __nms(self, bboxes, scores, mode='union'):
        """Non maximum suppression.

        Args:
          bboxes(tensor): bounding boxes, sized [N,4].
          scores(tensor): bbox scores, sized [N,].
          threshold(float): overlap threshold.
          mode(str): 'union' or 'min'.

        Returns:
          keep(tensor): selected indices.

        Ref:
          https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/nms/py_cpu_nms.py
        """

        x1 = bboxes[:, 0]
        y1 = bboxes[:, 1]
        x2 = bboxes[:, 2]
        y2 = bboxes[:, 3]

        areas = (x2 - x1) * (y2 - y1)
        _, order = scores.sort(0, descending=True)

        keep = []
        while order.numel() > 0:
            i = order[0]
            keep.append(i)

            if order.numel() == 1:
                break

            xx1 = x1[order[1:]].clamp(min=x1[i])
            yy1 = y1[order[1:]].clamp(min=y1[i])
            xx2 = x2[order[1:]].clamp(max=x2[i])
            yy2 = y2[order[1:]].clamp(max=y2[i])

            w = (xx2 - xx1).clamp(min=0)
            h = (yy2 - yy1).clamp(min=0)
            inter = w * h

            if self.configer.get('nms', 'mode') == 'union':
                ovr = inter / (areas[i] + areas[order[1:]] - inter)
            elif self.configer.get('nms', 'mode') == 'min':
                ovr = inter / areas[order[1:]].clamp(max=areas[i])
            else:
                raise TypeError('Unknown nms mode: %s.' % mode)

            ids = (ovr <= self.configer.get(
                'nms', 'overlap_threshold')).nonzero().squeeze()
            if ids.numel() == 0:
                break

            order = order[ids + 1]

        return torch.LongTensor(keep)

    def __decode(self, loc, conf):
        """Transform predicted loc/conf back to real bbox locations and class labels.

        Args:
          loc: (tensor) predicted loc, sized [8732, 4].
          conf: (tensor) predicted conf, sized [8732, 21].

        Returns:
          boxes: (tensor) bbox locations, sized [#obj, 4].
          labels: (tensor) class labels, sized [#obj,1].

        """
        has_obj = False
        variances = [0.1, 0.2]
        wh = torch.exp(loc[:, 2:] * variances[1]) * self.default_boxes[:, 2:]
        cxcy = loc[:, :2] * variances[
            0] * self.default_boxes[:, 2:] + self.default_boxes[:, :2]
        boxes = torch.cat([cxcy - wh / 2, cxcy + wh / 2], 1)  # [8732,4]

        max_conf, labels = conf.max(1)  # [8732,1]
        ids = labels.nonzero()
        tmp = ids.cpu().numpy()

        if tmp.__len__() > 0:
            # print('detected %d objs' % tmp.__len__())
            ids = ids.squeeze(1)  # [#boxes,]
            has_obj = True
        else:
            print('None obj detected!')
            return 0, 0, 0, has_obj

        keep = self.__nms(boxes[ids], max_conf[ids])
        return boxes[ids][keep], labels[ids][keep], max_conf[ids][
            keep], has_obj