示例#1
0
def create_panoptic_segmentation(img,
                                 cls_boxes,
                                 cls_segms,
                                 cls_keyps,
                                 thres=0.7):
    boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format(
        cls_boxes, cls_segms, cls_keyps)
    dataset = dummy_datasets.get_coco_dataset()

    ade_out = np.zeros(img.shape[:2], dtype="uint8")
    coco_out = np.zeros(img.shape[:2], dtype="uint8")
    inst_out = np.zeros(img.shape[:2], dtype="uint8")

    if segms is not None:
        masks = mask_util.decode(segms)
        cnt = 1
        # Display in largest to smallest order to reduce occlusion
        areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
        sorted_inds = np.argsort(-areas)
        for i in sorted_inds:
            if boxes[i, -1] < thres:  # Score too low
                continue

            mask = masks[..., i]
            mask = np.nonzero(mask)
            class_name = dataset.classes[classes[i]]
            ade_idx = ade20k_utils.category_to_idx(class_name)
            if ade_idx is not None:
                ade_out[mask] = ade_idx
            coco_out[mask] = i
            inst_out[mask] = cnt
            cnt += 1
    out = np.stack([ade_out, coco_out, inst_out], axis=-1)
    return out
示例#2
0
    def detect(self, im, id):
        timers = defaultdict(Timer)
        t = time.time()
        with c2_utils.NamedCudaScope(id):
            cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all(
                self.model, im, None, timers=timers)

        self.logger.info('Inference time: {:.3f}s'.format(time.time() - t))
        for k, v in timers.items():
            self.logger.info(' | {}: {:.3f}s'.format(k, v.average_time))

        imsw = vis_utils.vis_one_image_opencv(im,
                                              cls_boxes,
                                              cls_segms,
                                              cls_keyps,
                                              self.confidence,
                                              2,
                                              show_box=True,
                                              dataset=self.dummy_coco_dataset,
                                              show_class=True)
        boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format(
            cls_boxes, cls_segms, cls_keyps)
        bboxes = BBoxDetArray()
        bboxes.header = std_msgs.msg.Header()
        if boxes is not None:
            for i in range(len(boxes)):
                box = boxes[i][0:4]
                score = boxes[i][4]
                cls = self.dummy_coco_dataset.classes[classes[i]]
                if (score >= self.confidence):
                    bbox = BBox(box[0], box[1], box[2], box[3])
                    bbox_det = BBoxDet(bbox, score, cls)
                    bboxes.bboxes.append(bbox_det)
            return imsw, bboxes
def run_model_cfg(args, im, check_blobs):
    workspace.ResetWorkspace()
    model, _ = load_model(args)
    with c2_utils.NamedCudaScope(0):
        cls_boxes, cls_segms, cls_keyps = test_engine.im_detect_all(
            model, im, None, None,
        )

    boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format(
        cls_boxes, cls_segms, cls_keyps)

    # sort the results based on score for comparision
    boxes, segms, keypoints, classes = _sort_results(
        boxes, segms, keypoints, classes)

    # write final results back to workspace
    def _ornone(res):
        return np.array(res) if res is not None else np.array([], dtype=np.float32)
    with c2_utils.NamedCudaScope(0):
        workspace.FeedBlob(core.ScopedName('result_boxes'), _ornone(boxes))
        workspace.FeedBlob(core.ScopedName('result_segms'), _ornone(segms))
        workspace.FeedBlob(core.ScopedName('result_keypoints'), _ornone(keypoints))
        workspace.FeedBlob(core.ScopedName('result_classids'), _ornone(classes))

    # get result blobs
    with c2_utils.NamedCudaScope(0):
        ret = _get_result_blobs(check_blobs)

    return ret
def main(args):
    logger = logging.getLogger(__name__)
    merge_cfg_from_file(args.cfg)
    cfg.NUM_GPUS = 1
    args.weights = cache_url(args.weights, cfg.DOWNLOAD_CACHE)
    assert_and_infer_cfg(cache_urls=False)
    model = infer_engine.initialize_model_from_cfg(args.weights)
    dummy_coco_dataset = dummy_datasets.get_coco_dataset()
    for root_dir_path_1, sub_dir_path_list_1, sub_file_path_list_1 in os.walk(
            args.im_or_folder):
        sub_dir_path_list_1 = sorted(sub_dir_path_list_1)
        for i, sub_dir_path_1 in enumerate(sub_dir_path_list_1):
            for root_dir_path_2, sub_dir_path_list_2, sub_file_path_list_2 in os.walk(
                    os.path.join(root_dir_path_1, sub_dir_path_1)):
                sub_file_path_list_2 = sorted(sub_file_path_list_2)
                out_file = open(
                    os.path.join(args.output_dir,
                                 sub_dir_path_1 + "_Det_ffasta.txt"), "wb")
                for img_idx, sub_file_path_2 in enumerate(
                        sub_file_path_list_2):
                    im = cv2.imread(
                        os.path.join(root_dir_path_2, sub_file_path_2))
                    timers = defaultdict(Timer)
                    t = time.time()
                    if (img_idx + 1) % 1000 == 0:
                        sys.stdout.write(
                            "\rFinish {} images\n".format(img_idx + 1))
                        sys.stdout.flush()
                    with c2_utils.NamedCudaScope(0):
                        cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all(
                            model, im, None, timers=timers)
                        if isinstance(cls_boxes, list):
                            cls_boxes, cls_segms, cls_keyps, classes = vis_utils.convert_from_cls_format(
                                cls_boxes, cls_segms, cls_keyps)
                        if cls_boxes is None or cls_boxes.shape[0] == 0:
                            continue
                        obj_idx = 0
                        for cls_box, cls in zip(cls_boxes, classes):
                            if int(cls) != 3 and int(cls) != 6:
                                continue
                            out_file.write("{},{},{},{},{},{},{}\n".format(
                                img_idx + 1, obj_idx + 1, cls_box[0],
                                cls_box[1], cls_box[2] - cls_box[0],
                                cls_box[3] - cls_box[1], cls_box[4]))
                            obj_idx += 1
                out_file.close()
            print("Finish {} / {} of video sequences".format(
                i + 1, len(sub_dir_path_list_1)))
        break
示例#5
0
def mask_non_bbox(config, frame: FrameType, cls_segms: bytes) -> FrameType:
    cls_segms = pickle.loads(cls_segms)
    _, segms, _, _ = vis_utils.convert_from_cls_format([], cls_segms, None)

    if segms is not None and len(segms) > 0:
        masks = mask_util.decode(segms)

    sum_mask = np.zeros_like(frame)[..., 0]
    for mi in range(masks.shape[2]):
        sum_mask = np.logical_or(sum_mask, masks[:, :, mi])

    idx = np.nonzero(np.invert(sum_mask))
    img = frame.copy()

    img[idx[0], idx[1], :] = 0

    return img
示例#6
0
def get_result_json(boxes, segms, keypoints, thresh=0.7, dataset=None):

    if isinstance(boxes, list):
        boxes, segms, keypoints, classes = convert_from_cls_format(
            boxes, segms, keypoints)

    if boxes is None or boxes.shape[0] == 0 or max(boxes[:, 4]) < thresh:
        return

    dataset_keypoints, _ = keypoint_utils.get_keypoints()

    if segms is not None:
        masks = mask_util.decode(segms)

    # Display in largest to smallest order to reduce occlusion
    areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
    sorted_inds = np.argsort(-areas)
    sorted_inds = np.argsort(-boxes[:, 4])

    results = {'mask_rle': segms, 'objects': []}
    for i in sorted_inds:
        score = boxes[i, -1]

        if score < thresh:
            continue

        bbox = boxes[i, :4]
        class_idx = classes[i]
        class_text = dataset.classes[class_idx]
        mask_idx = i
        mask = masks[:, :, mask_idx]
        #kps = keypoints[i]
        _, contour, hier = cv2.findContours(mask.copy(), cv2.RETR_CCOMP,
                                            cv2.CHAIN_APPROX_NONE)
        contours = [c.reshape((-1, 2)).tolist() for c in contour]
        obj = {
            'box': bbox.tolist(),
            'class': class_text,
            'mask_idx': mask_idx,
            'contours': contours,
            'score': float(score)
        }
        results['objects'].append(obj)

    return results
def _main(video_folder, download_path, output, redownload, model_config, write_videos, outputdir):
    if os.path.exists(output):
        with open(output, 'r') as fi:
            videos_information = json.load(fi)
    else:
        videos_information = {}
    if os.path.isdir(video_folder):
        detectron = Detectron(model_config)

        for file in os.listdir(video_folder):
            print('Bearbeite Video:',file)
            if os.path.isdir(os.path.join(video_folder,file)) and (file!='a5arrD39XjY.mp4') and (file not in videos_information):
                video_dimensions=None
                video_boxes=[]
                video_segments=[]
                video_information=[]
                frame_count=0
                diff = 0
                for filename in sorted(os.listdir(os.path.join(video_folder,file))):
                    if filename.endswith('jpg'):
                        found=False
                        class_names = []
                        #print(os.path.join(video_folder, file,filename))
                        frame=cv2.imread(os.path.join(video_folder, file,filename))
                        if video_dimensions is None:
                            video_dimensions = frame.shape[1], frame.shape[0]

                        frame_boxes, frame_segments = detectron.infer_image(frame)
                        frame_information = []
                        print('Nach infer:', type(frame_boxes),type(frame_segments))
                        if isinstance(frame_boxes, list):
                            frame_boxes, frame_segments, _, classes = vis_utils.convert_from_cls_format(frame_boxes,frame_segments, None)
                        print('Nach convert:', type(frame_boxes),type(frame_segments), classes)


                        if frame_boxes is not None and frame_boxes.shape[0] != 0:
                            video_area = video_dimensions[0] * video_dimensions[1]
                            box_areas = (frame_boxes[:, 2] - frame_boxes[:, 0]) * (frame_boxes[:, 3] - frame_boxes[:, 1])

                            sorted_inds = np.argsort(-box_areas)
                            print(box_areas, sorted_inds)

                            for i in sorted_inds:
                                try:
                                    class_name = detectron.get_class_name(classes[i])
                                    if class_name != '__background__':
                                        class_names.append(class_name)
                                except IndexError as e:
                                    log.error("Cannot get_class_name: %s", e)
                                    log.debug("sorted_inds: %s", sorted_inds)
                                    log.debug("box_areas: %s", box_areas)
                                    log.debug("frame_boxes: %s", frame_boxes)
                                    log.debug("frame_segments: %s", frame_segments)

                                score = float(frame_boxes[i, -1])
                                if not(score < THRESHOLD or class_name == '__background__'):
                                    found=True
                                    log.debug("Frame %s: found class '%s' with score '%s'", frame_count, class_name, score)

                                    segment_area = int(mask_utils.area(frame_segments[i]))
                                    frame_information.append({
                                        'label': class_name,
                                        'total_area': segment_area,
                                        'percentage': float(segment_area) / float(video_area),
                                        'score': score,
                                        'bbox': frame_boxes[i, :4].astype(np.int).tolist()
                                    })

                                    frame = detectron.vis_one_image_opencv(im=frame, boxes=frame_boxes[i], segms=frame_segments[i], class_str=class_name)

                        else:
                            log.debug("Found nothing in frame %s", frame_count)
                        if found:
                            img = cv2.resize(frame, video_dimensions, cv2.INTER_NEAREST)
                            if not os.path.exists(os.path.join(outputdir, file)):
                                os.makedirs(os.path.join(outputdir, file), 0o755)
                            cv2.imwrite(os.path.join(outputdir, file, filename), img)
                            with open(os.path.join(outputdir, file, (filename.split('.')[0]+'.json')), 'w') as fo:
                                json.dump(frame_information, fo, indent=2)

                        video_information.append(frame_information)
                        frame_count+=1

                print('video',file,'bearbeitet. Ergebisse:',video_dimensions,video_information)
                log.info("Write intermediate file")
                videos_information[file] = video_information
                with open(output, 'w') as fo:
                    json.dump(videos_information, fo, indent=2)
    #videos = _download_videos(video_text_file=video_text_file, download_path=download_path, redownload=redownload)

    for idx, (video_id, video_file) in enumerate(videos, start=1):
        log.info("Video %s/%s: Start inference for video_id '%s' on file '%s'", idx, len(videos), video_id, video_file)
示例#8
0
def _main(video_text_file, download_path, output, redownload, model_config,
          write_videos, save_res):
    outputname = video_text_file.split('/')[-1] + '_' + model_config.split(
        '/')[-1].split('.')[0]
    print(model_config)
    detectron = Detectron(model_config)
    if not os.path.exists(outputname + '_out/'):
        os.makedirs(outputname + '_out/', 0o755)
    if not os.path.exists(outputname + '_out/'):
        os.makedirs(outputname + '_out/cropped/', 0o755)
    imgs_information = {}

    #if os.path.exists(output):
    #    with open(output, 'r') as fi:
    #        videos_information = json.load(fi)
    imgs_kp = {}
    dfkps = pd.DataFrame(columns=[
        'Bild', 'Achse', 'Person', 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13
    ])
    for img in os.listdir(video_text_file):
        dataset_keypoints, _ = get_keypoints()
        kp_lines = kp_connections(dataset_keypoints)
        cmap = plt.get_cmap('rainbow')
        colors = [cmap(i) for i in np.linspace(0, 1, len(kp_lines) + 2)]
        log.info(img)
        frame = cv2.imread(video_text_file + "/" + img, 1)
        print('Bilder:', img)
        frame_boxes, frame_segments, frame_keypoints, frame_dimensions = detectron.infer_img(
            frame)
        frame_area = frame_dimensions[0] * frame_dimensions[1]
        frame_information = []
        log.debug("frame_boxes: %s", frame_boxes)
        log.debug("frame_segments: %s", frame_segments)
        log.debug("frame_keypoints: %s", frame_keypoints)

        if isinstance(frame_boxes, list):
            frame_boxes, frame_segments, frame_keypoints, _ = vis_utils.convert_from_cls_format(
                frame_boxes, frame_segments, frame_keypoints)

        if frame_boxes is not None and frame_boxes.shape[0] != 0:
            print('lenframeboxes', len(frame_boxes))
            sorted_inds = range(len(frame_boxes))

            # for i in sorted_inds:
            #     try:
            #         class_name = detectron.get_class_name(i)
            #     except IndexError as e:
            #         log.error("Cannot get_class_name: %s", e)
            #         log.debug("sorted_inds: %s", sorted_inds)
            #         log.debug("frame_boxes: %s", frame_boxes)
            #         log.debug("frame_segments: %s", frame_segments)
            #         log.debug("score: %s", score)
            #
            #     score = float(frame_boxes[i, -1])
            #
            #     if score < THRESHOLD or class_name == '__background__':
            #         continue
            #     log.debug("Frame %s: found class '%s' with score '%s'", img, class_name, score)
            #
            #     frame_information.append({
            #         'label': class_name,
            #         'total_area': str(frame_keypoints),
            #         'percentage': 0,
            #         'score': score,
            #         'bbox': frame_boxes[i, :4].astype(np.int).tolist()
            #     })
            #     #print('schreibe in Pickle Bild:', img,frame_segments)
            #imgs_kp[str(img)]= {'kp':frame_keypoints, 'score':score,'bbox':frame_boxes,'segm':frame_segments}
            keypoints = frame_keypoints
            for i in range(len(frame_keypoints)):

                if (keypoints is not None and
                        len(keypoints) > i) and (frame_boxes[i, -1] > thresh):
                    print('Boundingbox', str(img), frame_boxes[i, 1], ':',
                          frame_boxes[i, 3], ',', frame_boxes[i, 0], ':',
                          frame_boxes[i, 2], frame.shape[0], frame.shape[1])
                    framecropped = frame[
                        int(frame_boxes[i, 1]):int(frame_boxes[i, 3]),
                        int(frame_boxes[i, 0]):int(frame_boxes[i, 2])]
                    #cv2.imwrite('messigray.png', framecropped)
                    framex = int(frame_boxes[i, 0])
                    framey = int(frame_boxes[i, 1])
                    fig = plt.figure(frameon=False)
                    fig.set_size_inches(
                        float(framecropped.shape[1]) / dpi,
                        float(framecropped.shape[0]) / dpi)
                    print('framecropped:', framecropped.shape,
                          float(framecropped.shape[1]) / dpi,
                          float(framecropped.shape[0]) / dpi)
                    ax = plt.Axes(fig, [0., 0., 1., 1.])
                    ax.axis('off')
                    fig.add_axes(ax)
                    #fig.savefig('test_' + str(img) + '.jpg', dpi=dpi)
                    im2 = cv.cvtColor(framecropped, cv.COLOR_BGR2RGB)
                    ax.imshow(im2)
                    kps = keypoints[i]
                    #print('Kps x', kps[0])
                    #print('Kps y', kps[1])
                    # print('kps', kps)
                    ind = len(dfkps)
                    dfkps.set_value(ind, 'Achse', 'x')
                    dfkps.set_value(ind, 'Bild', img.split('/')[0])
                    dfkps.set_value(ind, 'Person', i)
                    dfkps.set_value(ind + 1, 'Achse', 'y')
                    dfkps.set_value(ind + 1, 'Bild', img.split('/')[0])
                    dfkps.set_value(ind + 1, 'Person', i)
                    #fig.savefig('test_' + str(img) + '.jpg', dpi=dpi)
                    for z in range(len(kps[1])):
                        if 2 < kps[2][z]:
                            dfkps.set_value(ind, z, kps[0][z])
                            dfkps.set_value(ind + 1, z, kps[1][z])
                    if save_res == 'True':
                        #print(dfkps)
                        plt.autoscale(False)
                        for l in range(len(kp_lines)):
                            i1 = kp_lines[l][0]
                            i2 = kp_lines[l][1]
                            if kps[2, i1] > kp_thresh and kps[2,
                                                              i2] > kp_thresh:
                                x = [kps[0, i1] - framex, kps[0, i2] - framex]
                                y = [kps[1, i1] - framey, kps[1, i2] - framey]
                                print('keypoint', l, ':', x, y)
                                line = plt.plot(x, y)
                                plt.setp(line,
                                         color=colors[l],
                                         linewidth=3.0,
                                         alpha=0.7)
                            if kps[2, i1] > kp_thresh:
                                plt.plot(kps[0, i1] - framex,
                                         kps[1, i1] - framey,
                                         '.',
                                         color=colors[l],
                                         markersize=3.0,
                                         alpha=0.7)

                            if kps[2, i2] > kp_thresh:
                                plt.plot(kps[0, i2] - framex,
                                         kps[1, i2] - framey,
                                         '.',
                                         color=colors[l],
                                         markersize=3.0,
                                         alpha=0.7)
                            #fig.savefig('test_'+str(img)+'_'+str(l)+'.jpg', dpi=dpi)

                            # add mid shoulder / mid hip for better visualization
                        mid_shoulder = (
                            kps[:2,
                                dataset_keypoints.index('right_shoulder')] +
                            kps[:2,
                                dataset_keypoints.index('left_shoulder')]
                        ) / 2.0

                        sc_mid_shoulder = np.minimum(
                            kps[2,
                                dataset_keypoints.index('right_shoulder')],
                            kps[2, dataset_keypoints.index('left_shoulder')])
                        mid_hip = (
                            kps[:2, dataset_keypoints.index('right_hip')] +
                            kps[:2, dataset_keypoints.index('left_hip')]) / 2.0
                        sc_mid_hip = np.minimum(
                            kps[2, dataset_keypoints.index('right_hip')],
                            kps[2, dataset_keypoints.index('left_hip')])
                        if (sc_mid_shoulder > kp_thresh
                                and kps[2, dataset_keypoints.index('nose')] >
                                kp_thresh):
                            x = [
                                mid_shoulder[0] - framex,
                                kps[0, dataset_keypoints.index('nose')] -
                                framex
                            ]
                            y = [
                                mid_shoulder[1] - framey,
                                kps[1, dataset_keypoints.index('nose')] -
                                framey
                            ]
                            line = plt.plot(x, y)
                            print(x, y)
                            plt.setp(line,
                                     color=colors[len(kp_lines)],
                                     linewidth=3.0,
                                     alpha=0.7)
                        if sc_mid_shoulder > kp_thresh and sc_mid_hip > kp_thresh:
                            x = [mid_shoulder[0] - framex, mid_hip[0] - framex]
                            y = [mid_shoulder[1] - framey, mid_hip[1] - framey]
                            print(x, y)
                            line = plt.plot(x, y)
                            plt.setp(line,
                                     color=colors[len(kp_lines) + 1],
                                     linewidth=3.0,
                                     alpha=0.7)

                        output_name = os.path.basename(img) + str(
                            i) + '_kp.jpg'
                        size = fig.get_size_inches() * fig.dpi
                        print(size)
                        fig.savefig(os.path.join(outputname + '_out',
                                                 '{}'.format(output_name)),
                                    dpi=dpi)
                        plt.close('all')
                    #dfkps.to_csv(outputname + '_dfkps.csv', sep='\t')
                    dfkps.to_pickle(outputname + '_dfkps.p')

        else:
            log.debug("Found nothing in picture %s", img)

        imgs_information[img] = frame_information

        log.info("Write intermediate file")
        with open(output, 'w') as fo:
            json.dump(imgs_information, fo, indent=2)
        pickle.dump(imgs_kp, open(outputname + "_kps.p", "wb"))
def main():
    # Use first line of file docstring as description if it exists.
    parser = argparse.ArgumentParser(
        description=__doc__.split('\n')[0] if __doc__ else '',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--detectron-dir', required=True)
    parser.add_argument('--output-dir', required=True)
    parser.add_argument(
        '--recursive',
        action='store_true',
        help="""Search recursively in detectron-dir for pickle files. Any
                subdirectory containing a pickle file is considered to be
                a sequence.""")
    parser.add_argument(
        '--detectron-dataset', default='coco', choices=['coco'])

    args = parser.parse_args()
    detectron_dir = Path(args.detectron_dir)
    output_dir = Path(args.output_dir)
    output_dir.mkdir(exist_ok=True, parents=True)
    setup_logging(str(output_dir / (Path(__file__).stem + '.log')))

    input_sequences = set(x.parent for x in detectron_dir.rglob('*.pickle'))
    logging.info('Input sequences: %s' % pformat(map(str, input_sequences)))

    label_list = get_classes(args.detectron_dataset)
    for sequence_path in tqdm(input_sequences):
        output_path = output_dir / (
            sequence_path.relative_to(detectron_dir)).with_suffix('.txt')

        detections = {}
        for pickle_path in sequence_path.glob('*.pickle'):
            timestamp = int(pickle_path.stem)
            with open(pickle_path, 'rb') as f:
                data = pickle.load(f)
            boxes, _, _, labels = convert_from_cls_format(
                data['boxes'], data['segmentations'], data['keypoints'])
            detections[timestamp] = [
                Detection(box[:4], box[4], label, timestamp)
                for box, label in zip(boxes, labels)
                if label_list[label] == 'person'
            ]

        output_str = ''
        for frame, frame_detections in sorted(
                detections.items(), key=lambda x: x[0]):
            for detection in frame_detections:
                x0, y0, x1, y1 = detection.box
                width = x1 - x0
                height = y1 - y0
                output_str += DETECTION_FORMAT.format(
                    frame=frame,
                    track_id=-1,
                    left=x0,
                    top=y0,
                    width=width,
                    height=height,
                    conf=detection.score,
                    x=-1,
                    y=-1,
                    z=-1)
        with open(output_path, 'w') as f:
            f.write(output_str)
示例#10
0
def main(args):
    logger = logging.getLogger(__name__)
    merge_cfg_from_file(args.cfg)
    cfg.TEST.WEIGHTS = args.weights
    cfg.NUM_GPUS = 1
    assert_and_infer_cfg()
    model = infer_engine.initialize_model_from_cfg()
    dummy_coco_dataset = dummy_datasets.get_coco_dataset()

    if os.path.isdir(args.im_or_folder):
        im_list = glob.iglob(args.im_or_folder + '/*.' + args.image_ext)
    else:
        im_list = [args.im_or_folder]

    train_cfg = open(args.cfg, 'r').read()
    num_classes = yaml.load(train_cfg)['MODEL']['NUM_CLASSES']

    infer_cfg = open('/detectron/tools/infer_list.yaml', 'r').read()
    infer_list = yaml.load(infer_cfg)['thresholds']
    if (num_classes == len(dummy_coco_dataset.classes)) & (len(
            dummy_coco_dataset.classes) == len(infer_list) + 1):
        if not os.path.exists(args.output_dir):
            os.makedirs(args.output_dir)
        with open(
                os.path.join(args.output_dir,
                             "output_%s.csv" % dummy_coco_dataset.classes[1]),
                "w") as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(
                ["pic_name", "xmin", "ymin", "xmax", "ymax", "class", "score"])

            for i, im_name in enumerate(im_list):
                logger.info('No.{} pic ({})starts predict'.format(i, im_name))
                im = cv2.imread(im_name)
                timers = defaultdict(Timer)
                t = time.time()
                with c2_utils.NamedCudaScope(0):
                    cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all(
                        model, im, None, timers=timers)
                logger.info('Inference time: {:.3f}s'.format(time.time() - t))
                for k, v in timers.items():
                    logger.info(' | {}: {:.3f}s'.format(k, v.average_time))
                if i == 0:
                    logger.info(
                        ' \ Note: inference on the first image will be slower than the '
                        'rest (caches and auto-tuning need to warm up)')

                if isinstance(cls_boxes, list):
                    boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format(
                        cls_boxes, cls_segms, cls_keyps)

                if boxes is None or boxes.shape[0] == 0 or max(
                        boxes[:, 4]) < min(infer_list.values()):
                    continue

                areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] -
                                                       boxes[:, 1])
                sorted_inds = np.argsort(-areas)
                for j in sorted_inds:
                    bbox = boxes[j, :4]
                    score = boxes[j, -1]
                    class_text = dummy_coco_dataset.classes[classes[
                        j]] if dummy_coco_dataset is not None else 'id{:d}'.format(
                            classes[j])
                    current_thresh = infer_list[class_text]
                    if score < current_thresh:
                        continue
                    else:
                        writer.writerow([
                            os.path.basename(im_name).split('.')[0], bbox[0],
                            bbox[1], bbox[2], bbox[3], class_text, score
                        ])

            csvfile.close()
    else:
        logger.info('!!!!!!!wrong categories num')
示例#11
0
def main(args):
    logger = logging.getLogger(__name__)
    merge_cfg_from_file(args.cfg)
    cfg.TEST.WEIGHTS = args.weights
    cfg.NUM_GPUS = 1
    assert_and_infer_cfg()
    model = infer_engine.initialize_model_from_cfg()
    dummy_coco_dataset = dummy_datasets.get_coco_dataset()

    if os.path.isdir(args.im_or_folder):
        im_list = glob.iglob(args.im_or_folder + '/*.' + args.image_ext)
    else:
        im_list = [args.im_or_folder]

    #Sort frames by number
    im_list = list(im_list)
    im_list.sort()
    json_output = []

    for i, im_name in enumerate(im_list):
        out_name = os.path.join(
            args.output_dir, '{}'.format(os.path.basename(im_name) + '.pdf'))
        logger.info('Processing {} -> {}'.format(im_name, out_name))
        im = cv2.imread(im_name)
        timers = defaultdict(Timer)
        t = time.time()
        with c2_utils.NamedCudaScope(0):
            cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all(
                model, im, None, timers=timers)
        logger.info('Inference time: {:.3f}s'.format(time.time() - t))
        for k, v in timers.items():
            logger.info(' | {}: {:.3f}s'.format(k, v.average_time))
        if i == 0:
            logger.info(
                ' \ Note: inference on the first image will be slower than the '
                'rest (caches and auto-tuning need to warm up)')

        boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format(
            cls_boxes, cls_segms, cls_keyps)

        if boxes is None:
            boxes = []
        else:
            boxes = boxes.tolist()

        json_output.append({'frame': i, 'boxes': boxes})

        # Skip writing PDF output
        # vis_utils.vis_one_image(
        #     im[:, :, ::-1],  # BGR -> RGB for visualization
        #     im_name,
        #     args.output_dir,
        #     cls_boxes,
        #     cls_segms,
        #     cls_keyps,
        #     dataset=dummy_coco_dataset,
        #     box_alpha=0.3,
        #     show_class=True,
        #     thresh=0.7,
        #     kp_thresh=2
        # )

    with open(args.output_dir + '/boxes.json', 'w') as outfile:
        json.dump(json_output, outfile, indent=4)
示例#12
0
def main(args):
    logger = logging.getLogger(__name__)
    merge_cfg_from_file(args.cfg)
    cfg.TEST.WEIGHTS = args.weights
    cfg.NUM_GPUS = 1
    assert_and_infer_cfg()
    model = infer_engine.initialize_model_from_cfg(args.weights)
    dummy_coco_dataset = dummy_datasets.get_coco_dataset()

    if os.path.isdir(args.im_or_folder):
        im_list = glob.iglob(args.im_or_folder + '/*.' + args.image_ext)
    else:
        im_list = [args.im_or_folder]

    for i, im_name in enumerate(im_list):
        out_name = os.path.join(
            args.output_dir, '{}'.format(os.path.basename(im_name) + '.pdf'))

        logger.info('Processing {} -> {}'.format(im_name, out_name))
        im = cv2.imread(im_name)
        h, w = im.shape[:2]

        subimages = []
        for x in range(3):
            for y in range(3):
                x1, y1 = x * h // 4, y * w // 4
                x2, y2 = (x + 2) * h // 4, (y + 2) * w // 4
                subimages.append([x1, y1, x2, y2])

        timers = defaultdict(Timer)
        t = time.time()
        with c2_utils.NamedCudaScope(0):
            cls_boxes = []
            cls_segms = []
            cls_keyps = []
            for index in range(len(subimages)):
                x1, y1, x2, y2 = subimages[index]
                _cls_boxes, _cls_segms, _cls_keyps = infer_engine.im_detect_all(
                    model, im[x1:x2, y1:y2, :], None, timers=timers)
                cls_boxes.append(_cls_boxes)
                cls_segms.append(_cls_segms)
                cls_keyps.append(_cls_keyps)

        logger.info('Inference time: {:.3f}s'.format(time.time() - t))

        if i == 0:
            logger.info(
                ' \ Note: inference on the first image will be slower than the '
                'rest (caches and auto-tuning need to warm up)')

        t = time.time()

        out_name_yml = os.path.join(
            args.output_dir,
            '{}'.format(os.path.basename(im_name)[:-4] + '.yml'))

        _mask = np.zeros((h, w), dtype=np.uint8)
        all_boxes = np.zeros((0, 5))
        all_classes = []
        all_segs = []
        for index in range(len(subimages)):
            x1, y1, x2, y2 = subimages[index]

            boxes, segms, keyps, classes = vis_utils.convert_from_cls_format(
                cls_boxes[index], cls_segms[index], cls_keyps[index])
            if boxes is None:
                continue

            for i in range(boxes.shape[0]):
                _tmp = np.zeros((h, w), dtype=np.uint8, order='F')
                __segm = mask_util.decode(segms[i])
                _tmp[x1:x2, y1:y2] = __segm
                __tmp = mask_util.encode(_tmp)
                all_segs.append(__tmp)

                _mask[x1:x2, y1:y2] += __segm
                all_classes.append(classes[i])

            boxes[:, 0] += y1
            boxes[:, 2] += y1
            boxes[:, 1] += x1
            boxes[:, 3] += x1

            all_boxes = np.vstack((all_boxes, boxes))

        _mask = _mask.astype(bool).astype(int)
        out_name_mask = os.path.join(
            args.output_dir,
            '{}'.format(os.path.basename(im_name)[:-4] + '.png'))
        cv2.imwrite(out_name_mask, _mask * 255)

        with open(out_name_yml, 'w') as outfile:
            yaml.dump(
                {
                    'boxes': all_boxes,
                    'segms': all_segs,
                    'classes': all_classes
                },
                outfile,
                default_flow_style=False)

        logger.info('Saving time: {:.3f}s'.format(time.time() - t))
        for k, v in timers.items():
            logger.info(' | {}: {:.3f}s'.format(k, v.average_time))
示例#13
0
def main():
    """main function"""

    if not torch.cuda.is_available():
        sys.exit("Need a CUDA device to run the code.")

    args = parse_args()
    print('Called with args:')
    print(args)

    assert args.image_dir or args.images
    assert bool(args.image_dir) ^ bool(args.images)

    if args.dataset.startswith("coco"):
        dataset = datasets.get_coco_dataset()
        cfg.MODEL.NUM_CLASSES = len(dataset.classes)
    elif args.dataset.startswith("keypoints_coco"):
        dataset = datasets.get_coco_dataset()
        cfg.MODEL.NUM_CLASSES = 2
    elif args.dataset.startswith("gangjin"):
        dataset = datasets.get_gangjin_dataset()
        cfg.MODEL.NUM_CLASSES = len(dataset.classes)
    else:
        raise ValueError('Unexpected dataset name: {}'.format(args.dataset))

    print('load cfg from file: {}'.format(args.cfg_file))
    cfg_from_file(args.cfg_file)

    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    assert bool(args.load_ckpt) ^ bool(args.load_detectron), \
        'Exactly one of --load_ckpt and --load_detectron should be specified.'
    cfg.MODEL.LOAD_IMAGENET_PRETRAINED_WEIGHTS = False  # Don't need to load imagenet pretrained weights
    assert_and_infer_cfg()

    maskRCNN = Generalized_RCNN()

    if args.cuda:
        maskRCNN.cuda()

    if args.load_ckpt:
        load_name = args.load_ckpt
        print("loading checkpoint %s" % (load_name))
        checkpoint = torch.load(load_name,
                                map_location=lambda storage, loc: storage)
        net_utils.load_ckpt(maskRCNN, checkpoint['model'])

    if args.load_detectron:
        print("loading detectron weights %s" % args.load_detectron)
        load_detectron_weight(maskRCNN, args.load_detectron)

    maskRCNN = mynn.DataParallel(maskRCNN,
                                 cpu_keywords=['im_info', 'roidb'],
                                 minibatch=True,
                                 device_ids=[0])  # only support single GPU

    maskRCNN.eval()
    if args.image_dir:
        imglist = misc_utils.get_imagelist_from_dir(args.image_dir)
    else:
        imglist = args.images
    num_images = len(imglist)
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    img_ids = []
    rects = []

    for i in range(num_images):
        print('img', i)
        im = cv2.imread(imglist[i])
        assert im is not None

        timers = defaultdict(Timer)

        cls_boxes, cls_segms, cls_keyps = im_detect_all(maskRCNN,
                                                        im,
                                                        timers=timers)

        boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format(
            cls_boxes, cls_segms, cls_keyps)
        if boxes is not None:
            for j in range(len(boxes)):
                # print(boxes[j][-1])
                if float(boxes[j][-1]) < 0.99:  # 阀值
                    continue
                xmin = float(boxes[j, 0])
                xmax = float(boxes[j, 2])
                ymin = float(boxes[j, 1])
                ymax = float(boxes[j, 3])
                img_ids.append(os.path.basename(imglist[i]))
                rects.append(
                    str(xmin) + " " + str(ymin) + " " + str(xmax) + " " +
                    str(ymax))

        # im_name, _ = os.path.splitext(os.path.basename(imglist[i]))
        # vis_utils.vis_one_image(
        #     im[:, :, ::-1],  # BGR -> RGB for visualization
        #     im_name,
        #     args.output_dir,
        #     cls_boxes,
        #     cls_segms,
        #     cls_keyps,
        #     dataset=dataset,
        #     box_alpha=0.3,
        #     show_class=False,
        #     thresh=0.99,
        #     kp_thresh=2,
        #     ext="jpg"
        # )

    result_dict = {"ID": img_ids, "rects": rects}
    import pandas as pd
    result = pd.DataFrame.from_dict(result_dict)

    result.to_csv('submit/submit1.csv', header=None, index=False)
示例#14
0
def main():
    """main function"""

    if not torch.cuda.is_available():
        sys.exit("Need a CUDA device to run the code.")

    args = parse_args()
    print('Called with args:')
    print(args)

    assert args.image_dir or args.images
    assert bool(args.image_dir) ^ bool(args.images)

    prefix_path = args.output_dir

    os.makedirs(prefix_path, exist_ok=True)

    if args.dataset.startswith("coco"):
        dataset = datasets.get_coco_dataset()
        cfg.MODEL.NUM_CLASSES = len(dataset.classes)
    elif args.dataset.startswith("keypoints_coco"):
        dataset = datasets.get_coco_dataset()
        cfg.MODEL.NUM_CLASSES = 2
    else:
        raise ValueError('Unexpected dataset name: {}'.format(args.dataset))

    print('load cfg from file: {}'.format(args.cfg_file))
    cfg_from_file(args.cfg_file)

    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    assert bool(args.load_ckpt) ^ bool(args.load_detectron), \
        'Exactly one of --load_ckpt and --load_detectron should be specified.'
    cfg.MODEL.LOAD_IMAGENET_PRETRAINED_WEIGHTS = False  # Don't need to load imagenet pretrained weights
    assert_and_infer_cfg()

    maskRCNN = Generalized_RCNN()

    if args.cuda:
        maskRCNN.cuda()

    if args.load_ckpt:
        load_name = args.load_ckpt
        print("loading checkpoint %s" % (load_name))
        checkpoint = torch.load(load_name,
                                map_location=lambda storage, loc: storage)
        net_utils.load_ckpt(maskRCNN, checkpoint['model'])

    if args.load_detectron:
        print("loading detectron weights %s" % args.load_detectron)
        load_detectron_weight(maskRCNN, args.load_detectron)

    maskRCNN = mynn.DataParallel(maskRCNN,
                                 cpu_keywords=['im_info', 'roidb'],
                                 minibatch=True,
                                 device_ids=[0])  # only support single GPU

    maskRCNN.eval()
    if args.image_dir:
        imglist = misc_utils.get_imagelist_from_dir(args.image_dir)
    else:
        imglist = args.images
    num_images = len(imglist)

    writen_results = []

    # validate
    demo_im = cv2.imread(imglist[0])
    print(np.shape(demo_im))
    h, w, _ = np.shape(demo_im)
    #print(h)
    #print(args.height)
    assert h == args.height
    assert w == args.width
    h_scale = 720 / args.height
    w_scale = 1280 / args.width

    for i in tqdm(range(num_images)):
        im = cv2.imread(imglist[i])
        assert im is not None

        timers = defaultdict(Timer)

        cls_boxes, cls_segms, cls_keyps = im_detect_all(maskRCNN,
                                                        im,
                                                        timers=timers)

        im_name, _ = os.path.splitext(os.path.basename(imglist[i]))

        # boxs = [[x1, y1, x2, y2, cls], ...]
        boxes, _, _, classes = convert_from_cls_format(cls_boxes, cls_segms,
                                                       cls_keyps)

        if boxes is None:
            continue
        # scale
        boxes[:, 0] = boxes[:, 0] * w_scale
        boxes[:, 2] = boxes[:, 2] * w_scale
        boxes[:, 1] = boxes[:, 1] * h_scale
        boxes[:, 3] = boxes[:, 3] * h_scale

        if classes == []:
            continue

        for instance_idx, cls_idx in enumerate(classes):
            cls_name = dataset.classes[cls_idx]
            if cls_name == 'motorcycle':
                cls_name = 'motor'
            elif cls_name == 'stop sign':
                cls_name = 'traffic sign'
            elif cls_name == 'bicycle':
                cls_name = 'bike'
            if cls_name not in bdd_category:
                continue

            writen_results.append({
                "name": imglist[i].split('/')[-1],
                "timestamp": 1000,
                "category": cls_name,
                "bbox": boxes[instance_idx, :4],
                "score": boxes[instance_idx, -1]
            })

    with open(os.path.join(prefix_path, args.name + '.json'),
              'w') as outputfile:
        json.dump(writen_results, outputfile, cls=MyEncoder)
示例#15
0
    def infere(self,
               image,
               imageId=None,
               thresh=0.5,
               debug=False,
               pixel_size=(0.3, 0.3)):
        assert image is not None

        timers = defaultdict(Timer)

        cls_boxes, cls_segms, cls_keyps = im_detect_all(self.maskRCNN,
                                                        image,
                                                        timers=timers)

        if isinstance(cls_boxes, list):
            boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format(
                cls_boxes, cls_segms, cls_keyps
            )  #self.convert_from_cls_format(cls_boxes, cls_segms, cls_keyps)

        if boxes is None or boxes.shape[0] == 0 or max(boxes[:, 4]) < thresh:
            return []

        if segms is not None:
            masks = mask_util.decode(segms)

        result = []

        #print("Number of boxes=",len(boxes))
        #print("Boxes",boxes)
        #print("Boxes Shape=",boxes.shape)

        #masks = masks.T
        #print("Mask Shape=",masks.shape)

        areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
        sorted_inds = np.argsort(-areas)
        #sorted_inds = areas

        #for i in range(len(sorted_inds))
        for i in sorted_inds:
            bbox = boxes[i, :4]
            score = boxes[i, -1]
            if score < thresh:
                continue

            class_id = i
            label = self.dataset.classes[classes[i]]
            area, area_m2, perimeter, cv2Poly = self.getMaskInfo(
                masks[:, :, i], image.shape,
                pixel_size=pixel_size)  #masks[i].T, kernel=(10, 10)

            if cv2Poly is None:
                #print("Warning: Object is recognized, but contour is empty!")
                continue

            verts = cv2Poly[:, 0, :]
            r = {
                'classId': class_id,
                'score': score,
                'label': label,
                'area': area,
                'area_m2': area_m2,
                'perimetr': perimeter,
                'verts': verts
            }

            if imageId is not None:
                r['objId'] = "{}_obj-{}".format(imageId, i)

            result.append(r)

        return result
示例#16
0
def track(frame_paths,
          frame_detections,
          tracking_params,
          progress=True,
          filter_label=None):
    """
    Args:
        frame_paths (list): List of paths to frames.
        frame_detections (list): List of detection results for each frame. Each
            element is a dictionary containing keys 'boxes', 'masks', and
            'keypoints'.
        tracking_params (dict): See add_tracking_arguments() for details.
        label_list (list): List of label names.
        filter_label (str):
    """
    all_tracks = []

    # detections[i] contains list of Detections for frame_paths[i]
    detections = []
    dummy_image = None
    for t, (frame_path,
            image_results) in enumerate(zip(frame_paths, frame_detections)):
        if tracking_params['appearance_feature'] == 'none':
            if dummy_image is None:
                dummy_image = np.zeros_like(cv2.imread(str(frame_path)))
            image = dummy_image
        else:
            image = cv2.imread(str(frame_path))[:, :, ::-1]  # BGR -> RGB
        boxes, masks, _, labels = vis.convert_from_cls_format(
            image_results['boxes'], image_results['segmentations'],
            image_results['keypoints'])

        if boxes is None:
            logging.info('No predictions for image %s', frame_path.name)
            boxes, masks = [], []

        if ('features' in image_results
                and tracking_params['appearance_feature'] == 'mask'):
            # features are of shape (num_segments, d)
            features = list(image_results['features'])
        else:
            features = [None for _ in masks]

        current_detections = []
        for box, mask, label, feature in zip(boxes, masks, labels, features):
            low_scoring = box[4] <= tracking_params['score_continue_min']
            label_mismatch = filter_label is not None and label != filter_label
            if low_scoring or label_mismatch:
                continue
            current_detections.append(
                Detection(box[:4], box[4], label, t, image, mask, feature))
        detections.append(current_detections)

    if tracking_params['bidirectional']:
        directions = ['forward', 'backward']
    else:
        directions = ['forward']
    for direction in directions:
        forward = direction == 'forward'
        timestamps = range(len(frame_paths))
        if not forward:
            timestamps = reversed(timestamps)

        for t in tqdm(timestamps,
                      disable=not progress,
                      total=len(frame_paths),
                      desc='track ' + direction):
            frame_path = frame_paths[t]
            frame_detections = [d for d in detections[t] if not d.tracked()]
            if not frame_detections:
                continue

            for track in all_tracks:
                for detection in track.detections:
                    detection.clear_cache()

            if forward:
                active_tracks = [
                    track for track in all_tracks
                    if ((t - track.detections[-1].timestamp
                         ) <= tracking_params['frames_skip_max'])
                ]
            else:
                active_tracks = []
                for track in all_tracks:
                    # Keep tracks that
                    # (1) end at or after t,
                    # (2) start before t + frames_skip_max
                    # (3) don't have a detection at time t
                    ends_after = track.detections[-1].timestamp > t
                    starts_before_skip = (track.detections[0].timestamp < t +
                                          tracking_params['frames_skip_max'])
                    needs_detection = t not in track.detections_by_time
                    if ends_after and starts_before_skip and needs_detection:
                        active_tracks.append(track)
                if not active_tracks:
                    continue

            matched_tracks = match_detections(active_tracks,
                                              frame_detections,
                                              tracking_params,
                                              backward=not forward)

            # Tracks that were assigned a detection in this frame.
            for detection in frame_detections:
                track = matched_tracks[detection.id]
                if track is None:
                    if detection.score > tracking_params['score_init_min']:
                        track = Track()
                        all_tracks.append(track)
                    else:
                        continue
                track.add_detection(detection)

    for index, t in enumerate(all_tracks):
        t.friendly_id = index
    return all_tracks
示例#17
0
def main():
    # Use first line of file docstring as description if it exists.
    parser = argparse.ArgumentParser(
        description=__doc__.split('\n')[0] if __doc__ else '',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        '--davis-data-root',
        required=True,
        help='Directory containing a subdirectory for each sequence')
    parser.add_argument(
        '--davis-eval-root',
        required=True,
        help='DAVIS evaluation code root directory.')
    parser.add_argument(
        '--detectron-root',
        required=True,
        help=('Contains subdirectory for each sequence, containing pickle '
              'files of detectron outputs for each frame.'))
    parser.add_argument(
        '--set', choices=['train', 'val'], default='val')
    parser.add_argument('--output-dir', required=True)

    args = parser.parse_args()

    davis_eval_root = pathlib.Path(args.davis_eval_root)
    davis_root = pathlib.Path(args.davis_data_root)
    detectron_root = pathlib.Path(args.detectron_root)
    output_root = pathlib.Path(args.output_dir)
    output_root.mkdir(exist_ok=True)

    db_info_path = davis_eval_root / 'data' / 'db_info.yaml'
    assert db_info_path.exists(), (
        'DB info file (%s) does not exist' % db_info_path)
    with open(db_info_path, 'r') as f:
        davis_info = yaml.load(f)

    palette_path = davis_eval_root / 'data' / 'palette.txt'
    assert palette_path.exists(), (
        'DAVIS palette file (%s) does not exist' % palette_path)
    palette = np.loadtxt(palette_path, dtype=np.uint8).reshape(-1, 3)

    for sequence_info in davis_info['sequences']:
        if sequence_info['set'] != args.set:
            continue
        if sequence_info['year'] != 2016:
            continue
        sequence = sequence_info['name']
        output_sequence = output_root / sequence
        output_sequence.mkdir(exist_ok=True)
        detectron_sequence = detectron_root / sequence
        davis_sequence = davis_root / sequence
        assert detectron_sequence.exists(), (
            'Detectron path %s does not exist' % detectron_sequence)
        assert davis_sequence.exists(), (
            'DAVIS path %s does not exist' % davis_sequence)
        detectron_frames = sorted(
            detectron_sequence.glob('*.pickle'), key=lambda x: int(x.stem))
        davis_frames = sorted(
            davis_sequence.glob('*.png'), key=lambda x: int(x.stem))
        num_frames = sequence_info['num_frames']

        assert len(detectron_frames) == len(davis_frames) == num_frames
        for frame, detectron_path, davis_path in zip(
                range(num_frames), detectron_frames, davis_frames):
            output_frame = output_sequence / ('%05d.png' % frame)
            groundtruth = np.array(Image.open(davis_path))

            # 255 is used as an 'unknown' object in 2017, but it is used as
            # the single object in 2016. Re-map it to '1', so that the rest
            # of the code works as with 2017, pretending we have a single
            # known object.
            groundtruth[groundtruth == 255] = 1
            object_ids = get_unique_objects(groundtruth)
            groundtruth_masks = [groundtruth == i for i in object_ids]
            with open(detectron_path, 'rb') as f:
                data = pickle.load(f)
            predicted_boxes, predicted_masks, _, _ = (
                vis.convert_from_cls_format(
                    data['boxes'], data['segmentations'], data['keypoints']))
            if not predicted_masks:
                final_mask = np.zeros(
                    groundtruth_masks[0].shape, dtype=np.uint8)
                output = Image.fromarray(final_mask)
                output.putpalette(palette.ravel())
                output.save(output_frame, format='png')
                continue
            # Can threshold scores if necessary
            # scores = predicted_boxes[:, -1]
            predicted_masks = mask_util.decode(predicted_masks)
            predicted_masks = [
                predicted_masks[:, :, i]
                for i in range(predicted_masks.shape[2])
            ]
            mask_distance = np.zeros(
                (len(groundtruth_masks), len(predicted_masks)))
            mask_iou = mask_util.iou(
                [mask_util.encode(p) for p in predicted_masks],
                [mask_util.encode(np.asfortranarray(g.astype('uint8')))
                 for g in groundtruth_masks],
                pyiscrowd=np.zeros(len(groundtruth_masks)))

            mask_distance = 1 - mask_iou

            # Array of length num_matches, containing tuples of
            # (predicted_mask_index, groundtruth_mask_index)
            assignments = list(zip(*linear_sum_assignment(mask_distance)))
            final_mask = np.zeros(groundtruth_masks[0].shape, dtype=np.uint8)

            for predicted_mask_index, groundtruth_id in assignments:
                predicted_mask = predicted_masks[predicted_mask_index]
                final_mask[predicted_mask != 0] = object_ids[groundtruth_id]

            output = Image.fromarray(final_mask)
            output.putpalette(palette.ravel())
            output.save(output_frame, format='png')
def main():
    """main function"""

    if not torch.cuda.is_available():
        sys.exit("Need a CUDA device to run the code.")

    args = parse_args()
    print('Called with args:')
    print(args)

    assert args.image_dir or args.images
    assert bool(args.image_dir) ^ bool(args.images)

    prefix_path = args.output_dir + '_results'

    if os.path.exists(prefix_path):
        shutil.rmtree(prefix_path)
        os.mkdir(prefix_path)
    else:
        os.mkdir(prefix_path)

    if args.dataset.startswith("coco"):
        dataset = datasets.get_coco_dataset()
        cfg.MODEL.NUM_CLASSES = len(dataset.classes)
    elif args.dataset.startswith("keypoints_coco"):
        dataset = datasets.get_coco_dataset()
        cfg.MODEL.NUM_CLASSES = 2
    else:
        raise ValueError('Unexpected dataset name: {}'.format(args.dataset))

    print('load cfg from file: {}'.format(args.cfg_file))
    cfg_from_file(args.cfg_file)

    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    assert bool(args.load_ckpt) ^ bool(args.load_detectron), \
        'Exactly one of --load_ckpt and --load_detectron should be specified.'
    cfg.MODEL.LOAD_IMAGENET_PRETRAINED_WEIGHTS = False  # Don't need to load imagenet pretrained weights
    assert_and_infer_cfg()

    maskRCNN = Generalized_RCNN()

    if args.cuda:
        maskRCNN.cuda()

    if args.load_ckpt:
        load_name = args.load_ckpt
        print("loading checkpoint %s" % (load_name))
        checkpoint = torch.load(load_name,
                                map_location=lambda storage, loc: storage)
        net_utils.load_ckpt(maskRCNN, checkpoint['model'])

    if args.load_detectron:
        print("loading detectron weights %s" % args.load_detectron)
        load_detectron_weight(maskRCNN, args.load_detectron)

    maskRCNN = mynn.DataParallel(maskRCNN,
                                 cpu_keywords=['im_info', 'roidb'],
                                 minibatch=True,
                                 device_ids=[0])  # only support single GPU

    maskRCNN.eval()
    if args.image_dir:
        imglist = misc_utils.get_imagelist_from_dir(args.image_dir)
    else:
        imglist = args.images
    num_images = len(imglist)

    for i in tqdm(range(num_images)):
        im = cv2.imread(imglist[i])
        assert im is not None

        timers = defaultdict(Timer)

        cls_boxes, cls_segms, cls_keyps = im_detect_all(maskRCNN,
                                                        im,
                                                        timers=timers)

        im_name, _ = os.path.splitext(os.path.basename(imglist[i]))

        boxes, _, _, classes = convert_from_cls_format(cls_boxes, cls_segms,
                                                       cls_keyps)
        if classes == []:
            continue
        voc_boxes = np.zeros_like(boxes)
        voc_boxes[:, 0:1] = boxes[:, 4:5]
        voc_boxes[:, 1:3] = boxes[:, 0:2] + 1
        voc_boxes[:, 3:5] = boxes[:, 2:4] + 1

        for instance_idx, cls_idx in enumerate(classes):
            cls_name = dataset.classes[cls_idx]
            if cls_name == 'motorcycle':
                cls_name = 'motorbike'
            f = open(os.path.join(prefix_path, cls_name + ".txt"), "a+")
            f.write("%s " % im_name)
            for item in voc_boxes[instance_idx]:
                f.write("%f " % item)
            f.write("\n")
            f.close()
def process_sequences(fbms_dir, detectron_dir, output_dir, save_images,
                      detectron_threshold, iou_threshold):
    assert fbms_dir.exists()
    assert detectron_dir.exists()

    output_dir.mkdir(exist_ok=True)

    sequence_paths = list(fbms_dir.iterdir())
    sequence_names = [x.name for x in sequence_paths]

    output_paths = []
    for sequence, sequence_path in zip(tqdm(sequence_names), sequence_paths):
        groundtruth_path = sequence_path / 'GroundTruth'
        assert groundtruth_path.exists(), ('Path %s does not exists' %
                                           groundtruth_path)
        groundtruth = FbmsGroundtruth(groundtruth_path)
        frame_number_to_labels = groundtruth.frame_labels()
        detectron_paths = (detectron_dir / sequence).glob('*.pickle')
        detectron_paths = sorted(detectron_paths,
                                 key=lambda x: get_framenumber(x.stem))

        final_masks = {}
        for frame_number, frame_labels in frame_number_to_labels.items():
            groundtruth_masks = []
            for color, region_id in groundtruth.color_to_region.items():
                if region_id == 0:
                    # ppms have full white (255 * 256**2 + 255 * 256 + 255)
                    # as background, pgms have 0 as background.
                    assert color == 16777215 or color == 0
                    continue  # Ignore background
                groundtruth_masks.append(frame_labels == region_id)

            # Last frame may not have predictions, use second to last frame.
            if frame_number == len(detectron_paths):
                logging.info(
                    ("No predictions found for frame %s in sequence %s, "
                     "using previous frame (%s) instead.") %
                    (frame_number, sequence, frame_number - 1))
                frame_number -= 1
            detectron_path = detectron_paths[frame_number]
            assert detectron_path.exists(), ('%s does not exist.' %
                                             detectron_path)

            with open(detectron_path, 'rb') as f:
                data = pickle.load(f)

            predicted_boxes, predicted_masks, _, _ = (
                vis.convert_from_cls_format(data['boxes'],
                                            data['segmentations'],
                                            data['keypoints']))
            if predicted_boxes is None:
                final_masks[frame_number] = np.zeros(
                    groundtruth_masks[0].shape, dtype=np.uint8)
                continue

            scores = predicted_boxes[:, -1]
            if np.all(scores <= detectron_threshold):
                logging.info('No masks above threshold (%s) Using most '
                             'confident mask only.' % detectron_threshold)
                predicted_masks = [predicted_masks[np.argmax(scores)]]
            else:
                predicted_masks = [
                    m for i, m in enumerate(predicted_masks)
                    if scores[i] > detectron_threshold
                ]
            predicted_masks = mask_util.decode(predicted_masks)
            predicted_masks = [
                predicted_masks[:, :, i]
                for i in range(predicted_masks.shape[2])
            ]

            mask_distance = np.zeros(
                (len(groundtruth_masks), len(predicted_masks)))
            mask_iou = mask_util.iou(
                [mask_util.encode(p) for p in predicted_masks], [
                    mask_util.encode(np.asfortranarray(g.astype('uint8')))
                    for g in groundtruth_masks
                ],
                pyiscrowd=np.zeros(len(groundtruth_masks)))

            assert isinstance(mask_iou, np.ndarray), (
                'Unknown type of mask_iou (%s) for sequence %s, frame %s' %
                (type(mask_iou), sequence, frame_number))

            filtered_prediction_indices = np.where(
                np.any(mask_iou >= iou_threshold, axis=1))[0]
            mask_iou = mask_iou[filtered_prediction_indices]
            filtered_predictions = [
                predicted_masks[x] for x in filtered_prediction_indices
            ]
            mask_distance = 1 - mask_iou

            # Array of length num_matches, containing tuples of
            # (predicted_mask_index, groundtruth_mask_index)
            assignments = list(zip(*linear_sum_assignment(mask_distance)))
            final_mask = np.zeros(groundtruth_masks[0].shape, dtype=np.uint8)
            if False:
                from matplotlib import pyplot as plt
                plt.close()
                _, ax = plt.subplots(len(assignments), 2)
                plt.suptitle('Frame %s' % frame_number)

            for predicted_mask_index, groundtruth_id in assignments:
                predicted_mask = filtered_predictions[predicted_mask_index]
                final_mask[predicted_mask != 0] = groundtruth_id + 1
                if False:
                    ax[groundtruth_id,
                       0].imshow(groundtruth_masks[groundtruth_id])
                    ax[groundtruth_id, 0].title.set_text('Groundtruth')
                    ax[groundtruth_id, 1].imshow(predicted_mask)
                    ax[groundtruth_id, 1].title.set_text(
                        'Predicted; iou: %.4f' %
                        (1 -
                         mask_distance[predicted_mask_index, groundtruth_id]))
            if False:
                plt.show()
            final_masks[frame_number] = final_mask

        tracks = masks_to_tracks(final_masks)
        tracks_str = get_tracks_text(tracks, groundtruth.num_frames)
        output_file = output_dir / (sequence + '.dat')
        output_paths.append(output_file)
        with open(output_file, 'w') as f:
            f.write(tracks_str)

        if save_images:
            output_images = output_dir / (sequence + '-images')
            output_images.mkdir(exist_ok=True)
            colors = colormap()  # list(range(0, 251, 25))
            full_output = None
            for frame_number, frame_labels in frame_number_to_labels.items():
                groundtruth_output = np.zeros(
                    (frame_labels.shape[0], frame_labels.shape[1], 3))
                predictions_output = np.zeros(
                    (frame_labels.shape[0], frame_labels.shape[1], 3))
                for color, region_id in groundtruth.color_to_region.items():
                    if region_id == 0:
                        color = (255, 255, 255)
                    else:
                        color = colors[region_id - 1]
                    groundtruth_output[frame_labels == region_id] = color
                    predictions_output[final_masks[frame_number] ==
                                       region_id] = (color)
                concatenated = np.hstack(
                    (groundtruth_output, predictions_output))
                if full_output is None:
                    full_output = concatenated
                else:
                    full_output = np.vstack((full_output, concatenated))
                # imsave(output_images / ('groundtruth-%s.jpg' % frame_number),
                #        groundtruth_output)
                # imsave(output_images / ('predictions-%s.jpg' % frame_number),
                #        predictions_output)
            imsave(output_images / 'final.jpg', full_output)

    with open(output_dir / 'all_tracks.txt', 'w') as f:
        for output_path in output_paths:
            f.write(str(output_path.resolve()) + '\n')

    with open(output_dir / 'all_shots.txt', 'w') as f:
        f.write(str(len(sequence_paths)) + '\n')
        for sequence, sequence_path in zip(sequence_names, sequence_paths):
            groundtruth_path = sequence_path / 'GroundTruth' / (sequence +
                                                                'Def.dat')
            f.write(str(groundtruth_path.resolve()) + '\n')