예제 #1
0
    def run_network(self, img: np.ndarray):
        """ Runs an image through the network + postprocessing and returns the masks and bboxes

        Args:
            img (np.ndarray): The image to process.

        Returns:
            (tuple): the masks and bboxes
        """
        # Run image through the network
        img_gpu = torch.from_numpy(img).cuda().float()
        batch = FastBaseTransform()(img_gpu.unsqueeze(0))
        preds = self.net(batch)
        h, w, _ = img.shape

        # Post process
        t = postprocess(preds,
                        w,
                        h,
                        visualize_lincomb=True,
                        crop_masks=True,
                        score_threshold=0.15)
        top_k = 15  # Further restrict the number of predictions to parse
        idx = t[1].argsort(0, descending=True)[:top_k]
        masks = t[3][idx].cpu().numpy()
        classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]]

        return masks, boxes
    def segmentation(self, img):

        with torch.no_grad():
            h, w, _ = img.shape
            frame = torch.from_numpy(img).cuda().float()
            batch = FastBaseTransform()(frame.unsqueeze(0))
            preds = self.net(batch)
            classes, scores, boxes, masks = yolact_module.prep_display(
                5,
                preds,
                frame,
                0.5,
                h,
                w,
                undo_transform=True,
                class_color=False,
                mask_alpha=0.45,
                fps_str='')

            if not len(masks):
                return np.zeros((img.shape[0], img.shape[1]))
            mask = masks[0]
            mask = mask.cpu().numpy()

            h, w = mask.shape
            filled_mask = np.zeros([h, w])

            contours = yolact_module.cv_contours(np.uint8(mask))
            C = len(contours)
            contours = sorted(contours, key=lambda x: cv2.contourArea(x))
            cv2.drawContours(filled_mask, contours, C - 1, 255,
                             thickness=-1)  #Fills the biggest contour

            return filled_mask
예제 #3
0
    def evalimage(self):
        cv_img = self.get_data()
        #frame = torch.from_numpy(cv2.imread(path)).cuda().float()
        frame = torch.from_numpy(cv_img).cuda().float()
        batch = FastBaseTransform()(frame.unsqueeze(0))
        preds = self.net(batch)

        self.prep_display(preds, frame, None, None, undo_transform=False)
예제 #4
0
def evalimage(net: Yolact, path: str, save_path: str = None):
    frame = torch.from_numpy(cv2.imread(path)).cuda().float()
    batch = FastBaseTransform()(frame.unsqueeze(0))
    preds = net(batch)

    img_numpy = prep_display(preds, frame, None, None, net.cfg, undo_transform=False)

    if save_path is None:
        img_numpy = img_numpy[:, :, (2, 1, 0)]

    cv2.imwrite(save_path, img_numpy)
예제 #5
0
    def prediction(self, img):
        self.net.detect.cross_class_nms = True
        cfg.mask_proto_debug = False

        with torch.no_grad():
            frame = torch.Tensor(img).cuda().float()
            batch = FastBaseTransform()(frame.unsqueeze(0))
            time_start = time.clock()
            preds = self.net(batch)
            h, w, _ = img.shape
            t = postprocess(preds,
                            w,
                            h,
                            visualize_lincomb=False,
                            crop_masks=True,
                            score_threshold=self.threshold)
            torch.cuda.synchronize()
            masks = t[3][:self.top_k]
            classes, scores, bboxes = [
                x[:self.top_k].cpu().numpy() for x in t[:3]
            ]
            time_elapsed = (time.clock() - time_start)
            num_dets_to_consider = min(self.top_k, classes.shape[0])

            for i in range(num_dets_to_consider):
                if scores[i] < self.threshold:
                    num_dets_to_consider = i
                    break

            if num_dets_to_consider >= 1:
                masks = masks[:num_dets_to_consider, :, :, None]

            masks_msg = masks.cpu().detach().numpy()
            masks_msg = masks_msg.astype(np.uint8)
            scores_msg = np.zeros(num_dets_to_consider)
            class_label_msg = np.empty(num_dets_to_consider, dtype="S20")
            bboxes_msg = np.zeros([num_dets_to_consider, 4], dtype=int)
            for i in reversed(range(num_dets_to_consider)):
                scores_msg[i] = scores[i]
                class_label_msg[i] = cfg.dataset.class_names[classes[i]]
                bboxes_msg[i] = bboxes[i]
                print(class_label_msg[i].decode(), "%.2f" % (scores_msg[i]))

            os.system('cls' if os.name == 'nt' else 'clear')
            print("%.2f" % (1 / time_elapsed), "hz")

            if self.display_cv:
                self.display(frame, masks, classes, scores, bboxes,
                             num_dets_to_consider)

            return masks_msg, class_label_msg, scores_msg, bboxes_msg
예제 #6
0
파일: eval.py 프로젝트: unleashlive/yolact
def evalimage(net: Yolact, path: str, save_path: str = None):
    frame = torch.from_numpy(cv2.imread(path)).cuda().float()
    batch = FastBaseTransform(with_cuda=net.with_cuda)(frame.unsqueeze(0))
    preds = net(batch)

    img_numpy = prep_display(preds, frame, None, None, undo_transform=False)

    if save_path is None:
        img_numpy = img_numpy[:, :, (2, 1, 0)]

    if save_path is None:
        plt.imshow(img_numpy)
        plt.title(path)
        plt.show()
    else:
        cv2.imwrite(save_path, img_numpy)
예제 #7
0
    def predict(self, image_array: np.ndarray):
        """
        :image_path : image numpy array
        Format of returned boxes is [x1,y1,x2,y2], individual centers are tuples
        :return entire mask, individual masks, boxes, centers
        """
        with torch.no_grad():
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
            frame = torch.from_numpy(image_array).cuda().float()
            batch = FastBaseTransform()(frame.unsqueeze(0))
            net = Yolact()
            net.detect.use_fast_nms = True
            net.detect.use_cross_class_nms = True
            net.load_weights(self.weights)
            net.eval()
            preds = net(batch)
            mask_entire, boxes = prep_display(preds,
                                              frame,
                                              None,
                                              None,
                                              undo_transform=False)
            if len(boxes) < 1:
                return mask_entire, None, None, None
            mask_dict = {}
            centers_dict = {}
            boxes_dict = {}
            for index in range(len(boxes)):
                current_box = boxes[index]
                mask_dict[index] = mask_entire[current_box[1]:current_box[3],
                                               current_box[0]:current_box[2]]
                center = Segment.find_center(mask_dict[index])
                if not center:
                    adjusted_center = None
                else:
                    adjusted_center = Segment.adjust_centers(
                        center, current_box)
                centers_dict[index] = adjusted_center
                boxes_dict[index] = current_box

            return mask_entire, mask_dict, centers_dict, boxes_dict
예제 #8
0
    def evalimage(self, path:str=None, save_path:str=None):
        cv_img = self.get_data()
        #frame = torch.from_numpy(cv2.imread(path)).cuda().float()
        frame = torch.from_numpy(cv_img).cuda().float()
        batch = FastBaseTransform()(frame.unsqueeze(0))
        preds = self.net(batch)

        img_numpy = self.prep_display(preds, frame, None, None, undo_transform=False)
        
        if save_path is None:
            img_numpy = img_numpy[:, :, (2, 1, 0)]

        if save_path is None:
            plt.imshow(img_numpy)
            plt.title(path)
            plt.show()
        else:
            cv2.imwrite(save_path, img_numpy)
        try:
            self.image_pub.publish(self.bridge.cv2_to_imgmsg(img_numpy, "bgr8"))
        except CvBridgeError as e:
            print(e)
예제 #9
0
    def evalvideo(self, net: Yolact, path: str):
        # If the path is a digit, parse it as a webcam index
        is_webcam = path.isdigit()

        if is_webcam:
            vid = cv2.VideoCapture(int(path))
        else:
            vid = cv2.VideoCapture(path)

        if not vid.isOpened():
            print('Could not open video "%s"' % path)
            exit(-1)

        net = CustomDataParallel(net).cuda()
        transform = torch.nn.DataParallel(FastBaseTransform()).cuda()
        frame_times = MovingAverage(100)
        fps = 0
        # The 0.8 is to account for the overhead of time.sleep
        frame_time_target = 1 / vid.get(cv2.CAP_PROP_FPS)
        running = True

        def cleanup_and_exit():
            print()
            pool.terminate()
            vid.release()
            cv2.destroyAllWindows()
            exit()

        def get_next_frame(vid):
            return [vid.read()[1] for _ in range(args.video_multiframe)]

        def transform_frame(frames):
            with torch.no_grad():
                frames = [
                    torch.from_numpy(frame).cuda().float() for frame in frames
                ]
                return frames, transform(torch.stack(frames, 0))

        def eval_network(inp):
            with torch.no_grad():
                frames, imgs = inp
                return frames, net(imgs)

        def prep_frame(inp):
            with torch.no_grad():
                frame, preds = inp
                return self.prep_display(preds,
                                         frame,
                                         None,
                                         None,
                                         undo_transform=False,
                                         class_color=True)

        frame_buffer = Queue()
        video_fps = 0

        # All this timing code to make sure that
        def play_video():
            nonlocal frame_buffer, running, video_fps, is_webcam

            video_frame_times = MovingAverage(100)
            frame_time_stabilizer = frame_time_target
            last_time = None
            stabilizer_step = 0.0005

            while running:
                frame_time_start = time.time()

                if not frame_buffer.empty():
                    next_time = time.time()
                    if last_time is not None:
                        video_frame_times.add(next_time - last_time)
                        video_fps = 1 / video_frame_times.get_avg()
                    cv2.imshow(path, frame_buffer.get())
                    last_time = next_time

                #self.image_pub.publish(self.bridge.cv2_to_imgmsg(frame_buffer.get(), "bgr8"))

                if cv2.waitKey(1) == 27:  # Press Escape to close
                    running = False

                buffer_size = frame_buffer.qsize()
                if buffer_size < args.video_multiframe:
                    frame_time_stabilizer += stabilizer_step
                elif buffer_size > args.video_multiframe:
                    frame_time_stabilizer -= stabilizer_step
                    if frame_time_stabilizer < 0:
                        frame_time_stabilizer = 0

                new_target = frame_time_stabilizer if is_webcam else max(
                    frame_time_stabilizer, frame_time_target)

                next_frame_target = max(
                    2 * new_target - video_frame_times.get_avg(), 0)
                target_time = frame_time_start + next_frame_target - 0.001  # Let's just subtract a millisecond to be safe

                # This gives more accurate timing than if sleeping the whole amount at once
                while time.time() < target_time:
                    time.sleep(0.001)

        extract_frame = lambda x, i: (x[0][i] if x[1][i] is None else x[0][i].
                                      to(x[1][i]['box'].device), [x[1][i]])

        # Prime the network on the first frame because I do some thread unsafe things otherwise
        print('Initializing model... ', end='')
        eval_network(transform_frame(get_next_frame(vid)))
        print('Done.')

        # For each frame the sequence of functions it needs to go through to be processed (in reversed order)
        sequence = [prep_frame, eval_network, transform_frame]
        pool = ThreadPool(processes=len(sequence) + args.video_multiframe + 2)
        pool.apply_async(play_video)

        active_frames = []

        print()

        while vid.isOpened() and running:
            start_time = time.time()

            # Start loading the next frames from the disk
            next_frames = pool.apply_async(get_next_frame, args=(vid, ))

            # For each frame in our active processing queue, dispatch a job
            # for that frame using the current function in the sequence
            for frame in active_frames:
                frame['value'] = pool.apply_async(sequence[frame['idx']],
                                                  args=(frame['value'], ))

            # For each frame whose job was the last in the sequence (i.e. for all final outputs)
            for frame in active_frames:
                if frame['idx'] == 0:
                    frame_buffer.put(frame['value'].get())

            # Remove the finished frames from the processing queue
            active_frames = [x for x in active_frames if x['idx'] > 0]

            # Finish evaluating every frame in the processing queue and advanced their position in the sequence
            for frame in list(reversed(active_frames)):
                frame['value'] = frame['value'].get()
                frame['idx'] -= 1

                if frame['idx'] == 0:
                    # Split this up into individual threads for prep_frame since it doesn't support batch size
                    active_frames += [{
                        'value': extract_frame(frame['value'], i),
                        'idx': 0
                    } for i in range(1, args.video_multiframe)]
                    frame['value'] = extract_frame(frame['value'], 0)

            # Finish loading in the next frames and add them to the processing queue
            active_frames.append({
                'value': next_frames.get(),
                'idx': len(sequence) - 1
            })

            # Compute FPS
            frame_times.add(time.time() - start_time)
            fps = args.video_multiframe / frame_times.get_avg()

            print(
                '\rProcessing FPS: %.2f | Video Playback FPS: %.2f | Frames in Buffer: %d    '
                % (fps, video_fps, frame_buffer.qsize()),
                end='')

        cleanup_and_exit()
예제 #10
0
파일: eval.py 프로젝트: unleashlive/yolact
def evalvideo(net: Yolact, path: str, out_path: str = None):
    # If the path is a digit, parse it as a webcam index
    is_webcam = path.isdigit()

    # If the input image size is constant, this make things faster (hence why we can use it in a video setting).
    cudnn.benchmark = True

    if is_webcam:
        vid = cv2.VideoCapture(int(path))
    else:
        vid = cv2.VideoCapture(path)

    if not vid.isOpened():
        print('Could not open video "%s"' % path)
        exit(-1)

    target_fps = round(vid.get(cv2.CAP_PROP_FPS))
    frame_width = round(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = round(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))

    if is_webcam:
        num_frames = float('inf')
    else:
        num_frames = round(vid.get(cv2.CAP_PROP_FRAME_COUNT))

    net = CustomDataParallel(net).cuda()
    transform = torch.nn.DataParallel(
        FastBaseTransform(with_cuda=net.with_cuda))

    if net.with_cuda:
        transform = transform.cuda()

    frame_times = MovingAverage(100)
    fps = 0
    frame_time_target = 1 / target_fps
    running = True
    fps_str = ''
    vid_done = False
    frames_displayed = 0

    if out_path is not None:
        out = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*"mp4v"),
                              target_fps, (frame_width, frame_height))

    def cleanup_and_exit():
        print()
        pool.terminate()
        vid.release()
        if out_path is not None:
            out.release()
        cv2.destroyAllWindows()
        exit()

    def get_next_frame(vid):
        frames = []
        for idx in range(args.video_multiframe):
            frame = vid.read()[1]
            if frame is None:
                return frames
            frames.append(frame)
        return frames

    def transform_frame(frames):
        with torch.no_grad():
            frames = [
                torch.from_numpy(frame).cuda().float() for frame in frames
            ]
            return frames, transform(torch.stack(frames, 0))

    def eval_network(inp):
        with torch.no_grad():
            frames, imgs = inp
            num_extra = 0
            while imgs.size(0) < args.video_multiframe:
                imgs = torch.cat([imgs, imgs[0].unsqueeze(0)], dim=0)
                num_extra += 1
            out = net(imgs)
            if num_extra > 0:
                out = out[:-num_extra]
            return frames, out

    def prep_frame(inp, fps_str):
        with torch.no_grad():
            frame, preds = inp
            return prep_display(preds,
                                frame,
                                None,
                                None,
                                undo_transform=False,
                                class_color=True,
                                fps_str=fps_str)

    frame_buffer = Queue()
    video_fps = 0

    # All this timing code to make sure that
    def play_video():
        try:
            nonlocal frame_buffer, running, video_fps, is_webcam, num_frames, frames_displayed, vid_done

            video_frame_times = MovingAverage(100)
            frame_time_stabilizer = frame_time_target
            last_time = None
            stabilizer_step = 0.0005
            progress_bar = ProgressBar(30, num_frames)

            while running:
                frame_time_start = time.time()

                if not frame_buffer.empty():
                    next_time = time.time()
                    if last_time is not None:
                        video_frame_times.add(next_time - last_time)
                        video_fps = 1 / video_frame_times.get_avg()
                    if out_path is None:
                        cv2.imshow(path, frame_buffer.get())
                    else:
                        out.write(frame_buffer.get())
                    frames_displayed += 1
                    last_time = next_time

                    if out_path is not None:
                        if video_frame_times.get_avg() == 0:
                            fps = 0
                        else:
                            fps = 1 / video_frame_times.get_avg()
                        progress = frames_displayed / num_frames * 100
                        progress_bar.set_val(frames_displayed)

                        print(
                            '\rProcessing Frames  %s %6d / %6d (%5.2f%%)    %5.2f fps        '
                            % (repr(progress_bar), frames_displayed,
                               num_frames, progress, fps),
                            end='')

                # This is split because you don't want savevideo to require cv2 display functionality (see #197)
                if out_path is None and cv2.waitKey(1) == 27:
                    # Press Escape to close
                    running = False
                if not (frames_displayed < num_frames):
                    running = False

                if not vid_done:
                    buffer_size = frame_buffer.qsize()
                    if buffer_size < args.video_multiframe:
                        frame_time_stabilizer += stabilizer_step
                    elif buffer_size > args.video_multiframe:
                        frame_time_stabilizer -= stabilizer_step
                        if frame_time_stabilizer < 0:
                            frame_time_stabilizer = 0

                    new_target = frame_time_stabilizer if is_webcam else max(
                        frame_time_stabilizer, frame_time_target)
                else:
                    new_target = frame_time_target

                next_frame_target = max(
                    2 * new_target - video_frame_times.get_avg(), 0)
                target_time = frame_time_start + next_frame_target - 0.001  # Let's just subtract a millisecond to be safe

                if out_path is None or args.emulate_playback:
                    # This gives more accurate timing than if sleeping the whole amount at once
                    while time.time() < target_time:
                        time.sleep(0.001)
                else:
                    # Let's not starve the main thread, now
                    time.sleep(0.001)
        except:
            # See issue #197 for why this is necessary
            import traceback
            traceback.print_exc()

    extract_frame = lambda x, i: (x[0][i] if x[1][i]['detection'] is None else
                                  x[0][i].to(x[1][i]['detection']['box'].device
                                             ), [x[1][i]])

    # Prime the network on the first frame because I do some thread unsafe things otherwise
    print('Initializing model... ', end='')
    first_batch = eval_network(transform_frame(get_next_frame(vid)))
    print('Done.')

    # For each frame the sequence of functions it needs to go through to be processed (in reversed order)
    sequence = [prep_frame, eval_network, transform_frame]
    pool = ThreadPool(processes=len(sequence) + args.video_multiframe + 2)
    pool.apply_async(play_video)
    active_frames = [{
        'value': extract_frame(first_batch, i),
        'idx': 0
    } for i in range(len(first_batch[0]))]

    print()
    if out_path is None: print('Press Escape to close.')
    try:
        while vid.isOpened() and running:
            # Hard limit on frames in buffer so we don't run out of memory >.>
            while frame_buffer.qsize() > 100:
                time.sleep(0.001)

            start_time = time.time()

            # Start loading the next frames from the disk
            if not vid_done:
                next_frames = pool.apply_async(get_next_frame, args=(vid, ))
            else:
                next_frames = None

            if not (vid_done and len(active_frames) == 0):
                # For each frame in our active processing queue, dispatch a job
                # for that frame using the current function in the sequence
                for frame in active_frames:
                    _args = [frame['value']]
                    if frame['idx'] == 0:
                        _args.append(fps_str)
                    frame['value'] = pool.apply_async(sequence[frame['idx']],
                                                      args=_args)

                # For each frame whose job was the last in the sequence (i.e. for all final outputs)
                for frame in active_frames:
                    if frame['idx'] == 0:
                        frame_buffer.put(frame['value'].get())

                # Remove the finished frames from the processing queue
                active_frames = [x for x in active_frames if x['idx'] > 0]

                # Finish evaluating every frame in the processing queue and advanced their position in the sequence
                for frame in list(reversed(active_frames)):
                    frame['value'] = frame['value'].get()
                    frame['idx'] -= 1

                    if frame['idx'] == 0:
                        # Split this up into individual threads for prep_frame since it doesn't support batch size
                        active_frames += [{
                            'value':
                            extract_frame(frame['value'], i),
                            'idx':
                            0
                        } for i in range(1, len(frame['value'][0]))]
                        frame['value'] = extract_frame(frame['value'], 0)

                # Finish loading in the next frames and add them to the processing queue
                if next_frames is not None:
                    frames = next_frames.get()
                    if len(frames) == 0:
                        vid_done = True
                    else:
                        active_frames.append({
                            'value': frames,
                            'idx': len(sequence) - 1
                        })

                # Compute FPS
                frame_times.add(time.time() - start_time)
                fps = args.video_multiframe / frame_times.get_avg()
            else:
                fps = 0

            fps_str = 'Processing FPS: %.2f | Video Playback FPS: %.2f | Frames in Buffer: %d' % (
                fps, video_fps, frame_buffer.qsize())
            if not args.display_fps:
                print('\r' + fps_str + '    ', end='')

    except KeyboardInterrupt:
        print('\nStopping...')

    cleanup_and_exit()
예제 #11
0
        color_image = np.asanyarray(color_frame.get_data())
        # r, g, b = cv2.split(color_image)
        # color_image = cv2.merge((b, g, r))

        cv2.imshow('color_image', color_image)

        aligned_depth_frame = aligned_frames.get_depth_frame()
        aligned_depth_image = np.asanyarray(aligned_depth_frame.get_data())

        # img = Image.open(folder_path + '/color_image' + str(now) + '.png')
        # depth = np.array(Image.open(folder_path + '/depth_image' + str(now) + '.png'))
        img = color_image
        depth = aligned_depth_image

        frame = torch.from_numpy(np.array(img)).cuda().float()
        batch = FastBaseTransform()(frame.unsqueeze(0))
        preds = yolact(batch)

        try:
            masks, classes, boxes, img_numpy = prep_display(
                preds, frame, None, None, undo_transform=False)
        except:
            print("Yolact exception occur!")
            continue

        # print(classes)
        # print(boxes)
        #
        # plt.imshow(img_numpy)
        # plt.title("pred")
        # plt.show()
예제 #12
0
    def process(self, image: np.ndarray, pos: int):
        """:returns (classes, scores, boxes)

        where `boxes` is an array of bounding boxes of detected objects in
        (xleft, ytop, width, height) format.

        `classes` is the class ids of the corresponding objects.

        `scores` are the computed class scores corresponding to the detected objects.
        Roughly high score indicates strong belief that the object belongs to
        the identified class.
        """
        _ts = time.perf_counter()
        logging.debug(f'Received frame {pos}')
        if self.net is None:
            self.sigError.emit(YolactException('Network not initialized'))
            return
        # Partly follows yolact eval.py
        tic = time.perf_counter_ns()
        _ = qc.QMutexLocker(self.mutex)
        with torch.no_grad():
            if self.cuda:
                image = torch.from_numpy(image).cuda().float()
            else:
                image = torch.from_numpy(image).float()
            batch = FastBaseTransform()(image.unsqueeze(0))
            preds = self.net(batch)
            image_gpu = image / 255.0
            h, w, _ = image.shape
            save = self.config.rescore_bbox
            self.config.rescore_bbox = True
            classes, scores, boxes, masks = oututils.postprocess(
                preds,
                w,
                h,
                visualize_lincomb=False,
                crop_masks=True,
                score_threshold=self.score_threshold)
            idx = scores.argsort(0, descending=True)[:self.top_k]
            # if self.config.eval_mask_branch:
            #     masks = masks[idx]
            classes, scores, boxes = [
                x[idx].cpu().numpy() for x in (classes, scores, boxes)
            ]
            # This is probably not required, `postprocess` uses
            # `score_thresh` already
            num_dets_to_consider = min(self.top_k, classes.shape[0])
            for j in range(num_dets_to_consider):
                if scores[j] < self.score_threshold:
                    num_dets_to_consider = j
                    break
            # logging.debug('Bounding boxes: %r', boxes)
            # Convert from top-left bottom-right format to
            # top-left, width, height format
            if len(boxes) == 0:
                self.sigProcessed.emit(boxes, pos)
                return
            boxes[:, 2:] = boxes[:, 2:] - boxes[:, :2]
            boxes = np.asanyarray(boxes, dtype=np.int_)
            if self.overlap_thresh < 1:
                dist_matrix = pairwise_distance(new_bboxes=boxes,
                                                bboxes=boxes,
                                                boxtype=OutlineStyle.bbox,
                                                metric=DistanceMetric.ios)
                bad_idx = [jj for ii in range(dist_matrix.shape[0] - 1) \
                             for jj in range(ii+1, dist_matrix.shape[1]) \
                              if dist_matrix[ii, jj] < 1 - self.overlap_thresh]
                good_idx = list(set(range(boxes.shape[0])) - set(bad_idx))
                boxes = boxes[good_idx].copy()

            toc = time.perf_counter_ns()
            logging.debug('Time to process single _image: %f s',
                          1e-9 * (toc - tic))
            self.sigProcessed.emit(boxes, pos)
            logging.debug(f'Emitted bboxes for frame {pos}: {boxes}')
        _dt = time.perf_counter() - _ts
        logging.debug(
            f'{__name__}.{self.__class__.__name__}.process: Runtime: {_dt}s')
예제 #13
0
def segment_yolact(frame, score_threshold, top_k, overlap_thresh, cfgfile,
                   netfile, cuda):
    """Segment objects in frame using YOLACT.

    Parameters
    ----------
    frame: numpy.ndarray
        (WxHxC) integer array with the image content.
    score_threshold: float
        Minimum score to include object, should be in `(0, 1)`.
    top_k: int
        The number of segmented objects to keep.
    overlap_thresh: float
        Merge objects whose bounding boxes overlap (intersection over union)
        more than this amount.
    cfgfile: str
        Path to YOLACT configuration file.
    netfile: str
        Path to YOLACT network weights file.
    cuda: bool
        Whether to use CUDA.
    Returns
    -------
    numpy.ndarray
        An array of bounding boxes of detected objects in
        (xleft, ytop, width, height) format.
    """
    global ynet
    global config

    if ynet is None:
        init_yolact(cfgfile, netfile, cuda)
    # Partly follows yolact eval.py
    tic = time.perf_counter_ns()
    with torch.no_grad():
        if cuda:
            frame = torch.from_numpy(frame).cuda().float()
        else:
            frame = torch.from_numpy(frame).float()
        batch = FastBaseTransform()(frame.unsqueeze(0))
        preds = ynet(batch)
        h, w, _ = frame.shape
        config.rescore_bbox = True
        classes, scores, boxes, masks = oututils.postprocess(
            preds, w, h,
            visualize_lincomb=False,
            crop_masks=True,
            score_threshold=score_threshold)
        idx = scores.argsort(0, descending=True)[:top_k]
        # if self.config.eval_mask_branch:
        #     masks = masks[idx]
        classes, scores, boxes = [x[idx].cpu().numpy()
                                  for x in (classes, scores, boxes)]
        # This is probably not required, `postprocess` uses
        # `score_thresh` already
        # num_dets_to_consider = min(self.top_k, classes.shape[0])
        # for j in range(num_dets_to_consider):
        #     if scores[j] < self.score_threshold:
        #         num_dets_to_consider = j
        #         break
        # logging.debug('Bounding boxes: %r', boxes)
        # Convert from top-left bottom-right format to
        # top-left, width, height format
        if len(boxes) == 0:
            return np.empty(0)

        boxes[:, 2:] = boxes[:, 2:] - boxes[:, :2]
        boxes = np.asanyarray(np.rint(boxes), dtype=np.int_)
        if overlap_thresh < 1:
            dist_matrix = ut.pairwise_distance(new_bboxes=boxes, bboxes=boxes,
                                               boxtype=OutlineStyle.bbox,
                                               metric=DistanceMetric.iou)
            bad_boxes = []
            for ii in range(dist_matrix.shape[0] - 1):
                for jj in range(ii + 1, dist_matrix.shape[1]):
                    if dist_matrix[ii, jj] < 1 - overlap_thresh:
                        bad_boxes.append(jj)
            boxes = np.array([boxes[ii] for ii in range(boxes.shape[0]) if
                              ii not in bad_boxes], dtype=np.int_)
        toc = time.perf_counter_ns()
        logging.debug('Time to process single image: %f s',
                      1e-9 * (toc - tic))
        return boxes