示例#1
0
def ocr_image_region(image: np.ndarray,
                     region: TextRegion) -> Tuple[str, float]:

    # -------------------------
    # deproject image
    rect = region[0].vertices
    warped = perspective.four_point_transform(image, rect)

    # -------------------------
    # get text from image
    ocr_image = warped
    ocr_image = cv.cvtColor(ocr_image, cv.COLOR_RGB2GRAY)
    img_w = ocr_image.shape[1]
    img_h = ocr_image.shape[0]
    tess_rect = Rect(0, 0, img_w, img_h)
    img_bpp = 1
    img_bpl = int(img_bpp * img_w)

    text, textconf = get_blob_ocr_result(
        (ocr_image.tobytes(), int(img_w), int(img_h), img_bpp, img_bpl),
        tess_rect)

    # do inverted 2nd pass and pick better result
    ocr_inv = cv.bitwise_not(ocr_image)
    text1, textconf1 = get_blob_ocr_result(
        (ocr_inv.tobytes(), int(img_w), int(img_h), img_bpp, img_bpl),
        tess_rect)
    if textconf1[0] > textconf[0]:
        textconf = textconf1
        text = text1

    return (text, textconf[0])
示例#2
0
def find_rects(image: np.ndarray) -> List[np.ndarray]:
    """Detects rectangular contours in an image
    @returns list containing arrays of rect corners
    """

    gray = cv.cvtColor(image.copy(), cv.COLOR_RGB2GRAY)
    gray = cv.GaussianBlur(gray, (5, 5), 0)
    #edged = cv.Canny(gray, 70, 180) # this numbers is hand picked guess from a few photos
    edged = auto_canny(gray)

    contours = cv.findContours(edged.copy(), cv.RETR_LIST,
                               cv.CHAIN_APPROX_SIMPLE)
    contours = contours[0] if len(contours) == 2 else contours[1]
    contours.sort(key=cv.contourArea, reverse=True)
    contours = [cnt for cnt in contours if cv.contourArea(cnt) > 15
                ]  # 15px contour area, basically cnt>=4x4

    rects = list(map(cv.minAreaRect, contours))
    boxes = list(map(lambda r: Rect.from_cvrect(*r[0], *r[1]), rects))

    boxes = Rect.nms_merge(boxes)

    return boxes or list()
示例#3
0
def detect_objects(image: np.ndarray,
                   nnconfig: ObjDetectNetConfig,
                   threshold: float = 0.2) -> List[DetectedObject]:
    """Detects all objects in an image
    Accepts opencv compatible image and single tuple with NN weight and description file paths

    Returns array of all objects as tuple containing class id, confidence value and rect tuple
    """

    image_height, image_width, _ = image.shape

    net = cv.dnn.readNetFromTensorflow(nnconfig.model, nnconfig.config)
    net.setInput(
        cv.dnn.blobFromImage(image,
                             1, (300, 300), (127.5, 127.5, 127.5),
                             swapRB=True,
                             crop=False))

    output = net.forward()

    results = []
    for detection in output[0, 0, :, :]:

        confidence = float(detection[2])

        if confidence < threshold:
            continue

        class_id = int(detection[1])

        # look only for people
        #if class_id != 1:
        #    continue

        #print('{} {:.3f} {}'.format(class_id , confidence, classname))
        left = detection[3] * image_width
        top = detection[4] * image_height
        right = detection[5] * image_width
        bottom = detection[6] * image_height

        results.append(
            DetectedObject(class_id, confidence, Rect(left, top, right,
                                                      bottom)))

    return results
示例#4
0
    def detect(
        self,
        image: np.ndarray,
        config: TextDetectorConfig = TextDetectorConfig()
    ) -> List[Tuple[TextRegion, float]]:
        results = []

        # Get frame height and width
        inpWidth = adjust_dimension(image.shape[1])
        inpHeight = adjust_dimension(image.shape[0])
        # keep for now, can be used to remap region in the future
        rW = inpWidth / float(inpWidth)
        rH = inpHeight / float(inpHeight)

        net = self._makenet()
        confThreshold = config.confThreshold
        nmsThreshold = config.nmsThreshold
        outNames = self._outputs

        # Create a 4D blob from frame.
        blob = cv.dnn.blobFromImage(image, 1.0, (inpWidth, inpHeight),
                                    (123.68, 116.78, 103.94), True, False)

        # Run the model
        net.setInput(blob)
        outs = net.forward(outNames)
        t, _ = net.getPerfProfile()

        # Get scores and geometry
        scores = outs[0]
        geometry = outs[1]
        [boxes, confidences] = decode(scores, geometry, confThreshold)

        # Apply NMS
        #im = image.copy()
        indices = cv.dnn.NMSBoxesRotated(boxes, confidences, confThreshold,
                                         nmsThreshold)
        for i in indices:
            # get 4 corners of the rotated rect
            vertices = cv.boxPoints(boxes[i[0]])

            #cv.drawContours(im, [np.int0(vertices)], -1, (0, 255, 0), 2)
            #cv.imshow(WINDOW_LABEL, im)
            #cv.waitKey()

            # scale the bounding box coordinates based on the respective ratios
            for j in range(4):
                vertices[j][0] *= rW
                vertices[j][1] *= rH
            #for j in range(4):
            #    p1 = (vertices[j][0], vertices[j][1])
            #    p2 = (vertices[(j + 1) % 4][0], vertices[(j + 1) % 4][1])
            #    #cv.line(frame, p1, p2, (0, 255, 0), 1)

            xmin = vertices[0][0]
            xmax = 0
            ymin = vertices[0][1]
            ymax = 0
            for p in vertices:
                xmin = int(min(xmin, p[0]))
                ymin = int(min(ymin, p[1]))
                xmax = int(max(xmax, p[0]))
                ymax = int(max(ymax, p[1]))

            results.append((TextRegion(vertices,
                                       Rect(xmin, ymin, xmax,
                                            ymax)), confidences[i[0]]))

        return results
示例#5
0
def is_rects_overlap(a: Rect, b: Rect) -> bool:
    return a.overlaps(b)
示例#6
0
def detect_text_areas(image, model, *args,
                      **kwargs) -> List[Tuple[TextRegion, float]]:

    results = []

    # Get frame height and width
    sourceSize = kwargs['sourceSize']
    width_ = sourceSize[0]
    height_ = sourceSize[1]
    inpWidth = image.shape[1]
    inpHeight = image.shape[0]
    rW = width_ / float(inpWidth)
    rH = height_ / float(inpHeight)

    if not kwargs.get('scale', True):
        rW = 1.0
        rH = 1.0

    net = model
    confThreshold = kwargs['confThreshold']
    nmsThreshold = kwargs['nmsThreshold']
    outNames = kwargs['outNames']

    # Create a 4D blob from frame.
    blob = cv.dnn.blobFromImage(image, 1.0, (inpWidth, inpHeight),
                                (123.68, 116.78, 103.94), True, False)

    # Run the model
    net.setInput(blob)
    outs = net.forward(outNames)
    t, _ = net.getPerfProfile()

    # Get scores and geometry
    scores = outs[0]
    geometry = outs[1]
    [boxes, confidences] = decode(scores, geometry, confThreshold)

    # Apply NMS
    #im = image.copy()
    indices = cv.dnn.NMSBoxesRotated(boxes, confidences, confThreshold,
                                     nmsThreshold)
    for i in indices:
        # get 4 corners of the rotated rect
        vertices = cv.boxPoints(boxes[i[0]])

        #cv.drawContours(im, [np.int0(vertices)], -1, (0, 255, 0), 2)
        #cv.imshow(WINDOW_LABEL, im)
        #cv.waitKey()

        # scale the bounding box coordinates based on the respective ratios
        for j in range(4):
            vertices[j][0] *= rW
            vertices[j][1] *= rH
        #for j in range(4):
        #    p1 = (vertices[j][0], vertices[j][1])
        #    p2 = (vertices[(j + 1) % 4][0], vertices[(j + 1) % 4][1])
        #    #cv.line(frame, p1, p2, (0, 255, 0), 1)

        xmin = vertices[0][0]
        xmax = 0
        ymin = vertices[0][1]
        ymax = 0
        for p in vertices:
            xmin = int(min(xmin, p[0]))
            ymin = int(min(ymin, p[1]))
            xmax = int(max(xmax, p[0]))
            ymax = int(max(ymax, p[1]))

        results.append((TextRegion(vertices, Rect(xmin, ymin, xmax,
                                                  ymax)), confidences[i[0]]))

    return results
示例#7
0
def find_rects_white(image: np.ndarray) -> List[np.ndarray]:
    """Detects rectangular contours in an image
    @returns list containing arrays of rect corners
    """

    raise NotImplementedError()

    #gray = norm_color_test(image.copy())
    gray = cv.cvtColor(image.copy(), cv.COLOR_RGB2GRAY)
    gray = cv.GaussianBlur(gray, (5, 5), 0)
    #edged = cv.Canny(gray, 70, 180) # this numbers is hand picked guess from a few photos
    edged = auto_canny(
        gray)  # this numbers is hand picked guess from a few photos

    # split to HSV, then pick up rouhly any white color zeroing out the rest
    hsv = cv.cvtColor(image.copy(), cv.COLOR_RGB2HSV)
    h, s, v = cv.split(hsv)
    h[h < 145] = 0
    h[h > 165] = 0
    #h = cv.GaussianBlur(h, (5, 5), 0)
    normed = cv.normalize(h, None, 0, 255, cv.NORM_MINMAX, cv.CV_8UC1)
    kernel = cv.getStructuringElement(shape=cv.MORPH_ELLIPSE, ksize=(5, 5))
    opened = cv.morphologyEx(normed, cv.MORPH_OPEN, kernel)

    # now find white regions contours
    whites = cv.findContours(opened, cv.RETR_LIST, cv.CHAIN_APPROX_SIMPLE)[0]
    whites.sort(key=cv.contourArea, reverse=True)
    whites = [cnt for cnt in whites if cv.contourArea(cnt) > 15
              ]  # 15px contour area, basically cnt>=4x4

    whiterects = []
    for i in whites:
        rect = cv.minAreaRect(i)
        w, h = rect[1]
        if w * h > 150:  # 150px area, or rougly 12x12 pixels
            whiterects.append(rect)

    #cv.drawContours(image, whites, -1, COLORS[2 % len(COLORS)], 2)
    #cv.imshow('test', image)
    #cv.waitKey()

    whites = list(map(lambda r: Rect.from_cvrect(*r[0], *r[1]), whiterects))

    #cv.imshow('test', edged)
    #cv.waitKey()

    contours = cv.findContours(edged.copy(), cv.RETR_LIST,
                               cv.CHAIN_APPROX_SIMPLE)
    contours = contours[0] if len(contours) == 2 else contours[1]
    contours.sort(key=cv.contourArea, reverse=True)
    contours = [cnt for cnt in contours if cv.contourArea(cnt) > 15
                ]  # 15px contour area, basically cnt>=4x4

    rects = list(map(cv.minAreaRect, contours))
    boxes = list(map(lambda r: Rect.from_cvrect(*r[0], *r[1]), rects))

    # filter non overlapping contours
    for i in reversed(range(len(boxes))):
        overlaps = False
        for wbox in whites:
            if wbox.overlaps(boxes[i]):
                overlaps = True
                break
        if not overlaps:
            boxes.pop(i)

    boxes = Rect.nms_merge(boxes)

    for i in range(len(contours)):
        #peri = cv.arcLength(contours[i], True)
        #approx = cv.approxPolyDP(contours[i], 0.02 * peri, True)
        rect = cv.minAreaRect(contours[i])
        box = cv.boxPoints(rect)
        box = np.int0(box)
        #cv.drawContours(image, [box], -1, COLORS[i % len(COLORS)], 2)
        #cv.putText(image, f'{i}: {cv.contourArea(contours[i])}px', (int(rect[0][0]), int(rect[0][1])), cv.FONT_HERSHEY_SIMPLEX, 0.6, COLORS[i % len(COLORS)], 1)

    #cv.drawContours(image, contours, -1, COLORS[1], 2)

    for b in boxes:
        cv.line(image, (int(b.xmin), int(b.ymin)), (int(b.xmax), int(b.ymin)),
                (0, 255, 255), 2)
        cv.line(image, (int(b.xmin), int(b.ymax)), (int(b.xmax), int(b.ymax)),
                (0, 255, 255), 2)
        cv.line(image, (int(b.xmin), int(b.ymin)), (int(b.xmin), int(b.ymax)),
                (0, 255, 255), 2)
        cv.line(image, (int(b.xmax), int(b.ymin)), (int(b.xmax), int(b.ymax)),
                (0, 255, 255), 2)

    stacked = np.hstack((cv.cvtColor(edged, cv.COLOR_GRAY2RGB),
                         cv.cvtColor(opened, cv.COLOR_GRAY2RGB), image))
    cv.namedWindow('test', 0)
    cv.imshow('test', stacked)
    cv.waitKey()

    cv.imwrite('dump.jpg', stacked)

    return boxes or list()