def draw_bboxes_withindex(img, boxes, uids):
    """
    A helper function to draw bounding box rectangles on images
    Args:
        img: image to be drawn on in array format
        boxes: An (N,4) array of bounding boxes
    Output:
        Image with drawn bounding boxes
    """
    source = Image.fromarray(img)
    draw = ImageDraw.Draw(source)
    w2, h2 = (img.shape[0], img.shape[1])

    font = ImageFont.truetype(
        '/usr/share/fonts/truetype/freefont/FreeSerif.ttf', 40)
    #font = ImageFont.truetype('arial.ttf', 24)

    idx = 0

    for b in boxes:
        xmin, ymin, xmax, ymax = b

        for j in range(3):
            draw.rectangle(((xmin + j, ymin + j), (xmax + j, ymax + j)),
                           outline="red")
        draw.text((xmin + 20, ymin + 70), str(uids[idx]), font=font)
        idx += 1
    return source
Exemplo n.º 2
0
def generate_position_image(element, position_converter):
    img = Image.new("RGB", (100, 100), "white")

    if isinstance(element, E):
        Ep = element.value
        elements = Ep.strip(" ()").split("+")
    elif isinstance(element, str):
        if element == "all":
            elements = position_converter.keys()
        else:
            Ep = element
            elements = Ep.strip(" ()").split("+")
    else:
        return img

    draw = ImageDraw.Draw(img)

    for el in elements:
        x_start, y_start, x_end, y_end = position_converter[el]
        draw.rectangle(((x_start, y_start), (x_end, y_end)),
                       fill="gold",
                       outline=True,
                       width=1)
        draw.text((x_start, y_start), el, fill="black")

    draw.rectangle(((0, 0), (100 - 1, 100 - 1)), outline=True, width=1)
    return img
Exemplo n.º 3
0
    def puttext(self,
                cv_image,
                texts,
                point=(30, 30),
                font_path='/IPAexfont00401/ipaexm.ttf',
                font_size=100,
                color=(255, 0, 0)):
        h, w, _ = img.shape
        font_path = './font' + font_path
        font = ImageFont.truetype(font_path, font_size)

        cv_rgb_image = cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB)
        pil_image = Image.fromarray(cv_rgb_image)

        draw = ImageDraw.Draw(pil_image)
        dx = 0
        dy = (h // 4) // len(texts)
        for i, text in enumerate(texts):
            repoint = (point[0] + dx, point[1] + i * dy)
            draw.text(repoint, text, fill=color, font=font)

        cv_rgb_result_image = np.asarray(pil_image)
        cv_bgr_result_image = cv2.cvtColor(cv_rgb_result_image,
                                           cv2.COLOR_RGB2BGR)

        return cv_bgr_result_image
Exemplo n.º 4
0
    def _sample():
        x = np.random.randint(width, size=4).tolist()
        y = np.random.randint(height, size=4).tolist()
        x.sort()
        y.sort()

        # sample intersected bboxes
        fake = Faker()
        if fake.pybool():
            if fake.pybool():
                ax0, bx0, ax1, bx1 = x
            else:
                bx0, ax0, bx1, ax1 = x
            if fake.pybool():
                ay0, by0, ay1, by1 = y
            else:
                by0, ay0, by1, ay1 = y
        else:
            ax0, bx0, bx1, ax1 = x
            ay0, by0, by1, ay1 = y
        bboxes = [(ax0, ay0, ax1, ay1), (bx0, by0, bx1, by1)]

        # sample layers
        layers = [0, 3]
        shuffle(layers)
        im = Image.new("RGB", (width, width))
        draw = ImageDraw.Draw(im)
        im_t = [np.array(im)]
        for layer, bbox in zip(layers, bboxes):
            x0, y0, x1, y1 = bbox
            if layer == 4:
                draw.text((x0, y0), fake.sentence(), fill=fake.hex_color())
            elif layer == 3:
                f = os.path.join(photo_folder,
                                 files[fake.pyint(min=0, max=len(files) - 1)])
                _im = Image.open(f).resize((x1 - x0, y1 - y0))
                im.paste(_im, box=(x0, y0))
            elif layer == 0:
                draw.rectangle((x0, y0, x1, y1), fill=fake.hex_color())
            im_t.append(np.array(im))

        # sample final layer: text
        x0 = np.random.randint(width / 2)
        y0 = np.random.randint(height / 2)
        text = fake.sentence()
        draw.text((x0, y0), text, fill=fake.hex_color())
        w, h = draw.textsize(text)
        layers.append(4)
        bboxes.append((x0, y0, w, h))
        ims = np.stack(
            [np.concatenate([x, np.array(im)], axis=2) for x in im_t])

        return ims, np.array(layers), np.array(bboxes)
Exemplo n.º 5
0
    def __getitem__(self, idx):

        sample = self.dataset[self.imageids[idx]]
        seg = self.seg[None, :, :]

        # Convert to properly-sized tensors
        img = self.convert(sample["img"][0])

        draw_distractor = False

        if self.mode == "train":
            if self.labels[idx] == 1:
                if (np.random.rand() < self.prob):
                    draw_distractor = True
        else:
            if self.labels[idx] == 0:
                if (np.random.rand() < self.prob):
                    draw_distractor = True
        if draw_distractor:
            draw = ImageDraw.Draw(img)
            font = ImageFont.load_default()  #.truetype("sans-serif.ttf", 16)
            draw.text((np.random.randint(5) + 10, np.random.randint(5) + 10),
                      "R",
                      np.random.randint(10) + 245)

        # Enforces that the image is a square.
        if self.new_size != img.width == img.height:
            img = self.resize(img)

        # Enforce datatype
        img = TF.to_tensor(img).float()
        seg = TF.to_tensor(seg).permute([1, 0, 2]).float()

        if self.mask_all:
            try:
                img *= seg
            except:
                import IPython
                IPython.embed()

        return (img, seg, self.labels[idx])  #self.masks_selector[idx]
Exemplo n.º 6
0
def center_point_splash(image, mask, output_path=None):
    draw = ImageDraw.Draw(image)
    if mask.shape[-1] > 0:
        # We're treating all instances as one, so collapse the mask into one layer
        font = ImageFont.truetype('simsun.ttc', 40)
        shape = mask.shape
        dim = shape[2]
        count = 1
        for i in range(dim):
            mask1 = mask[:, :, i]
            mask1 = mask1 + 0
            gray = np.array(mask1, dtype='uint8')
            kernel = np.ones((20, 20), np.uint8)
            erosion = cv.erode(gray, kernel)  # 腐蚀

            im, contours, hierarchy = cv.findContours(erosion, cv.RETR_LIST,
                                                      cv.CHAIN_APPROX_SIMPLE)
            if len(contours):
                cnt = contours[0]
                M = cv.moments(cnt)
                # print(M)
                cx = int(M['m10'] / (M['m00'] + 1))
                cy = int(M['m01'] / (M['m00'] + 1))
                """
                if mask1[cy, cx] == 0:
                    num = 1
                    avg = 0
                    for j in range(shape[0]):
                        if mask1[cy, j] == 1:
                            avg += j
                            num += 1
                    cx = round(avg/num)
                """
                draw.text((cx, cy), str(count), fill=(255, 0, 0), font=font)
                count += 1
    image.save(output_path, 'jpeg')
Exemplo n.º 7
0
def draw_text(image, text_message):
    #get a plain foregroun layer filled with yellow
    foreground = Image.new('RGB', (image.shape[1], image.shape[0]),
                           (128, 128, 0, 128))

    background = Image.fromarray(image[:, :, :])

    mask = Image.new('L', (image.shape[1], image.shape[0]), 255)
    draw = ImageDraw.Draw(mask)
    fnt = ImageFont.truetype("Pillow/Tests/fonts/FreeMono.ttf",
                             round(120 * scale_video))
    w, h = draw.textsize(text_message, font=fnt)
    W, H = mask.size

    # draw subtitle in the bottom of the frame, aligned to the right, in an alpha layer, with alpha = 200/255
    draw.text(
        ((W - w) - round(30 * scale_video), (H) - round(150 * scale_video)),
        text_message,
        fill=(200),
        font=fnt)

    #draw foreground on background respecting the alpha layer
    result = Image.composite(background, foreground, mask)
    return np.array(result)
Exemplo n.º 8
0
def main():
    args = get_args()
    if args.resume is None:
        raise ValueError('Must provide --resume when testing.')

    support_architectures = [
        'ksevendet',
    ]
    support_architectures += [f'efficientdet-d{i}' for i in range(8)]
    support_architectures += [
        f'retinanet-res{i}' for i in [18, 34, 50, 101, 152]
    ]

    support_architectures.append('retinanet-p45p6')

    print(support_architectures)

    if args.architecture == 'ksevendet':
        ksevendet_cfg = args.model_cfg
        if ksevendet_cfg.get('variant'):
            network_name = f'{args.architecture}-{ksevendet_cfg["variant"]}-{ksevendet_cfg["neck"]}'
        else:
            assert 0, 'not support now.'
            assert isinstance(ksevendet_cfg, dict)
            network_name = f'{args.architecture}-{ksevendet_cfg["backbone"]}_specifical-{ksevendet_cfg["neck"]}'
    elif args.architecture in support_architectures:
        network_name = args.architecture
    else:
        raise ValueError('Architecture {} is not support.'.format(
            args.architecture))

    args.network_name = network_name
    net_logger = get_logger(name='Network Logger', args=args)
    net_logger.info('Positive Threshold: {:.2f}'.format(args.threshold))

    _shape_1, _shape_2 = tuple(map(int, args.input_shape.split(',')))
    _normalizer = Normalizer(inference_mode=True)
    if args.resize_mode == 0:
        _resizer = Resizer(min_side=_shape_1,
                           max_side=_shape_2,
                           resize_mode=args.resize_mode,
                           logger=net_logger,
                           inference_mode=True)
    elif args.resize_mode == 1:
        _resizer = Resizer(height=_shape_1,
                           width=_shape_2,
                           resize_mode=args.resize_mode,
                           logger=net_logger,
                           inference_mode=True)
    else:
        raise ValueError('Illegal resize mode.')

    transfrom_funcs_valid = [
        _normalizer,
        _resizer,
    ]
    transform = transforms.Compose(transfrom_funcs_valid)

    net_logger.info('Number of Classes: {:>3}'.format(args.num_classes))

    build_param = {'logger': net_logger}
    if args.architecture == 'ksevendet':
        net_model = ksevendet.KSevenDet(ksevendet_cfg,
                                        num_classes=args.num_classes,
                                        pretrained=False,
                                        **build_param)
    elif args.architecture == 'retinanet-p45p6':
        net_model = retinanet.retinanet_p45p6(num_classes=args.num_classes,
                                              **build_param)
    elif args.architecture.split('-')[0] == 'retinanet':
        net_model = retinanet.build_retinanet(args.architecture,
                                              num_classes=args.num_classes,
                                              pretrained=False,
                                              **build_param)
    elif args.architecture.split('-')[0] == 'efficientdet':
        net_model = efficientdet.build_efficientdet(
            args.architecture,
            num_classes=args.num_classes,
            pretrained=False,
            **build_param)
    else:
        assert 0, 'architecture error'

    net_logger.info('Loading Weights from Checkpoint : {}'.format(args.resume))
    net_model.load_state_dict(torch.load(args.resume))
    #model = torch.load(args.resume)

    use_gpu = True

    if use_gpu:
        if torch.cuda.is_available():
            net_model = net_model.cuda()

    if torch.cuda.is_available():
        net_model = torch.nn.DataParallel(net_model).cuda()
    else:
        net_model = torch.nn.DataParallel(net_model)

    #net_model.eval()
    net_model.module.eval()

    img_array = []

    cap = cv2.VideoCapture(args.input_path)
    fontsize = 12
    score_font = ImageFont.truetype("DejaVuSans.ttf", size=fontsize)

    cap_i = 0
    while (cap.isOpened()):
        ret, frame = cap.read()
        if ret == False:
            break
        #if cap_i > 20:
        #    break
        #img = skimage.io.imread(os.path.join(args.demo_path, f))
        #if len(img.shape) == 2:
        #    img = skimage.color.gray2rgb(img)
        a_img = np.copy(frame)
        img = Image.fromarray(np.uint8(frame))
        a_img = a_img.astype(np.float32) / 255.0
        a_img = transform(a_img)
        a_img = torch.unsqueeze(a_img, 0)
        a_img = a_img.permute(0, 3, 1, 2)

        # print('predict...')
        scores, labels, boxes = net_model(a_img, return_loss=False)

        scores = scores.cpu()
        labels = labels.cpu()
        boxes = boxes.cpu()

        # change to (x, y, w, h) (MS COCO standard)
        boxes[:, 2] -= boxes[:, 0]
        boxes[:, 3] -= boxes[:, 1]

        print(f'{cap_i}   inference ...', end="\r")

        draw = ImageDraw.Draw(img)
        for box_id in range(boxes.shape[0]):
            score = float(scores[box_id])
            label = int(labels[box_id])
            box = boxes[box_id, :]

            # scores are sorted, so we can break
            if score < args.threshold:
                break

            x, y, w, h = box
            color_ = COLOR_LABEL[label]
            _text_offset_x, _text_offset_y = 2, 3
            draw.rectangle(tuple([x, y, x + w, y + h]),
                           width=1,
                           outline=color_)
            draw.text(tuple(
                [int(x) + _text_offset_x + 1,
                 int(y) + _text_offset_y]),
                      '{:.3f}'.format(score),
                      fill='#000000',
                      font=score_font)
            draw.text(tuple([int(x) + _text_offset_x,
                             int(y) + _text_offset_y]),
                      '{:.3f}'.format(score),
                      fill=color_,
                      font=score_font)

        img_array.append(np.asarray(img))
        cap_i += 1

    cap.release()

    height, width, layers = img_array[0].shape
    size = (width, height)
    fps = 30
    #fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')

    input_video_name = os.path.basename(args.input_path)
    input_video_dir = os.path.dirname(args.input_path)
    out_video_path = os.path.join(
        'trash', '{}_{}_thr{}.avi'.format(
            input_video_name[:-4],
            network_name if not args.model_name else args.model_name,
            int(args.threshold * 100)))
    print('Convert to video... {}'.format(out_video_path))
    out = cv2.VideoWriter(out_video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps,
                          size)

    for i in range(len(img_array)):
        out.write(img_array[i])

    out.release()

    print('Done')
Exemplo n.º 9
0
def load_verify_contour(data_path, phase='train'):
    data_path = os.path.join(data_path, phase)
    annotation_path = os.path.join(data_path, 'annotations')
    rgb_path = os.path.join(data_path, 'images')
    updated_mask = os.path.join(data_path, 'masks')
    cache_path = os.path.join(data_path, 'cache')
    verify_path = os.path.join(data_path, 'verify')

    # verify the extracted contour and bounding box, image saved in "verify"
    do_verification = False

    ground_truth_cache = os.path.join(cache_path, 'ground_truth_cache.pkl')
    if os.path.isfile(ground_truth_cache):
        print('Loading gt_labels from: ' + ground_truth_cache)
        with open(ground_truth_cache, 'rb') as f:
            gt_data = cPickle.load(f)
        return gt_data

    f_wrect = open(os.path.join(cache_path, phase + '.txt'),
                   'w')  # creat image ID text
    annotations = []
    imgfile = os.listdir(rgb_path)
    error_mask = 0
    for i in range(len(imgfile)):
        file = imgfile[i]
        filename = os.path.splitext(file)[0]
        print(filename)
        f_wrect.write(filename + '\n')

        #Load image, load bounding box info from XML file in PASCAL VOC format
        annoname = os.path.join(annotation_path, filename + '.xml')
        if os.path.exists(annoname):
            objects = []
            tree = ET.parse(annoname)
            objs = tree.findall('object')
            for obj in objs:
                obj_struct = {}
                cls_name = obj.find('name').text.lower().strip()
                obj_struct['class'] = cls_name
                bbox = obj.find('bndbox')
                x1 = float(bbox.find('xmin').text) - 1
                y1 = float(bbox.find('ymin').text) - 1
                x2 = float(bbox.find('xmax').text) - 1
                y2 = float(bbox.find('ymax').text) - 1
                obj_struct['bbox'] = [x1, y1, x2, y2]
                objects.append(obj_struct)
            # extract 'merge' box in list[[x1, y1, x2, y2],[x1, y1, x2, y2]...]
            object_merge = [
                obj['bbox'] for obj in objects if obj['class'] == 'merge'
            ]

        rgb_file = os.path.join(rgb_path, filename + '.jpg')
        spallmask_file = os.path.join(updated_mask,
                                      filename + 'spall' + '.jpg')
        rebarmask_file = os.path.join(updated_mask,
                                      filename + 'rebar' + '.jpg')
        crackmask_file = os.path.join(updated_mask,
                                      filename + 'crack' + '.jpg')

        # load contours from mask file
        spall_contours = []
        rebar_contours = []
        crack_contours = []
        if os.path.exists(rebarmask_file):
            img_binary = cv2.imread(rebarmask_file, cv2.IMREAD_GRAYSCALE)
            ret, rebarthresh = cv2.threshold(img_binary, 127, 255, 0)
            im2, rebar_contours, rebar_hierarchy = cv2.findContours(
                rebarthresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
        if os.path.exists(spallmask_file):
            img_binary = cv2.imread(spallmask_file, cv2.IMREAD_GRAYSCALE)
            ret, spallthresh = cv2.threshold(img_binary, 127, 255, 0)
            im2, spall_contours, spall_hierarchy = cv2.findContours(
                spallthresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
        if os.path.exists(crackmask_file):
            img_binary = cv2.imread(crackmask_file, cv2.IMREAD_GRAYSCALE)
            ret, crackthresh = cv2.threshold(img_binary, 127, 255, 0)
            im2, crack_contours, crack_hierarchy = cv2.findContours(
                crackthresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)

        # generate contour cache
        regions = {}
        count = 0
        pair = {}
        if not crack_contours == []:
            classname = 'crack'
            for j in range(len(crack_contours)):
                shape_groups = []
                one_contour = crack_contours[j][:, 0, :]
                all_x = np.array(one_contour[:, 0]).tolist()
                all_y = np.array(one_contour[:, 1]).tolist()
                if crack_hierarchy[0, j, 3] == -1:
                    shape_groups.append({
                        'type': 'out',
                        'all_points_x': all_x,
                        'all_points_y': all_y
                    })
                    regions[str(count)] = {
                        'region_attributes': classname,
                        'shape_attributes': shape_groups
                    }
                    pair[str(j)] = count
                    count = count + 1
                else:
                    indexvalue = crack_hierarchy[0, j, 3]
                    if not crack_hierarchy[0, indexvalue, 3] == -1:
                        print('There may be errors in mask ' + filename +
                              'crack' + '.jpg')
                        error_mask = error_mask + 1
                        continue
                    index = pair[str(indexvalue)]
                    shape_groups = regions[str(index)]['shape_attributes']
                    shape_groups.append({
                        'type': 'in',
                        'all_points_x': all_x,
                        'all_points_y': all_y
                    })
                    regions[str(index)] = {
                        'region_attributes': classname,
                        'shape_attributes': shape_groups
                    }
        pair = {}
        if not spall_contours == []:
            classname = 'spall'
            for j in range(len(spall_contours)):
                shape_groups = []
                one_contour = spall_contours[j][:, 0, :]
                all_x = np.array(one_contour[:, 0]).tolist()
                all_y = np.array(one_contour[:, 1]).tolist()
                # check if the contour is inside another and thus [:,:,3]parent is not ==-1
                if spall_hierarchy[0, j, 3] == -1:
                    shape_groups.append({
                        'type': 'out',
                        'all_points_x': all_x,
                        'all_points_y': all_y
                    })
                    regions[str(count)] = {
                        'region_attributes': classname,
                        'shape_attributes': shape_groups
                    }
                    pair[str(j)] = count
                    count = count + 1
                else:
                    indexvalue = spall_hierarchy[0, j, 3]
                    if not spall_hierarchy[
                            0, indexvalue,
                            3] == -1:  # second inside defect masks, usually should not happen
                        print('There may be errors in mask ' + filename +
                              'spall' + '.jpg')
                        error_mask = error_mask + 1
                        continue
                    index = pair[str(
                        indexvalue)]  # find the count of the parent contour
                    shape_groups = regions[str(index)]['shape_attributes']
                    shape_groups.append({
                        'type': 'in',
                        'all_points_x': all_x,
                        'all_points_y': all_y
                    })
                    regions[str(index)] = {
                        'region_attributes': classname,
                        'shape_attributes': shape_groups
                    }
        pair = {}
        if not rebar_contours == []:
            classname = 'rebar'
            for j in range(len(rebar_contours)):
                shape_groups = []
                one_contour = rebar_contours[j][:, 0, :]
                all_x = np.array(one_contour[:, 0]).tolist()
                all_y = np.array(one_contour[:, 1]).tolist()
                if rebar_hierarchy[0, j, 3] == -1:
                    shape_groups.append({
                        'type': 'out',
                        'all_points_x': all_x,
                        'all_points_y': all_y
                    })
                    regions[str(count)] = {
                        'region_attributes': classname,
                        'shape_attributes': shape_groups
                    }
                    pair[str(j)] = count
                    count = count + 1
                else:
                    indexvalue = rebar_hierarchy[0, j, 3]
                    if not rebar_hierarchy[0, indexvalue, 3] == -1:
                        print('There may be errors in mask ' + filename +
                              'rebar' + '.jpg')
                        error_mask = error_mask + 1
                        continue
                    else:
                        index = pair[str(indexvalue)]
                        shape_groups = regions[str(index)]['shape_attributes']
                        shape_groups.append({
                            'type': 'in',
                            'all_points_x': all_x,
                            'all_points_y': all_y
                        })
                        regions[str(index)] = {
                            'region_attributes': classname,
                            'shape_attributes': shape_groups
                        }

        # merge instances acording to "object_merge"
        if os.path.exists(annoname):
            merge_groups = {}
            name_list = {}
            for jj in range(len(object_merge)):
                merge_groups[str(jj)] = []
                name_list[str(jj)] = []

            # assign each instance to merge_groups
            instance_num = len(regions)
            for k in range(instance_num):
                one_region = regions[str(k)]
                polygons = one_region['shape_attributes']
                classname = one_region['region_attributes']

                check = 0
                old_center_dis = 4000

                polygon = polygons[0]  # only need consider the outmost contour
                all_x1 = polygon['all_points_x']
                all_y1 = polygon['all_points_y']
                rr, cc = skimage.draw.polygon(all_y1, all_x1)
                all_p1 = np.column_stack([np.array(all_x1), np.array(all_y1)])
                contour = np.expand_dims(all_p1, axis=1)
                M = cv2.moments(contour)
                cX = int(M["m10"] / M["m00"])
                cY = int(M["m01"] / M["m00"])
                for ii in range(len(object_merge)):
                    [x1, y1, x2, y2] = object_merge[ii]
                    if cX <= x1 or cX >= x2 or cY <= y1 or cY >= y2:
                        continue
                    center_disx = (x1 + x2) / 2 - cX
                    center_dixy = (y1 + y2) / 2 - cY
                    new_center_dis = (center_disx**2 + center_dixy**2)**0.5
                    if new_center_dis < old_center_dis:
                        dis_index = ii
                        old_center_dis = new_center_dis
                if (ii + 1) == len(object_merge):
                    [x1, y1, x2, y2] = object_merge[dis_index]
                    if cX >= x1 and cX <= x2 and cY >= y1 and cY <= y2:
                        merge_groups[str(dis_index)].extend(polygons)
                        name_list[str(dis_index)].extend([classname])
                        check = 1
                if not check == 1:
                    print('No merged box belongs to the defect in ' + file)

            # update "regions"
            new_regions = {}
            count = 0
            for jj in range(len(object_merge)):
                if merge_groups[str(jj)] == []:
                    print('No defect belongs to this merged box ' + file)
                else:
                    # determine the class name for this merge box: [crack, spall, rebar] or [crack, spall]
                    namelist = name_list[str(jj)]
                    if 'crack' in namelist:
                        classname = 'crack'
                    elif 'spall' in namelist and 'rebar' not in namelist:
                        classname = 'spall'
                    elif 'rebar' in namelist:
                        classname = 'rebar'
                    new_regions[str(count)] = {
                        'region_attributes': classname,
                        'shape_attributes': merge_groups[str(jj)]
                    }
                    count = count + 1

        damage_bgr = cv2.imread(rgb_file)  # read and save in BGR mode
        height, width, _ = damage_bgr.shape
        if os.path.exists(annoname):
            copy_regions = new_regions
        else:
            copy_regions = regions
        # save in annotations list
        annotations.append({
            'filename': file,
            'regions': copy_regions,
            'size': [height, width]
        })

        # verify the annotation for each image
        if do_verification:
            damage_rgb = cv2.cvtColor(damage_bgr, cv2.COLOR_BGR2RGB)
            instance_num = len(copy_regions)
            boxes = np.zeros([instance_num, 4], dtype=np.int32)
            boxes_name = []
            instance_mask = []

            for k in range(instance_num):
                one_region = copy_regions[str(k)]
                class_name = one_region['region_attributes']
                polygons = one_region['shape_attributes']
                each_mask = np.zeros([height, width], dtype=np.bool)
                for each_poly in polygons:
                    subtype = each_poly['type']
                    x_points = each_poly['all_points_x']
                    y_points = each_poly['all_points_y']
                    rr, cc = skimage.draw.polygon(y_points, x_points)
                    if subtype == 'out':
                        each_mask[rr, cc] = True
                        each_mask[np.array(y_points),
                                  np.array(x_points)] = True
                    else:
                        each_mask[
                            rr,
                            cc] = False  # remove the inside background region
                        each_mask[np.array(y_points),
                                  np.array(x_points)] = True

                instance_mask.append(each_mask)
                # extract the box from mask
                y1, x1, y2, x2 = extract_bboxes(each_mask)
                boxes[k] = np.array([y1, x1, y2, x2]).astype(np.int32)
                boxes_name.append(class_name)

            # creat merged new mask for each class of each image
            crack_mask = np.zeros([height, width], dtype=np.uint8)
            spall_mask = np.zeros([height, width], dtype=np.uint8)
            rebar_mask = np.zeros([height, width], dtype=np.uint8)
            for k in range(instance_num):
                defectname = boxes_name[k]
                defectmask = instance_mask[k]
                defectmask = (defectmask * 255).astype(np.uint8)

                if defectname == 'crack':
                    crack_mask = np.where(defectmask == 255, 255, crack_mask)
                elif defectname == 'spall':
                    spall_mask = np.where(defectmask == 255, 255, spall_mask)
                elif defectname == 'rebar':
                    rebar_mask = np.where(defectmask == 255, 255, rebar_mask)

            # plot masks on original image
            if np.max(crack_mask) == 255:
                color = [255, 255, 0]  # yellow
                for c in range(3):
                    damage_rgb[:, :, c] = np.where(
                        crack_mask == 255,
                        damage_rgb[:, :, c] * 0.8 + 0.2 * color[c],
                        damage_rgb[:, :, c])
            if np.max(spall_mask) == 255:
                color = [0, 255, 255]  # Cyan
                for c in range(3):
                    damage_rgb[:, :, c] = np.where(
                        spall_mask == 255,
                        damage_rgb[:, :, c] * 0.85 + 0.15 * color[c],
                        damage_rgb[:, :, c])
            if np.max(rebar_mask) == 255:
                color = [255, 0, 255]  # Magenta
                for c in range(3):
                    damage_rgb[:, :, c] = np.where(
                        rebar_mask == 255,
                        damage_rgb[:, :, c] * 0.8 + 0.2 * color[c],
                        damage_rgb[:, :, c])

            # draw bounding boxes
            img_draw = Image.fromarray(damage_rgb)
            draw = ImageDraw.Draw(img_draw)
            font = ImageFont.truetype(font='fonttype/FiraMono-Medium.otf',
                                      size=int(0.02 * height))
            for j in range(instance_num):
                y1, x1, y2, x2 = boxes[j, :]
                draw.line([x1, y1, x1, y2], fill=(255, 0, 0), width=2)
                draw.line([x2, y1, x2, y2], fill=(255, 0, 0), width=2)
                draw.line([x1, y1, x2, y1], fill=(255, 0, 0), width=2)
                draw.line([x1, y2, x2, y2], fill=(255, 0, 0), width=2)
                text_str = str(j) + ' ' + boxes_name[j]
                draw.text(np.array([x1, y1]),
                          text_str,
                          font=font,
                          fill=(0, 0, 255))
            del draw

            imagedir = os.path.join(verify_path, filename + '.jpg')
            img_draw.save(imagedir)

    print('Number of error mask is ' + str(error_mask))

    print('Saving gt_labels to: ' + ground_truth_cache)
    with open(ground_truth_cache, 'wb') as f:
        cPickle.dump(annotations, f)

    return annotations
Exemplo n.º 10
0
        def display_instances(image,
                              boxes,
                              masks,
                              class_ids,
                              class_names,
                              scores,
                              image_name,
                              save_dir,
                              title="",
                              figsize=(16, 16),
                              ax=None,
                              show_mask=True,
                              show_bbox=True,
                              colors=None,
                              captions=None):
            """
            boxes: [num_instance, (y1, x1, y2, x2, class_id)] in image coordinates.
            masks: [height, width, num_instances]
            class_ids: [num_instances]
            class_names: list of class names of the dataset
            scores: (optional) confidence scores for each box
            title: (optional) Figure title
            show_mask, show_bbox: To show masks and bounding boxes or not
            figsize: (optional) the size of the image
            colors: (optional) An array or colors to use with each object
            captions: (optional) A list of strings to use as captions for each object
            """

            N = boxes.shape[0]
            colors = colors or random_colors(N)

            if not N:
                print("\n*** No instances in image %s to draw *** \n" %
                      (image_name))
                masked_image = image.astype(np.uint8).copy()
                cv2.imwrite(os.path.join(save_dir, '%s' % (image_name)),
                            masked_image)
                return
            else:
                assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0]

            useful_mask_indices = []

            for i in range(N):
                # Generate random colors
                colors = colors or random_colors(N)

                if not np.any(boxes[i]):
                    # Skip this instance. Has no bbox. Likely lost in image cropping.
                    continue
                useful_mask_indices.append(i)

            masked_image = image.astype(np.uint8).copy()
            for index, value in enumerate(useful_mask_indices):
                class_id = class_ids[value]
                label = class_names[class_id]

                # Skip hand,mouth masking
                if (label == 'hand') or (label == 'mouth'):
                    pass
                else:
                    masked_image = apply_mask(masked_image, masks[:, :, value],
                                              colors[index])

            masked_image = Image.fromarray(masked_image)
            draw = ImageDraw.Draw(masked_image)
            colors = np.array(colors).astype(int) * 255

            for index, value in enumerate(useful_mask_indices):
                class_id = class_ids[value]
                score = scores[value]
                label = class_names[class_id]

                # object timeline
                if label in sec_object:
                    pass
                else:
                    sec_object.append(label)

                # object result count
                if label in count_obj:
                    count_obj[label] += 1
                else:
                    count_obj[label] = 1

                # hand, mouth disable and others able
                if (label == 'hand') or (label == 'mouth'):
                    pass
                else:
                    y1, x1, y2, x2 = boxes[value]
                    color = tuple(colors[index])
                    draw.rectangle((x1, y1, x2, y2), outline=color)

                    # Label
                    # font = ImageFont.truetype('/Library/Fonts/Arial.ttf', 15)
                    draw.text((x1, y1), "%s %f" % (label, score),
                              (255, 255, 255))

                masked_image.save(os.path.join(save_dir, '%s' % (image_name)))
Exemplo n.º 11
0
def generate_text_data(width=64, n_sample=1000, n_strokes=1):
    print("Generating datasets...")

    fake = Faker()

    width = 256
    space_x, space_y = 4, 0  # in pixel
    im = Image.new("RGB", (width, width))
    draw = ImageDraw.Draw(im)

    x, y, w, h = 30, 50, 0, 0
    _y = y
    for i in range(fake.pyint(min=1, max=5)):
        dx = 0
        for j in range(fake.pyint(min=1, max=5)):
            word = fake.word()
            draw.text((x + dx, _y), word, fill=(255, 255, 255))
            _w, _h = draw.textsize(word)  # size of token
            dx += _w + space_x
        w = dx - space_x if dx - space_x > w else w
        _y += _h + space_y
    h = _y - y

    draw.rectangle([x, y, x + w, y + h])

    tokens = fake.sentence().split()

    label, bbox, im = [], [], []
    for _ in range(n_sample):
        _im, _labels = skimage.draw.random_shapes((64, 64),
                                                  min_shapes=1,
                                                  max_shapes=n_strokes,
                                                  min_size=10)
        _label, ((r0, r1), (c0, c1)) = _labels[0]
        _class = LABEL_CLASS[_label]
        if r0 < r1:
            y0, y1 = r0, r1
            x0, x1 = c0, c1
        else:
            y0, y1 = r1, r0
            x0, x1 = c1, c0

        if x0 > x1 or y0 > y1:
            print((r0, r1), (c0, c1))

        label.append(np.array((_class), dtype="uint8"))
        bbox.append(np.array((x0, y0, x1, y1), dtype="uint8"))
        im.append(_im)
    label = np.stack(label)
    bbox = np.stack(bbox)  # (N, 5=(class, x0, y0, x1, y1))
    im = np.stack(im).transpose(0, 3, 1, 2)  # (N, H, W, C) -> (N, C, H, W)

    indices = np.arange(0, len(label), dtype="int32")
    train, test = train_test_split(indices, test_size=0.2, random_state=0)

    if not os.path.exists("data-multi-shape/"):
        os.makedirs("data-multi-shape/")

    np.save("data-multi-shape/train_label.npy", label[train])
    np.save("data-multi-shape/train_bbox.npy", bbox[train])
    np.save("data-multi-shape/train_images.npy", im[train])
    np.save("data-multi-shape/test_label.npy", label[train])
    np.save("data-multi-shape/test_bbox.npy", bbox[train])
    np.save("data-multi-shape/test_images.npy", im[test])
Exemplo n.º 12
0
def detect_and_color_splash(model, image_path=None, video_path=None, save_path=None):
    assert image_path or video_path
    num_spikes = []
    pixel_count = []
    spike_height = []
    spike_width = []
    center_mask = []
    # Image or video?
    if image_path:
        # Run model detection and generate the color splash effect
        print("Running on {}".format(image_path))
        # Read image
        image = skimage.io.imread(image_path)
        # Detect objects
        r = model.detect([image], verbose=1)[0]
        # Color splash
        splash = color_splash(image, r['masks'])
        print(type(r['masks']))

        file_name = "splash_{:%Y%m%dT%H%M%S}.png".format(datetime.datetime.now())
        bbInformationName = os.path.join(save_path, file_name[0:-3] + 'txt')
        # save bb information
        with open(bbInformationName, 'w') as file:
            # <class_name> <left> <top> <right> <bottom> [<difficult>]
            # bb information top left bottom right
            for each_roi, each_score in zip(r['rois'], r['scores']):
                file.write(f'spike {each_score}')
                file.write(f' {each_roi[1]} {each_roi[0]} {each_roi[3]} {each_roi[2]}')
                file.write('\n')

        # draw bb
        spike_cnt = 0  # number of spikes
        for eachBB in r['rois']:
            splash = drawBoundingBox(eachBB, splash)
            spike_cnt += 1
            spike_height.append(eachBB[2] - eachBB[0])
            spike_width.append(eachBB[3] - eachBB[1])
        for _ in range(len(spike_height)):
            num_spikes.append(spike_cnt)

        # draw center of mask by k mean
        for eachBB, maskIndex in zip(r['rois'], range(0, len(r['rois']))):
            topBotList = []
            leftRightList = []
            maskCenter = {}
            for topBot in range(eachBB[0], eachBB[2]):
                for leftRight in range(eachBB[1], eachBB[3]):
                    if r['masks'][topBot][leftRight][maskIndex]:
                        topBotList.append(topBot)
                        leftRightList.append(leftRight)
            maskCenter[maskIndex] = (sum(topBotList) // len(topBotList), sum(leftRightList) // len(leftRightList))
            center_mask.append(f'({maskCenter[maskIndex][0]}, {maskCenter[maskIndex][1]})')
            splash = drawCenterMask(maskCenter[maskIndex], splash)

        # write confidence level
        pilImage = Image.fromarray(splash, 'RGB')
        # fnt = ImageFont.truetype('Pillow/Tests/fonts/FreeMono.ttf', 40)
        draw = ImageDraw.Draw(pilImage)
        for eachBB, eachText in zip(r['rois'], r['scores']):
            draw.text((eachBB[1], eachBB[0]), '{:3f}'.format(eachText), fill=(255,255,255,255))

        # write number of pixel
        for eachBB, maskIndex in zip(r['rois'], range(0, len(r['rois']))):
            pixelSum = 0
            topBotList = []
            leftRightList = []
            maskCenter = {}
            for topBot in range(eachBB[0], eachBB[2]):
                for leftRight in range(eachBB[1], eachBB[3]):
                    if r['masks'][topBot][leftRight][maskIndex]:
                        pixelSum += 1
                        topBotList.append(topBot)
                        leftRightList.append(leftRight)
            pixel_count.append(pixelSum)
            draw.text((eachBB[1], eachBB[2]), '{}'.format(pixelSum), fill=(255,255,255,255))

        # Save output
        pilImage.save(os.path.join(save_path, file_name))
        # skimage.io.imsave(file_name, splash)
    elif video_path:
        import cv2
        # Video capture
        vcapture = cv2.VideoCapture(video_path)
        width = int(vcapture.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vcapture.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = vcapture.get(cv2.CAP_PROP_FPS)

        # Define codec and create video writer
        file_name = "splash_{:%Y%m%dT%H%M%S}.avi".format(datetime.datetime.now())
        vwriter = cv2.VideoWriter(file_name,
                                  cv2.VideoWriter_fourcc(*'MJPG'),
                                  fps, (width, height))

        count = 0
        success = True
        while success:
            print("frame: ", count)
            # Read next image
            success, image = vcapture.read()
            if success:
                # OpenCV returns images as BGR, convert to RGB
                image = image[..., ::-1]
                # Detect objects
                r = model.detect([image], verbose=0)[0]
                # Color splash
                splash = color_splash(image, r['masks'])
                # RGB -> BGR to save image to video
                splash = splash[..., ::-1]
                # Add image to video writer
                vwriter.write(splash)
                count += 1
        vwriter.release()
    print("Saved to ", file_name)
    return num_spikes, pixel_count, spike_height, spike_width, center_mask
Exemplo n.º 13
0
from pytesseract import Output
from PIL import Image , ImageDraw, ImageFont
import cv2
img= Image.open("desktop/base_img.png")
imgF= Image.open("desktop/edit_img.png")
#imgg = color.xyz2rgb(img)
draw = ImageDraw.Draw(imgF)
font = ImageFont.truetype("Desktop/Roboto-Light.ttf", 20)
d = pytesseract.image_to_data(img, output_type=Output.DICT)
d2= pytesseract.image_to_data(imgF, output_type=Output.DICT)
n_boxes = len(d2['level'])
for i in range(n_boxes):
    (x, y, w, h) = (d2['left'][i], d2['top'][i], d2['width'][i], d2['height'][i])
    if(d['text'][i]!=d2['text'][i]):
        border(y,x,y+h,x+w)
        draw.text((d2['left'][i], d2['top'][i]-20), text = d['text'][i], font = font,  fill = "black")

plt.figure(figsize=(120,120))
plt.imshow(imgF)


import pytesseract
from skimage import color
from pytesseract import Output
from PIL import Image, ImageFilter,ImageOps
import cv2
img= (Image.open("desktop/img5.png").convert("L"))
img = ImageOps.invert(img)
print(np.shape(img))

def main():
    args = get_args()
    assert args.dataset, 'dataset must provide'
    if args.resume is None:
        raise ValueError('Must provide --resume when testing.')

    support_architectures = [
        'ksevendet',
    ]
    support_architectures += [f'efficientdet-d{i}' for i in range(8)]
    support_architectures += [
        f'retinanet-res{i}' for i in [18, 34, 50, 101, 152]
    ]

    support_architectures.append('retinanet-p45p6')

    print(support_architectures)

    if args.architecture == 'ksevendet':
        ksevendet_cfg = args.model_cfg
        if ksevendet_cfg.get('variant'):
            network_name = f'{args.architecture}-{ksevendet_cfg["variant"]}-{ksevendet_cfg["neck"]}'
        else:
            assert 0, 'not support now.'
            assert isinstance(ksevendet_cfg, dict)
            network_name = f'{args.architecture}-{ksevendet_cfg["backbone"]}_specifical-{ksevendet_cfg["neck"]}'
    elif args.architecture in support_architectures:
        network_name = args.architecture
    else:
        raise ValueError('Architecture {} is not support.'.format(
            args.architecture))

    args.network_name = network_name
    net_logger = get_logger(name='Network Logger', args=args)
    net_logger.info('Positive Threshold: {:.2f}'.format(args.threshold))

    _shape_1, _shape_2 = tuple(map(int, args.input_shape.split(',')))
    _normalizer = Normalizer(inference_mode=True)
    if args.resize_mode == 0:
        _resizer = Resizer(min_side=_shape_1,
                           max_side=_shape_2,
                           resize_mode=args.resize_mode,
                           logger=net_logger,
                           inference_mode=True)
    elif args.resize_mode == 1:
        _resizer = Resizer(height=_shape_1,
                           width=_shape_2,
                           resize_mode=args.resize_mode,
                           logger=net_logger,
                           inference_mode=True)
    else:
        raise ValueError('Illegal resize mode.')

    transfrom_funcs_valid = [
        _normalizer,
        _resizer,
    ]
    transform = transforms.Compose(transfrom_funcs_valid)

    net_logger.info('Number of Classes: {:>3}'.format(args.num_classes))

    build_param = {'logger': net_logger}
    if args.architecture == 'ksevendet':
        net_model = ksevendet.KSevenDet(ksevendet_cfg,
                                        num_classes=args.num_classes,
                                        pretrained=False,
                                        **build_param)
    elif args.architecture == 'retinanet-p45p6':
        net_model = retinanet.retinanet_p45p6(num_classes=args.num_classes,
                                              **build_param)
    elif args.architecture.split('-')[0] == 'retinanet':
        net_model = retinanet.build_retinanet(args.architecture,
                                              num_classes=args.num_classes,
                                              pretrained=False,
                                              **build_param)
    elif args.architecture.split('-')[0] == 'efficientdet':
        net_model = efficientdet.build_efficientdet(
            args.architecture,
            num_classes=args.num_classes,
            pretrained=False,
            **build_param)
    else:
        assert 0, 'architecture error'

    net_logger.info('Loading Weights from Checkpoint : {}'.format(args.resume))
    net_model.load_state_dict(torch.load(args.resume))
    #model = torch.load(args.resume)

    use_gpu = True

    if use_gpu:
        if torch.cuda.is_available():
            net_model = net_model.cuda()

    if torch.cuda.is_available():
        net_model = torch.nn.DataParallel(net_model).cuda()
    else:
        net_model = torch.nn.DataParallel(net_model)

    demo_image_files = os.listdir(args.demo_path)
    demo_image_files.sort()
    #if len(demo_image_files) > CONVERT_FILE_LIMIT:
    #    print('WARNING: Too many files...    total {} files.'.format(len(demo_image_files)))
    fontsize = 12
    score_font = ImageFont.truetype("DejaVuSans.ttf", size=fontsize)

    net_model.eval()

    img_array = []
    # print(net_model)

    for f in demo_image_files:
        #for f in demo_image_files[:1]:
        # for f in demo_image_files[:100]:
        #for f in demo_image_files[:min(len(demo_image_files), CONVERT_FILE_LIMIT)]:
        print(f'inference {f}', end="\r")
        if f[-3:] not in ['png', 'jpg']:
            continue
        #img = skimage.io.imread(os.path.join(args.demo_path, f))
        #if len(img.shape) == 2:
        #    img = skimage.color.gray2rgb(img)
        #print(np.sum(img - a_pil_img))
        img = Image.open(os.path.join(args.demo_path, f)).convert('RGB')
        a_img = np.array(img)
        # print(a_img)
        a_img = a_img.astype(np.float32) / 255.0
        # print(a_img.shape)
        a_img = transform(a_img)
        # print(a_img.shape)
        a_img = torch.unsqueeze(a_img, 0)
        # print(a_img.shape)
        a_img = a_img.permute(0, 3, 1, 2)
        # print(a_img.shape)

        # print('predict...')
        scores, labels, boxes = net_model(a_img, return_loss=False)

        scores = scores.cpu()
        labels = labels.cpu()
        boxes = boxes.cpu()

        # change to (x, y, w, h) (MS COCO standard)
        boxes[:, 2] -= boxes[:, 0]
        boxes[:, 3] -= boxes[:, 1]

        #if args.dataset == 'thermal':
        #    img = img.resize((80, 60))

        draw = ImageDraw.Draw(img)
        for box_id in range(boxes.shape[0]):
            score = float(scores[box_id])
            label = int(labels[box_id])
            box = boxes[box_id, :]

            # scores are sorted, so we can break
            if score < args.threshold:
                break

            x, y, w, h = box
            color_ = COLOR_LABEL[label]
            _text_offset_x, _text_offset_y = 2, 3
            #draw.rectangle(tuple([x, y, x+w, y+h]), width = 1, outline ='green')
            draw.rectangle(tuple([x, y, x + w, y + h]),
                           width=1,
                           outline=color_)
            draw.text(tuple(
                [int(x) + _text_offset_x + 1,
                 int(y) + _text_offset_y + 1]),
                      '{:.3f}'.format(score),
                      fill='#000000',
                      font=score_font)
            draw.text(tuple([int(x) + _text_offset_x,
                             int(y) + _text_offset_y]),
                      '{:.3f}'.format(score),
                      fill=color_,
                      font=score_font)

            # append detection to results
            # results.append(image_result)
        #plt.figure()
        #plt.imshow(img)
        #plt.axis('off')
        #plt.show()
        img_array.append(np.array(img))

    height, width, layers = img_array[0].shape
    size = (width, height)
    fps = 30
    #fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')

    out_video_file = os.path.join(
        args.output_path, '{}.avi'.format(
            os.path.basename(args.demo_path) if not args.output_name else args.
            output_name))
    print('Convert to video... {}'.format(out_video_file))
    out = cv2.VideoWriter(out_video_file, cv2.VideoWriter_fourcc(*'mp4v'), fps,
                          size)

    for i in range(len(img_array)):
        out.write(img_array[i])

    out.release()

    print('Done')