Exemplo n.º 1
0
def transform(img, boxes, labels):
    img, boxes = resize(img, boxes, size=(img_size,img_size))
    img = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225))
    ])(img)
    return img, boxes, labels
Exemplo n.º 2
0
def transform_test(img, boxes, labels):
    img, boxes = resize(img, boxes, size=(img_size,img_size))
    img = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4140, 0.4265, 0.4172), (0.2646, 0.2683, 0.2751))
    ])(img)
    boxes, labels = box_coder.encode(boxes, labels)
    return img, boxes, labels
Exemplo n.º 3
0
def transform_test(img, boxes, labels):
    img, boxes = resize(img, boxes, size=img_size, max_size=img_size)
    img = pad(img, (img_size, img_size))
    img = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])(img)
    boxes, labels = box_coder.encode(boxes, labels)
    return img, boxes, labels
Exemplo n.º 4
0
def transform_train(img, boxes, labels):
    img = random_distort(img)
    # if random.random() < 0.5:
    #     img, boxes = random_paste(img, boxes, max_ratio=4, fill=(123, 116, 103))
    img, boxes, labels = random_crop(img, boxes, labels)
    img, boxes = resize(img, boxes, size=(img_size,img_size), random_interpolation=True)
    img, boxes = random_flip(img, boxes)
    img = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4140, 0.4265, 0.4172), (0.2646, 0.2683, 0.2751))
    ])(img)
    boxes, labels = box_coder.encode(boxes, labels)
    return img, boxes, labels
Exemplo n.º 5
0
Arquivo: io.py Projeto: nupurkmr9/detr
def create_wandb_img(classes, img_path, target, preds, att_map, f_map,
                     dec_att):

    prob = F.softmax(preds["pred_logits"], -1)
    scores, labels = prob[..., :-1].max(-1)
    img = Image.open(img_path)

    # size for logging purposes
    tensor_img = ToTensor()(resize(img, size=(1500, 1333), target=None)[0])

    boxes_data = []
    for sc, cl, (cx, cy, width, height) in zip(scores.tolist(),
                                               labels.tolist(),
                                               preds["pred_boxes"].tolist()):
        boxes_data.append({
            "position": {
                "middle": (cx, cy),
                "width": width,
                "height": height
            },
            "box_caption": f"{classes[cl]}: {sc:0.2f}",
            "class_id": cl,
            "scores": {
                "score": sc
            }
        })

    gt_data = []
    for cl, (cx, cy, width, height) in zip(target["labels"].tolist(),
                                           target["boxes"].tolist()):

        gt_data.append({
            "position": {
                "middle": (cx, cy),
                "width": width,
                "height": height
            },
            "box_caption": f"{classes[cl]}",
            "class_id": cl,
            "scores": {
                "score": 1.0
            }
        })

    boxes = {"predictions": {"box_data": boxes_data, "class_labels": classes}}
    boxes["ground_truth"] = {"box_data": gt_data, "class_labels": classes}

    wimg = wandb.Image(tensor_img,
                       boxes=boxes,
                       caption="Image: " + str(target["image_id"].item()))

    # resize to feedforward size
    tensor_img = ToTensor()(resize(img, size=800, target=None,
                                   max_size=1333)[0])

    # Taken from https://colab.research.google.com/github/facebookresearch/detr/blob/colab/notebooks/detr_attention.ipynb
    # visualize encoder self attention
    fact = 2**round(math.log2(tensor_img.shape[-1] / att_map.shape[-1]))

    # how much was the original image upsampled before feeding it to the model
    scale_y = img.height / tensor_img.shape[-2]
    scale_x = img.width / tensor_img.shape[-1]

    # visualize attention around gt's center
    sample = random.sample(gt_data, 4)
    idxs = [(int(data["position"]["middle"][1] * tensor_img.shape[-2]),
             int(data["position"]["middle"][0] * tensor_img.shape[-1]))
            for data in sample]
    captions = [data["box_caption"] for data in sample]
    colors = ['lime', 'deepskyblue', 'orange', 'red']

    fig = plt.figure(constrained_layout=True, figsize=(25 * 0.7, 8.5 * 0.7))
    gs = fig.add_gridspec(2, 4)
    axs = [
        fig.add_subplot(gs[0, 0]),
        fig.add_subplot(gs[1, 0]),
        fig.add_subplot(gs[0, -1]),
        fig.add_subplot(gs[1, -1]),
    ]

    for idx_o, ax, col, caption in zip(idxs, axs, colors, captions):
        idx = ((idx_o[0] // fact), idx_o[1] // fact)
        ax.imshow(att_map[..., idx[0], idx[1]],
                  cmap='cividis',
                  interpolation='nearest')
        ax.axis('off')
        ax.set_title(f'self-attention: {col} ({caption})')

    fcenter_ax = fig.add_subplot(gs[:, 1:-1])
    fcenter_ax.imshow(img)

    for (y, x), col in zip(idxs, colors):
        x = ((x // fact) + 0.5) * fact
        y = ((y // fact) + 0.5) * fact
        fcenter_ax.add_patch(
            plt.Circle((x * scale_x, y * scale_y), fact // 4, color=col))
        fcenter_ax.axis('off')

    self_att = wandb.Image(fig,
                           caption="Image: " + str(target["image_id"].item()))

    h, w = f_map.shape[-2:]

    # select 4 highest scores
    keep = torch.sort(scores, 0, descending=True)[1][:4]

    bboxes_scaled = rescale_bboxes(preds["pred_boxes"][keep].cpu(),
                                   (img.width, img.height))

    fig = plt.figure(constrained_layout=True, figsize=(25 * 0.7, 8.5 * 0.7))
    gs = fig.add_gridspec(2, 4)
    axs = [
        fig.add_subplot(gs[0, 0]),
        fig.add_subplot(gs[1, 0]),
        fig.add_subplot(gs[0, -1]),
        fig.add_subplot(gs[1, -1]),
    ]
    for idx, ax, col in zip(keep, axs, colors):

        ax.imshow(dec_att[0, idx].view(h, w))
        ax.axis('off')
        ax.set_title(f'Attention: {col} ({classes[labels[idx].item()]})')

    fcenter_ax = fig.add_subplot(gs[:, 1:-1])
    fcenter_ax.imshow(img)

    for col, (xmin, ymin, xmax, ymax) in zip(colors, bboxes_scaled):
        fcenter_ax.add_patch(
            plt.Rectangle((xmin, ymin),
                          xmax - xmin,
                          ymax - ymin,
                          fill=False,
                          color=col,
                          linewidth=2))

    att_map = wandb.Image(plt,
                          caption="Image: " + str(target["image_id"].item()))
    plt.close()

    return wimg, self_att, att_map