예제 #1
0
train_data = VOC(root=data_path, transform=compsed, class_path=class_path)

train_loader = dataloader.DataLoader(dataset=train_data,
                                     batch_size=batch_size,
                                     shuffle=True,
                                     collate_fn=detection_collate)

# model

dropout = 0.4
num_class = 20
learning_rate = .4
num_epochs = 6

net = yolov1.YOLOv1(params={"dropout": dropout, "num_class": num_class})

if device == 'cpu':
    model = nn.DataParallel(net).cpu()
else:
    model = torch.nn.DataParallel(net).cuda()

optimaizer = torch.optim.Adam(model.parameters(),
                              lr=learning_rate,
                              weight_decay=1e-5)

scheduler = torch.optim.lr_scheduler.ExponentialLR(optimaizer, gamma=0.95)

total_step = len(train_loader)

# print(total_step)
예제 #2
0
def test(params):

    input_height = params["input_height"]
    input_width = params["input_width"]

    data_path = params["data_path"]
    class_path = params["class_path"]
    num_gpus = [i for i in range(params["num_gpus"])]
    checkpoint_path = params["checkpoint_path"]

    USE_SUMMARY = params["use_summary"]

    num_class = params["num_class"]

    with open(class_path) as f:
        class_list = f.read().splitlines()

    objness_threshold = 0.3
    class_threshold = 0.3

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    net = yolov1.YOLOv1(params={"dropout": 1.0, "num_class": num_class})
    # model = torch.nn.DataParallel(net, device_ids=num_gpus).cuda()
    print("device : ", device)
    if device is "cpu":
        model = torch.nn.DataParallel(net)
    else:
        model = torch.nn.DataParallel(net, device_ids=num_gpus).cuda()

    model.load_state_dict(torch.load(checkpoint_path)["state_dict"])
    model.eval()

    if USE_SUMMARY:
        summary(model, (3, 448, 448))

    image_path = os.path.join(data_path, "JPEGImages")
    root, dir, files = next(os.walk(os.path.abspath(image_path)))

    for file in files:
        extension = file.split(".")[-1]
        if extension not in ["jpeg", "jpg", "png", "JPEG", "JPG", "PNG"]:
            continue

        img = Image.open(os.path.join(image_path, file)).convert('RGB')

        # PRE-PROCESSING
        input_img = img.resize((input_width, input_height))
        input_img = transforms.ToTensor()(input_img)
        c, w, h = input_img.shape

        # INVERSE TRANSFORM IMAGE########
        # inverseTimg = transforms.ToPILImage()(input_img)
        W, H = img.size
        draw = ImageDraw.Draw(img)

        dx = W // 7
        dy = H // 7
        ##################################

        input_img = input_img.view(1, c, w, h)
        input_img = input_img.to(device)

        # INFERENCE
        outputs = model(input_img)
        b, w, h, c = outputs.shape

        outputs = outputs.view(w, h, c)
        outputs_np = outputs.cpu().data.numpy()

        objness = outputs[:, :, 0].unsqueeze(-1).cpu().data.numpy()

        cls_map = outputs[:, :, 5:].cpu().data.numpy()

        print("obj : {}".format(objness.shape))
        print("cls : {}".format(cls_map.shape))

        threshold_map = np.multiply(objness, cls_map)

        print("OBJECTNESS : {}".format(objness.shape))
        print(objness)
        print("\n\n\n")
        print("CLS MAP : {}".format(cls_map.shape))
        print(cls_map[0])
        print("\n\n\n")
        print("MULTIPLICATION : {}".format(threshold_map.shape))
        print(threshold_map[:, :, 0])
        print("\n\n\n")

        print("IMAGE SIZE")
        print("width : {}, height : {}".format(W, H))
        print("\n\n\n\n")

        try:

            for i in range(7):
                for j in range(7):
                    draw.rectangle(
                        ((dx * i, dy * j), (dx * i + dx, dy * j + dy)),
                        outline='#00ff88')

                    if objness[i][j] >= objness_threshold:
                        block = outputs_np[i][j]

                        x_start_point = dx * i
                        y_start_point = dy * j

                        x_shift = block[1]
                        y_shift = block[2]

                        center_x = int((block[1] * W / 7.0) + (i * W / 7.0))
                        center_y = int((block[2] * H / 7.0) + (j * H / 7.0))
                        w_ratio = block[3]
                        h_ratio = block[4]
                        w_ratio = w_ratio * w_ratio
                        h_ratio = h_ratio * h_ratio
                        width = int(w_ratio * W)
                        height = int(h_ratio * H)

                        xmin = center_x - (width // 2)
                        ymin = center_y - (height // 2)
                        xmax = xmin + width
                        ymax = ymin + height

                        clsprob = block[5:] * objness[i][j]
                        cls_idx = np.argmax(clsprob)

                        if clsprob[cls_idx] > class_threshold:

                            draw.rectangle(
                                ((xmin + 2, ymin + 2), (xmax - 2, ymax - 2)),
                                outline="blue")
                            draw.text(
                                (xmin + 5, ymin + 5),
                                "{}: {:.2f}".format(class_list[cls_idx],
                                                    clsprob[cls_idx]))
                            draw.ellipse(((center_x - 2, center_y - 2),
                                          (center_x + 2, center_y + 2)),
                                         fill='blue')

                        # LOG
                        print("idx : [{}][{}]".format(i, j))
                        print("x shift : {}, y shift : {}".format(
                            x_shift, y_shift))
                        print("w ratio : {}, h ratio : {}".format(
                            w_ratio, h_ratio))
                        print("cls prob : {}".format(
                            np.around(clsprob, decimals=2)))

                        print("xmin : {}, ymin : {}, xmax : {}, ymax : {}".
                              format(xmin, ymin, xmax, ymax))
                        print("width : {} height : {}".format(width, height))
                        print("class list : {}".format(class_list))
                        print("\n\n\n")

            plt.figure(figsize=(24, 18))
            plt.imshow(img)
            plt.show()
            plt.close()

        except Exception as e:
            print("ERROR")
            print("Message : {}".format(e))
예제 #3
0
def train(params):

    # future work variable
    dataset = params["dataset"]
    input_height = params["input_height"]
    input_width = params["input_width"]

    data_path = params["data_path"]
    class_path = params["class_path"]
    batch_size = params["batch_size"]
    num_epochs = params["num_epochs"]
    learning_rate = params["lr"]
    dropout = params["dropout"]
    num_gpus = [i for i in range(params["num_gpus"])]
    checkpoint_path = params["checkpoint_path"]

    USE_VISDOM = params["use_visdom"]
    USE_WANDB = params["use_wandb"]
    USE_SUMMARY = params["use_summary"]
    USE_AUGMENTATION = params["use_augmentation"]
    USE_GTCHECKER = params["use_gtcheck"]

    USE_GITHASH = params["use_githash"]
    num_class = params["num_class"]

    if (USE_WANDB):
        wandb.init()
        wandb.config.update(
            params)  # adds all of the arguments as config variables

    with open(class_path) as f:
        class_list = f.read().splitlines()

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    if (USE_GITHASH):
        repo = git.Repo(search_parent_directories=True)
        sha = repo.head.object.hexsha
        short_sha = repo.git.rev_parse(sha, short=7)

    if USE_VISDOM:
        viz = visdom.Visdom(use_incoming_socket=False)
        vis_title = 'Yolo V1 Deepbaksu_vision (feat. martin, visionNoob) PyTorch on ' + 'VOC'
        vis_legend = ['Train Loss']
        iter_plot = create_vis_plot(viz, 'Iteration', 'Total Loss', vis_title,
                                    vis_legend)
        coord1_plot = create_vis_plot(viz, 'Iteration', 'coord1', vis_title,
                                      vis_legend)
        size1_plot = create_vis_plot(viz, 'Iteration', 'size1', vis_title,
                                     vis_legend)
        noobjectness1_plot = create_vis_plot(viz, 'Iteration', 'noobjectness1',
                                             vis_title, vis_legend)
        objectness1_plot = create_vis_plot(viz, 'Iteration', 'objectness1',
                                           vis_title, vis_legend)
        obj_cls_plot = create_vis_plot(viz, 'Iteration', 'obj_cls', vis_title,
                                       vis_legend)

    # 2. Data augmentation setting
    if (USE_AUGMENTATION):
        seq = iaa.SomeOf(
            2,
            [
                iaa.Multiply(
                    (1.2, 1.5)),  # change brightness, doesn't affect BBs
                iaa.Affine(
                    translate_px={
                        "x": 3,
                        "y": 10
                    }, scale=(0.9, 0.9)
                ),  # translate by 40/60px on x/y axis, and scale to 50-70%, affects BBs
                iaa.AdditiveGaussianNoise(scale=0.1 * 255),
                iaa.CoarseDropout(0.02, size_percent=0.15, per_channel=0.5),
                iaa.Affine(rotate=45),
                iaa.Sharpen(alpha=0.5)
            ])
    else:
        seq = iaa.Sequential([])

    composed = transforms.Compose([Augmenter(seq)])

    # 3. Load Dataset
    # composed
    # transforms.ToTensor
    train_dataset = VOC(root=data_path,
                        transform=composed,
                        class_path=class_path)

    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               collate_fn=detection_collate)

    # 5. Load YOLOv1
    net = yolov1.YOLOv1(params={"dropout": dropout, "num_class": num_class})
    # model = torch.nn.DataParallel(net, device_ids=num_gpus).cuda()

    print("device : ", device)
    if device.type == 'cpu':
        model = torch.nn.DataParallel(net)
    else:
        model = torch.nn.DataParallel(net, device_ids=num_gpus).cuda()

    if USE_SUMMARY:
        summary(model, (3, 448, 448))

    # 7.Train the model
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=learning_rate,
                                 weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95)

    # Train the model
    total_step = len(train_loader)

    total_train_step = num_epochs * total_step

    # for epoch in range(num_epochs):
    for epoch in range(1, num_epochs + 1):

        if (epoch == 200) or (epoch == 400) or (epoch == 600) or (
                epoch == 20000) or (epoch == 30000):
            scheduler.step()

        for i, (images, labels, sizes) in enumerate(train_loader):

            current_train_step = (epoch) * total_step + (i + 1)

            if USE_GTCHECKER:
                visualize_GT(images, labels, class_list)

            images = images.to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = model(images)

            # Calc Loss
            loss, \
            obj_coord1_loss, \
            obj_size1_loss, \
            obj_class_loss, \
            noobjness1_loss, \
            objness1_loss = detection_loss_4_yolo(outputs, labels, device.type)
            # objness1_loss = detection_loss_4_yolo(outputs, labels)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if (((current_train_step) % 100)
                    == 0) or (current_train_step % 10 == 0
                              and current_train_step < 100):
                print(
                    'epoch: [{}/{}], total step: [{}/{}], batch step [{}/{}], lr: {}, total_loss: {:.4f}, coord1: {:.4f}, size1: {:.4f}, noobj_clss: {:.4f}, objness1: {:.4f}, class_loss: {:.4f}'
                    .format(epoch + 1, num_epochs, current_train_step,
                            total_train_step, i + 1, total_step, ([
                                param_group['lr']
                                for param_group in optimizer.param_groups
                            ])[0], loss.item(), obj_coord1_loss,
                            obj_size1_loss, noobjness1_loss, objness1_loss,
                            obj_class_loss))

                if USE_VISDOM:
                    update_vis_plot(viz, (epoch + 1) * total_step + (i + 1),
                                    loss.item(), iter_plot, None, 'append')
                    update_vis_plot(viz, (epoch + 1) * total_step + (i + 1),
                                    obj_coord1_loss, coord1_plot, None,
                                    'append')
                    update_vis_plot(viz, (epoch + 1) * total_step + (i + 1),
                                    obj_size1_loss, size1_plot, None, 'append')
                    update_vis_plot(viz, (epoch + 1) * total_step + (i + 1),
                                    obj_class_loss, obj_cls_plot, None,
                                    'append')
                    update_vis_plot(viz, (epoch + 1) * total_step + (i + 1),
                                    noobjness1_loss, noobjectness1_plot, None,
                                    'append')
                    update_vis_plot(viz, (epoch + 1) * total_step + (i + 1),
                                    objness1_loss, objectness1_plot, None,
                                    'append')

                if USE_WANDB:
                    wandb.log({
                        'total_loss': loss.item(),
                        'obj_coord1_loss': obj_coord1_loss,
                        'obj_size1_loss': obj_size1_loss,
                        'obj_class_loss': obj_class_loss,
                        'noobjness1_loss': noobjness1_loss,
                        'objness1_loss': objness1_loss
                    })

        if not USE_GITHASH:
            short_sha = 'noHash'

        # if ((epoch % 1000) == 0) and (epoch != 0):
        if ((epoch % 1000) == 0):
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': "YOLOv1",
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                },
                False,
                filename=os.path.join(
                    checkpoint_path,
                    'ckpt_{}_ep{:05d}_loss{:.04f}_lr{}.pth.tar'.format(
                        short_sha, epoch, loss.item(), ([
                            param_group['lr']
                            for param_group in optimizer.param_groups
                        ])[0])))
예제 #4
0
파일: train.py 프로젝트: HeegonJin/yolov1
        label = torch.from_numpy(np_label)
        targets.append(label)
    return torch.stack(imgs, 0), torch.stack(targets, 0)


data_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), "VOCdevkit", "VOC2007")
train_dataset = VOC(root=data_path,
                    transform=transforms.ToTensor())

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=1,
                                           shuffle=True,
                                           collate_fn=detection_collate)

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
net = yolov1.YOLOv1(params={"dropout": 0.5, "num_class": 20})
if device.type == 'cpu':
    model = torch.nn.DataParallel(net)
else:
    model = torch.nn.DataParallel(net, device_ids=0).cuda()

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95)

num_epochs = 1000
total_step = len(train_loader)
total_train_step = num_epochs * total_step

for epoch in range(num_epochs):

    for i, (images, labels) in enumerate(train_loader):