Exemplo n.º 1
0
def test(args, io):
    test_loader = DataLoader(ModelNet40(partition='test',
                                        num_points=args.num_points,
                                        pt_norm=False),
                             batch_size=args.test_batch_size,
                             shuffle=False,
                             drop_last=False)

    device = torch.device("cuda" if args.cuda else "cpu")

    # Try to load models:
    if args.arch == 'dgcnn':
        from model.DGCNN_PAConv import PAConv
        model = PAConv(args).to(device)
    elif args.arch == 'pointnet':
        from model.PointNet_PAConv import PAConv
        model = PAConv(args).to(device)
    else:
        raise Exception("Not implemented")

    io.cprint(str(model))

    model = nn.DataParallel(model)
    model.load_state_dict(
        torch.load("checkpoints/%s/best_model.t7" % args.exp_name))
    model = model.eval()
    test_acc = 0.0
    count = 0.0
    test_true = []
    test_pred = []
    for data, label in test_loader:

        data, label = data.to(device), label.to(device).squeeze()
        data = data.permute(0, 2, 1)
        batch_size = data.size()[0]
        with torch.no_grad():
            logits = model(data)
        preds = logits.max(dim=1)[1]
        test_true.append(label.cpu().numpy())
        test_pred.append(preds.detach().cpu().numpy())
    test_true = np.concatenate(test_true)
    test_pred = np.concatenate(test_pred)
    test_acc = metrics.accuracy_score(test_true, test_pred)
    avg_per_class_acc = metrics.balanced_accuracy_score(test_true, test_pred)
    outstr = 'Test :: test acc: %.6f, test avg acc: %.6f' % (test_acc,
                                                             avg_per_class_acc)
    io.cprint(outstr)
Exemplo n.º 2
0
def test(args, io):
    test_loader = DataLoader(ModelNet40(partition='test',
                                        num_points=args.num_points,
                                        pt_norm=False),
                             num_workers=args.workers,
                             batch_size=args.test_batch_size,
                             shuffle=False,
                             drop_last=False)

    device = torch.device("cuda" if args.cuda else "cpu")
    NUM_PEPEAT = 300
    NUM_VOTE = 10

    # Try to load models:
    if args.arch == 'dgcnn':
        from model.DGCNN_PAConv import PAConv
        model = PAConv(args).to(device)
    elif args.arch == 'pointnet':
        from model.PointNet_PAConv import PAConv
        model = PAConv(args).to(device)
    else:
        raise Exception("Not implemented")

    model = nn.DataParallel(model)
    model.load_state_dict(
        torch.load("checkpoints/%s/best_model.t7" % args.exp_name))
    model = model.eval()
    best_acc = 0

    pointscale = PointcloudScale(scale_low=0.8,
                                 scale_high=1.18)  # set the range of scaling

    for i in range(NUM_PEPEAT):
        test_true = []
        test_pred = []

        for data, label in test_loader:
            data, label = data.to(device), label.to(device).squeeze()
            pred = 0
            for v in range(NUM_VOTE):
                new_data = data
                batch_size = data.size()[0]
                if v > 0:
                    new_data.data = pointscale(new_data.data)
                with torch.no_grad():
                    pred += F.softmax(model(new_data.permute(0, 2, 1)),
                                      dim=1)  # sum 10 preds
            pred /= NUM_VOTE  # avg the preds!
            label = label.view(-1)
            pred_choice = pred.max(dim=1)[1]
            test_true.append(label.cpu().numpy())
            test_pred.append(pred_choice.detach().cpu().numpy())
        test_true = np.concatenate(test_true)
        test_pred = np.concatenate(test_pred)
        test_acc = metrics.accuracy_score(test_true, test_pred)
        if test_acc > best_acc:
            best_acc = test_acc
        outstr = 'Voting %d, test acc: %.6f,' % (i, test_acc * 100)
        io.cprint(outstr)

    final_outstr = 'Final voting test acc: %.6f,' % (best_acc * 100)
    io.cprint(final_outstr)
Exemplo n.º 3
0
def train(args, io):

    # ============= Model ===================
    num_part = 50
    device = torch.device("cuda" if args.cuda else "cpu")

    model = PAConv(args, num_part).to(device)
    io.cprint(str(model))

    model.apply(weight_init)
    model = nn.DataParallel(model)
    print("Let's use", torch.cuda.device_count(), "GPUs!")

    '''Use Pretrain or not'''
    if args.get('pretrain', False):
        state_dict = torch.load("checkpoints/%s/best_insiou_model.pth" % args.exp_name,
                                map_location=torch.device('cpu'))['model']
        for k in state_dict.keys():
            if 'module' not in k:
                from collections import OrderedDict
                new_state_dict = OrderedDict()
                for k in state_dict:
                    new_state_dict['module.' + k] = state_dict[k]
                state_dict = new_state_dict
            break
        model.load_state_dict(state_dict)

        print("Using pretrained model...")
        print(torch.load("checkpoints/%s/best_insiou_model.pth" % args.exp_name).keys())
    else:
        print("Training from scratch...")

    # =========== Dataloader =================
    train_data = PartNormalDataset(npoints=2048, split='trainval', normalize=False)
    print("The number of training data is:%d", len(train_data))

    test_data = PartNormalDataset(npoints=2048, split='test', normalize=False)
    print("The number of test data is:%d", len(test_data))

    train_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers,
                              drop_last=True)

    test_loader = DataLoader(test_data, batch_size=args.test_batch_size, shuffle=False, num_workers=args.workers,
                             drop_last=False)

    # ============= Optimizer ================
    if args.use_sgd:
        print("Use SGD")
        opt = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
    else:
        print("Use Adam")
        opt = optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=args.weight_decay)

    if args.scheduler == 'cos':
        print("Use CosLR")
        scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=args.lr / 100)
    else:
        print("Use StepLR")
        scheduler = StepLR(opt, step_size=args.step, gamma=0.5)

    # ============= Training =================
    best_acc = 0
    best_class_iou = 0
    best_instance_iou = 0
    num_part = 50
    num_classes = 16

    for epoch in range(args.epochs):

        train_epoch(train_loader, model, opt, scheduler, epoch, num_part, num_classes, io)

        test_metrics, total_per_cat_iou = test_epoch(test_loader, model, epoch, num_part, num_classes, io)

        # 1. when get the best accuracy, save the model:
        if test_metrics['accuracy'] > best_acc:
            best_acc = test_metrics['accuracy']
            io.cprint('Max Acc:%.5f' % best_acc)
            state = {
                'model': model.module.state_dict() if torch.cuda.device_count() > 1 else model.state_dict(),
                'optimizer': opt.state_dict(), 'epoch': epoch, 'test_acc': best_acc}
            torch.save(state, 'checkpoints/%s/best_acc_model.pth' % args.exp_name)

        # 2. when get the best instance_iou, save the model:
        if test_metrics['shape_avg_iou'] > best_instance_iou:
            best_instance_iou = test_metrics['shape_avg_iou']
            io.cprint('Max instance iou:%.5f' % best_instance_iou)
            state = {
                'model': model.module.state_dict() if torch.cuda.device_count() > 1 else model.state_dict(),
                'optimizer': opt.state_dict(), 'epoch': epoch, 'test_instance_iou': best_instance_iou}
            torch.save(state, 'checkpoints/%s/best_insiou_model.pth' % args.exp_name)

        # 3. when get the best class_iou, save the model:
        # first we need to calculate the average per-class iou
        class_iou = 0
        for cat_idx in range(16):
            class_iou += total_per_cat_iou[cat_idx]
        avg_class_iou = class_iou / 16
        if avg_class_iou > best_class_iou:
            best_class_iou = avg_class_iou
            # print the iou of each class:
            for cat_idx in range(16):
                io.cprint(classes_str[cat_idx] + ' iou: ' + str(total_per_cat_iou[cat_idx]))
            io.cprint('Max class iou:%.5f' % best_class_iou)
            state = {
                'model': model.module.state_dict() if torch.cuda.device_count() > 1 else model.state_dict(),
                'optimizer': opt.state_dict(), 'epoch': epoch, 'test_class_iou': best_class_iou}
            torch.save(state, 'checkpoints/%s/best_clsiou_model.pth' % args.exp_name)

    # report best acc, ins_iou, cls_iou
    io.cprint('Final Max Acc:%.5f' % best_acc)
    io.cprint('Final Max instance iou:%.5f' % best_instance_iou)
    io.cprint('Final Max class iou:%.5f' % best_class_iou)
    # save last model
    state = {
        'model': model.module.state_dict() if torch.cuda.device_count() > 1 else model.state_dict(),
        'optimizer': opt.state_dict(), 'epoch': args.epochs - 1, 'test_iou': best_instance_iou}
    torch.save(state, 'checkpoints/%s/model_ep%d.pth' % (args.exp_name, args.epochs))
Exemplo n.º 4
0
def test(args, io):
    # Dataloader
    test_data = PartNormalDataset(npoints=2048, split='test', normalize=False)
    print("The number of test data is:%d", len(test_data))

    test_loader = DataLoader(test_data, batch_size=args.test_batch_size, shuffle=False, num_workers=args.workers,
                             drop_last=False)

    # Try to load models
    num_part = 50
    device = torch.device("cuda" if args.cuda else "cpu")

    model = PAConv(args, num_part).to(device)
    io.cprint(str(model))

    from collections import OrderedDict
    state_dict = torch.load("checkpoints/%s/best_%s_model.pth" % (args.exp_name, args.model_type),
                            map_location=torch.device('cpu'))['model']

    new_state_dict = OrderedDict()
    for layer in state_dict:
        new_state_dict[layer.replace('module.', '')] = state_dict[layer]
    model.load_state_dict(new_state_dict)

    model.eval()
    num_part = 50
    num_classes = 16
    metrics = defaultdict(lambda: list())
    hist_acc = []
    shape_ious = []
    total_per_cat_iou = np.zeros((16)).astype(np.float32)
    total_per_cat_seen = np.zeros((16)).astype(np.int32)

    for batch_id, (points, label, target, norm_plt) in tqdm(enumerate(test_loader), total=len(test_loader), smoothing=0.9):
        batch_size, num_point, _ = points.size()
        points, label, target, norm_plt = Variable(points.float()), Variable(label.long()), Variable(target.long()), Variable(norm_plt.float())
        points = points.transpose(2, 1)
        norm_plt = norm_plt.transpose(2, 1)
        points, label, target, norm_plt = points.cuda(non_blocking=True), label.squeeze().cuda(
            non_blocking=True), target.cuda(non_blocking=True), norm_plt.cuda(non_blocking=True)

        with torch.no_grad():
            seg_pred = model(points, norm_plt, to_categorical(label, num_classes))  # b,n,50

        # instance iou without considering the class average at each batch_size:
        batch_shapeious = compute_overall_iou(seg_pred, target, num_part)  # [b]
        shape_ious += batch_shapeious  # iou +=, equals to .append

        # per category iou at each batch_size:
        for shape_idx in range(seg_pred.size(0)):  # sample_idx
            cur_gt_label = label[shape_idx]  # label[sample_idx]
            total_per_cat_iou[cur_gt_label] += batch_shapeious[shape_idx]
            total_per_cat_seen[cur_gt_label] += 1

        # accuracy:
        seg_pred = seg_pred.contiguous().view(-1, num_part)
        target = target.view(-1, 1)[:, 0]
        pred_choice = seg_pred.data.max(1)[1]
        correct = pred_choice.eq(target.data).cpu().sum()
        metrics['accuracy'].append(correct.item() / (batch_size * num_point))

    hist_acc += metrics['accuracy']
    metrics['accuracy'] = np.mean(hist_acc)
    metrics['shape_avg_iou'] = np.mean(shape_ious)
    for cat_idx in range(16):
        if total_per_cat_seen[cat_idx] > 0:
            total_per_cat_iou[cat_idx] = total_per_cat_iou[cat_idx] / total_per_cat_seen[cat_idx]

    # First we need to calculate the iou of each class and the avg class iou:
    class_iou = 0
    for cat_idx in range(16):
        class_iou += total_per_cat_iou[cat_idx]
        io.cprint(classes_str[cat_idx] + ' iou: ' + str(total_per_cat_iou[cat_idx]))  # print the iou of each class
    avg_class_iou = class_iou / 16
    outstr = 'Test :: test acc: %f  test class mIOU: %f, test instance mIOU: %f' % (metrics['accuracy'], avg_class_iou, metrics['shape_avg_iou'])
    io.cprint(outstr)
Exemplo n.º 5
0
def train(args, io):
    train_loader = DataLoader(ModelNet40(partition='train',
                                         num_points=args.num_points,
                                         pt_norm=args.pt_norm),
                              num_workers=args.workers,
                              batch_size=args.batch_size,
                              shuffle=True,
                              drop_last=True)
    test_loader = DataLoader(ModelNet40(partition='test',
                                        num_points=args.num_points,
                                        pt_norm=False),
                             num_workers=args.workers,
                             batch_size=args.test_batch_size,
                             shuffle=False,
                             drop_last=False)

    device = torch.device("cuda" if args.cuda else "cpu")

    if args.arch == 'dgcnn':
        from model.DGCNN_PAConv import PAConv
        model = PAConv(args).to(device)
    elif args.arch == 'pointnet':
        from model.PointNet_PAConv import PAConv
        model = PAConv(args).to(device)
    else:
        raise Exception("Not implemented")

    io.cprint(str(model))

    model.apply(weight_init)
    model = nn.DataParallel(model)
    print("Let's use", torch.cuda.device_count(), "GPUs!")

    print("Use SGD")
    opt = optim.SGD(model.parameters(),
                    lr=args.lr,
                    momentum=args.momentum,
                    weight_decay=1e-4)
    scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=args.lr / 100)

    criterion = cal_loss

    best_test_acc = 0

    for epoch in range(args.epochs):
        scheduler.step()
        ####################
        # Train
        ####################
        train_loss = 0.0
        count = 0.0
        model.train()
        train_pred = []
        train_true = []
        for data, label in train_loader:
            data, label = data.to(device), label.to(device).squeeze()
            data = data.permute(0, 2, 1)
            batch_size = data.size()[0]
            opt.zero_grad()
            logits = model(data)
            loss = criterion(logits, label)
            loss.backward()
            opt.step()
            preds = logits.max(dim=1)[1]
            count += batch_size
            train_loss += loss.item() * batch_size
            train_true.append(label.cpu().numpy())
            train_pred.append(preds.detach().cpu().numpy())
        train_true = np.concatenate(train_true)
        train_pred = np.concatenate(train_pred)
        train_acc = metrics.accuracy_score(train_true, train_pred)
        outstr = 'Train %d, loss: %.6f, train acc: %.6f, ' % (
            epoch, train_loss * 1.0 / count, train_acc)
        io.cprint(outstr)

        writer.add_scalar('loss_train', train_loss * 1.0 / count, epoch + 1)
        writer.add_scalar('Acc_train', train_acc, epoch + 1)

        ####################
        # Test
        ####################
        test_loss = 0.0
        count = 0.0
        model.eval()
        test_pred = []
        test_true = []
        for data, label in test_loader:
            data, label = data.to(device), label.to(device).squeeze()
            data = data.permute(0, 2, 1)
            batch_size = data.size()[0]
            logits = model(data)
            loss = criterion(logits, label)
            preds = logits.max(dim=1)[1]
            count += batch_size
            test_loss += loss.item() * batch_size
            test_true.append(label.cpu().numpy())
            test_pred.append(preds.detach().cpu().numpy())
        test_true = np.concatenate(test_true)
        test_pred = np.concatenate(test_pred)
        test_acc = metrics.accuracy_score(test_true, test_pred)
        outstr = 'Test %d, loss: %.6f, test acc: %.6f,' % (
            epoch, test_loss * 1.0 / count, test_acc)
        io.cprint(outstr)

        writer.add_scalar('loss_test', test_loss * 1.0 / count, epoch + 1)
        writer.add_scalar('Acc_test', test_acc, epoch + 1)

        if test_acc >= best_test_acc:
            best_test_acc = test_acc
            io.cprint('Max Acc:%.6f' % best_test_acc)
            torch.save(model.state_dict(),
                       'checkpoints/%s/best_model.t7' % args.exp_name)
Exemplo n.º 6
0
def train(gpu, ngpus_per_node):

    # ============= Model ===================
    if args.arch == 'dgcnn':
        from model.DGCNN_PAConv import PAConv
        model = PAConv(args)
    elif args.arch == 'pointnet':
        from model.PointNet_PAConv import PAConv
        model = PAConv(args)
    else:
        raise Exception("Not implemented")

    model.apply(weight_init)

    if main_process():
        logger.info(model)

    if args.sync_bn and args.distributed:
        model = nn.SyncBatchNorm.convert_sync_batchnorm(model)

    if args.distributed:
        torch.cuda.set_device(gpu)
        args.batch_size = int(args.batch_size / ngpus_per_node)
        args.test_batch_size = int(args.test_batch_size / ngpus_per_node)
        args.workers = int(
            (args.workers + ngpus_per_node - 1) / ngpus_per_node)
        model = torch.nn.parallel.DistributedDataParallel(
            model.cuda(), device_ids=[gpu], find_unused_parameters=True)
    else:
        model = torch.nn.DataParallel(model.cuda())

    # =========== Dataloader =================
    train_data = ModelNet40(partition='train',
                            num_points=args.num_points,
                            pt_norm=args.pt_norm)
    test_data = ModelNet40(partition='test',
                           num_points=args.num_points,
                           pt_norm=False)

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_data)
        test_sampler = torch.utils.data.distributed.DistributedSampler(
            test_data)
    else:
        train_sampler = None
        test_sampler = None

    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=args.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=train_sampler,
                                               drop_last=True)
    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=args.test_batch_size,
                                              shuffle=False,
                                              num_workers=args.workers,
                                              pin_memory=True,
                                              sampler=test_sampler)

    # ============= Optimizer ===================
    if main_process():
        logger.info("Use SGD")
    opt = optim.SGD(model.parameters(),
                    lr=args.lr,
                    momentum=args.momentum,
                    weight_decay=1e-4)
    scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=args.lr / 100)

    criterion = cal_loss
    best_test_acc = 0
    start_epoch = 0

    # ============= Training from scratch=================
    for epoch in range(start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)

        train_epoch(train_loader, model, opt, scheduler, epoch, criterion)

        test_acc = test_epoch(test_loader, model, epoch, criterion)

        if test_acc >= best_test_acc and main_process():
            best_test_acc = test_acc
            logger.info('Max Acc:%.6f' % best_test_acc)
            torch.save(model.state_dict(), 'checkpoints/%s/best_model.t7' %
                       args.exp_name)  # save the best model
Exemplo n.º 7
0
def test(gpu, ngpus_per_node):
    if main_process():
        logger.info('<<<<<<<<<<<<<<<<< Start Evaluation <<<<<<<<<<<<<<<<<')

    # ============= Model ===================
    if args.arch == 'dgcnn':
        from model.DGCNN_PAConv import PAConv
        model = PAConv(args)
    elif args.arch == 'pointnet':
        from model.PointNet_PAConv import PAConv
        model = PAConv(args)
    else:
        raise Exception("Not implemented")

    if main_process():
        logger.info(model)

    if args.sync_bn:
        assert args.distributed == True
        model = nn.SyncBatchNorm.convert_sync_batchnorm(model)

    if args.distributed:
        torch.cuda.set_device(gpu)
        args.batch_size = int(args.batch_size / ngpus_per_node)
        args.test_batch_size = int(args.test_batch_size / ngpus_per_node)
        args.workers = int(
            (args.workers + ngpus_per_node - 1) / ngpus_per_node)
        model = torch.nn.parallel.DistributedDataParallel(
            model.cuda(), device_ids=[gpu], find_unused_parameters=True)
    else:
        model = torch.nn.DataParallel(model.cuda())

    state_dict = torch.load("checkpoints/%s/best_model.t7" % args.exp_name,
                            map_location=torch.device('cpu'))

    for k in state_dict.keys():
        if 'module' not in k:
            from collections import OrderedDict
            new_state_dict = OrderedDict()
            for k in state_dict:
                new_state_dict['module.' + k] = state_dict[k]
            state_dict = new_state_dict
        break

    model.load_state_dict(state_dict)

    # Dataloader
    test_data = ModelNet40(partition='test', num_points=args.num_points)
    if args.distributed:
        test_sampler = torch.utils.data.distributed.DistributedSampler(
            test_data)
    else:
        test_sampler = None
    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=args.test_batch_size,
                                              shuffle=False,
                                              num_workers=args.workers,
                                              pin_memory=True,
                                              sampler=test_sampler)

    model.eval()

    intersection_meter = AverageMeter()
    union_meter = AverageMeter()
    target_meter = AverageMeter()

    for data, label in test_loader:

        data, label = data.cuda(non_blocking=True), label.cuda(
            non_blocking=True).squeeze(1)
        data = data.permute(0, 2, 1)
        with torch.no_grad():
            logits = model(data)
        preds = logits.max(dim=1)[1]

        intersection, union, target = intersectionAndUnionGPU(
            preds, label, args.classes)
        if args.multiprocessing_distributed:
            dist.all_reduce(intersection), dist.all_reduce(
                union), dist.all_reduce(target)
        intersection, union, target = intersection.cpu().numpy(), union.cpu(
        ).numpy(), target.cpu().numpy()
        intersection_meter.update(intersection), union_meter.update(
            union), target_meter.update(target)

    accuracy_class = intersection_meter.sum / (target_meter.sum + 1e-10)
    mAcc = np.mean(accuracy_class)
    allAcc = sum(intersection_meter.sum) / (sum(target_meter.sum) + 1e-10)
    if main_process():
        logger.info('Test result: mAcc/allAcc {:.4f}/{:.4f}.'.format(
            mAcc, allAcc))
        for i in range(args.classes):
            logger.info('Class_{} Result: accuracy {:.4f}.'.format(
                i, accuracy_class[i]))
        logger.info('<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<')
Exemplo n.º 8
0
def train(gpu, ngpus_per_node):
    # ============= Model ===================
    num_part = 50
    model = PAConv(args, num_part)

    model.apply(weight_init)

    if main_process():
        logger.info(model)

    if args.sync_bn and args.distributed:
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)

    if args.distributed:
        torch.cuda.set_device(gpu)
        args.batch_size = int(args.batch_size / ngpus_per_node)
        args.test_batch_size = int(args.test_batch_size / ngpus_per_node)
        args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
        model = torch.nn.parallel.DistributedDataParallel(model.cuda(), device_ids=[gpu], find_unused_parameters=True)
    else:
        model = torch.nn.DataParallel(model.cuda())

    '''Use Pretrain or not'''
    if args.get('pretrain', False):
        state_dict = torch.load("checkpoints/%s/best_insiou_model.pth" % args.exp_name,
                                map_location=torch.device('cpu'))['model']
        for k in state_dict.keys():
            if 'module' not in k:
                from collections import OrderedDict
                new_state_dict = OrderedDict()
                for k in state_dict:
                    new_state_dict['module.' + k] = state_dict[k]
                state_dict = new_state_dict
            break
        model.load_state_dict(state_dict)
        if main_process():
            logger.info("Using pretrained model...")
            logger.info(torch.load("checkpoints/%s/best_insiou_model.pth" % args.exp_name).keys())
    else:
        if main_process():
            logger.info("Training from scratch...")

    # =========== Dataloader =================
    train_data = PartNormalDataset(npoints=2048, split='trainval', normalize=False)
    if main_process():
        logger.info("The number of training data is:%d", len(train_data))

    test_data = PartNormalDataset(npoints=2048, split='test', normalize=False)
    if main_process():
        logger.info("The number of test data is:%d", len(test_data))

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(train_data)
        test_sampler = torch.utils.data.distributed.DistributedSampler(test_data)
    else:
        train_sampler = None
        test_sampler = None

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=(train_sampler is None),
                                               num_workers=args.workers, pin_memory=True, sampler=train_sampler)
    test_loader = torch.utils.data.DataLoader(test_data, batch_size=args.test_batch_size, shuffle=False,
                                              num_workers=args.workers, pin_memory=True, sampler=test_sampler)

    # ============= Optimizer ===================
    if args.use_sgd:
        if main_process():
            logger.info("Use SGD")
        opt = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
    else:
        if main_process():
            logger.info("Use Adam")
        opt = optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=args.weight_decay)

    if args.scheduler == 'cos':
        if main_process():
            logger.info("Use CosLR")
        scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=args.lr / 100)
    else:
        if main_process():
            logger.info("Use StepLR")
        scheduler = StepLR(opt, step_size=args.step, gamma=0.5)

    # ============= Training =================
    best_acc = 0
    best_class_iou = 0
    best_instance_iou = 0
    num_part = 50
    num_classes = 16

    for epoch in range(args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)

        train_epoch(train_loader, model, opt, scheduler, epoch, num_part, num_classes)

        test_metrics, total_per_cat_iou = test_epoch(test_loader, model, epoch, num_part, num_classes)

        # 1. when get the best accuracy, save the model:
        if test_metrics['accuracy'] > best_acc and main_process():
            best_acc = test_metrics['accuracy']
            logger.info('Max Acc:%.5f' % best_acc)
            state = {
                'model': model.module.state_dict() if torch.cuda.device_count() > 1 else model.state_dict(),
                'optimizer': opt.state_dict(), 'epoch': epoch, 'test_acc': best_acc}
            torch.save(state, 'checkpoints/%s/best_acc_model.pth' % args.exp_name)

        # 2. when get the best instance_iou, save the model:
        if test_metrics['shape_avg_iou'] > best_instance_iou and main_process():
            best_instance_iou = test_metrics['shape_avg_iou']
            logger.info('Max instance iou:%.5f' % best_instance_iou)
            state = {
                'model': model.module.state_dict() if torch.cuda.device_count() > 1 else model.state_dict(),
                'optimizer': opt.state_dict(), 'epoch': epoch, 'test_instance_iou': best_instance_iou}
            torch.save(state, 'checkpoints/%s/best_insiou_model.pth' % args.exp_name)

        # 3. when get the best class_iou, save the model:
        # first we need to calculate the average per-class iou
        class_iou = 0
        for cat_idx in range(16):
            class_iou += total_per_cat_iou[cat_idx]
        avg_class_iou = class_iou / 16
        if avg_class_iou > best_class_iou and main_process():
            best_class_iou = avg_class_iou
            # print the iou of each class:
            for cat_idx in range(16):
                if main_process():
                    logger.info(classes_str[cat_idx] + ' iou: ' + str(total_per_cat_iou[cat_idx]))
            logger.info('Max class iou:%.5f' % best_class_iou)
            state = {
                'model': model.module.state_dict() if torch.cuda.device_count() > 1 else model.state_dict(),
                'optimizer': opt.state_dict(), 'epoch': epoch, 'test_class_iou': best_class_iou}
            torch.save(state, 'checkpoints/%s/best_clsiou_model.pth' % args.exp_name)

    if main_process():
        # report best acc, ins_iou, cls_iou
        logger.info('Final Max Acc:%.5f' % best_acc)
        logger.info('Final Max instance iou:%.5f' % best_instance_iou)
        logger.info('Final Max class iou:%.5f' % best_class_iou)
        # save last model
        state = {
            'model': model.module.state_dict() if torch.cuda.device_count() > 1 else model.state_dict(),
            'optimizer': opt.state_dict(), 'epoch': args.epochs - 1, 'test_iou': best_instance_iou}
        torch.save(state, 'checkpoints/%s/model_ep%d.pth' % (args.exp_name, args.epochs))