Exemplo n.º 1
0
def main(log_dir, augmentation, dataset, batch_size, num_workers,num_point,normal,num_votes,gpu):
    #print(check_output(["nodejs", "--version"]).decode("utf-8"))
    '''HYPER PARAMETER'''
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu


    torch.backends.cudnn.benchmark = True

    # Increasing `repeat` will generate more cached files

    transform = torchvision.transforms.Compose(
        [
            # ToMesh(random_rotations=True, random_translation=0.1),#transform data to mesh
            # ToPoints(random_rotations=True, random_translation=0.1),
            # need to be modified. Originally, the value on the sp  here is based on the ray cast from
            # the points on spherical surface, since that we want to process point cloud data directly,
            # we can try to modified the script, let the ray cast from the point cloud to sphere.
            # ProjectOnSphere(bandwidth=bw)
            ProjectFromPointsOnSphere(bandwidth=64)
        ]
    )
    #transform = KeepName(transform)

    #test_set = Shrec17("data", dataset, perturbed=True, download=True, transform=transform)

    DATA_PATH = 'data/modelnet40_normal_resampled/'
    TEST_DATASET = ModelNetDataLoader(root=DATA_PATH, npoint=args.num_point, split='test',normal_channel=args.normal,transform=transform)
    testDataLoader = torch.utils.data.DataLoader(TEST_DATASET, batch_size=args.batch_size, shuffle=False, num_workers=4)

    loader = importlib.machinery.SourceFileLoader('model', os.path.join(log_dir, "model.py"))
    mod = types.ModuleType(loader.name)
    loader.exec_module(mod)

    model = mod.Model(40)
    model.cuda()

    model.load_state_dict(torch.load(os.path.join(log_dir, "state.pkl")))

    with torch.no_grad():
        instance_acc, class_acc = test(model.eval(), testDataLoader, vote_num=args.num_votes)
        print('Test Instance Accuracy: %f, Class Accuracy: %f' % (instance_acc, class_acc))
Exemplo n.º 2
0
def main(args):

    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    ''' === Create Loggers and Backup Scripts === '''
    MyLogger = TrainLogger(args, name=args.model.upper(), subfold='cls')
    writer = SummaryWriter(os.path.join(MyLogger.experiment_dir, 'runs'))
    shutil.copy(os.path.join('models', 'dgcnn_cls.py'), MyLogger.log_dir)
    shutil.copy(os.path.abspath(__file__), MyLogger.log_dir)
    shutil.copy(args.nfl_cfg, MyLogger.log_dir)
    ''' === Load Data (excludes normals) === '''
    MyLogger.logger.info('Load dataset ...')
    DATA_PATH = 'data/modelnet40_normal_resampled/'
    TRAIN_DATASET = ModelNetDataLoader(root=DATA_PATH,
                                       npoint=args.num_point,
                                       split='train',
                                       normal_channel=False)
    TEST_DATASET = ModelNetDataLoader(root=DATA_PATH,
                                      npoint=args.num_point,
                                      split='test',
                                      normal_channel=False)
    train_loader = torch.utils.data.DataLoader(TRAIN_DATASET,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=4)
    test_loader = torch.utils.data.DataLoader(TEST_DATASET,
                                              batch_size=6,
                                              shuffle=False,
                                              num_workers=4)
    # use smaller batch size in test_loader (no effect on training), to make it applicable on a single GTX 1080 (8GB Mem)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("===================")
    print("Let's use", torch.cuda.device_count(), "GPUs: %s!" % args.gpu)
    print("===================")
    ''' === Load Models, Allow Multiple GPUs === '''
    if args.model == 'dgcnn':
        model = DGCNN(args).to(device)
    elif args.model == 'dgcnn_nrs':
        model = DGCNN_NRS(args).to(device)
    else:
        raise Exception("Specified Model is Not Implemented")
    model = nn.DataParallel(model)

    try:
        checkpoint = torch.load(MyLogger.savepath)
        model.load_state_dict(checkpoint['model_state_dict'])
        MyLogger.update_from_checkpoints(checkpoint)
    except:
        MyLogger.logger.info(
            'No pre-trained model, start training from scratch...')

    if args.use_sgd:
        print("Use SGD Optimiser")
        opt = torch.optim.SGD(model.parameters(),
                              lr=args.lr * 100,
                              momentum=args.momentum,
                              weight_decay=1e-4)
    else:
        print("Use Adam Optimiser")
        opt = torch.optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=1e-4)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(opt,
                                                           args.epoch,
                                                           eta_min=args.lr)

    for epoch in range(1, args.epoch + 1):
        '''=== Train ==='''
        MyLogger.cls_epoch_init()
        scheduler.step()
        model.train()

        for _, data in tqdm(enumerate(train_loader, 0),
                            total=len(train_loader),
                            smoothing=0.9):
            points, label = data  # points -> (batch_size, num_points, 3),

            if args.data_aug:
                points = random_point_dropout(points.numpy())
                points[:, :, 0:3] = random_scale_point_cloud(points[:, :, 0:3])
                points[:, :, 0:3] = random_shift_point_cloud(points[:, :, 0:3])

            points, label = torch.Tensor(points).transpose(
                2, 1).cuda(), label[:, 0].type(torch.int64).cuda()
            # (batch_size, 3, num_points)
            # batch_size = points.size()[0]  # the last batch is smaller than args.batch_size

            opt.zero_grad()
            logits = model(points)
            loss = cal_loss(logits, label)
            loss.backward()
            opt.step()

            MyLogger.cls_step_update(
                logits.max(1)[1].cpu().numpy(),
                label.long().cpu().numpy(),
                loss.cpu().detach().numpy())
        MyLogger.cls_epoch_summary(writer=writer, training=True)
        '''=== Test ==='''
        MyLogger.cls_epoch_init(training=False)
        model.eval()

        for _, data in tqdm(enumerate(test_loader, 0),
                            total=len(test_loader),
                            smoothing=0.9):
            points, label = data
            points, label = points.transpose(2, 1).cuda(), label[:, 0].type(
                torch.int64).cuda()
            logits = model(points)
            loss = cal_loss(logits, label)
            MyLogger.cls_step_update(
                logits.max(1)[1].cpu().numpy(),
                label.long().cpu().numpy(),
                loss.cpu().detach().numpy())
        MyLogger.cls_epoch_summary(writer=writer, training=False)

        if MyLogger.save_model:
            state = {
                'step': MyLogger.step,
                'epoch': MyLogger.best_instance_epoch,
                'instance_acc': MyLogger.best_instance_acc,
                'best_class_acc': MyLogger.best_class_acc,
                'best_class_epoch': MyLogger.best_class_epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': model.state_dict(),
            }
            torch.save(state, MyLogger.savepath)

    MyLogger.cls_train_summary()
Exemplo n.º 3
0
                          momentum=0.9,
                          weight_decay=1e-4)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(opt,
                                                     milestones=[120, 160],
                                                     gamma=0.1)
elif args.opt == 'sgd':
    # The optimizer strategy proposed by
    # https://github.com/qq456cvb/Point-Transformers:
    opt = torch.optim.Adam(net.parameters(),
                           lr=1e-3,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=1e-4)
    scheduler = torch.optim.lr_scheduler.StepLR(opt, step_size=50, gamma=0.3)

train_dataset = ModelNetDataLoader(local_path, 1024, split='train')
test_dataset = ModelNetDataLoader(local_path, 1024, split='test')
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True,
                                           num_workers=num_workers,
                                           drop_last=True)
test_loader = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False,
                                          num_workers=num_workers,
                                          drop_last=True)

best_test_acc = 0

for epoch in range(args.num_epochs):
Exemplo n.º 4
0
Arquivo: main.py Projeto: yuchongY/ddn
def main():
    # Download dataset for point cloud classification
    modelnet_dir = 'modelnet40_ply_hdf5_2048'
    BASE_DIR = os.path.dirname(os.path.abspath(__file__))
    sys.path.append(BASE_DIR)
    DATA_DIR = os.path.join(BASE_DIR, 'data')
    if not os.path.exists(DATA_DIR):
        os.mkdir(DATA_DIR)
    if not os.path.exists(os.path.join(DATA_DIR, modelnet_dir)):
        www = 'https://shapenet.cs.stanford.edu/media/' + modelnet_dir + '.zip'
        zipfile = os.path.basename(www)
        os.system('wget %s; unzip %s' % (www, zipfile))
        os.system('mv %s %s' % (zipfile[:-4], DATA_DIR))
        os.system('rm %s' % (zipfile))

    datapath = './data/' + modelnet_dir + '/'

    args = parse_args()

    if args.robust_type == 'Q':
        type_string = 'quadratic'
        outlier_string = 'outliers_' + str(args.outlier_fraction)
    elif args.robust_type == 'PH':
        type_string = 'pseudohuber'
        outlier_string = 'outliers_' + str(args.outlier_fraction)
    elif args.robust_type == 'H':
        type_string = 'huber'
        outlier_string = 'outliers_' + str(args.outlier_fraction)
    elif args.robust_type == 'W':
        type_string = 'welsch'
        outlier_string = 'outliers_' + str(args.outlier_fraction)
    elif args.robust_type == 'TQ':
        type_string = 'truncatedquadratic'
        outlier_string = 'outliers_' + str(args.outlier_fraction)
    else:
        type_string = 'max'
        outlier_string = 'outliers_' + str(args.outlier_fraction)

    if args.rotation is not None:
        ROTATION = (int(args.rotation[0:2]), int(args.rotation[3:5]))
    else:
        ROTATION = None
    '''CREATE DIRS'''
    experiment_dir = Path('./tests/')
    if not experiment_dir.exists():
        experiment_dir.mkdir()
    type_dir = Path(str(experiment_dir) + '/' + type_string + '/')
    if not type_dir.exists():
        type_dir.mkdir()
    outlier_dir = Path(str(type_dir) + '/' + outlier_string + '/')
    if not outlier_dir.exists():
        outlier_dir.mkdir()
    checkpoints_dir = outlier_dir
    '''LOG'''
    logger = logging.getLogger("PointNet")
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    file_handler = logging.FileHandler(
        str(checkpoints_dir) + '/' + 'train_%s_' % args.model_name +
        str(datetime.datetime.now().strftime('%Y-%m-%d-%H-%M')) + '.txt')
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)
    logger.info(
        '---------------------------------------------------TRAINING---------------------------------------------------'
    )
    logger.info('PARAMETER ...')
    logger.info(args)
    '''DATA LOADING'''
    logger.info('Load dataset ...')
    train_data, train_label, test_data, test_label = load_data(
        datapath, classification=True)
    logger.info("The number of training data is: %d", train_data.shape[0])
    logger.info("The number of test data is: %d", test_data.shape[0])

    ## Replace a fraction of the points with outliers drawn uniformly from the unit sphere
    if args.outlier_fraction > 0.0:
        # Training set
        num_outliers = int(args.outlier_fraction * train_data.shape[1])
        print('Number of training set outliers per point cloud: {}'.format(
            num_outliers))
        for i in range(
                train_data.shape[0]):  # For each point cloud in the batch
            random_indices = np.random.choice(train_data.shape[1],
                                              num_outliers,
                                              replace=False)
            for j in range(num_outliers):  # For each point in outlier subset
                random_point = 2.0 * np.random.rand(3) - 1.0
                # Ensure outliers are within unit sphere:
                while np.linalg.norm(random_point) > 1.0:
                    random_point = 2.0 * np.random.rand(3) - 1.0
                train_data[i, random_indices[
                    j], :] = random_point  # Make an outlier, uniform distribution in [-1,1]^3
        # Testing set
        num_outliers = int(args.outlier_fraction * test_data.shape[1])
        print('Number of test set outliers per point cloud: {}'.format(
            num_outliers))
        for i in range(
                test_data.shape[0]):  # For each point cloud in the batch
            random_indices = np.random.choice(test_data.shape[1],
                                              num_outliers,
                                              replace=False)
            for j in range(num_outliers):  # For each point in outlier subset
                random_point = 2.0 * np.random.rand(3) - 1.0
                # Ensure outliers are within unit sphere:
                while np.linalg.norm(random_point) > 1.0:
                    random_point = 2.0 * np.random.rand(3) - 1.0
                test_data[i, random_indices[
                    j], :] = random_point  # Make an outlier, uniform distribution in [-1,1]^3

    trainDataset = ModelNetDataLoader(train_data,
                                      train_label,
                                      rotation=ROTATION)
    if ROTATION is not None:
        print('The range of training rotation is', ROTATION)
    testDataset = ModelNetDataLoader(test_data, test_label, rotation=ROTATION)
    trainDataLoader = torch.utils.data.DataLoader(trainDataset,
                                                  batch_size=args.batchsize,
                                                  shuffle=True)
    testDataLoader = torch.utils.data.DataLoader(testDataset,
                                                 batch_size=args.batchsize,
                                                 shuffle=False)
    '''MODEL LOADING'''
    num_class = 40
    classifier = PointNetCls(num_class, args.input_transform,
                             args.feature_transform, args.robust_type,
                             args.alpha).cuda()
    if args.pretrain is not None:
        print('Use pretrain model...')
        logger.info('Use pretrain model')
        checkpoint = torch.load(args.pretrain)
        start_epoch = checkpoint['epoch']
        classifier.load_state_dict(checkpoint['model_state_dict'])
    else:
        print('No existing model, starting training from scratch...')
        start_epoch = 0

    if args.evaluate:
        acc, map, _ = test(classifier, testDataLoader, do_map=True)
        logger.info('Test Accuracy: %f', acc)
        logger.info('mAP: %f', map)
        logger.info('%f,%f' % (acc, map))
        print('Test Accuracy:\n%f' % acc)
        print('mAP:\n%f' % map)
        # print('%f,%f'%(acc, map))
        return

    if args.optimizer == 'SGD':
        optimizer = torch.optim.SGD(classifier.parameters(),
                                    lr=0.01,
                                    momentum=0.9)
    elif args.optimizer == 'Adam':
        optimizer = torch.optim.Adam(classifier.parameters(),
                                     lr=args.learning_rate,
                                     betas=(0.9, 0.999),
                                     eps=1e-08,
                                     weight_decay=args.decay_rate)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=20,
                                                gamma=0.5)
    global_epoch = 0
    global_step = 0
    best_tst_accuracy = 0.0
    blue = lambda x: '\033[94m' + x + '\033[0m'
    '''TRAINING'''
    logger.info('Start training...')
    for epoch in range(start_epoch, args.epoch):
        print('Epoch %d (%d/%s):' % (global_epoch + 1, epoch + 1, args.epoch))
        logger.info('Epoch %d (%d/%s):', global_epoch + 1, epoch + 1,
                    args.epoch)

        scheduler.step()
        for batch_id, data in tqdm(enumerate(trainDataLoader, 0),
                                   total=len(trainDataLoader),
                                   smoothing=0.9):
            points, target = data
            target = target[:, 0]
            points = points.transpose(2, 1)
            points, target = points.cuda(), target.cuda()
            optimizer.zero_grad()
            classifier = classifier.train()
            pred, trans_feat = classifier(points)
            loss = F.nll_loss(pred, target.long())
            if args.feature_transform and args.model_name == 'pointnet':
                loss += feature_transform_regularizer(trans_feat) * 0.001
            loss.backward()
            optimizer.step()
            global_step += 1

        train_acc = test(classifier.eval(),
                         trainDataLoader) if args.train_metric else None
        acc, map, _ = test(classifier, testDataLoader, do_map=True)

        print('\r Loss: %f' % loss.data)
        logger.info('Loss: %f', loss.data)
        if args.train_metric:
            print('Train Accuracy: %f' % train_acc)
            logger.info('Train Accuracy: %f', (train_acc))
        logger.info('Test Accuracy: %f', acc)
        logger.info('Test mAP: %f', map)
        print('\r Test %s: %f' % (blue('Accuracy'), acc))
        print('\r Test %s: %f' % (blue('mAP'), map))
        if args.train_metric:
            logger.info('%f,%f,%f' % (train_acc, acc, map))
            print('\r%f,%f,%f' % (train_acc, acc, map))
        else:
            logger.info('%f,%f' % (acc, map))
            print('\r%f,%f' % (acc, map))

        if (acc >= best_tst_accuracy):
            best_tst_accuracy = acc
        # Save every 10
        if (epoch + 1) % 10 == 0:
            logger.info('Save model...')
            save_checkpoint(global_epoch + 1,
                            train_acc if args.train_metric else 0.0,
                            acc, map, classifier, optimizer,
                            str(checkpoints_dir), args.model_name)
            print('Saving model....')
        global_epoch += 1
    print('Best Accuracy: %f' % best_tst_accuracy)

    logger.info('Save final model...')
    save_checkpoint(global_epoch, train_acc if args.train_metric else 0.0, acc,
                    map, classifier, optimizer, str(checkpoints_dir),
                    args.model_name)
    print('Saving final model....')

    logger.info('End of training...')
Exemplo n.º 5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model', type=str, default='grid_gcn')
    parser.add_argument('--dataset-path', type=str, default='')
    parser.add_argument('--load-model-path', type=str, default='')
    parser.add_argument('--save-model-path', type=str, default='')
    parser.add_argument('--num-epochs', type=int, default=200)
    parser.add_argument('--num-workers', type=int, default=8)
    parser.add_argument('--batch-size', type=int, default=32)
    args = parser.parse_args()

    num_workers = args.num_workers
    batch_size = args.batch_size

    data_filename = 'modelnet40_normal_resampled.zip'
    download_path = os.path.join(get_download_dir(), data_filename)
    local_path = args.dataset_path or os.path.join(
        get_download_dir(), 'modelnet40_normal_resampled')

    if not os.path.exists(local_path):
        download(
            'https://shapenet.cs.stanford.edu/media/modelnet40_normal_resampled.zip',
            download_path)
        from zipfile import ZipFile
        with ZipFile(download_path) as z:
            z.extractall(path=get_download_dir())

    CustomDataLoader = partial(DataLoader,
                               num_workers=num_workers,
                               batch_size=batch_size,
                               shuffle=True,
                               drop_last=True)

    #dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    dev = torch.device("cpu")

    #if args.model == 'pointnet':
    #    net = PointNetCls(40, input_dims=6)
    #elif args.model == 'pointnet2_ssg':
    #    net = PointNet2SSGCls(40, batch_size, input_dims=6)
    #elif args.model == 'pointnet2_msg':
    #    net = PointNet2MSGCls(40, batch_size, input_dims=6)
    #elif args.model == 'grid_gcn':
    #    net = Grid_GCN(40, batch_size, input_dims=6)

    net = Grid_GCN(40, batch_size, input_dims=6)

    net = net.to(dev)
    if args.load_model_path:
        net.load_state_dict(torch.load(args.load_model_path, map_location=dev))

    opt = optim.Adam(net.parameters(), lr=1e-3, weight_decay=1e-4)

    scheduler = optim.lr_scheduler.StepLR(opt, step_size=20, gamma=0.7)

    train_dataset = ModelNetDataLoader(local_path, 1024, split='train')
    test_dataset = ModelNetDataLoader(local_path, 1024, split='test')
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               num_workers=num_workers,
                                               drop_last=True)
    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=batch_size,
                                              shuffle=False,
                                              num_workers=num_workers,
                                              drop_last=True)

    best_test_acc = 0

    #for epoch in range(args.num_epochs):
    for epoch in range(1):
        train(net, opt, scheduler, train_loader, dev)
        break
        if (epoch + 1) % 1 == 0:
            print('Epoch #%d Testing' % epoch)
            test_acc = evaluate(net, test_loader, dev)
            if test_acc > best_test_acc:
                best_test_acc = test_acc
                if args.save_model_path:
                    torch.save(net.state_dict(), args.save_model_path)
            print('Current test acc: %.5f (best: %.5f)' %
                  (test_acc, best_test_acc))
        break
Exemplo n.º 6
0
def main(log_dir, model_path, augmentation, dataset, batch_size, learning_rate,
         num_workers, num_point, normal, gpu):
    '''HYPER PARAMETER'''
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu

    arguments = copy.deepcopy(locals())

    os.mkdir(log_dir)
    shutil.copy2(__file__, os.path.join(log_dir, "script.py"))
    shutil.copy2(model_path, os.path.join(log_dir, "model.py"))

    logger = logging.getLogger("train")
    logger.setLevel(logging.DEBUG)
    logger.handlers = []
    ch = logging.StreamHandler()
    logger.addHandler(ch)
    fh = logging.FileHandler(os.path.join(log_dir, "log.txt"))
    logger.addHandler(fh)

    logger.info("%s", repr(arguments))

    torch.backends.cudnn.benchmark = True

    # Load the model
    loader = importlib.machinery.SourceFileLoader(
        'model', os.path.join(log_dir, "model.py"))
    mod = types.ModuleType(loader.name)
    loader.exec_module(mod)

    model = mod.Model(40)
    model.cuda()

    logger.info("{} paramerters in total".format(
        sum(x.numel() for x in model.parameters())))
    logger.info("{} paramerters in the last layer".format(
        sum(x.numel() for x in model.out_layer.parameters())))

    bw = model.bandwidths[0]
    _file = 'libmatrix.so'
    _path = '/data2/tzf/s2cnn-master/draft/' + _file
    lib = ctypes.cdll.LoadLibrary(_path)
    # Load the dataset
    # Increasing `repeat` will generate more cached files
    transform = torchvision.transforms.Compose([
        #ToMesh(random_rotations=True, random_translation=0.1),#transform data to mesh
        #ToPoints(random_rotations=True, random_translation=0.1),
        #need to be modified. Originally, the value on the sp  here is based on the ray cast from
        #the points on spherical surface, since that we want to process point cloud data directly,
        #we can try to modified the script, let the ray cast from the point cloud to sphere.
        #ProjectOnSphere(bandwidth=bw)
        ProjectFromPointsOnSphere(bandwidth=bw, lib=lib)
    ])

    #train_set = Shrec17("/data2/tzf/s2cnn-master/examples/shrec17/data", dataset, perturbed=True, download=False, transform=transform, target_transform=target_transform)

    #train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True, drop_last=True)

    DATA_PATH = 'data/modelnet40_normal_resampled/'

    TRAIN_DATASET = ModelNetDataLoader(root=DATA_PATH,
                                       npoint=args.num_point,
                                       split='train',
                                       normal_channel=args.normal,
                                       transform=transform)
    TEST_DATASET = ModelNetDataLoader(root=DATA_PATH,
                                      npoint=args.num_point,
                                      split='test',
                                      normal_channel=args.normal,
                                      transform=transform)
    train_loader = torch.utils.data.DataLoader(TRAIN_DATASET,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=4,
                                               pin_memory=True)
    testDataLoader = torch.utils.data.DataLoader(TEST_DATASET,
                                                 batch_size=args.batch_size,
                                                 shuffle=False,
                                                 num_workers=4,
                                                 pin_memory=True)

    optimizer = torch.optim.SGD(model.parameters(), lr=0, momentum=0.9)

    def train_step(data, target):
        model.train()
        data, target = data.cuda(), target.cuda()

        prediction = model(data)
        loss = F.nll_loss(prediction, target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        correct = prediction.data.max(1)[1].eq(target.data).long().cpu().sum()

        return loss.item(), correct.item()

    def get_learning_rate(epoch):
        limits = [100, 200]
        lrs = [1, 0.1, 0.01]
        assert len(lrs) == len(limits) + 1
        for lim, lr in zip(limits, lrs):
            if epoch < lim:
                return lr * learning_rate
        return lrs[-1] * learning_rate

    def train():
        for epoch in range(300):

            lr = get_learning_rate(epoch)
            logger.info("learning rate = {} and batch size = {}".format(
                lr, train_loader.batch_size))
            for p in optimizer.param_groups:
                p['lr'] = lr

            total_loss = 0
            total_correct = 0
            time_before_load = time.perf_counter()
            for batch_idx, (data, target) in enumerate(train_loader):  # x
                time_after_load = time.perf_counter()
                time_before_step = time.perf_counter()
                loss, correct = train_step(data, target)

                total_loss += loss
                total_correct += correct

                logger.info(
                    "[{}:{}/{}] LOSS={:.2} <LOSS>={:.2} ACC={:.2} <ACC>={:.2} time={:.2}+{:.2}"
                    .format(epoch, batch_idx, len(train_loader), loss,
                            total_loss / (batch_idx + 1), correct / len(data),
                            total_correct / len(data) / (batch_idx + 1),
                            time_after_load - time_before_load,
                            time.perf_counter() - time_before_step))
                time_before_load = time.perf_counter()

            torch.save(model.state_dict(), os.path.join(log_dir, "state.pkl"))

    best_instance_acc = 0.0
    best_class_acc = 0.0
    for epoch in range(200):

        lr = get_learning_rate(epoch)
        logger.info("learning rate = {} and batch size = {}".format(
            lr, train_loader.batch_size))
        for p in optimizer.param_groups:
            p['lr'] = lr

        total_loss = 0
        total_correct = 0
        time_before_load = time.perf_counter()
        for batch_idx, (data, target) in enumerate(train_loader):  #x
            time_after_load = time.perf_counter()
            time_before_step = time.perf_counter()

            target = target[:, 0]
            data, target = data.cuda(), target.cuda()
            loss, correct = train_step(data, target.long())

            total_loss += loss
            total_correct += correct

            logger.info(
                "[{}:{}/{}] LOSS={:.2} <LOSS>={:.2} ACC={:.2} <ACC>={:.2} time={:.2}+{:.2}"
                .format(epoch, batch_idx, len(train_loader), loss,
                        total_loss / (batch_idx + 1), correct / len(data),
                        total_correct / len(data) / (batch_idx + 1),
                        time_after_load - time_before_load,
                        time.perf_counter() - time_before_step))
            time_before_load = time.perf_counter()

        torch.save(model.state_dict(), os.path.join(log_dir, "state.pkl"))

        with torch.no_grad():
            instance_acc, class_acc = test(model.eval(), testDataLoader)

            if (instance_acc >= best_instance_acc):
                best_instance_acc = instance_acc
                best_epoch = epoch + 1

            if (class_acc >= best_class_acc):
                best_class_acc = class_acc
            print('Test Instance Accuracy: %f, Class Accuracy: %f' %
                  (instance_acc, class_acc))
            print('Best Instance Accuracy: %f, Class Accuracy: %f' %
                  (best_instance_acc, best_class_acc))

            if (instance_acc >= best_instance_acc):
                logger.info('Save model...')
                savepath = '/data2/tzf/s2cnn-master/examples/shrec17/best_model.pth'
                print('Saving at %s' % savepath)
                state = {
                    'epoch': best_epoch,
                    'instance_acc': instance_acc,
                    'class_acc': class_acc,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                }
                torch.save(state, savepath)
Exemplo n.º 7
0
def main(args):

    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    ''' === Inference Time Calculation === '''
    if args.inference_timer:
        MyTimer = Inference_Timer(args)
        args = MyTimer.update_args()  # Set the batch size as 1, and epoch as 3
    ''' === Set up Loggers and Load Data === '''
    DATA_PATH = 'data/modelnet40_normal_resampled/'
    TRAIN_DATASET = ModelNetDataLoader(root=DATA_PATH,
                                       npoint=args.num_point,
                                       split='train',
                                       normal_channel=args.normal)
    TEST_DATASET = ModelNetDataLoader(root=DATA_PATH,
                                      npoint=args.num_point,
                                      split='test',
                                      normal_channel=args.normal)
    trainDataLoader = DataLoader(TRAIN_DATASET,
                                 batch_size=args.batch_size,
                                 shuffle=True,
                                 num_workers=4,
                                 drop_last=True)
    testDataLoader = DataLoader(TEST_DATASET,
                                batch_size=args.batch_size,
                                shuffle=False,
                                num_workers=4)
    MyLogger = TrainLogger(args, name=args.model.upper(), subfold='cls')
    ''' === Model Loading and Files Backup === '''
    MODEL = importlib.import_module(args.model)
    shutil.copy(args.nrs_cfg, MyLogger.log_dir)
    shutil.copy(os.path.abspath(__file__), MyLogger.log_dir)
    shutil.copy('./models/%s.py' % args.model, MyLogger.log_dir)
    writer = SummaryWriter(os.path.join(MyLogger.experiment_dir, 'runs'))

    # allow multiple GPUs
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    classifier = MODEL.get_model(num_class=40,
                                 normal_channel=args.normal,
                                 nrs_cfg=Dict2Object(args.nrs_cfg)).to(device)
    criterion = MODEL.get_loss().to(device)
    classifier = torch.nn.DataParallel(classifier)
    print("=" * 33, "\n", "Number of GPU(s):", torch.cuda.device_count(),
          "Indices: %s!\n" % args.gpu, "=" * 33)
    # nn.DataParallel has its own issues (slow, memory expensive), here are some advanced solutions:
    # https://zhuanlan.zhihu.com/p/145427849

    try:
        checkpoint = torch.load(MyLogger.savepath)
        classifier.load_state_dict(checkpoint['model_state_dict'])
        MyLogger.update_from_checkpoints(checkpoint)
    except:
        MyLogger.logger.info(
            'No pre-trained model, start training from scratch...')
    ''' === Optimiser and Scheduler === '''
    if args.optimizer == 'Adam':
        optimizer = torch.optim.Adam(classifier.parameters(),
                                     lr=args.learning_rate,
                                     betas=(0.9, 0.999),
                                     eps=1e-08,
                                     weight_decay=args.decay_rate)
    else:
        optimizer = torch.optim.SGD(classifier.parameters(),
                                    lr=args.lr * 100,
                                    momentum=0.9)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=20,
                                                gamma=0.7)

    for epoch in range(MyLogger.epoch, args.epoch + 1):
        ''' === Training === '''
        scheduler.step()
        classifier.train()
        MyLogger.cls_epoch_init()
        # writer.add_scalar('learning rate', scheduler.get_lr()[-1], global_step)

        for points, target in tqdm(trainDataLoader,
                                   total=len(trainDataLoader),
                                   smoothing=0.9):
            # Data Augmentation
            points = random_point_dropout(points.data.numpy())
            points[:, :, 0:3] = random_scale_point_cloud(points[:, :, 0:3])
            points[:, :, 0:3] = random_shift_point_cloud(points[:, :, 0:3])

            if args.gpu == 'None':
                points, target = torch.Tensor(points).transpose(2,
                                                                1), target[:,
                                                                           0]
            else:
                points, target = torch.Tensor(points).transpose(
                    2, 1).cuda(), target[:, 0].cuda()

            # FP and BP
            optimizer.zero_grad()
            if args.inference_timer:
                pred, trans_feat = MyTimer.single_step(classifier, points)
            else:
                pred, trans_feat = classifier(points)

            loss = criterion(pred, target.long(), trans_feat)
            loss.backward()
            optimizer.step()
            MyLogger.cls_step_update(
                pred.data.max(1)[1].cpu().numpy(),
                target.long().cpu().numpy(),
                loss.cpu().detach().numpy())
        MyLogger.cls_epoch_summary(writer=writer, training=True)
        if args.inference_timer:
            MyTimer.update_single_epoch(MyLogger.logger)
            break
        '''Validating'''
        with torch.no_grad():
            classifier.eval()
            MyLogger.cls_epoch_init(training=False)
            for points, target in tqdm(testDataLoader,
                                       total=len(testDataLoader),
                                       smoothing=0.9):
                points, target = points.transpose(2,
                                                  1).cuda(), target[:,
                                                                    0].cuda()
                pred, trans_feat = classifier(points)
                loss = criterion(pred, target.long(), trans_feat)
                MyLogger.cls_step_update(
                    pred.data.max(1)[1].cpu().numpy(),
                    target.long().cpu().numpy(),
                    loss.cpu().detach().numpy())

            MyLogger.cls_epoch_summary(writer=writer, training=False)
            if MyLogger.save_model:
                state = {
                    'step': MyLogger.step,
                    'epoch': MyLogger.best_instance_epoch,
                    'instance_acc': MyLogger.best_instance_acc,
                    'best_class_acc': MyLogger.best_class_acc,
                    'best_class_epoch': MyLogger.best_class_epoch,
                    'model_state_dict': classifier.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                }
                torch.save(state, MyLogger.savepath)
    MyLogger.cls_train_summary()