def train_process(): global global_step summary_writer = tensorboardX.SummaryWriter( log_dir=config.result_sub_folder, comment=config.comment) # prepare data if config.dataset == "ModelNet40": train_set = ds.ModelNet40.ModelNet40(config.train.root_dir, type="train") valid_set = ds.ModelNet40.ModelNet40(config.validation.root_dir, type="val") elif config.dataset == "ShapeNetParts": train_set = ds.shapenet_partseg.ShapeNetParts(config.train.root_dir) valid_set = ds.shapenet_partseg.ShapeNetParts( config.validation.root_dir) else: raise NotImplementedError train_loader = DataLoader(train_set, batch_size=config.train.batch_size, shuffle=True, num_workers=config.num_workers, drop_last=True) valid_loader = DataLoader(valid_set, batch_size=config.validation.batch_size, shuffle=False, num_workers=config.num_workers, drop_last=False) print('train set size: {}'.format(len(train_set))) print('valid set size: {}'.format(len(valid_set))) # prepare model net = create_model(config.base_model).to(config.device) # prepare optimizer if config.train.optimizer == 'SGD': optimizer = optim.SGD(net.parameters(), config.train.learning_rate_base, momentum=config.train.momentum) elif config.train.optimizer == 'ADAM': optimizer = optim.Adam(net.parameters(), lr=config.train.learning_rate_base, eps=config.train.epsilon, weight_decay=config.train.weight_decay) else: raise NotImplementedError net = DataParallel(net) model_recorder = ModelRecorder(config.ckpt_file, optimizer, summary_writer=summary_writer) start_epoch = 0 if config.train.resume: start_epoch = model_recorder.resume(net.module, optimizer, from_measurement='acc') if config.train.resume_epoch is not None: start_epoch = config.train.resume_epoch print("Force resume at {}".format(start_epoch)) else: print("Resume at {}".format(start_epoch)) # prepare the criterion criterion = nn.CrossEntropyLoss() # start to train for epoch in range(start_epoch, config.train.num_epochs): train_epoch(train_loader, net, criterion, optimizer, epoch) if (epoch % config.validation.step_val == 0) or (epoch == config.train.num_epochs - 1): with torch.no_grad(): acc = evaluate(valid_loader, net) model_recorder.add(epoch, net, dict(acc=acc)) model_recorder.print_curr_stat() print('\nTrain Finished: {}'.format( time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime())))
def train_process(): global global_step summary_writer = tensorboardX.SummaryWriter( log_dir=config.result_sub_folder, comment=config.comment) train_tfs = compose( [rotate_y(), rand_scale(), rand_translate(), jitter(), normalize()]) test_tfs = normalize() scene_tfs = compose([normalize(), to_tensor()]) # prepare data print("config.dataset") if config.dataset == "ModelNet40": train_set = ModelNet40(partition='train', transforms=train_tfs) valid_set = ModelNet40(partition='test', transforms=test_tfs) elif config.dataset == "Mnist": train_set = Mnist(partition='train') valid_set = Mnist(partition='test') elif config.dataset == "ScanNet": train_set = ScanNet(partition='train', transforms=train_tfs) valid_set = ScanNet(partition='test', transforms=test_tfs) elif config.dataset == "ModelNet10": train_set = ModelNet10(partition='train') valid_set = ModelNet10(partition='test') elif config.dataset == "S3DIS": train_set = S3DIS(partition='train', transforms=train_tfs) valid_set = S3DIS(partition='test', transforms=test_tfs) scene_set = S3DIS(partition='data/zero_0.h5', transforms=test_tfs) elif config.dataset == "ShapeNetParts": train_set = ShapeNetPart(partition='trainval', transforms=train_tfs) valid_set = ShapeNetPart(partition='test', transforms=test_tfs) elif config.dataset == "Cifar10": train_set = Cifar10(partition='train') valid_set = Cifar10(partition='test') else: raise NotImplementedError train_loader = DataLoader(train_set, batch_size=config.train.batch_size, shuffle=True, num_workers=config.num_workers, drop_last=True) valid_loader = DataLoader(valid_set, batch_size=config.validation.batch_size, shuffle=False, num_workers=config.num_workers, drop_last=False) if config.dataset == "S3DIS": scene_loader = DataLoader(scene_set, batch_size=config.validation.batch_size, shuffle=False, num_workers=config.num_workers, drop_last=False) print('train set size: {}'.format(len(train_set))) print('valid set size: {}'.format(len(valid_set))) if config.dataset == "S3DIS": print('scene set size: {}'.format(len(scene_set))) # prepare model net = create_model(config.base_model).to(config.device) # prepare optimizer if config.train.optimizer == 'SGD': optimizer = optim.SGD(net.parameters(), config.train.learning_rate_base, momentum=config.train.momentum) elif config.train.optimizer == 'ADAM': optimizer = optim.Adam(net.parameters(), lr=config.train.learning_rate_base, eps=1e-08, weight_decay=1e-4) else: raise NotImplementedError net = DataParallel(net) if config.train.resume: model_recorder = ModelRecorder(config.resume_ckpt_file, optimizer, summary_writer=summary_writer) else: model_recorder = ModelRecorder(config.ckpt_file, optimizer, summary_writer=summary_writer) start_epoch = 0 if config.train.resume: if not config.task == "seg": start_epoch = model_recorder.resume(net.module, optimizer, from_measurement='acc') else: start_epoch = model_recorder.resume(net.module, optimizer, from_measurement='iou') if config.train.resume_epoch is not None: start_epoch = config.train.resume_epoch print("Force resume at {}".format(start_epoch)) else: print("Resume at {}".format(start_epoch)) # prepare the criterion criterion = nn.CrossEntropyLoss() # start to train for epoch in range(start_epoch, config.train.num_epochs): lr = config.train.learning_rate_base * (math.pow( config.train.decay_rate, epoch // 10)) if lr < config.train.learning_rate_min: lr = config.train.learning_rate_min for g in optimizer.param_groups: g['lr'] = lr summary_writer.add_scalar('Learning rate', lr, global_step=epoch) if config.task == "seg": training_loss, training_acc, avg_per_class_acc, train_ious = train_epoch( train_loader, net, criterion, optimizer, epoch) summary_writer.add_scalar('Training Loss', training_loss, global_step=epoch) summary_writer.add_scalar('Training Accuracy', training_acc, global_step=epoch) summary_writer.add_scalar('Training Average Precision ', avg_per_class_acc, global_step=epoch) summary_writer.add_scalar('Training IOUs ', train_ious, global_step=epoch) else: training_loss, training_acc = train_epoch(train_loader, net, criterion, optimizer, epoch) summary_writer.add_scalar('Training Accuracy', training_acc, global_step=epoch) summary_writer.add_scalar('Training Loss', training_loss, global_step=epoch) if (epoch % config.validation.step_val == 0) or (epoch == config.train.num_epochs - 1): with torch.no_grad(): if config.task == "seg": validation_loss, validation_acc, avg_per_class_acc, val_ious = evaluate( valid_loader, net, html_path="training_output") summary_writer.add_scalar('Validation Loss', validation_loss, global_step=epoch) summary_writer.add_scalar('Validation Accuracy', validation_acc, global_step=epoch) summary_writer.add_scalar('Validation Average Precision ', avg_per_class_acc, global_step=epoch) summary_writer.add_scalar('Validation IOUs ', val_ious, global_step=epoch) if config.dataset == "ScanNet": net.eval() print('Scene Validation') y_true = [] y_pred = [] sample_num = 2048 max_point_num = 8192 batch_size = math.ceil(max_point_num / sample_num) indices_batch_indices = np.tile( np.reshape(np.arange(batch_size), (batch_size, 1, 1)), (1, sample_num, 1)) data_h5 = h5py.File("zero_0.h5", 'r+') data = data_h5['data'][...].astype(np.float32) data_num = data_h5['data_num'][...].astype(np.int32) data_labels_seg = data_h5['label_seg'][...].astype( np.int64) data_h5.close() batch_num = data.shape[0] labels_pred = np.full((batch_num, max_point_num), -1, dtype=np.int32) confidences_pred = np.zeros((batch_num, max_point_num), dtype=np.float32) for batch_idx in range(batch_num): if batch_idx % 10 == 0: print('{}-Processing {} of {} batches.'.format( datetime.now(), batch_idx, batch_num)) points_batch = data[batch_idx] point_num = data_num[batch_idx] seg_np = (data_labels_seg[batch_idx])[:point_num] y_true.append(seg_np.reshape(-1, 1)) tile_num = math.ceil( (sample_num * batch_size) / point_num) indices_shuffle = np.tile(np.arange(point_num), tile_num)[0:sample_num * batch_size] np.random.shuffle(indices_shuffle) input_points = scene_tfs( (points_batch[indices_shuffle]).reshape( (batch_size, sample_num, -1))).to(config.device) seg_probs = net.forward(input_points) probs_2d = np.reshape( seg_probs.detach().cpu().numpy(), (sample_num * batch_size, -1)) predictions = [(-1, 0.0)] * point_num for idx in range(sample_num * batch_size): point_idx = indices_shuffle[idx] probs = probs_2d[idx, :] confidence = np.amax(probs) label = np.argmax(probs) if confidence > predictions[point_idx][1]: predictions[point_idx] = [ label, confidence ] pred_np = np.array(predictions)[:, 0] y_pred.append(pred_np.reshape(-1, 1)) print( metrics.classification_report( np.concatenate(y_true, axis=0), np.concatenate(y_pred, axis=0))) else: validation_loss, acc = evaluate(valid_loader, net) summary_writer.add_scalar('Validation Accuracy', acc, global_step=epoch) summary_writer.add_scalar('Validation Loss', validation_loss, global_step=epoch) if config.task == "seg": model_recorder.add( epoch, net, dict(acc=validation_acc, iou=val_ious, avg_acc=avg_per_class_acc)) else: model_recorder.add(epoch, net, dict(acc=acc)) model_recorder.print_curr_stat() print('\nTrain Finished: {}'.format( time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime())))
def train_process(): global global_step summary_writer = tensorboardX.SummaryWriter( log_dir=config.result_sub_folder, comment=config.comment) # prepare data print("config.dataset") if config.dataset == "ModelNet40": train_set = ModelNet40(partition='train') valid_set = ModelNet40(partition='test') if config.dataset == "Mnist": train_set = Mnist(partition='train') valid_set = Mnist(partition='test') elif config.dataset == "ModelNet10": train_set = ModelNet10(partition='train') valid_set = ModelNet10(partition='test') elif config.dataset == "S3DIS": train_set = S3DIS(partition='train') valid_set = S3DIS(partition='test') elif config.dataset == "ShapeNetParts": train_set = ShapeNetPart(partition='trainval') valid_set = ShapeNetPart(partition='test') elif config.dataset == "Cifar10": train_set = Cifar10(partition='train') valid_set = Cifar10(partition='test') else: raise NotImplementedError train_loader = DataLoader(train_set, batch_size=config.train.batch_size, shuffle=True, num_workers=config.num_workers, drop_last=True) valid_loader = DataLoader(valid_set, batch_size=config.validation.batch_size, shuffle=False, num_workers=config.num_workers, drop_last=False) print('train set size: {}'.format(len(train_set))) print('valid set size: {}'.format(len(valid_set))) # prepare model net = create_model(config.base_model).to(config.device) # prepare optimizer if config.train.optimizer == 'SGD': optimizer = optim.SGD(net.parameters(), config.train.learning_rate_base, momentum=config.train.momentum) elif config.train.optimizer == 'ADAM': optimizer = optim.Adam(net.parameters(), lr=config.train.learning_rate_base, eps=config.train.epsilon, weight_decay=config.train.weight_decay) else: raise NotImplementedError net = DataParallel(net) if config.train.resume: model_recorder = ModelRecorder(config.resume_ckpt_file, optimizer, summary_writer=summary_writer) else: model_recorder = ModelRecorder(config.ckpt_file, optimizer, summary_writer=summary_writer) start_epoch = 0 if config.train.resume: start_epoch = model_recorder.resume(net.module, optimizer, from_measurement='acc') if config.train.resume_epoch is not None: start_epoch = config.train.resume_epoch print("Force resume at {}".format(start_epoch)) else: print("Resume at {}".format(start_epoch)) # prepare the criterion criterion = nn.CrossEntropyLoss() # start to train for epoch in range(start_epoch, config.train.num_epochs): if config.task == "seg": training_loss, training_acc, avg_per_class_acc, train_ious = train_epoch( train_loader, net, criterion, optimizer, epoch) summary_writer.add_scalar('Training Loss', training_loss, global_step=epoch) summary_writer.add_scalar('Training Accuracy', training_acc, global_step=epoch) summary_writer.add_scalar('Training Average Precision ', avg_per_class_acc, global_step=epoch) summary_writer.add_scalar('Training IOUs ', train_ious, global_step=epoch) else: training_loss, training_acc = train_epoch(train_loader, net, criterion, optimizer, epoch) summary_writer.add_scalar('Training Accuracy', training_acc, global_step=epoch) summary_writer.add_scalar('Training Loss', training_loss, global_step=epoch) if (epoch % config.validation.step_val == 0) or (epoch == config.train.num_epochs - 1): with torch.no_grad(): if config.task == "seg": validation_loss, validation_acc, avg_per_class_acc, val_ious = evaluate( valid_loader, net, html_path="training_output") summary_writer.add_scalar('Validation Loss', validation_loss, global_step=epoch) summary_writer.add_scalar('Validation Accuracy', validation_acc, global_step=epoch) summary_writer.add_scalar('Validation Average Precision ', avg_per_class_acc, global_step=epoch) summary_writer.add_scalar('Validation IOUs ', val_ious, global_step=epoch) else: validation_loss, acc = evaluate(valid_loader, net) summary_writer.add_scalar('Validation Accuracy', acc, global_step=epoch) summary_writer.add_scalar('Validation Loss', validation_loss, global_step=epoch) if config.task == "seg": model_recorder.add(epoch, net, dict(acc=val_ious)) else: model_recorder.add(epoch, net, dict(acc=acc)) model_recorder.print_curr_stat() print('\nTrain Finished: {}'.format( time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime())))