def train(cfg, local_rank, distributed, logger=None, tblogger=None, transfer_weight=False, change_lr=False): device = torch.device('cuda') # create model logger.info('Creating model "{}"'.format(cfg.MODEL.ARCHITECTURE)) model = build_model(cfg).to(device) criterion = torch.nn.CrossEntropyLoss(ignore_index=255).to(device) optimizer = make_optimizer(cfg, model) # model, optimizer = apex.amp.initialize(model, optimizer, opt_level='O2') scheduler = make_lr_scheduler(cfg, optimizer) if distributed: # model = apex.parallel.DistributedDataParallel(model) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank, broadcast_buffers=True, ) save_to_disk = get_rank() == 0 # checkpoint arguments = {} arguments['iteration'] = 0 arguments['best_iou'] = 0 checkpointer = Checkpointer(model, optimizer, scheduler, cfg.LOGS.DIR, save_to_disk, logger) extra_checkpoint_data = checkpointer.load( f=cfg.MODEL.WEIGHT, model_weight_only=transfer_weight, change_scheduler=change_lr) arguments.update(extra_checkpoint_data) # data_loader logger.info('Loading dataset "{}"'.format(cfg.DATASETS.TRAIN)) data_loader = make_data_loader(cfg, 'train', distributed) data_loader_val = make_data_loader(cfg, 'val', distributed) do_train(cfg, model=model, data_loader=data_loader, optimizer=optimizer, scheduler=scheduler, criterion=criterion, checkpointer=checkpointer, device=device, arguments=arguments, tblogger=tblogger, data_loader_val=data_loader_val, distributed=distributed)
def test(cfg, local_rank, distributed, logger=None): device = torch.device('cuda') cpu_device = torch.device('cpu') # create model logger.info("Creating model \"{}\"".format(cfg.MODEL.ARCHITECTURE)) model = build_model(cfg).to(device) criterion = torch.nn.CrossEntropyLoss(ignore_index=255).to(device) if distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank, broadcast_buffers=True, ) # checkpoint checkpointer = Checkpointer(model, save_dir=cfg.LOGS.DIR, logger=logger) _ = checkpointer.load(f=cfg.MODEL.WEIGHT) # data_loader logger.info('Loading dataset "{}"'.format(cfg.DATASETS.TEST)) stage = cfg.DATASETS.TEST.split('_')[-1] data_loader = make_data_loader(cfg, stage, distributed) dataset_name = cfg.DATASETS.TEST metrics = inference(model, criterion, data_loader, dataset_name, True) if is_main_process(): logger.info("Metrics:") for k, v in metrics.items(): logger.info("{}: {}".format(k, v))
def __init__(self, args): kwargs = {'num_workers': 4, 'pin_memory': True} self.source_loader, self.target_loader, self.test_loader, self.nclass = make_data_loader(args, **kwargs) self.tbar = tqdm(self.test_loader, desc='\r') self.trainer = wgan_trainer(args, 2) self.evaluator = Evaluator(2) self.best_IoU = {'disc': 0.77, 'cup': 0.65} self.attempt = 9.5 self.validation(args, self.trainer.target_model, self.tbar ) self.trainer_wgan(args)
def train(model: Model, X: torch.Tensor, X_val: torch.Tensor, YMin: torch.Tensor, YMin_val: torch.Tensor, YMax: torch.Tensor, YMax_val: torch.Tensor, N: int = 1024, M: int = 1000, num_epoch: int = 10, lr: float = 1e-3, epoch_size: int = 1000, batch_size: int = 32, device: str = 'cuda:0') -> Model: """ Trains model :param model: model to train :param X: Training data :param X_val: validation data :param YMin: Training labels for minimums :param YMin_val: validation labels for minimums :param YMax: Training labels for maximums :param YMax_val: validation labels for maximums :param N: length of subsequences in data :param M: amount of subsequences in data :param num_epoch: amount of epochs to train model :param lr: learning rate :param epoch_size: amount of batches to feed every epoch :param batch_size: batch size :param device: device :return: trained model """ assert X.shape[0] == N*M model.to(device) opt = optim.Adam(model.parameters(), lr=lr) sh = optim.lr_scheduler.StepLR(opt, 1, 0.5) for epoch in range(num_epoch): with tqdm(total=epoch_size, desc=f'epoch {epoch} of {num_epoch}') as tq: model.train() train_loader = make_data_loader(X, YMin, YMax, N=N, batch_size=batch_size, num_batches=epoch_size) for x in tqdm(train_loader): loss, pred_, true_ = _train_step(model, opt, [x_.to(device) for x_ in x]) tq.set_postfix(loss=loss.item(), lr=sh.get_last_lr()) tq.update() sh.step() logging.info(f"Training for epoch {epoch}") model.eval() loss, pred, true = _eval(model, [x_.to(device) for x_ in [X_val, YMin_val, YMax_val]]) true = true.cpu().numpy() pred = pred.cpu().numpy() print(classification_report(true, pred, labels=[1, 2], target_names=['Min', 'Max'])) scores = classification_report(true, pred, labels=[1, 2], target_names=['Min', 'Max'], output_dict=True)['micro avg'] del scores['support'] scores['loss'] = loss.item() logging.info(f"Validation for epoch {epoch} ended, scores are {scores}") return model
def profile(cfg, logger=None): device = torch.device('cuda') # create model logger.info("Creating model \"{}\"".format(cfg.MODEL.ARCHITECTURE)) model = build_model(cfg).to(device) model.eval() # data_loader logger.info("Loading dataset \"{}\"".format(cfg.DATASETS.TRAIN)) data_loader = make_data_loader(cfg, 'train', False) # profile locs, feats, targets, metadata = next(iter(data_loader)) inputs = ME.SparseTensor(feats, coords=locs).to(device) targets = targets.to(device, non_blocking=True).long() return profiler(model, inputs={'x': inputs, 'y': targets})
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Training") parser.add_argument("--local_rank", type=int, default=0) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group( backend="nccl", init_method="env://" ) synchronize() output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) logger = setup_logger("train", output_dir, get_rank(), filename='train_log.txt') logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) model = Network() device = torch.device(cfg.MODEL.DEVICE) model.to(device) # scaling policy, only suppose batch_size < SOLVER.IMS_PER_BATCH lr_steps, scale_factor = cfg.SOLVER.STEPS, 1.0 batch_size = num_gpus * cfg.SOLVER.IMS_PER_GPU if batch_size < cfg.SOLVER.IMS_PER_BATCH: assert cfg.SOLVER.IMS_PER_BATCH % batch_size == 0 scale_factor = cfg.SOLVER.IMS_PER_BATCH // batch_size lr_steps = [step * scale_factor for step in lr_steps] optimizer = make_optimizer(cfg, model, 1.0 / scale_factor) scheduler = WarmupMultiStepLR( optimizer, lr_steps, cfg.SOLVER.GAMMA, warmup_factor=cfg.SOLVER.WARMUP_FACTOR, warmup_iters=cfg.SOLVER.WARMUP_ITERS, warmup_method=cfg.SOLVER.WARMUP_METHOD, ) if args.distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.local_rank], output_device=args.local_rank, # this should be removed if we update BatchNorm stats broadcast_buffers=False, ) arguments = {} arguments["iteration"] = 0 checkpoint_dir = os.path.join(cfg.OUTPUT_DIR, 'checkpoints') mkdir(checkpoint_dir) save_to_disk = get_rank() == 0 checkpointer = DetectronCheckpointer( cfg, model, optimizer, scheduler, checkpoint_dir, save_to_disk, logger ) extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT) arguments.update(extra_checkpoint_data) start_iter = arguments["iteration"] data_loader = make_data_loader( num_gpus, is_train=True, is_distributed=args.distributed, start_iter=start_iter) checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD logger.info("Start training") meters = MetricLogger(delimiter=" ") max_iter = len(data_loader) model.train() start_training_time = time.time() end = time.time() rcnn_iou_now = cfg.MODEL.DYNAMIC_RCNN.WARMUP_IOU rcnn_beta_now = cfg.MODEL.DYNAMIC_RCNN.WARMUP_BETA iteration_count = cfg.MODEL.DYNAMIC_RCNN.ITERATION_COUNT S_I, S_E = [], [] for iteration, (images, targets, _) in enumerate(data_loader, start_iter): if any(len(target) < 1 for target in targets): logger.error( "Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}") continue data_time = time.time() - end iteration = iteration + 1 arguments["iteration"] = iteration scheduler.step() images = images.to(device) targets = [target.to(device) for target in targets] loss_dict, rcnn_iou_new, rcnn_error_new = model( images, targets, rcnn_iou=rcnn_iou_now, rcnn_beta=rcnn_beta_now) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes def reduce_loss_dict(loss_dict): """ Reduce the loss dictionary from all processes so that process with rank 0 has the averaged results. Returns a dict with the same fields as loss_dict, after reduction. """ world_size = get_world_size() if world_size < 2: return loss_dict with torch.no_grad(): loss_names = [] all_losses = [] for k in sorted(loss_dict.keys()): loss_names.append(k) all_losses.append(loss_dict[k]) all_losses = torch.stack(all_losses, dim=0) dist.reduce(all_losses, dst=0) if dist.get_rank() == 0: # only main process gets accumulated, so only divide by # world_size in this case all_losses /= world_size reduced_losses = {k: v for k, v in zip(loss_names, all_losses)} return reduced_losses loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss=losses_reduced, **loss_dict_reduced) S_I.append(rcnn_iou_new) S_E.append(rcnn_error_new) if iteration % iteration_count == 0: rcnn_iou_now = max(sum(S_I) / iteration_count, cfg.MODEL.DYNAMIC_RCNN.WARMUP_IOU) rcnn_beta_now = min(sorted(S_E)[iteration_count // 2], cfg.MODEL.DYNAMIC_RCNN.WARMUP_BETA) S_I, S_E = [], [] optimizer.zero_grad() losses.backward() optimizer.step() batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if iteration % 20 == 0 or iteration == max_iter: logger.info( meters.delimiter.join( [ "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ] ).format( eta=eta_string, iter=iteration, meters=str(meters), lr=optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, ) ) if iteration % checkpoint_period == 0 or iteration == max_iter: checkpointer.save("model_{:07d}".format(iteration), **arguments) total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info( "Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / max_iter ) )
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() # Merge config file. cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() # Print experimental infos. save_dir = "" logger = setup_logger("AlphAction", save_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + get_pretty_env_info()) # Build the model. model = build_detection_model(cfg) model.to("cuda") # load weight. output_dir = cfg.OUTPUT_DIR checkpointer = ActionCheckpointer(cfg, model, save_dir=output_dir) checkpointer.load(cfg.MODEL.WEIGHT) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST mem_active = has_memory(cfg.IA_STRUCTURE) if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) os.makedirs(output_folder, exist_ok=True) output_folders[idx] = output_folder # Do inference. data_loaders_test = make_data_loader(cfg, is_train=False, is_distributed=distributed) for output_folder, dataset_name, data_loader_test in zip( output_folders, dataset_names, data_loaders_test): inference( model, data_loader_test, dataset_name, mem_active=mem_active, output_folder=output_folder, ) synchronize()
import numpy as np from dataset import make_data_loader from helpers import parse_args, timer from generate_data import generate_x, find_min_max def sample(loader): for x in loader: pass if __name__ == '__main__': args = parse_args() filename = 'task_3.log' x_msg = f"X generation with N = {args.N} and M = {args.M}" X = timer(generate_x, filename, x_msg)(args.M, args.N) y_msg = "Finding optimums for X" YMin, YMax = timer(find_min_max, filename, y_msg)(X, args.T, args.k) loader = make_data_loader(X, YMin, YMax, N=args.N, batch_size=args.batch_size, num_batches=args.num_batches) timer( sample, filename, f"{args.num_batches} batches sampling with batch size = {args.batch_size}" )(loader)
def __init__(self, args): # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} self.source_loader, self.target_loader, _, self.nclass = make_data_loader( args, **kwargs) # Define Target Model self.target_model = DeepLab(num_classes=self.nclass, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) # Using cuda self.best_pred = {'disc': 0.0, 'cup': 0.0} self.target_model = torch.nn.DataParallel(self.target_model) patch_replication_callback(self.target_model) self.target_model = self.target_model.cuda() model_dict = self.target_model.module.state_dict() pretrained_dict = { k: v for k, v in checkpoint['state_dict'].items() if 'last_conv' not in k } model_dict.update(pretrained_dict) self.target_model.module.load_state_dict(model_dict) self.target_model.train() self.set_requires_grad('target', True) # Define learning rate and optimizer params target_params = [{ 'params': self.target_model.module.get_1x_lr_params(), 'lr': args.lr }, { 'params': self.target_model.module.get_10x_lr_params(), 'lr': args.lr * 10 }] target_optim = torch.optim.SGD(target_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) target_optim.zero_grad() self.target_criterion = torch.nn.BCEWithLogitsLoss() self.target_optim = target_optim # Define lr scheduler self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.target_loader)) self.evaluator = Evaluator(3)
if __name__ == '__main__': parser = argparse.ArgumentParser( description='Training GlamPoints detector') parser.add_argument('--path_ymlfile', type=str, default='configs/glampoints_training.yml', help='Path to yaml file.') opt = parser.parse_args() with open(opt.path_ymlfile, 'r') as ymlfile: cfg = yaml.load(ymlfile) _device = settings.initialize_cuda_and_logging(cfg) train_loader, val_loader = make_data_loader(cfg) model = build_model(cfg) model.to(_device) optimizer = build_optimizer(cfg, model) loss_func = build_loss(cfg) logger, tb_logger = build_logger(cfg) do_train(cfg, model, train_loader, val_loader, optimizer, loss_func, logger, tb_logger, _device)
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Testing") parser.add_argument("--local_rank", type=int, default=0) parser.add_argument("--iter", "-i", type=int, default=-1, help="The iteration number, default -1 which will " "test the latest model") parser.add_argument('--show_res', '-s', default=False, action='store_true') args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if args.show_res and num_gpus > 1: print('\033[93m You can\'t specify both show_image (-s) and multiple' ' devices (-d %s) \033[0m' % num_gpus) exit(-1) if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) logger = setup_logger("test.inference", output_dir, get_rank(), filename='test_log.txt') logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) model = Network() device = torch.device(cfg.MODEL.DEVICE) model.to(device) checkpoint_dir = os.path.join(cfg.OUTPUT_DIR, 'checkpoints') mkdir(checkpoint_dir) checkpointer = DetectronCheckpointer(cfg, model, save_dir=checkpoint_dir, logger=logger) iou_types = ("bbox", ) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm", ) if cfg.MODEL.KEYPOINT_ON: iou_types = iou_types + ("keypoints", ) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders = make_data_loader(num_gpus, is_train=False, is_distributed=distributed, return_raw=args.show_res) def test_model(model): for output_folder, dataset_name, data_loader_val in zip( output_folders, dataset_names, data_loaders): inference( model, data_loader_val, dataset_name=dataset_name, iou_types=iou_types, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, box_only=False, bbox_aug=cfg.TEST.BBOX_AUG.ENABLED, show_res=args.show_res, logger=logger) synchronize() test_iter = args.iter if args.iter == -1: model_file = os.readlink( os.path.join(checkpoint_dir, 'last_checkpoint')) test_iter = int(model_file.split('/')[-1].split('_')[-1][:-4]) else: model_file = os.path.join(checkpoint_dir, "model_{:07d}.pth".format(args.iter)) if os.path.exists(model_file): logger.info("\n\nstart to evaluate iteration of {}".format(test_iter)) _ = checkpointer.load(model_file) test_model(model)