def main(): os.environ["CUDA_VISIBLE_DEVICES"] = "0" parser = argparse.ArgumentParser( description="PyTorch Query Localization in Videos Training") parser.add_argument( "--config-file", default="experiments/charades_sta_train.yaml", # default="experiments/anet_cap_train.yaml", # default="experiments/tacos_train.yaml", metavar="FILE", help="path to config file", type=str, ) args = parser.parse_args() experiment_name = args.config_file.split("/")[-1] log_directory = args.config_file.replace(experiment_name, "logs/") vis_directory = args.config_file.replace(experiment_name, "visualization/") experiment_name = experiment_name.replace(".yaml", "") cfg.merge_from_list([ 'EXPERIMENT_NAME', experiment_name, 'LOG_DIRECTORY', log_directory, "VISUALIZATION_DIRECTORY", vis_directory ]) cfg.merge_from_file(args.config_file) output_dir = "./{}".format(cfg.LOG_DIRECTORY) if output_dir: mkdir(output_dir) mkdir("./checkpoints/{}".format(cfg.EXPERIMENT_NAME)) logger = setup_logger("mlnlp", output_dir, cfg.EXPERIMENT_NAME + ".txt", 0) logger.info("Starting moment localization with dynamic filters") logger.info(cfg.EXPERIMENT_NAME) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # torch.backends.cudnn.enabled = False if cfg.ENGINE_STAGE == "TRAINER": print('#######') print(cfg.DYNAMIC_FILTER.LSTM_VIDEO.DROPOUT) trainer(cfg) elif cfg.ENGINE_STAGE == "TESTER": tester(cfg)
def __init__(self, cfg, dataset_size, is_train=True): self.loss = [] self.IoU = [] self.mIoU = [] self.aux_mIoU = [] self.individual_loss = {} self.vis_dir = "{}{}".format(cfg.VISUALIZATION_DIRECTORY, cfg.EXPERIMENT_NAME) print(self.vis_dir) mkdir(self.vis_dir) self.cfg = cfg self.s_samples = np.random.randint(dataset_size, size=4) self.s_samples = np.insert(self.s_samples,0, 100) for s in self.s_samples: self.individual_loss[str(s)] = [] mkdir("{}/{}".format(self.vis_dir, str(s))) if is_train == True: self.state = "training" else: self.state = "testing"
def main(): parser = argparse.ArgumentParser(description="PyTorch Object Detection Training") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str,) args = parser.parse_args() experiment_name = args.config_file.split("/")[-1] log_directory = args.config_file.replace(experiment_name,"logs/") vis_directory = args.config_file.replace(experiment_name,"visualization/") experiment_name = experiment_name.replace(".yaml","") cfg.merge_from_list(['EXPERIMENT_NAME', experiment_name, 'LOG_DIRECTORY', log_directory, "VISUALIZATION_DIRECTORY", vis_directory]) cfg.merge_from_file(args.config_file) output_dir = "./{}".format(cfg.LOG_DIRECTORY) if output_dir: mkdir(output_dir) mkdir("./checkpoints/{}".format(cfg.EXPERIMENT_NAME)) logger = setup_logger("mlnlp", output_dir, cfg.EXPERIMENT_NAME + ".txt", 0) logger.info("Starting moment localization with dynamic filters") logger.info(cfg.EXPERIMENT_NAME) # reproductibility np.random.seed(0) torch.manual_seed(0) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False if cfg.ENGINE_STAGE == "TRAINER": trainer(cfg) elif cfg.ENGINE_STAGE == "TESTER": tester(cfg)
def tsv_writer(values, tsv_file, sep='\t'): mkdir(op.dirname(tsv_file)) lineidx_file = op.splitext(tsv_file)[0] + '.lineidx' idx = 0 tsv_file_tmp = tsv_file + '.tmp' lineidx_file_tmp = lineidx_file + '.tmp' with open(tsv_file_tmp, 'w') as fp, open(lineidx_file_tmp, 'w') as fpidx: assert values is not None for value in values: assert value is not None # this step makes sure python2 and python3 encoded img string are the same. # for python2 encoded image string, it is a str class starts with "/". # for python3 encoded image string, it is a bytes class starts with "b'/". # v.decode('utf-8') converts bytes to str so the content is the same. # v.decode('utf-8') should only be applied to bytes class type. value = [ v if type(v) != bytes else v.decode('utf-8') for v in value ] v = '{0}\n'.format(sep.join(map(str, value))) fp.write(v) fpidx.write(str(idx) + '\n') idx = idx + len(v) os.rename(tsv_file_tmp, tsv_file) os.rename(lineidx_file_tmp, lineidx_file)
is_distributed=cfg.distributed) ntrain = len(trainloader.dataset) steps_per_epoch = len(trainloader) cfg.SOLVER.STEPS_PER_EPOCH = steps_per_epoch logs_per_epoch = steps_per_epoch / cfg.LOG_FREQ warmup = cfg.OPTIM.WARMUP * steps_per_epoch cfg.OPTIM.WARMUP = warmup cfg.SOLVER.MAX_ITER = steps_per_epoch * cfg.OPTIM.EPOCHS # get the starting checkpoint's iteration iteration = config_iteration(cfg.OUTPUT_DIR, steps_per_epoch) logging.info("Experiment settings:") logging.info(cfg) if cfg.OUTPUT_DIR: mkdir(cfg.OUTPUT_DIR) # save full config to a file in output_dir for future reference with open(os.path.join(cfg.OUTPUT_DIR, 'config.yaml'), 'w') as f: f.write(str(cfg)) cfg.freeze() # mix-up aug = cfg.AUG mixup_fn = Mixup( mixup_alpha=aug.MIXUP, cutmix_alpha=aug.MIXCUT, cutmix_minmax=aug.MIXCUT_MINMAX if aug.MIXCUT_MINMAX else None, prob=aug.MIXUP_PROB, switch_prob=aug.MIXUP_SWITCH_PROB, mode=aug.MIXUP_MODE,
def main(): # Add augments parser = argparse.ArgumentParser(description="Vision Research Toolkit by PyTorch") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true" ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER ) args = parser.parse_args() num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group( backend="nccl", init_method="env://" ) synchronize() # make config cfg = make_config(args.config_file, args.opts) # obtain absolute dir of project project_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) if cfg.CHECKPOINTER.DIR: if cfg.CHECKPOINTER.DIR[0] is not os.sep: # if the saver_dir is not absolute dir cfg.CHECKPOINTER.DIR = os.path.join(project_dir, cfg.CHECKPOINTER.DIR) else: cfg.CHECKPOINTER.DIR = os.path.join(project_dir, 'log') if not cfg.CHECKPOINTER.NAME: cfg.CHECKPOINTER.NAME = strftime("%Y-%m-%d-%H-%M-%S", localtime()) cfg.freeze() save_dir = os.path.join(cfg.CHECKPOINTER.DIR, cfg.CHECKPOINTER.NAME) mkdir(save_dir) # Init logger logger = setup_logger(cfg.NAME, save_dir, get_rank()) logger.info("Using {} GPU".format(num_gpus)) logger.info(args) logger.info("Collecting env info ...") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) logger.info("Running with config:\n{}".format(cfg)) output_config_path = os.path.join(save_dir, os.path.basename(args.config_file)) logger.info("Saving config into: {}".format(output_config_path)) # save overloaded model config in the output directory save_config(cfg, output_config_path) train(cfg, args.local_rank, args.distributed) return
def main(cfgs: DictConfig): logger = logging.getLogger(cfgs.arch) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) # create model logger.info("getting model '{}' from torch hub".format(cfgs.arch)) model, input_size = initialize_model( model_name=cfgs.arch, num_classes=cfgs.num_classes, feature_extract=cfgs.feature_extract, use_pretrained=cfgs.pretrained, ) logger.info("model: '{}' is successfully loaded".format( model.__class__.__name__)) logger.info("model structure: {}".format(model)) # Data augmentation and normalization for training # Just normalization for validation logger.info("Initializing Datasets and Dataloaders...") logger.info("loading data {} from {}".format(cfgs.dataset, cfgs.data_path)) dataloaders_dict = load_data(input_size=input_size, batch_size=cfgs.batch_size, data_path=cfgs.data_path, num_workers=cfgs.workers) # Detect if we have a GPU available device = torch.device(cfgs.device if torch.cuda.is_available() else "cpu") # Gather the parameters to be optimized/updated in this run. If we are # finetuning we will be updating all parameters. However, if we are # doing feature extract method, we will only update the parameters # that we have just initialized, i.e. the parameters with requires_grad # is True. params_to_update = model.parameters() param_log_info = '' if cfgs.feature_extract: params_to_update = [] for name, param in model.named_parameters(): if param.requires_grad: params_to_update.append(param) param_log_info += "\t{}".format(name) else: for name, param in model.named_parameters(): if param.requires_grad: param_log_info += "\t{}".format(name) logger.info("Params to learn:\n" + param_log_info) # Observe that all parameters are being optimized optimizer_ft = optim.SGD(params_to_update, lr=cfgs.lr, momentum=cfgs.momentum) # Setup the loss fxn criterion = nn.CrossEntropyLoss() # Train and evaluate model_ft = train_model(model, dataloaders_dict, device, criterion, optimizer_ft, logger, print_freq=cfgs.print_freq, num_epochs=cfgs.epochs, is_inception=(cfgs.arch == "inception")) mkdir(cfgs.weight_dir) torch.save(model_ft.state_dict(), os.path.join(cfgs.weight_dir, cfgs.arch) + '.ckpt') logger.info("model is saved at {}".format( os.path.abspath(os.path.join(cfgs.weight_dir, cfgs.arch) + '.ckpt')))
def main(): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" global best_acc1 run_time = str(datetime.datetime.now()) cfgs = load_configs(CONFIG_FILE) # create log dir and weight dir mkdir(cfgs['weight_dir']) mkdir(cfgs['log_dir']) # create logger log_dir = osp.join(cfgs['log_dir'], cfgs['arch']) mkdir(log_dir) cfgs['log_name'] = cfgs['arch'] + '_' + cfgs['dataset'] logger = setup_logger(cfgs['log_name'], log_dir, get_rank(), run_time + '.txt') logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(CONFIG_FILE)) logger.info("Running with config:\n{}".format(cfgs)) # create model logger.info("=> creating model '{}'".format(cfgs['arch'])) model = models.__dict__[cfgs['arch']]() if cfgs['arch'].lower().startswith('wideresnet'): # a customized resnet model with last feature map size as 14x14 for better class activation mapping model = wideresnet.resnet50(num_classes=cfgs['num_classes']) else: model = models.__dict__[cfgs['arch']](num_classes=cfgs['num_classes']) if cfgs['arch'].lower().startswith( 'alexnet') or cfgs['arch'].lower().startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() logger.info("=> created model '{}'".format(model.__class__.__name__)) logger.info("model structure: {}".format(model)) num_gpus = torch.cuda.device_count() logger.info("using {} GPUs".format(num_gpus)) # optionally resume from a checkpoint if cfgs['resume']: if osp.isfile(cfgs['resume']): logger.info("=> loading checkpoint '{}'".format(cfgs['resume'])) checkpoint = torch.load(cfgs['resume']) cfgs['start_epoch'] = checkpoint['epoch'] best_acc1 = checkpoint['best_acc1'] model.load_state_dict(checkpoint['state_dict']) logger.info("=> loaded checkpoint '{}' (epoch {})".format( cfgs['resume'], checkpoint['epoch'])) else: logger.info("=> no checkpoint found at '{}'".format( cfgs['resume'])) torch.backends.cudnn.benchmark = True # Data loading code traindir = osp.join(cfgs['data_path'], 'train') valdir = osp.join(cfgs['data_path'], 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_loader = torch.utils.data.DataLoader(datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])), batch_size=cfgs['batch_size'], shuffle=True, num_workers=cfgs['workers'], pin_memory=True) val_loader = torch.utils.data.DataLoader(datasets.ImageFolder( valdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])), batch_size=cfgs['batch_size'], shuffle=False, num_workers=cfgs['workers'], pin_memory=True) # define loss function (criterion) and pptimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), cfgs['lr'], momentum=cfgs['momentum'], weight_decay=float(cfgs['weight_decay'])) # if cfgs['evaluate']: # validate(val_loader, model, criterion, cfgs) # return # for epoch in range(cfgs['start_epoch'], cfgs['epochs']): # adjust_learning_rate(optimizer, epoch, cfgs) # # train for one epoch # train(train_loader, model, criterion, optimizer, epoch, cfgs) # # evaluate on validation set # acc1 = validate(val_loader, model, criterion, cfgs) # # remember best acc@1 and save checkpoint # is_best = acc1 > best_acc1 # best_acc1 = max(acc1, best_acc1) # save_checkpoint({ # 'epoch': epoch + 1, # 'arch': cfgs['arch'], # 'state_dict': model.state_dict(), # 'best_acc1': best_acc1, # }, is_best, cfgs['weight_dir'] + '/' + cfgs['arch'].lower()) logger.info("start to test the best model") best_weight = cfgs['weight_dir'] + '/' + cfgs['arch'].lower( ) + '_best.pth.tar' if osp.isfile(best_weight): logger.info("=> loading best model '{}'".format(best_weight)) checkpoint = torch.load(best_weight) best_acc1 = checkpoint['best_acc1'] epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) logger.info("=> loaded checkpoint '{}' (val Acc@1 {})".format( best_weight, best_acc1)) else: logger.info("=> no best model found at '{}'".format(best_weight)) acc1 = validate(val_loader, model, criterion, cfgs)