def main(): args.dump_dir = ensure_path( osp.join('dumps', args.dataset_name, args.desc_name, args.expr)) args.ckpt_dir = ensure_path(osp.join(args.dump_dir, 'checkpoints')) args.meta_dir = ensure_path(osp.join(args.dump_dir, 'meta')) args.vis_dir = osp.join(args.dump_dir, 'vis', args.run_name) initialize_dataset(args.dataset) build_dataset = get_dataset_builder(args.dataset) dataset = build_dataset(args, configs, args.data_image_root, args.data_scenes_json, args.data_questions_json) dataset_split = int(len(dataset) * args.data_split) if args.data_split <= 1 else int( args.data_split) train_dataset, validation_dataset = dataset.split_trainval(dataset_split) logger.critical('Building the model.') model = desc.make_model(args, train_dataset.unwrapped.vocab) if args.use_gpu: model.cuda() # Use the customized data parallel if applicable. if args.gpu_parallel: from jactorch.parallel import JacDataParallel # from jactorch.parallel import UserScatteredJacDataParallel as JacDataParallel model = JacDataParallel(model, device_ids=args.gpus).cuda() # Disable the cudnn benchmark. cudnn.benchmark = False if args.load: from jactorch.io import load_weights if load_weights(model, args.load): logger.critical( 'Loaded weights from pretrained model: "{}".'.format( args.load)) from jacinle.utils.meter import GroupMeters meters = GroupMeters() if args.embed: from IPython import embed embed() logger.critical('Building the data loader.') validation_dataloader = validation_dataset.make_dataloader( args.batch_size, shuffle=True, drop_last=False, nr_workers=args.data_workers) model.eval() validate_epoch(0, model, validation_dataloader, meters) logger.critical( meters.format_simple('Validation', {k: v for k, v in meters.avg.items() if v != 0}, compressed=False)) return meters
def main(): args.dump_dir = ensure_path( osp.join('dumps', args.series_name, args.desc_name, (args.training_target + ('-curriculum_' + args.curriculum) + ('-qtrans_' + args.question_transform if args.question_transform is not None else '') + ('-' + args.expr if args.expr is not None else '') + ('-lr_' + str(args.lr))))) if not args.debug: args.ckpt_dir = ensure_path(osp.join(args.dump_dir, 'checkpoints')) args.meta_dir = ensure_path(osp.join(args.dump_dir, 'meta')) args.meta_file = osp.join(args.meta_dir, args.run_name + '.json') args.log_file = osp.join(args.meta_dir, args.run_name + '.log') args.meter_file = osp.join(args.meta_dir, args.run_name + '.meter.json') logger.critical('Writing logs to file: "{}".'.format(args.log_file)) set_output_file(args.log_file) logger.critical('Writing metainfo to file: "{}".'.format( args.meta_file)) with open(args.meta_file, 'w') as f: f.write(dump_metainfo(args=args.__dict__, configs=configs)) # Initialize the tensorboard. if args.use_tb: args.tb_dir_root = ensure_path( osp.join(args.dump_dir, 'tensorboard')) args.tb_dir = ensure_path(osp.join(args.tb_dir_root, args.run_name)) initialize_dataset(args.dataset) build_dataset = get_dataset_builder(args.dataset) dataset = build_dataset(args, configs, args.data_image_root, args.data_scenes_json, args.data_questions_json) dataset_trim = int(len(dataset) * args.data_trim) if args.data_trim <= 1 else int( args.data_trim) if dataset_trim > 0: dataset = dataset.trim_length(dataset_trim) dataset_split = int(len(dataset) * args.data_split) if args.data_split <= 1 else int( args.data_split) train_dataset, validation_dataset = dataset.split_trainval(dataset_split) extra_dataset = None if args.extra_data_dir is not None: extra_dataset = build_dataset(args, configs, args.extra_data_image_root, args.extra_data_scenes_json, args.extra_data_questions_json) main_train(train_dataset, validation_dataset, extra_dataset)
def main(): args.dump_dir = ensure_path(osp.join( 'dumps', args.series_name, args.desc_name)) if args.normalized_boxes: args.dump_dir = args.dump_dir + '_norm_box' if args.even_smp_flag: args.dump_dir = args.dump_dir + '_even_smp'+str(args.frm_img_num) if args.even_smp_flag: args.dump_dir = args.dump_dir + '_col_box_ftr' args.dump_dir += '_' + args.version + '_' + args.prefix #if args.debug: if not args.debug: args.ckpt_dir = ensure_path(osp.join(args.dump_dir, 'checkpoints')) args.meta_dir = ensure_path(osp.join(args.dump_dir, 'meta')) args.meta_file = osp.join(args.meta_dir, args.run_name + '.json') args.log_file = osp.join(args.meta_dir, args.run_name + '.log') args.meter_file = osp.join(args.meta_dir, args.run_name + '.meter.json') logger.critical('Writing logs to file: "{}".'.format(args.log_file)) set_output_file(args.log_file) logger.critical('Writing metainfo to file: "{}".'.format(args.meta_file)) with open(args.meta_file, 'w') as f: f.write(dump_metainfo(args=args.__dict__, configs=configs)) # Initialize the tensorboard. if args.use_tb: args.tb_dir_root = ensure_path(osp.join(args.dump_dir, 'tensorboard')) args.tb_dir = ensure_path(osp.join(args.tb_dir_root, args.run_name)) initialize_dataset(args.dataset, args.version) #validation_dataset = extra_dataset if args.testing_flag==1 or args.dataset=='billiards': validation_dataset = build_clevrer_dataset(args, 'test') else: validation_dataset = build_clevrer_dataset(args, 'validation') train_dataset = build_clevrer_dataset(args, 'train') extra_dataset = None main_train(train_dataset, validation_dataset, extra_dataset)
def main(): # directories if not args.debug: args.dump_dir = ensure_path(osp.join('dumps', args.series_name, args.desc_name, args.run_name)) args.ckpt_dir = ensure_path(osp.join(args.dump_dir, 'checkpoints')) args.vis_dir = ensure_path(osp.join(args.dump_dir, 'visualizations')) args.meta_file = osp.join(args.dump_dir, 'metainfo.json') args.log_file = osp.join(args.dump_dir, 'log.log') args.meter_file = osp.join(args.dump_dir, 'meter.json') # Initialize the tensorboard. if args.use_tb: args.tb_dir = ensure_path(osp.join(args.dump_dir, 'tensorboard')) else: args.tb_dir = None if not args.debug: logger.critical('Writing logs to file: "{}".'.format(args.log_file)) set_output_file(args.log_file) logger.critical('Writing metainfo to file: "{}".'.format(args.meta_file)) with open(args.meta_file, 'w') as f: f.write(dump_metainfo(args=args.__dict__, configs=configs)) if args.debug and args.use_tb: logger.warning('Disabling the tensorboard in the debug mode.') args.use_tb = False if args.evaluate and args.use_tb: logger.warning('Disabling the tensorboard in the evaluation mode.') args.use_tb = False # TODO(Jiayuan Mao @ 04/23): load the dataset. logger.critical('Loading the dataset.') train_dataset = None validation_dataset = None # configs.validate_dataset_compatibility(train_dataset) # TODO(Jiayuan Mao @ 04/23): build the model. logger.critical('Building the model.') model = desc.make_model(args) if args.use_gpu: model.cuda() # Use the customized data parallel if applicable. if args.gpu_parallel: from jactorch.parallel import JacDataParallel # Set user_scattered because we will add a multi GPU wrapper to the dataloader. See below. model = JacDataParallel(model, device_ids=args.gpus, user_scattered=True).cuda() # TODO(Jiayuan Mao @ 04/23): disable the cudnn benchmark. # Disable the cudnn benchmark. cudnn.benchmark = False if hasattr(desc, 'make_optimizer'): logger.critical('Building customized optimizer.') optimizer = desc.make_optimizer(model, args.lr) else: from jactorch.optim import AdamW # TODO(Jiayuan Mao @ 04/23): set the default optimizer. trainable_parameters = filter(lambda x: x.requires_grad, model.parameters()) optimizer = AdamW(trainable_parameters, args.lr, weight_decay=configs.train.weight_decay) if args.acc_grad > 1: from jactorch.optim import AccumGrad optimizer = AccumGrad(optimizer, args.acc_grad) logger.warning('Use accumulated grad={:d}, effective iterations per epoch={:d}.'.format(args.acc_grad, int(args.iters_per_epoch / args.acc_grad))) trainer = TrainerEnv(model, optimizer) if args.resume: extra = trainer.load_checkpoint(args.resume) if extra: args.start_epoch = extra['epoch'] logger.critical('Resume from epoch {}.'.format(args.start_epoch)) elif args.load: if trainer.load_weights(args.load): logger.critical('Loaded weights from pretrained model: "{}".'.format(args.load)) if args.use_tb: from jactorch.train.tb import TBLogger, TBGroupMeters tb_logger = TBLogger(args.tb_dir) meters = TBGroupMeters(tb_logger) logger.critical('Writing tensorboard logs to: "{}".'.format(args.tb_dir)) else: from jacinle.utils.meter import GroupMeters meters = GroupMeters() if not args.debug: logger.critical('Writing metainfo to file: "{}".'.format(args.meta_file)) with open(args.meta_file, 'w') as f: f.write(dump_metainfo(args=args.__dict__, configs=configs)) logger.critical('Writing meter logs to file: "{}".'.format(args.meter_file)) logger.critical('Initializing MLDash.') mldash.init( desc_name=args.series_name + '/' + args.desc_name, expr_name=args.expr, run_name=args.run_name, args=args, highlight_args=parser, configs=configs, ) mldash.update(metainfo_file=args.meta_file, log_file=args.log_file, meter_file=args.meter_file, tb_dir=args.tb_dir) if args.embed: from IPython import embed; embed() if hasattr(desc, 'customize_trainer'): desc.customize_trainer(trainer) # TODO(Jiayuan Mao @ 04/23): make the data loader. logger.critical('Building the data loader.') train_dataloader = train_dataset.make_dataloader(args.batch_size, shuffle=True, drop_last=True, nr_workers=args.data_workers) validation_dataloader = validation_dataset.make_dataloader(args.batch_size, shuffle=False, drop_last=False, nr_workers=args.data_workers) if args.use_gpu and args.gpu_parallel: from jactorch.data.dataloader import JacDataLoaderMultiGPUWrapper train_dataloader = JacDataLoaderMultiGPUWrapper(train_dataloader, args.gpus) validation_dataloader = JacDataLoaderMultiGPUWrapper(validation_dataloader, args.gpus) if args.evaluate: epoch = 0 model.eval() validate_epoch(epoch, trainer, validation_dataloader, meters) if not args.debug: meters.dump(args.meter_file) logger.critical(meters.format_simple('Epoch = {}'.format(epoch), compressed=False)) return for epoch in range(args.start_epoch + 1, args.epochs + 1): meters.reset() model.train() train_epoch(epoch, trainer, train_dataloader, meters) if args.validation_interval > 0 and epoch % args.validation_interval == 0: model.eval() with torch.no_grad(): validate_epoch(epoch, trainer, validation_dataloader, meters) if not args.debug: meters.dump(args.meter_file) # TODO(Jiayuan Mao @ 02/15): config the MLDash. if not args.debug: mldash.log_metric('epoch', epoch, desc=False, expr=False) for key, value in meters.items(): if key.startswith('loss') or key.startswith('validation/loss'): mldash.log_metric_min(key, value.avg) for key, value in meters.items(): if key.startswith('acc') or key.startswith('validation/acc'): mldash.log_metric_max(key, value.avg) logger.critical(meters.format_simple('Epoch = {}'.format(epoch), compressed=False)) if not args.debug: if epoch % args.save_interval == 0: fname = osp.join(args.ckpt_dir, 'epoch_{}.pth'.format(epoch)) trainer.save_checkpoint(fname, dict(epoch=epoch, meta_file=args.meta_file))
def main(): args.dump_dir = ensure_path( osp.join( "dumps", args.series_name, args.desc_name, (args.training_target + ("-curriculum_" + args.curriculum) + ("-qtrans_" + args.question_transform if args.question_transform is not None else "") + ("-" + args.expr if args.expr is not None else "")), )) if not args.debug: args.ckpt_dir = ensure_path(osp.join(args.dump_dir, "checkpoints")) args.meta_dir = ensure_path(osp.join(args.dump_dir, "meta")) args.meta_file = osp.join(args.meta_dir, args.run_name + ".json") args.log_file = osp.join(args.meta_dir, args.run_name + ".log") args.meter_file = osp.join(args.meta_dir, args.run_name + ".meter.json") logger.critical('Writing logs to file: "{}".'.format(args.log_file)) set_output_file(args.log_file) logger.critical('Writing metainfo to file: "{}".'.format( args.meta_file)) with open(args.meta_file, "w") as f: f.write(dump_metainfo(args=args.__dict__, configs=configs)) # Initialize the tensorboard. if args.use_tb: args.tb_dir_root = ensure_path( osp.join(args.dump_dir, "tensorboard")) args.tb_dir = ensure_path(osp.join(args.tb_dir_root, args.run_name)) if args.train_split is not None: with open(osp.join(args.data_dir, args.train_split)) as f: train_idxs = set(json.load(f)) else: train_idxs = None if args.val_split is not None and args.val_data_dir is not None: with open(osp.join(args.val_data_dir, args.val_split)) as f: val_idxs = set(json.load(f)) else: val_idxs = None if args.test_split is not None and args.test_data_dir is not None: with open(osp.join(args.test_data_dir, args.test_split)) as f: test_idxs = set(json.load(f)) else: test_idxs = None initialize_dataset(args.dataset) build_dataset = get_dataset_builder(args.dataset) dataset = build_dataset( args, configs, args.data_image_root, args.data_depth_root, args.data_scenes_json, args.data_questions_json, ) # dataset_trim = ( # int(len(dataset) * args.data_trim) # if args.data_trim <= 1 # else int(args.data_trim) # ) # if dataset_trim > 0: # dataset = dataset.trim_length(dataset_trim) # # dataset_split = ( # # int(len(dataset) * args.data_split) # # if args.data_split <= 1 # # else int(args.data_split) # # ) # # train_dataset, validation_dataset = dataset.split_trainval(dataset_split) # if args.mv: # ood_views = set(args.ood_views) # id_views = set(range(args.num_views)) - ood_views train_dataset = dataset # if train_idxs: # train_dataset = dataset.filter( # lambda question: question["image_index"] in train_idxs, # "filter_train_size_{}".format(len(train_idxs)), # ) val_dataset = None if args.val_data_dir is not None: val_dataset = build_dataset( args, configs, args.val_data_image_root, args.val_data_depth_root, args.val_data_scenes_json, args.val_data_questions_json, ) # if val_idxs: # val_dataset = val_dataset.filter( # lambda question: question["image_index"] in val_idxs, # "filter_val_size_{}".format(len(val_idxs)), # ) test_dataset = None if args.test_data_dir is not None: test_dataset = build_dataset( args, configs, args.test_data_image_root, args.test_data_depth_root, args.test_data_scenes_json, args.test_data_questions_json, ) # if test_idxs: # test_dataset = test_dataset.filter( # lambda question: question["image_index"] in test_idxs, # "filter_val_size_{}".format(len(test_idxs)), # ) # test_dataset = {"test": test_dataset} # if args.mv: # # train_dataset = train_dataset.filter( # # lambda question: question["view_id"] in id_views, "id_view" # # ) # if val_dataset: # val_dataset = val_dataset.filter( # lambda question: question["view_id"] in id_views, "id_view" # ) # if test_dataset: # id_test = test_dataset["test"].filter( # lambda question: question["view_id"] in id_views, "id_view" # ) # ood_test = test_dataset["test"].filter( # lambda question: question["view_id"] in ood_views, "ood_view" # ) # test_dataset = {"id_test": id_test, "ood_test": ood_test} prototype_dataset = create_prototype_dataset( "/projects/data/clevr_nscl/one_shot_protos") one_shot_root = "/projects/data/clevr_nscl/one_shot_test_only" one_shot_dataset = build_dataset( args, configs, one_shot_root + "/images", one_shot_root + "/depth", one_shot_root + "/CLEVR_scenes_annotated_aligned.json", one_shot_root + "/CLEVR_questions.json", ) main_train(train_dataset, val_dataset, test_dataset, prototype_dataset, one_shot_dataset)
def main(): args.dump_dir = ensure_path( osp.join( "dumps", args.series_name, args.desc_name, (args.training_target + ("-curriculum_" + args.curriculum) + ("-qtrans_" + args.question_transform if args.question_transform is not None else "") + ("-" + args.expr if args.expr is not None else "")), )) if not args.debug: args.ckpt_dir = ensure_path(osp.join(args.dump_dir, "checkpoints")) args.meta_dir = ensure_path(osp.join(args.dump_dir, "meta")) args.meta_file = osp.join(args.meta_dir, args.run_name + ".json") args.log_file = osp.join(args.meta_dir, args.run_name + ".log") args.meter_file = osp.join(args.meta_dir, args.run_name + ".meter.json") logger.critical('Writing logs to file: "{}".'.format(args.log_file)) set_output_file(args.log_file) logger.critical('Writing metainfo to file: "{}".'.format( args.meta_file)) with open(args.meta_file, "w") as f: f.write(dump_metainfo(args=args.__dict__, configs=configs)) # Initialize the tensorboard. if args.use_tb: args.tb_dir_root = ensure_path( osp.join(args.dump_dir, "tensorboard")) args.tb_dir = ensure_path(osp.join(args.tb_dir_root, args.run_name)) initialize_dataset(args.dataset) build_dataset = get_dataset_builder(args.dataset) dataset = build_dataset( args, configs, args.data_image_root, args.data_scenes_json, args.data_questions_json, ) dataset_trim = (int(len(dataset) * args.data_trim) if args.data_trim <= 1 else int(args.data_trim)) if dataset_trim > 0: dataset = dataset.trim_length(dataset_trim) dataset_split = (int(len(dataset) * args.data_split) if args.data_split <= 1 else int(args.data_split)) # from IPython import embed # embed() train_dataset, validation_dataset = dataset.split_trainval(dataset_split) extra_dataset = None if args.extra_data_dir is not None: extra_dataset = build_dataset( args, configs, args.extra_data_image_root, args.extra_data_scenes_json, args.extra_data_questions_json, ) main_train(train_dataset, validation_dataset, extra_dataset)
def main(): # directories if not args.debug: args.dump_dir = ensure_path( osp.join('dumps', args.series_name, args.desc_name)) args.ckpt_dir = ensure_path(osp.join(args.dump_dir, 'checkpoints')) args.meta_dir = ensure_path(osp.join(args.dump_dir, 'meta')) args.meta_file = osp.join(args.meta_dir, args.run_name + '.json') args.log_file = osp.join(args.meta_dir, args.run_name + '.log') args.meter_file = osp.join(args.meta_dir, args.run_name + '.meter.json') if not args.debug: logger.critical('Writing logs to file: "{}".'.format(args.log_file)) set_output_file(args.log_file) logger.critical('Writing metainfo to file: "{}".'.format( args.meta_file)) with open(args.meta_file, 'w') as f: f.write(dump_metainfo(args=args.__dict__, configs=configs)) else: if args.use_tb: logger.warning( 'Disabling the tensorboard in the debug mode.'.format( args.meta_file)) args.use_tb = False # TODO(Jiayuan Mao @ 04/23): load the dataset. logger.critical('Loading the dataset.') validation_dataset = None # configs.validate_dataset_compatibility(train_dataset) # TODO(Jiayuan Mao @ 04/23): build the model. logger.critical('Building the model.') model = desc.make_model(args) if args.use_gpu: model.cuda() # Use the customized data parallel if applicable. if args.gpu_parallel: from jactorch.parallel import JacDataParallel # from jactorch.parallel import UserScatteredJacDataParallel as JacDataParallel model = JacDataParallel(model, device_ids=args.gpus).cuda() # TODO(Jiayuan Mao @ 04/23): disable the cudnn benchmark. # Disable the cudnn benchmark. cudnn.benchmark = False if load_weights(model, args.load): logger.critical('Loaded weights from pretrained model: "{}".'.format( args.load)) if args.use_tb: from jactorch.train.tb import TBLogger, TBGroupMeters tb_logger = TBLogger(args.tb_dir) meters = TBGroupMeters(tb_logger) logger.critical('Writing tensorboard logs to: "{}".'.format( args.tb_dir)) else: from jacinle.utils.meter import GroupMeters meters = GroupMeters() if not args.debug: logger.critical('Writing meter logs to file: "{}".'.format( args.meter_file)) if args.embed: from IPython import embed embed() # TODO(Jiayuan Mao @ 04/23): make the data loader. logger.critical('Building the data loader.') validation_dataloader = validation_dataset.make_dataloader( args.batch_size, shuffle=False, drop_last=False, nr_workers=args.data_workers) model.eval() validate_epoch(model, validation_dataloader, meters) if not args.debug: meters.dump(args.meter_file) logger.critical(meters.format_simple('Test', compressed=False))