def __init__(self, cfg): """ Args: cfg (CfgNode): """ super().__init__() logger = logging.getLogger("herbarium") if not logger.isEnabledFor( logging.INFO): # setup_logger is not called for d2 setup_logger() cfg = DefaultTrainer.auto_scale_workers(cfg, comm.get_world_size()) # Assume these objects must be constructed in this order. model = self.build_model(cfg) optimizer = self.build_optimizer(cfg, model) data_loader = self.build_train_loader(cfg) model = create_ddp_model(model, broadcast_buffers=False, find_unused_parameters=True) self._trainer = (AMPTrainer if cfg.SOLVER.AMP.ENABLED else SimpleTrainer)(model, data_loader, optimizer) self.scheduler = self.build_lr_scheduler(cfg, optimizer) self.checkpointer = Checkpointer( # Assume you want to save checkpoints together with logs/statistics model, cfg.OUTPUT_DIR, trainer=weakref.proxy(self), ) self.start_iter = 0 self.max_iter = cfg.SOLVER.MAX_ITER self.cfg = cfg self.register_hooks(self.build_hooks())
def default_setup(cfg, args): """ Perform some basic common setups at the beginning of a job, including: 1. Set up the herbarium logger 2. Log basic information about environment, cmdline arguments, and config 3. Backup the config to the output directory Args: cfg (CfgNode or omegaconf.DictConfig): the full config to be used args (argparse.NameSpace): the command line arguments to be logged """ output_dir = _try_get_key(cfg, "OUTPUT_DIR", "output_dir", "train.output_dir") if comm.is_main_process() and output_dir: PathManager.mkdirs(output_dir) rank = comm.get_rank() setup_logger(output_dir, distributed_rank=rank, name="fvcore") logger = setup_logger(output_dir, distributed_rank=rank) logger.info("Rank of current process: {}. World size: {}".format( rank, comm.get_world_size())) logger.info("Environment info:\n" + collect_env_info()) logger.info("Command line arguments: " + str(args)) if hasattr(args, "config_file") and args.config_file != "": logger.info("Contents of args.config_file={}:\n{}".format( args.config_file, _highlight( PathManager.open(args.config_file, "r").read(), args.config_file), )) if comm.is_main_process() and output_dir: # Note: some of our scripts may expect the existence of # config.yaml in output directory path = os.path.join(output_dir, "config.yaml") if isinstance(cfg, CfgNode): logger.info("Running with full config:\n{}".format( _highlight(cfg.dump(), ".yaml"))) with PathManager.open(path, "w") as f: f.write(cfg.dump()) else: LazyConfig.save(cfg, path) logger.info("Full config saved to {}".format(path)) # make sure each worker has a different, yet deterministic seed if specified seed = _try_get_key(cfg, "SEED", "train.seed", default=-1) seed_all_rng(None if seed < 0 else seed + rank) # cudnn benchmark has large overhead. It shouldn't be used considering the small size of # typical validation set. if not (hasattr(args, "eval_only") and args.eval_only): torch.backends.cudnn.benchmark = _try_get_key(cfg, "CUDNN_BENCHMARK", "train.cudnn_benchmark", default=False)
def __init__(self, cfg): """ Args: model, data_loader, optimizer: same as in :class:`SimpleTrainer`. grad_scaler: torch GradScaler to automatically scale gradients. """ super().__init__() logger = logging.getLogger("herbarium") if not logger.isEnabledFor( logging.INFO): # setup_logger is not called for d2 setup_logger() cfg = DefaultTrainer.auto_scale_workers(cfg, comm.get_world_size()) # Assume these objects must be constructed in this order. model = build_model(cfg) optimizer = build_optimizer(cfg, model) controller = build_controller(cfg, model) train_data_loader = build_general_train_loader(cfg) # TODO: Need to change here for validation dataset loader val_data_loader = train_data_loader model = create_ddp_model(model, broadcast_buffers=False, find_unused_parameters=True) self._trainer = HierarchyTrainLoop( model, controller, train_data_loader, val_data_loader, optimizer, ) self.scheduler = build_lr_scheduler(cfg, optimizer) self.checkpointer = Checkpointer( # Assume you want to save checkpoints together with logs/statistics model, cfg.OUTPUT_DIR, trainer=weakref.proxy(self), ) self.start_iter = 0 self.max_iter = cfg.SOLVER.MAX_ITER self.cfg = cfg self.register_hooks(self.build_hooks())
update_meta(ann_files, name) if __name__ == "__main__": """ Test the Herbarium json dataset loader. Usage: python -m herbarium.data.datasets.herb \ path/to/json path/to/image_root dataset_name "dataset_name" can be "herb_2021_val", or other pre-registered ones """ from herbarium.utils.logger import setup_logger import herbarium.data.datasets # noqa # add pre-defined metadata import sys logger = setup_logger(name=__name__) assert sys.argv[3] in DatasetCatalog.list() meta = MetadataCatalog.get(sys.argv[3]) dicts = load_herb_json(sys.argv[1], sys.argv[2], sys.argv[3]) logger.info("Done loading {} samples.".format(len(dicts))) dirname = "herb-data-vis" os.makedirs(dirname, exist_ok=True) for d in dicts: img = np.array(Image.open(d["file_name"])) # TODO: do something for visualize in this "herb-data-vis" and implement in util.visualizer