def test_instantiate_classes(classname: str, params: Any, args: Any, kwargs: Any, expected: Any) -> None: full_class = f"{MODULE_NAME}.generated.{classname}Conf" schema = OmegaConf.structured(get_class(full_class)) cfg = OmegaConf.merge(schema, params) obj = instantiate(config=cfg, *args, **kwargs) assert obj == expected
def test_instantiate_classes( tmpdir: Path, modulepath: str, classname: str, cfg: Any, passthrough_args: Any, passthrough_kwargs: Any, expected_class: Any, ) -> None: # Create fake dataset and put it in tmpdir for test: tmp_data_root = tmpdir.mkdir("data") processed_dir = os.path.join(tmp_data_root, classname, "processed") os.makedirs(processed_dir) torch.save(torch.tensor([[1.0], [1.0]]), processed_dir + "/training.pt") torch.save(torch.tensor([1.0]), processed_dir + "/test.pt") # cfg is populated here since it requires tmpdir testfixture cfg["root"] = str(tmp_data_root) full_class = f"hydra_configs.torchvision.{modulepath}.{classname}Conf" schema = OmegaConf.structured(get_class(full_class)) cfg = OmegaConf.merge(schema, cfg) obj = instantiate(cfg, *passthrough_args, **passthrough_kwargs) expected_obj = expected_class(root=tmp_data_root) assert isinstance(obj, type(expected_obj))
def __init__(self, *args, **kwargs): samplers = None self._samplers = kwargs.get("samplers") if self._samplers: for stage_sampler in self._samplers: stage = stage_sampler.stage func_name = f"{stage}_dataloader" sampling = stage_sampler.sampling if sampling == SAMPLING.DataLoader.value: func = partial(self.create_dataloader, stage=stage) func.__code__ = self.create_dataloader.__code__ setattr(self, f"{stage}_loader_type", sampling) elif sampling == SAMPLING.NeighborSampler.value: func = partial(self.create_neighbor_sampler, stage=stage) func.__code__ = self.create_neighbor_sampler.__code__ setattr(self, f"{stage}_loader_type", sampling) elif sampling == SAMPLING.LinkPred.value: func = partial(self.create_train_test_split_edges, stage=stage) func.__code__ = self.create_train_test_split_edges.__code__ setattr(self, f"{stage}_loader_type", sampling) else: if hasattr(sampling, "_target_"): samplers = [ DictConfig({ "stage": s["stage"], "sampling": find_enum(s["sampling"].name, SAMPLING).value, }) for s in self._samplers ] loader_cls = get_class(sampling._target_) params = {} if hasattr(sampling, "params"): params = sampling.params func = partial( self.create_loader_from_cls, loader_cls=loader_cls, params=params, stage=stage, ) func.__code__ = self.create_loader_from_cls.__code__ else: raise Exception( f"Strategy should be within {[v.value for v in SAMPLING]}" ) setattr(self, func_name, func) if samplers is not None: self._samplers = samplers self._num_edges = kwargs.get("num_edges") self._num_layers = kwargs.get("num_layers") if (self._num_edges is not None) and (self._num_layers is not None): self._sizes = [self._num_edges, self._num_layers]
def test_instantiate_classes(modulepath: str, classname: str, cfg: Any, passthrough_kwargs: Any, expected: Any) -> None: full_class = f"hydra_configs.torch.{modulepath}.{classname}Conf" schema = OmegaConf.structured(get_class(full_class)) cfg = OmegaConf.merge(schema, cfg) obj = instantiate(cfg, **passthrough_kwargs) def closure(): return model(Tensor([10])) assert torch.all(torch.eq(obj.step(closure), expected.step(closure)))
def _init_optim(name, params, optimizers_conf=None): if optimizers_conf is not None: for optim_conf in optimizers_conf: if name == optim_conf["name"]: optim_cls = get_class(optim_conf["_target_"]) return optim_cls([p for p in params], **optim_conf["params"]) raise Exception( f"The provided name {name} doesn't exist within {[o['name'] for o in optimizers_conf]}" ) else: raise Exception("Optimizer should be defined within configuration files")
def run(config: BenchmarkConfig) -> None: backend_factory: Type[Backend] = get_class(config.backend._target_) backend = backend_factory.allocate(config) benchmark = backend.execute(config) backend.clean(config) # Save the resolved config OmegaConf.save(config, ".hydra/config.yaml", resolve=True) df = benchmark.to_pandas() df.to_csv("results.csv", index_label="id")
def test_instantiate_classes( modulepath: str, classname: str, cfg: Any, passthrough_args: Any, passthrough_kwargs: Any, expected: Any, ) -> None: full_class = f"hydra_configs.torchvision.{modulepath}.{classname}Conf" schema = OmegaConf.structured(get_class(full_class)) cfg = OmegaConf.merge(schema, cfg) obj = instantiate(cfg, *passthrough_args, **passthrough_kwargs) assert isinstance(obj, type(expected))
def __init__(self, config: DictConfig, processors: list): self.logger = logging.getLogger(self.__class__.__name__) # # config # default_config = OmegaConf.load(os.path.join(os.path.dirname(__file__), 'config', 'bigpipe_response.yaml')) self.conf = OmegaConf.merge(default_config, config) # # Install js dependencies # self.logger.info("Installing javascript dependencies.") BigpipeSettings.validate_rendered_output_path(self.conf) javascript_manager = JavascriptManager(self.conf) self.javascript_folder = javascript_manager.javascript_folder # # Validate config. this is after installing javascript. # since processors input folders could be inside the node module folder # BigpipeSettings.validate_settings(self.conf) # # Set render default options # self.default_render_option = BigpipeRenderOptions( js_processor_name=self.conf.javascript.default_processor, css_processor_name=self.conf.css.default_processor, i18n_processor_name=self.conf.i18n.default_processor, js_bundle_link_dependencies=self.conf.javascript.bundle_link_dependencies, js_dom_bind=get_class(self.conf.javascript.dom_bind)(), css_bundle_link_dependencies=self.conf.css.bundle_link_dependencies, css_complete_dependencies_by_js=self.conf.css.complete_dependencies_by_js, ) # # processors manager # from bigpipe_response.processors_manager import ProcessorsManager self.processors_manager = ProcessorsManager(self.conf, self.javascript_folder, processors) self.logger.info("Bigpipe Response Ready.")
def prepare_data(self): path = osp.join( osp.dirname(osp.realpath(__file__)), "..", "..", "data", self.NAME ) dataset = OmegaConf.to_container(self._dataset) self.evaluator = Evaluator(dataset["params"]["name"]) dataset["params"]["transform"] = self._transform dataset["params"]["root"] = path dataset_cls = get_class(dataset["_target_"]) self.dataset = dataset_cls(**dataset["params"]) self.split_idx = self.dataset.get_idx_split() self.data = self.dataset[0] self.data.adj_t = self.data.adj_t.to_symmetric() self.data.adj_t.storage._row.long() self.data.adj_t.storage._col.long()
def __init__( self, downstream_model_type: str, backbone: HFBackboneConfig, optimizer: OptimizerConfig = OptimizerConfig(), scheduler: SchedulerConfig = SchedulerConfig(), instantiator: Optional[Instantiator] = None, tokenizer: Optional[PreTrainedTokenizerBase] = None, pipeline_kwargs: Optional[dict] = None, **model_data_kwargs, ) -> None: self.save_hyperparameters() model_cls: Type["AutoModel"] = get_class(downstream_model_type) model = model_cls.from_pretrained( backbone.pretrained_model_name_or_path, **model_data_kwargs) super().__init__(model=model, optimizer=optimizer, scheduler=scheduler, instantiator=instantiator) self._tokenizer = tokenizer # necessary for hf_pipeline self._hf_pipeline = None self._hf_pipeline_kwargs = pipeline_kwargs or {}
def __init__(self, *args, **kwargs): defaulTasksMixin = kwargs.get("defaulTasksMixin") assert defaulTasksMixin is not None mixins = [instantiate(c, *args, **kwargs) for c in defaulTasksMixin] named_funcs = { mixin: [f for f in dir(mixin) if "__" not in f] for mixin in mixins } func_names = sum(named_funcs.values(), []) assert len(func_names) == len( set(func_names) ), "The Tasks Mixin are overlapping. Should not be happening !" targets_mixin = [get_class(c._target_) for c in defaulTasksMixin] if len(self.__class__.__bases__) > 1: self.__class__.__bases__ = (self.__class__.__bases__[0], ) for t_cls in targets_mixin: self.__class__.__bases__ += (t_cls, )
def run( x: Any, instantiator: Instantiator, checkpoint_path: Optional[str] = None, task: TaskConfig = TaskConfig(), model_data_kwargs: Optional[Dict[str, Any]] = None, tokenizer: Optional[HFTokenizerConfig] = None, pipeline_kwargs: Optional[dict] = None, # mostly for the device predict_kwargs: Optional[dict] = None, ) -> List[Dict[str, Any]]: model: HFTransformer if checkpoint_path: model = get_class(task._target_).load_from_checkpoint(checkpoint_path) else: model = instantiator.model( task, model_data_kwargs=model_data_kwargs, tokenizer=tokenizer, pipeline_kwargs=pipeline_kwargs ) predict_kwargs = predict_kwargs or {} if isinstance(x, Mapping): return model.hf_predict(**x, **predict_kwargs) else: return model.hf_predict(x, **predict_kwargs)
def validate_settings(config): BigpipeSettings.validate_rendered_output_path(config) BigpipeSettings.validate_folder_name(config.rendered_output_container, 'rendered_output_container') if not isinstance(config.is_production_mode, bool): raise InvalidConfiguration( 'is_production_mode must be of type boolean') from bigpipe_response.javascript_dom_bind.javascript_dom_bind import JavascriptDOMBind # # path, resource = RemoteJsProcessor.build_js_resource(config.processors.js.javascript_handler) # if not resource_exists(path, resource): # raise InvalidConfiguration('config.processors.js.javascript_handler must be set to a javascript file') if JavascriptDOMBind not in get_class( config.javascript.dom_bind).__bases__: raise InvalidConfiguration( 'config.processors.js.js_dom_bind must be set and instance of JavascriptDOMBind' ) if config.css.complete_dependencies_by_js is None: raise InvalidConfiguration( 'config.processors.css.complete_dependencies_by_js must be set to boolean' ) if config.css.bundle_link_dependencies is None: raise InvalidConfiguration( 'config.processors.css.bundle_link_dependencies must be set to boolean' ) if not config.remote.port_start: raise InvalidConfiguration( 'config.processors.js.remote_port_start must be set to a port number' ) if not config.remote.port_count: raise InvalidConfiguration( 'config.processors.js.remote_port_count must be set to number of ports to scan' ) for key, conf_processors in config.processors.items(): if 'processor_name' not in conf_processors: raise InvalidConfiguration( 'processor processor_name must be set') if '_target_' not in conf_processors: raise InvalidConfiguration('processor class must be set') processor_classes = get_class( conf_processors['_target_']).__bases__ if BaseFileProcessor in processor_classes: if not conf_processors.source_paths: raise InvalidConfiguration( 'processor `{}`. `source_paths` is missing'.format( conf_processors)) source_paths = OmegaConf.to_container( conf_processors.source_paths, resolve=True) if source_paths and not isinstance(source_paths, list): raise InvalidConfiguration( 'processor `{}` "source_paths " must as list'.format( conf_processors)) for sp_index in range(len(source_paths)): source_base_path = source_paths[sp_index] if not os.path.exists(source_base_path): raise InvalidConfiguration( 'processor `{}` source_paths directory dose not exists. `{}`' .format(conf_processors, source_base_path)) if not conf_processors.source_ext or not isinstance( OmegaConf.to_container(conf_processors.source_ext, resolve=True), list): raise InvalidConfiguration( 'processors named `{}`. source_ext musy be a populated list ' .format(conf_processors.processor_name)) if not conf_processors.target_ext: raise InvalidConfiguration( 'processors named `{}`. target_ext must be set') if RemoteJsFileProcessor in processor_classes or RemoteJsProcessor in processor_classes: if not conf_processors.javascript_handler: raise InvalidConfiguration( 'processors named `{}`. javascript_handler must be set.' .format(conf_processors.javascript_handler)) if not conf_processors.javascript_handler.strip().lower( ).endswith('.js'): raise InvalidConfiguration( 'processors named `{}`. javascript_handler must be with js extension.' .format(conf_processors.javascript_handler))
def test_get_class(path: str, expected_type: type) -> None: assert utils.get_class(path) == expected_type
def sweep(self, arguments: List[str]) -> None: assert self.config is not None assert self.launcher is not None assert self.job_idx is not None parser = OverridesParser.create() parsed = parser.parse_overrides(arguments) search_space = dict(self.search_space) fixed_params = dict() for override in parsed: value = create_optuna_distribution_from_override(override) if isinstance(value, BaseDistribution): search_space[override.get_key_element()] = value else: fixed_params[override.get_key_element()] = value # Remove fixed parameters from Optuna search space. for param_name in fixed_params: if param_name in search_space: del search_space[param_name] samplers = { "tpe": "optuna.samplers.TPESampler", "random": "optuna.samplers.RandomSampler", "cmaes": "optuna.samplers.CmaEsSampler", "nsgaii": "optuna.samplers.NSGAIISampler", "motpe": "optuna.samplers.MOTPESampler", } if self.optuna_config.sampler.name not in samplers: raise NotImplementedError( f"{self.optuna_config.sampler} is not supported by Optuna sweeper." ) sampler_class = get_class(samplers[self.optuna_config.sampler.name]) sampler = sampler_class(seed=self.optuna_config.seed) directions: List[str] if isinstance(self.optuna_config.direction, MutableSequence): directions = [ d.name if isinstance(d, Direction) else d for d in self.optuna_config.direction ] else: if isinstance(self.optuna_config.direction, str): directions = [self.optuna_config.direction] else: directions = [self.optuna_config.direction.name] study = optuna.create_study( study_name=self.optuna_config.study_name, storage=self.optuna_config.storage, sampler=sampler, directions=directions, load_if_exists=True, ) log.info(f"Study name: {study.study_name}") log.info(f"Storage: {self.optuna_config.storage}") log.info(f"Sampler: {self.optuna_config.sampler.name}") log.info(f"Directions: {directions}") batch_size = self.optuna_config.n_jobs n_trials_to_go = self.optuna_config.n_trials while n_trials_to_go > 0: batch_size = min(n_trials_to_go, batch_size) trials = [study._ask() for _ in range(batch_size)] overrides = [] for trial in trials: for param_name, distribution in search_space.items(): trial._suggest(param_name, distribution) params = dict(trial.params) params.update(fixed_params) overrides.append( tuple(f"{name}={val}" for name, val in params.items())) returns = self.launcher.launch(overrides, initial_job_idx=self.job_idx) self.job_idx += len(returns) for trial, ret in zip(trials, returns): values: Optional[List[float]] = None state: optuna.trial.TrialState = optuna.trial.TrialState.COMPLETE try: if len(directions) == 1: try: values = [float(ret.return_value)] except (ValueError, TypeError): raise ValueError( f"Return value must be float-castable. Got '{ret.return_value}'." ).with_traceback(sys.exc_info()[2]) else: try: values = [float(v) for v in ret.return_value] except (ValueError, TypeError): raise ValueError( "Return value must be a list or tuple of float-castable values." f" Got '{ret.return_value}'.").with_traceback( sys.exc_info()[2]) if len(values) != len(directions): raise ValueError( "The number of the values and the number of the objectives are" f" mismatched. Expect {len(directions)}, but actually {len(values)}." ) study._tell(trial, state, values) except Exception as e: state = optuna.trial.TrialState.FAIL study._tell(trial, state, values) raise e n_trials_to_go -= batch_size results_to_serialize: Dict[str, Any] if len(directions) < 2: best_trial = study.best_trial results_to_serialize = { "name": "optuna", "best_params": best_trial.params, "best_value": best_trial.value, } log.info(f"Best parameters: {best_trial.params}") log.info(f"Best value: {best_trial.value}") else: best_trials = study.best_trials pareto_front = [{ "params": t.params, "values": t.values } for t in best_trials] results_to_serialize = { "name": "optuna", "solutions": pareto_front, } log.info(f"Number of Pareto solutions: {len(best_trials)}") for t in best_trials: log.info(f" Values: {t.values}, Params: {t.params}") OmegaConf.save( OmegaConf.create(results_to_serialize), f"{self.config.hydra.sweep.dir}/optimization_results.yaml", )
def test_get_class(path, expected_type): assert utils.get_class(path) == expected_type
def test_discover(plugin_type: Type[Plugin], expected: List[str]) -> None: plugins = Plugins.instance().discover(plugin_type) expected_classes = [get_class(c) for c in expected] for ex in expected_classes: assert ex in plugins
from hydra.utils import get_class, instantiate from omegaconf import OmegaConf from torch import Tensor cfg = {} full_class = f"gen.configen_tests.utils.data.dataset.TensorDatasetConf" schema = OmegaConf.structured(get_class(full_class)) cfg = OmegaConf.merge(schema, cfg) obj = instantiate(cfg, tensors=(Tensor([1]))) print(obj)
def sweep(self, arguments: List[str]) -> None: assert self.config is not None assert self.launcher is not None assert self.job_idx is not None parser = OverridesParser.create() parsed = parser.parse_overrides(arguments) search_space = dict(self.search_space) fixed_params = dict() for override in parsed: value = create_optuna_distribution_from_override(override) if isinstance(value, BaseDistribution): search_space[override.get_key_element()] = value else: fixed_params[override.get_key_element()] = value # Remove fixed parameters from Optuna search space. for param_name in fixed_params: if param_name in search_space: del search_space[param_name] samplers = { "tpe": "optuna.samplers.TPESampler", "random": "optuna.samplers.RandomSampler", "cmaes": "optuna.samplers.CmaEsSampler", } if self.optuna_config.sampler.name not in samplers: raise NotImplementedError( f"{self.optuna_config.sampler} is not supported by Optuna sweeper." ) sampler_class = get_class(samplers[self.optuna_config.sampler.name]) sampler = sampler_class(seed=self.optuna_config.seed) # TODO (toshihikoyanase): Remove type-ignore when optuna==2.4.0 is released. study = optuna.create_study( # type: ignore study_name=self.optuna_config.study_name, storage=self.optuna_config.storage, sampler=sampler, direction=self.optuna_config.direction.name, load_if_exists=True, ) log.info(f"Study name: {study.study_name}") log.info(f"Storage: {self.optuna_config.storage}") log.info(f"Sampler: {self.optuna_config.sampler.name}") log.info(f"Direction: {self.optuna_config.direction.name}") batch_size = self.optuna_config.n_jobs n_trials_to_go = self.optuna_config.n_trials while n_trials_to_go > 0: batch_size = min(n_trials_to_go, batch_size) trials = [study._ask() for _ in range(batch_size)] overrides = [] for trial in trials: for param_name, distribution in search_space.items(): trial._suggest(param_name, distribution) params = dict(trial.params) params.update(fixed_params) overrides.append( tuple(f"{name}={val}" for name, val in params.items())) returns = self.launcher.launch(overrides, initial_job_idx=self.job_idx) self.job_idx += len(returns) for trial, ret in zip(trials, returns): # TODO (toshihikoyanase): Remove type-ignore when optuna==2.4.0 is released. study._tell(trial, optuna.trial.TrialState.COMPLETE, ret.return_value) # type: ignore n_trials_to_go -= batch_size best_trial = study.best_trial results_to_serialize = { "name": "optuna", "best_params": best_trial.params, "best_value": best_trial.value, } OmegaConf.save( OmegaConf.create(results_to_serialize), f"{self.config.hydra.sweep.dir}/optimization_results.yaml", ) log.info(f"Best parameters: {best_trial.params}") log.info(f"Best value: {best_trial.value}")
def test_discover(plugin_type, expected): plugins = Plugins.discover(plugin_type) expected_classes = [get_class(c) for c in sorted(expected)] for ex in expected_classes: assert ex in plugins