示例#1
0
    def init(self):
        """initialize the data_dir directory.
        needs to be called exactly once before any other CLI/API commands of the package are executed.
        """
        _ = self._process_kwargs_optional()

        nerbb = NerBlackBoxMain("init")
        nerbb.main()
示例#2
0
    def download(self):
        """download & prepare built-in datasets, prepare experiment configuration.
        needs to be called exactly once before any other CLI/API commands of the package are executed
        in case built-in datasets shall be used.
        """
        _ = self._process_kwargs_optional()

        nerbb = NerBlackBoxMain("download")
        nerbb.main()
示例#3
0
def _run_nerblackbox_main(_ctx_obj: Dict[str, Any],
                          _kwargs: Dict[str, str]) -> None:
    """
    given context (_ctx_obj) and all relevant arguments (_kwargs), invoke NerBlackBoxMain
    is used by every nerbb command
    """
    kwargs = dict(**_ctx_obj, **_kwargs)

    nerblackbox_main = NerBlackBoxMain(**kwargs)
    nerblackbox_main.main()
示例#4
0
    def show_experiment_config(self, experiment_name: str):
        """show a single experiment configuration in detail
           or an overview on all available experiment configurations.

        Args:
            experiment_name: e.g. "exp0" or "all"
        """
        kwargs = self._process_kwargs_optional()
        kwargs["experiment_name"] = experiment_name

        nerbb = NerBlackBoxMain("show_experiment_config", **kwargs)
        nerbb.main()
示例#5
0
    def analyze_data(self, dataset_name: str, **kwargs_optional: Any):
        """analyze a dataset.

        Args:
            dataset_name: e.g. "swedish_ner_corpus".
            kwargs_optional: with optional key-value pairs {"verbose": [bool]}.
        """

        kwargs = self._process_kwargs_optional(kwargs_optional)
        kwargs["dataset_name"] = dataset_name

        nerbb = NerBlackBoxMain("analyze_data", **kwargs)
        nerbb.main()
示例#6
0
    def get_experiments(self, **kwargs_optional: Any) -> pd.DataFrame:
        """show list of experiments that have been run.

        Args:
            kwargs_optional: with optional key-value pairs \
            {"ids": [tuple of int], "as_df": [bool]}

        Returns:
            experiments_overview
        """
        kwargs = self._process_kwargs_optional(kwargs_optional)
        kwargs["usage"] = "api"

        nerbb = NerBlackBoxMain("get_experiments", **kwargs)
        return nerbb.main()
示例#7
0
    def predict(self, experiment_name: str, text_input: Union[str, List[str]]):
        """predict labels for text_input using the best model of a single experiment.

        Args:
            experiment_name: e.g. "exp0"
            text_input: e.g. "this text needs to be tagged"
        """

        kwargs = self._process_kwargs_optional()
        kwargs["usage"] = "api"
        kwargs["experiment_name"] = experiment_name
        kwargs["text_input"] = text_input

        nerbb = NerBlackBoxMain("predict", **kwargs)
        return nerbb.main()
示例#8
0
    def get_model_from_experiment(
            self, experiment_name: str) -> Optional[NerModelPredict]:
        """gets (best) model from experiment.

        Args:
            experiment_name: e.g. "exp0"

        Returns:
            ner_model_predict
        """
        kwargs = self._process_kwargs_optional()
        kwargs["usage"] = "api"
        kwargs["experiment_name"] = experiment_name

        nerbb = NerBlackBoxMain("get_model_from_experiment", **kwargs)
        return nerbb.main()
示例#9
0
    def get_experiment_results(
            self, experiment_name: str) -> List[ExperimentResults]:
        """get results for a single experiment.

        Args:
            experiment_name: e.g. "exp0"

        Returns:
            see ExperimentResults
        """
        kwargs = self._process_kwargs_optional()
        kwargs["usage"] = "api"
        kwargs["experiment_name"] = experiment_name
        kwargs["from_config"] = True

        nerbb = NerBlackBoxMain("get_experiment_results", **kwargs)
        return nerbb.main()
示例#10
0
    def set_up_dataset(self,
                       dataset_name: str,
                       dataset_subset_name: str = "",
                       **kwargs_optional: Any):
        """set up a dataset using the associated Formatter class.

        Args:
            dataset_name: e.g. "swedish_ner_corpus"
            dataset_subset_name: e.g. "simple_cased"
            kwargs_optional: with optional key-value pairs \
            {"modify": [bool], "val_fraction": [float], "verbose": [bool]}
        """

        kwargs = self._process_kwargs_optional(kwargs_optional)
        kwargs["dataset_name"] = dataset_name
        kwargs["dataset_subset_name"] = dataset_subset_name

        nerbb = NerBlackBoxMain("set_up_dataset", **kwargs)
        nerbb.main()
示例#11
0
class TestMain:

    main = NerBlackBoxMain(flag="xyz", from_config=True)

    # 1 ################################################################################################################
    @pytest.mark.parametrize(
        "hparams, from_preset, hparams_processed",
        [
            (
                None,
                None,
                None,
            ),
            (
                {
                    "multiple_runs": "2"
                },
                None,
                {
                    "multiple_runs": "2"
                },
            ),
            (
                {
                    "multiple_runs": "2"
                },
                "adaptive",
                {
                    "multiple_runs": "2",
                    "max_epochs": 250,
                    "early_stopping": True,
                    "lr_schedule": "constant",
                },
            ),
        ],
    )
    def test_process_hparams(
        self,
        hparams: Optional[Dict[str, Union[str, int, bool]]],
        from_preset: Optional[str],
        hparams_processed: Optional[Dict[str, str]],
    ):
        test_hparams_processed = self.main._process_hparams(
            hparams, from_preset)
        assert (
            test_hparams_processed == hparams_processed
        ), f"ERROR! test_hparams_processed = {test_hparams_processed} != {hparams_processed}"
示例#12
0
    def run_experiment(
        self,
        experiment_name: str,
        from_config: bool = False,
        model: Optional[str] = None,
        dataset: Optional[str] = None,
        from_preset: Optional[str] = "adaptive",
        **kwargs_optional: Any,
    ):
        """run a single experiment.

           Note:

           - from_config == True -> experiment config file is used, no other optional arguments will be used

           - from_config == False -> experiment config file is created dynamically, optional arguments will be used

               - model and dataset are mandatory.

               - All other arguments relate to hyperparameters and are optional.
                 If not specified, they are taken using the following hierarchy:

                 1) optional argument

                 2) from_preset (adaptive, original, stable),
                    which specifies e.g. the hyperparameters "max_epochs", "early_stopping", "lr_schedule"

                 3) default experiment configuration


        Args:
            experiment_name: e.g. 'exp0'
            from_config: e.g. False
            model: if experiment config file is to be created dynamically, e.g. 'bert-base-uncased'
            dataset: if experiment config file is to be created dynamically, e.g. 'conll-2003'
            from_preset: if experiment config file is to be created dynamically, e.g. 'adaptive'
            kwargs_optional: with optional key-value pairs, e.g. \
            {"multiple_runs": [int], "run_name": [str], "device": [torch device], "fp16": [bool]}
        """

        kwargs = self._process_kwargs_optional(kwargs_optional)

        kwargs["experiment_name"] = experiment_name
        if model is not None:
            kwargs["pretrained_model_name"] = model
        if dataset is not None:
            kwargs["dataset_name"] = dataset

        kwargs["hparams"] = self._extract_hparams(kwargs)
        kwargs["from_config"] = from_config
        if not from_config:
            kwargs["from_preset"] = from_preset

        # get rid of keys in kwargs that are present in kwargs["hparams"]
        for key in kwargs["hparams"].keys():
            kwargs.pop(key)

        if kwargs["hparams"] == {}:
            kwargs["hparams"] = None

        nerbb = NerBlackBoxMain("run_experiment", **kwargs)
        nerbb.main()