示例#1
0
def test_exception_on_search_space_file_not_exist():
    with pytest.raises(FileNotFoundError) as e:
        hp_loader = HyperparameterLoader(KGEArgParser().get_args(
            ["-ssf", "not_exist_file"]))
        hp_loader.load_search_space("analogy")

    assert str(e.value) == "Cannot find configuration file not_exist_file"
示例#2
0
def test_exception_on_search_space_file_with_wrong_extension():
    custom_hyperparamter_file = os.path.join(os.path.dirname(__file__), "resource", "custom_hyperparams", "custom.txt")
    with pytest.raises(ValueError) as e:
        hp_loader = HyperparameterLoader(KGEArgParser().get_args(["-ssf", custom_hyperparamter_file]))
        hp_loader.load_search_space("analogy")

    assert str(e.value) == "Configuration file must have .yaml or .yml extension: %s" % custom_hyperparamter_file
示例#3
0
    def __init__(self, args):
        """store the information of database"""
        if args.model_name.lower() in [
                "tucker", "conve", "convkb", "proje_pointwise"
        ]:
            raise Exception(
                "Model %s has not been supported in tuning hyperparameters!" %
                args.model)

        self.model_name = args.model_name
        self.knowledge_graph = KnowledgeGraph(
            dataset=args.dataset_name, custom_dataset_path=args.dataset_path)
        self.kge_args = KGEArgParser().get_args([])
        self.kge_args.dataset_name = args.dataset_name
        self.kge_args.debug = args.debug
        self.kge_args.device = args.device
        self.max_evals = args.max_number_trials if not args.debug else 3

        self.config_obj, self.model_obj = Importer().import_model_config(
            self.model_name.lower())
        self.config_local = self.config_obj(self.kge_args)
        self.search_space = HyperparameterLoader(args).load_search_space(
            self.model_name.lower())
        self._best_result = None
        self.trainer = None
示例#4
0
def test_load_default_hyperparameter_file():
    hp_loader = HyperparameterLoader(KGEArgParser().get_args([]))
    hyperparams = hp_loader.load_hyperparameter("freebase15k", "analogy")
    search_space = hp_loader.load_search_space("analogy")

    assert hyperparams["learning_rate"] == 0.1
    assert hyperparams["hidden_size"] == 200
    assert str(search_space["epochs"].inputs()[1]) == "0 Literal{10}"
示例#5
0
def test_load_custom_hyperparameter_file():
    custom_hyperparamter_file = os.path.join(os.path.dirname(__file__), "resource", "custom_hyperparams", "custom.yaml")
    hp_loader = HyperparameterLoader(KGEArgParser().get_args(["-hpf", custom_hyperparamter_file, "-ssf", custom_hyperparamter_file]))
    hyperparams = hp_loader.load_hyperparameter("freebase15k", "analogy")
    search_space = hp_loader.load_search_space("analogy")

    assert hyperparams["learning_rate"] == 0.01
    assert hyperparams["hidden_size"] == 200
    assert str(search_space["epochs"].inputs()[1]) == "0 Literal{100}"
示例#6
0
def test_search_space_loader(model_name):
    knowledge_graph = KnowledgeGraph(dataset="freebase15k")
    knowledge_graph.prepare_data()

    # getting the customized configurations from the command-line arguments.
    args = KGEArgParser().get_args([])

    hyperparams = HyperparameterLoader(args).load_search_space(model_name)

    assert hyperparams["epochs"] is not None
示例#7
0
    def __init__(self, args):

        for arg_name in vars(args):
            self.__dict__[arg_name] = getattr(args, arg_name)

        # Training and evaluating related variables
        self.hits = [1, 3, 5, 10]
        self.disp_result = False
        self.patience = 3  # should make this configurable as well.

        # Visualization related,
        # p.s. the visualizer is disable for most of the KGE methods for now.
        self.disp_triple_num = 20
        self.plot_training_result = True
        self.plot_testing_result = True

        # Knowledge Graph Information
        self.knowledge_graph = KnowledgeGraph(
            dataset=args.dataset_name, custom_dataset_path=args.dataset_path)
        for key in self.knowledge_graph.kg_meta.__dict__:
            self.__dict__[key] = self.knowledge_graph.kg_meta.__dict__[key]

        # The results of training will be stored in the following folders
        # which are relative to the parent folder (the path of the dataset).
        dataset_path = self.knowledge_graph.dataset.dataset_path
        self.path_tmp = dataset_path / 'intermediate'
        self.path_tmp.mkdir(parents=True, exist_ok=True)
        self.path_result = dataset_path / 'results'
        self.path_result.mkdir(parents=True, exist_ok=True)
        self.path_figures = dataset_path / 'figures'
        self.path_figures.mkdir(parents=True, exist_ok=True)
        self.path_embeddings = dataset_path / 'embeddings'
        self.path_embeddings.mkdir(parents=True, exist_ok=True)

        if args.exp is True:
            paper_params = HyperparameterLoader(args).load_hyperparameter(
                args.dataset_name, args.model_name)
            for key, value in paper_params.items():
                self.__dict__[
                    key] = value  # copy all the setting from the paper.
示例#8
0
class BaysOptimizer:
    """Bayesian optimizer class for tuning hyperparameter.

      This class implements the Bayesian Optimizer for tuning the
      hyper-parameter.

      Args:
        args (object): The Argument Parser object providing arguments.
        name_dataset (str): The name of the dataset.
        sampling (str): sampling to be used for generating negative triples


      Examples:
        >>> from pykg2vec.common import KGEArgParser
        >>> from pykg2vec.utils.bayesian_optimizer import BaysOptimizer
        >>> model = Complex()
        >>> args = KGEArgParser().get_args(sys.argv[1:])
        >>> bays_opt = BaysOptimizer(args=args)
        >>> bays_opt.optimize()
    """
    _logger = Logger().get_logger(__name__)

    def __init__(self, args):
        """store the information of database"""
        if args.model_name.lower() in [
                "conve", "convkb", "proje_pointwise", "interacte", "hyper",
                "acre"
        ]:
            raise Exception(
                "Model %s has not been supported in tuning hyperparameters!" %
                args.model)

        self.model_name = args.model_name
        self.knowledge_graph = KnowledgeGraph(
            dataset=args.dataset_name, custom_dataset_path=args.dataset_path)
        self.kge_args = args
        self.max_evals = args.max_number_trials if not args.debug else 3

        self.config_obj, self.model_obj = Importer().import_model_config(
            self.model_name.lower())
        self.config_local = self.config_obj(self.kge_args)
        self.search_space = HyperparameterLoader(args).load_search_space(
            self.model_name.lower())
        self._best_result = None
        self.trainer = None

    def optimize(self):
        """Function that performs bayesian optimization"""
        trials = Trials()

        self._best_result = fmin(fn=self._get_loss,
                                 space=self.search_space,
                                 trials=trials,
                                 algo=tpe.suggest,
                                 max_evals=self.max_evals)

        columns = list(self.search_space.keys())
        results = pd.DataFrame(columns=['iteration'] + columns + ['loss'])

        for idx, trial in enumerate(trials.trials):
            row = [idx]
            translated_eval = space_eval(
                self.search_space,
                {k: v[0]
                 for k, v in trial['misc']['vals'].items()})
            for k in columns:
                row.append(translated_eval[k])
            row.append(trial['result']['loss'])
            results.loc[idx] = row

        path = self.config_local.path_result / self.model_name
        path.mkdir(parents=True, exist_ok=True)
        results.to_csv(str(path / "trials.csv"), index=False)

        self._logger.info(results)
        self._logger.info('Found golden setting:')
        self._logger.info(space_eval(self.search_space, self._best_result))

    def return_best(self):
        """Function to return the best hyper-parameters"""
        assert self._best_result is not None, 'Cannot find golden setting. Has optimize() been called?'
        return space_eval(self.search_space, self._best_result)

    def _get_loss(self, params):
        """Function that defines and acquires the loss"""

        # copy the hyperparameters to trainer config and hyperparameter set.
        for key, value in params.items():
            self.config_local.__dict__[key] = value
        self.config_local.__dict__['device'] = self.kge_args.device
        model = self.model_obj(**self.config_local.__dict__)

        self.trainer = Trainer(model, self.config_local)

        # configure common setting for a tuning training.
        self.config_local.disp_result = False
        self.config_local.disp_summary = False
        self.config_local.save_model = False

        # do not overwrite test numbers if set
        if self.config_local.test_num is None:
            self.config_local.test_num = 1000

        if self.kge_args.debug:
            self.config_local.epochs = 1

        # start the trial.
        self.trainer.build_model()
        loss = self.trainer.tune_model()

        return {'loss': loss, 'status': STATUS_OK}