Пример #1
0
class BaysOptimizer(object):
    """Bayesian optimizer class for tuning hyperparameter.

      This class implements the Bayesian Optimizer for tuning the 
      hyper-parameter.

      Args:
        args (object): The Argument Parser object providing arguments.
        name_dataset (str): The name of the dataset.
        sampling (str): sampling to be used for generating negative triples


      Examples:
        >>> from pykg2vec.config.hyperparams import KGETuneArgParser
        >>> from pykg2vec.utils.bayesian_optimizer import BaysOptimizer
        >>> model = Complex()
        >>> args = KGETuneArgParser().get_args(sys.argv[1:])
        >>> bays_opt = BaysOptimizer(args=args)
        >>> bays_opt.optimize()
    """
    def __init__(self, args=None):
        """store the information of database"""
        model_name = args.model.lower()
        self.args = args
        self.knowledge_graph = KnowledgeGraph(dataset=args.dataset_name,
                                              negative_sample=args.sampling)
        hyper_params = None
        try:
            self.model_obj = getattr(
                importlib.import_module(model_path +
                                        ".%s" % modelMap[model_name]),
                modelMap[model_name])
            self.config_obj = getattr(importlib.import_module(config_path),
                                      configMap[model_name])
            hyper_params = getattr(importlib.import_module(hyper_param_path),
                                   hypMap[model_name])()

        except ModuleNotFoundError:
            print("%s not implemented! Select from: %s" %
                  (model_name, ' '.join(map(str, modelMap.values()))))
        config = self.config_obj()
        config.data = args.dataset_name

        self.trainer = Trainer(model=self.model_obj(config),
                               debug=self.args.debug,
                               tuning=True)
        self.search_space = self.define_search_space(hyper_params)
        self.max_evals = self.args.max_number_trials if not self.args.debug else 1

    def define_search_space(self, hyper_params):
        """Function to perform search space addition"""
        space = {
            k: hp.choice(k, v)
            for k, v in hyper_params.__dict__.items()
            if not k.startswith('__') and not callable(k)
        }
        return space

    def optimize(self):
        """Function that performs bayesian optimization"""
        space = self.search_space
        trials = Trials()

        best_result = fmin(fn=self.get_loss,
                           space=space,
                           algo=tpe.suggest,
                           max_evals=self.max_evals,
                           trials=trials)

        columns = list(space.keys())
        results = pd.DataFrame(columns=['iteration'] + columns + ['loss'])

        for idx, trial in enumerate(trials.trials):
            row = []
            row.append(idx)
            translated_eval = space_eval(
                self.search_space,
                {k: v[0]
                 for k, v in trial['misc']['vals'].items()})
            for k in columns:
                row.append(translated_eval[k])
            row.append(trial['result']['loss'])
            results.loc[idx] = row

        path = self.trainer.config.result / self.trainer.model.model_name
        path.mkdir(parents=True, exist_ok=True)
        results.to_csv(str(path / "trials.csv"), index=False)

        print(results)
        print('Found Golden Setting:')
        pprint(space_eval(space, best_result))

    def get_loss(self, params):
        """Function that defines and acquires the loss"""
        self.trainer.config.L1_flag = params['L1_flag']
        self.trainer.config.batch_size = params['batch_size']
        self.trainer.config.epochs = params['epochs']

        if 'hidden_size' in params:
            self.trainer.config.hidden_size = params['hidden_size']
        if 'ent_hidden_size' in params:
            self.trainer.config.ent_hidden_size = params['ent_hidden_size']
        if 'rel_hidden_size' in params:
            self.trainer.config.rel_hidden_size = params['rel_hidden_size']

        self.trainer.config.learning_rate = params['learning_rate']
        self.trainer.config.margin = params['margin']
        self.trainer.config.disp_result = False
        self.trainer.config.disp_summary = False
        self.trainer.config.save_model = False
        self.trainer.config.debug = True
        self.trainer.config.test_num = 1000

        self.trainer.build_model()
        self.trainer.summary_hyperparameter()

        loss = self.trainer.tune_model()
        # loss = self.trainer.train_model(tuning=True)

        return {'loss': loss, 'status': STATUS_OK}
Пример #2
0
class BaysOptimizer:
    """Bayesian optimizer class for tuning hyperparameter.

      This class implements the Bayesian Optimizer for tuning the
      hyper-parameter.

      Args:
        args (object): The Argument Parser object providing arguments.
        name_dataset (str): The name of the dataset.
        sampling (str): sampling to be used for generating negative triples


      Examples:
        >>> from pykg2vec.common import KGEArgParser
        >>> from pykg2vec.utils.bayesian_optimizer import BaysOptimizer
        >>> model = Complex()
        >>> args = KGEArgParser().get_args(sys.argv[1:])
        >>> bays_opt = BaysOptimizer(args=args)
        >>> bays_opt.optimize()
    """
    _logger = Logger().get_logger(__name__)

    def __init__(self, args):
        """store the information of database"""
        if args.model_name.lower() in [
                "conve", "convkb", "proje_pointwise", "interacte", "hyper",
                "acre"
        ]:
            raise Exception(
                "Model %s has not been supported in tuning hyperparameters!" %
                args.model)

        self.model_name = args.model_name
        self.knowledge_graph = KnowledgeGraph(
            dataset=args.dataset_name, custom_dataset_path=args.dataset_path)
        self.kge_args = args
        self.max_evals = args.max_number_trials if not args.debug else 3

        self.config_obj, self.model_obj = Importer().import_model_config(
            self.model_name.lower())
        self.config_local = self.config_obj(self.kge_args)
        self.search_space = HyperparameterLoader(args).load_search_space(
            self.model_name.lower())
        self._best_result = None
        self.trainer = None

    def optimize(self):
        """Function that performs bayesian optimization"""
        trials = Trials()

        self._best_result = fmin(fn=self._get_loss,
                                 space=self.search_space,
                                 trials=trials,
                                 algo=tpe.suggest,
                                 max_evals=self.max_evals)

        columns = list(self.search_space.keys())
        results = pd.DataFrame(columns=['iteration'] + columns + ['loss'])

        for idx, trial in enumerate(trials.trials):
            row = [idx]
            translated_eval = space_eval(
                self.search_space,
                {k: v[0]
                 for k, v in trial['misc']['vals'].items()})
            for k in columns:
                row.append(translated_eval[k])
            row.append(trial['result']['loss'])
            results.loc[idx] = row

        path = self.config_local.path_result / self.model_name
        path.mkdir(parents=True, exist_ok=True)
        results.to_csv(str(path / "trials.csv"), index=False)

        self._logger.info(results)
        self._logger.info('Found golden setting:')
        self._logger.info(space_eval(self.search_space, self._best_result))

    def return_best(self):
        """Function to return the best hyper-parameters"""
        assert self._best_result is not None, 'Cannot find golden setting. Has optimize() been called?'
        return space_eval(self.search_space, self._best_result)

    def _get_loss(self, params):
        """Function that defines and acquires the loss"""

        # copy the hyperparameters to trainer config and hyperparameter set.
        for key, value in params.items():
            self.config_local.__dict__[key] = value
        self.config_local.__dict__['device'] = self.kge_args.device
        model = self.model_obj(**self.config_local.__dict__)

        self.trainer = Trainer(model, self.config_local)

        # configure common setting for a tuning training.
        self.config_local.disp_result = False
        self.config_local.disp_summary = False
        self.config_local.save_model = False

        # do not overwrite test numbers if set
        if self.config_local.test_num is None:
            self.config_local.test_num = 1000

        if self.kge_args.debug:
            self.config_local.epochs = 1

        # start the trial.
        self.trainer.build_model()
        loss = self.trainer.tune_model()

        return {'loss': loss, 'status': STATUS_OK}
Пример #3
0
class BaysOptimizer(object):

    def __init__(self, name_dataset='Freebase15k', sampling="uniform", args=None):
        """store the information of database"""
        model_name = args.model.lower()
        self.args = args
        self.knowledge_graph = KnowledgeGraph(dataset=name_dataset, negative_sample=sampling)
        hyper_params = None
        try:
            self.model_obj = getattr(importlib.import_module(model_path + ".%s" % modelMap[model_name]),
                                     modelMap[model_name])
            self.config_obj = getattr(importlib.import_module(config_path), configMap[model_name])
            hyper_params = getattr(importlib.import_module(hyper_param_path), hypMap[model_name])()

        except ModuleNotFoundError:
            print("%s not implemented! Select from: %s" % (model_name,
                                                           ' '.join(map(str, modelMap.values()))))
        config = self.config_obj()
        config.data=name_dataset
        # config.set_dataset(name_dataset)
        self.trainer = Trainer(model=self.model_obj(config), debug=self.args.debug, tuning=True)
        self.search_space = self.define_search_space(hyper_params)
        
    def define_search_space(self, hyper_params):
        space = {k: hp.choice(k, v) for k, v in hyper_params.__dict__.items() if not k.startswith('__') and not callable(k)}
        return space

    def optimize(self):
        space = self.search_space
        trials = Trials()
        
        best_result = fmin(fn=self.get_loss, space=space, algo=tpe.suggest, max_evals=2, trials=trials)
        
        columns = list(space.keys())   
        results = pd.DataFrame(columns=['iteration'] + columns + ['loss'])
        
        for idx, trial in enumerate(trials.trials):
            row = []
            row.append(idx)
            translated_eval = space_eval(self.search_space, {k: v[0] for k, v in trial['misc']['vals'].items()})
            for k in columns:
                row.append(translated_eval[k])
            row.append(trial['result']['loss'])
            results.loc[idx] = row

        path = self.trainer.config.result / self.trainer.model.model_name 
        path.mkdir(parents=True, exist_ok=True)
        results.to_csv(str(path / "trials.csv"), index=False)
        
        print(results)
        print('Found Golden Setting:')
        pprint(space_eval(space, best_result))

    def get_loss(self, params):
        self.trainer.config.L1_flag = params['L1_flag']
        self.trainer.config.batch_size = params['batch_size']
        self.trainer.config.epochs = params['epochs']
        self.trainer.config.hidden_size = params['hidden_size']
        self.trainer.config.learning_rate = params['learning_rate']
        self.trainer.config.margin = params['margin']
        self.trainer.config.disp_result = False
        self.trainer.config.disp_summary = False
        self.trainer.config.save_model = False
        self.trainer.config.debug = True
        self.trainer.config.test_num = 1000

        self.trainer.build_model()
        self.trainer.summary_hyperparameter()
    
        loss = self.trainer.tune_model()
        # loss = self.trainer.train_model(tuning=True)

        return {'loss': loss, 'status': STATUS_OK}
Пример #4
0
class BaysOptimizer(object):
    """Bayesian optimizer class for tuning hyperparameter.

      This class implements the Bayesian Optimizer for tuning the 
      hyper-parameter.

      Args:
        args (object): The Argument Parser object providing arguments.
        name_dataset (str): The name of the dataset.
        sampling (str): sampling to be used for generating negative triples


      Examples:
        >>> from pykg2vec.config.hyperparams import KGETuneArgParser
        >>> from pykg2vec.utils.bayesian_optimizer import BaysOptimizer
        >>> model = Complex()
        >>> args = KGETuneArgParser().get_args(sys.argv[1:])
        >>> bays_opt = BaysOptimizer(args=args)
        >>> bays_opt.optimize()
    """
    _logger = Logger().get_logger(__name__)

    def __init__(self, args=None):
        """store the information of database"""
        if args.model.lower() in ["tucker", "tucker_v2", "conve", "convkb", "proje_pointwise"]:
          raise Exception("Model %s has not been supported in tuning hyperparameters!" % args.model)

        model_name = args.model.lower()
        self.args = args
        self.knowledge_graph = KnowledgeGraph(dataset=args.dataset_name, custom_dataset_path=args.dataset_path)
        hyper_params = None
        try:
            self.model_obj = getattr(importlib.import_module(model_path + ".%s" % moduleMap[model_name]),
                                     modelMap[model_name])
            self.config_obj = getattr(importlib.import_module(config_path), configMap[model_name])
            hyper_params = getattr(importlib.import_module(hyper_param_path), hypMap[model_name])()

        except ModuleNotFoundError:
            self._logger.error("%s not implemented! Select from: %s" % \
                               (model_name, ' '.join(map(str, modelMap.values()))))
        
        from pykg2vec.config.config import KGEArgParser
        kge_args = KGEArgParser().get_args([])
        kge_args.dataset_name = args.dataset_name
        kge_args.debug = self.args.debug
        config = self.config_obj(kge_args)
        model =  self.model_obj(config)
        
        self.trainer = Trainer(model)
        
        self.search_space = hyper_params.search_space
        self.max_evals = self.args.max_number_trials if not self.args.debug else 1
        
    def optimize(self):
        """Function that performs bayesian optimization"""
        trials = Trials()
        
        self.best_result = fmin(fn=self.get_loss, space=self.search_space, trials=trials,
                                algo=tpe.suggest, max_evals=self.max_evals)
        
        columns = list(self.search_space.keys())   
        results = pd.DataFrame(columns=['iteration'] + columns + ['loss'])
        
        for idx, trial in enumerate(trials.trials):
            row = []
            row.append(idx)
            translated_eval = space_eval(self.search_space, {k: v[0] for k, v in trial['misc']['vals'].items()})
            for k in columns:
                row.append(translated_eval[k])
            row.append(trial['result']['loss'])
            results.loc[idx] = row

        path = self.trainer.config.path_result / self.trainer.model.model_name 
        path.mkdir(parents=True, exist_ok=True)
        results.to_csv(str(path / "trials.csv"), index=False)
        
        self._logger.info(results)
        self._logger.info('Found Golden Setting:')
        self._logger.info(space_eval(self.search_space, self.best_result))

    def return_best(self):
        """Function to return the best hyper-parameters"""
        return space_eval(self.search_space, self.best_result)

    def get_loss(self, params):
        """Function that defines and acquires the loss"""
        
        # copy the hyperparameters to trainer config and hyperparameter set. 
        for key, value in params.items():
          self.trainer.config.__dict__[key] = value
          self.trainer.config.hyperparameters[key] = value  
        
        # configure common setting for a tuning training. 
        self.trainer.config.disp_result = False
        self.trainer.config.disp_summary = False
        self.trainer.config.save_model = False

        # do not overwrite test numbers if set
        if self.trainer.config.test_num is None:
            self.trainer.config.test_num = 1000

        if self.args.debug:
          self.trainer.config.epochs = 1
          self.trainer.config.hyperparameters['epochs'] = 1
        
        # start the trial.
        self.trainer.build_model()
        loss = self.trainer.tune_model()

        return {'loss': loss, 'status': STATUS_OK}