def test_userdefined_dataset(): custom_dataset_path = os.path.join( os.path.dirname(os.path.abspath(__file__)), 'resource', 'custom_dataset') knowledge_graph = KnowledgeGraph(dataset="userdefineddataset", custom_dataset_path=custom_dataset_path) knowledge_graph.prepare_data() knowledge_graph.dump() knowledge_graph.read_cache_data('triplets_train') knowledge_graph.read_cache_data('triplets_test') knowledge_graph.read_cache_data('triplets_valid') knowledge_graph.read_cache_data('hr_t') knowledge_graph.read_cache_data('tr_h') knowledge_graph.read_cache_data('idx2entity') knowledge_graph.read_cache_data('idx2relation') knowledge_graph.read_cache_data('entity2idx') knowledge_graph.read_cache_data('relation2idx') knowledge_graph.dataset.read_metadata() knowledge_graph.dataset.dump() assert knowledge_graph.kg_meta.tot_train_triples == 1 assert knowledge_graph.kg_meta.tot_test_triples == 1 assert knowledge_graph.kg_meta.tot_valid_triples == 1 assert knowledge_graph.kg_meta.tot_entity == 6 assert knowledge_graph.kg_meta.tot_relation == 3
def test_generator_pairwise(): """Function to test the generator for pairwise based algorithm.""" knowledge_graph = KnowledgeGraph(dataset="freebase15k") knowledge_graph.force_prepare_data() config_def, model_def = Importer().import_model_config('transe') config = config_def(KGEArgParser().get_args([])) generator = Generator(model_def(**config.__dict__), config) generator.start_one_epoch(10) for i in range(10): data = list(next(generator)) assert len(data) == 6 ph = data[0] pr = data[1] pt = data[2] nh = data[3] nr = data[4] nt = data[5] assert len(ph) == len(pr) assert len(ph) == len(pt) assert len(ph) == len(nh) assert len(ph) == len(nr) assert len(ph) == len(nt) generator.stop()
def testing_function(name): """Function to test the models with arguments.""" # getting the customized configurations from the command-line arguments. args = KGEArgParser().get_args(['-exp', 'True', '-mn', name]) # Preparing data and cache the data for later usage knowledge_graph = KnowledgeGraph(dataset=args.dataset_name) knowledge_graph.prepare_data() # Extracting the corresponding model config and definition from Importer(). config_def, model_def = Importer().import_model_config(name) config = config_def(args) config.epochs = 1 config.test_step = 1 config.test_num = 10 config.save_model = False config.debug = True config.ent_hidden_size = 10 config.rel_hidden_size = 10 config.channels = 2 model = model_def(**config.__dict__) # Create, Compile and Train the model. While training, several evaluation will be performed. trainer = Trainer(model, config) trainer.build_model() trainer.train_model()
def test_known_datasets(dataset_name): """Function to test the the knowledge graph parse for Freebase.""" knowledge_graph = KnowledgeGraph(dataset=dataset_name) knowledge_graph.force_prepare_data() assert len(knowledge_graph.dump()) == 9 assert knowledge_graph.is_cache_exists() kg_metadata = knowledge_graph.dataset.read_metadata() assert kg_metadata.tot_triple > 0 assert kg_metadata.tot_valid_triples > 0 assert kg_metadata.tot_test_triples > 0 assert kg_metadata.tot_train_triples > 0 assert kg_metadata.tot_relation > 0 assert kg_metadata.tot_entity > 0 assert len(knowledge_graph.read_cache_data('triplets_train')) > 0 assert len(knowledge_graph.read_cache_data('triplets_test')) > 0 assert len(knowledge_graph.read_cache_data('triplets_valid')) > 0 assert len(knowledge_graph.read_cache_data('hr_t')) > 0 assert len(knowledge_graph.read_cache_data('tr_h')) > 0 assert len(knowledge_graph.read_cache_data('idx2entity')) > 0 assert len(knowledge_graph.read_cache_data('idx2relation')) > 0 assert len(knowledge_graph.read_cache_data('entity2idx')) > 0 assert len(knowledge_graph.read_cache_data('relation2idx')) > 0
def main(): # getting the customized configurations from the command-line arguments. args = KGEArgParser().get_args(sys.argv[1:]) # Preparing data and cache the data for later usage knowledge_graph = KnowledgeGraph(dataset=args.dataset_name, custom_dataset_path=args.dataset_path) knowledge_graph.prepare_data() # Extracting the corresponding model config and definition from Importer(). config_def, model_def = Importer().import_model_config( args.model_name.lower()) config = config_def(args) model = model_def(config) # Create, Compile and Train the model. While training, several evaluation will be performed. trainer = Trainer(model, config) trainer.build_model() trainer.train_model() #can perform all the inference here after training the model trainer.enter_interactive_mode() code.interact(local=locals()) trainer.exit_interactive_mode()
def __init__(self, args): """store the information of database""" if args.model_name.lower() in [ "tucker", "conve", "convkb", "proje_pointwise" ]: raise Exception( "Model %s has not been supported in tuning hyperparameters!" % args.model) self.model_name = args.model_name self.knowledge_graph = KnowledgeGraph( dataset=args.dataset_name, custom_dataset_path=args.dataset_path) self.kge_args = KGEArgParser().get_args([]) self.kge_args.dataset_name = args.dataset_name self.kge_args.debug = args.debug self.kge_args.device = args.device self.max_evals = args.max_number_trials if not args.debug else 3 self.config_obj, self.model_obj = Importer().import_model_config( self.model_name.lower()) self.config_local = self.config_obj(self.kge_args) self.search_space = HyperparameterLoader(args).load_search_space( self.model_name.lower()) self._best_result = None self.trainer = None
def test_hyperparamter_loader(model_name): knowledge_graph = KnowledgeGraph(dataset="freebase15k") knowledge_graph.prepare_data() # getting the customized configurations from the command-line arguments. args = KGETuneArgParser().get_args([]) hyperparams = HyperparamterLoader(args).load_hyperparameter("freebase15k", model_name) assert hyperparams["optimizer"] is not None
def test_fb15k_meta(): """Function to test the the knowledge graph parse for Freebase and basic operations.""" knowledge_graph = KnowledgeGraph(dataset="freebase15k") knowledge_graph.force_prepare_data() knowledge_graph.dump() assert knowledge_graph.is_cache_exists() knowledge_graph.prepare_data() knowledge_graph.dataset.read_metadata() knowledge_graph.dataset.dump()
def main(): args = KGEArgParser().get_args(sys.argv[1:]) knowledge_graph = KnowledgeGraph(dataset=args.dataset_name, custom_dataset_path=args.dataset_path) knowledge_graph.prepare_data() config_def, model_def = Importer().import_model_config(args.model_name.lower()) config = config_def(args) model = model_def(**config.__dict__) trainer = Trainer(model, config) trainer.build_model() trainer.train_model()
def test_fb15k_manipulate(): """Function to test the the knowledge graph parse for Freebase and basic operations.""" knowledge_graph = KnowledgeGraph(dataset="freebase15k") knowledge_graph.force_prepare_data() knowledge_graph.dump() knowledge_graph.read_cache_data('triplets_train') knowledge_graph.read_cache_data('triplets_test') knowledge_graph.read_cache_data('triplets_valid') knowledge_graph.read_cache_data('hr_t') knowledge_graph.read_cache_data('tr_h') knowledge_graph.read_cache_data('idx2entity') knowledge_graph.read_cache_data('idx2relation') knowledge_graph.read_cache_data('entity2idx') knowledge_graph.read_cache_data('relation2idx')
def testing_function_with_args(name, l1_flag, distance_measure=None, bilinear=None, display=False): """Function to test the models with arguments.""" # getting the customized configurations from the command-line arguments. args = KGEArgParser().get_args([]) # Preparing data and cache the data for later usage knowledge_graph = KnowledgeGraph(dataset=args.dataset_name) knowledge_graph.prepare_data() # Extracting the corresponding model config and definition from Importer(). config_def, model_def = Importer().import_model_config(name) config = config_def(args) config.epochs = 1 config.test_step = 1 config.test_num = 10 config.disp_result = display config.save_model = True config.L1_flag = l1_flag config.debug = True model = model_def(**config.__dict__) # Create, Compile and Train the model. While training, several evaluation will be performed. trainer = Trainer(model, config) trainer.build_model() trainer.train_model() #can perform all the inference here after training the model trainer.enter_interactive_mode() #takes head, relation tails = trainer.infer_tails(1, 10, topk=5) assert len(tails) == 5 #takes relation, tail heads = trainer.infer_heads(10, 20, topk=5) assert len(heads) == 5 #takes head, tail relations = trainer.infer_rels(1, 20, topk=5) assert len(relations) == 5 trainer.exit_interactive_mode()
def get_model(result_path_dir, configured_epochs, patience, config_key): args = KGEArgParser().get_args([]) knowledge_graph = KnowledgeGraph(dataset="Freebase15k") knowledge_graph.prepare_data() config_def, model_def = Importer().import_model_config(config_key) config = config_def(args) config.epochs = configured_epochs config.test_step = 1 config.test_num = 1 config.disp_result = False config.save_model = False config.path_result = result_path_dir config.debug = True config.patience = patience return model_def(**config.__dict__), config
def experiment(model_name): args = KGEArgParser().get_args([]) args.exp = True args.dataset_name = "fb15k" # Preparing data and cache the data for later usage knowledge_graph = KnowledgeGraph(dataset=args.dataset_name, custom_dataset_path=args.dataset_path) knowledge_graph.prepare_data() # Extracting the corresponding model config and definition from Importer(). config_def, model_def = Importer().import_model_config(model_name) config = config_def(args) model = model_def(**config.__dict__) # Create, Compile and Train the model. While training, several evaluation will be performed. trainer = Trainer(model, config) trainer.build_model() trainer.train_model()
def tunning_function(name): """Function to test the tuning of the models.""" knowledge_graph = KnowledgeGraph(dataset="freebase15k") knowledge_graph.prepare_data() # getting the customized configurations from the command-line arguments. args = KGETuneArgParser().get_args([]) # initializing bayesian optimizer and prepare data. args.debug = True args.model = name bays_opt = BaysOptimizer(args=args) bays_opt.config_local.test_num = 10 # perform the golden hyperparameter tuning. bays_opt.optimize() assert bays_opt.return_best() is not None
def test_return_empty_before_optimization(mocked_fmin): """Function to test the tuning of the models.""" knowledge_graph = KnowledgeGraph(dataset="freebase15k") knowledge_graph.prepare_data() # getting the customized configurations from the command-line arguments. args = KGETuneArgParser().get_args([]) # initializing bayesian optimizer and prepare data. args.debug = True args.model = 'analogy' bays_opt = BaysOptimizer(args=args) bays_opt.config_local.test_num = 10 with pytest.raises(Exception) as e: bays_opt.return_best() assert mocked_fmin.called is False assert e.value.args[0] == 'Cannot find golden setting. Has optimize() been called?'
def __init__(self, args): for arg_name in vars(args): self.__dict__[arg_name] = getattr(args, arg_name) # Training and evaluating related variables self.hits = [1, 3, 5, 10] self.disp_result = False self.patience = 3 # should make this configurable as well. # Visualization related, # p.s. the visualizer is disable for most of the KGE methods for now. self.disp_triple_num = 20 self.plot_training_result = True self.plot_testing_result = True # Knowledge Graph Information self.knowledge_graph = KnowledgeGraph( dataset=args.dataset_name, custom_dataset_path=args.dataset_path) for key in self.knowledge_graph.kg_meta.__dict__: self.__dict__[key] = self.knowledge_graph.kg_meta.__dict__[key] # The results of training will be stored in the following folders # which are relative to the parent folder (the path of the dataset). dataset_path = self.knowledge_graph.dataset.dataset_path self.path_tmp = dataset_path / 'intermediate' self.path_tmp.mkdir(parents=True, exist_ok=True) self.path_result = dataset_path / 'results' self.path_result.mkdir(parents=True, exist_ok=True) self.path_figures = dataset_path / 'figures' self.path_figures.mkdir(parents=True, exist_ok=True) self.path_embeddings = dataset_path / 'embeddings' self.path_embeddings.mkdir(parents=True, exist_ok=True) if args.exp is True: paper_params = HyperparamterLoader().load_hyperparameter( args.dataset_name, args.model_name) for key, value in paper_params.items(): self.__dict__[ key] = value # copy all the setting from the paper.
def test_generator_pointwise(): """Function to test the generator for pointwise based algorithm.""" knowledge_graph = KnowledgeGraph(dataset="freebase15k") knowledge_graph.force_prepare_data() config_def, model_def = Importer().import_model_config("complex") config = config_def(KGEArgParser().get_args([])) generator = Generator(model_def(**config.__dict__), config) generator.start_one_epoch(10) for i in range(10): data = list(next(generator)) assert len(data) == 4 h = data[0] r = data[1] t = data[2] y = data[3] assert len(h) == len(r) assert len(h) == len(t) assert set(y) == {1, -1} generator.stop()
def test_visualization(tmpdir): result_path_dir = tmpdir.mkdir("result_path") args = KGEArgParser().get_args([]) knowledge_graph = KnowledgeGraph(dataset="Freebase15k") knowledge_graph.prepare_data() config_def, model_def = Importer().import_model_config("analogy") config = config_def(args=args) config.epochs = 5 config.test_step = 1 config.test_num = 1 config.disp_result = True config.save_model = False config.debug = True config.patience = -1 config.plot_embedding = True config.plot_training_result = True config.plot_testing_result = True config.path_figures = result_path_dir config.path_result = result_path_dir trainer = Trainer(model_def(**config.__dict__), config) trainer.build_model() trainer.train_model() files = [f for f in listdir(result_path_dir)] assert any(map(lambda f: "_entity_plot" in f, files)) assert any(map(lambda f: "_rel_plot" in f, files)) assert any(map(lambda f: "_ent_n_rel_plot" in f, files)) assert any(map(lambda f: "_training_loss_plot_" in f, files)) assert any(map(lambda f: "_testing_hits_plot" in f, files)) assert any(map(lambda f: "_testing_latex_table_" in f, files)) assert any(map(lambda f: "_testing_table_" in f, files)) assert any(map(lambda f: "_testing_rank_plot_" in f, files)) assert any(map(lambda f: "_testing_hits_plot_" in f, files))
def main(): model_name = "transe" dataset_name = "Freebase15k" # 1. Tune the hyper-parameters for the selected model and dataset. # p.s. this is using training and validation set. args = KGETuneArgParser().get_args( ['-mn', model_name, '-ds', dataset_name]) # initializing bayesian optimizer and prepare data. bays_opt = BaysOptimizer(args=args) # perform the golden hyperparameter tuning. bays_opt.optimize() best = bays_opt.return_best() # 2. Evaluate final model using the found best hyperparameters on testing set. args = KGEArgParser().get_args(['-mn', model_name, '-ds', dataset_name]) # Preparing data and cache the data for later usage knowledge_graph = KnowledgeGraph(dataset=args.dataset_name) knowledge_graph.prepare_data() # Extracting the corresponding model config and definition from Importer(). config_def, model_def = Importer().import_model_config( args.model_name.lower()) config = config_def(args) # Update the config params with the golden hyperparameter for k, v in best.items(): config.__dict__[k] = v model = model_def(**config.__dict__) # Create, Compile and Train the model. trainer = Trainer(model, config) trainer.build_model() trainer.train_model()
def test_generator_proje(): """Function to test the generator for projection based algorithm.""" knowledge_graph = KnowledgeGraph(dataset="freebase15k") knowledge_graph.force_prepare_data() config_def, model_def = Importer().import_model_config("proje_pointwise") config = config_def(KGEArgParser().get_args([])) generator = Generator(model_def(**config.__dict__), config) generator.start_one_epoch(10) for i in range(10): data = list(next(generator)) assert len(data) == 5 h = data[0] r = data[1] t = data[2] hr_t = data[3] tr_h = data[4] assert len(h) == len(r) assert len(h) == len(t) assert isinstance(hr_t, torch.Tensor) assert isinstance(tr_h, torch.Tensor) generator.stop()
def test_benchmarks(dataset_name): """Function to test the the knowledge graph parse for Freebase.""" knowledge_graph = KnowledgeGraph(dataset=dataset_name) knowledge_graph.force_prepare_data() knowledge_graph.dump()