def setUp(self): self.generator = OntologyCodeGenerator() self.dir_path = None curr_dir = os.path.dirname(__file__) self.spec_dir = os.path.join(curr_dir, "test_specs/") self.test_output = os.path.join(curr_dir, "test_outputs/")
def create(args_): """ Function for the `create` mode. Generates the ontology. Args: args_: parsed args for the `create` mode """ spec_path = normalize_path(args_.spec) dest_path = normalize_path(args_.dest_path) spec_paths = [normalize_path(config) for config in args_.spec_paths] \ if args_.spec_paths is not None else None merged_path = normalize_path(args_.merged_path) leient_prefix = args_.lenient_prefix generator = OntologyCodeGenerator(spec_paths, args_.gen_all) if args_.no_dry_run is None: log.info("Ontology will be generated in a temporary directory as " "--no_dry_run is not specified by the user.") args_.no_dry_run = False if leient_prefix: log.info("Will not enforce prefix check.") is_dry_run = not args_.no_dry_run include_init = not args_.exclude_init generated_folder = generator.generate(spec_path, dest_path, is_dry_run, include_init, merged_path, leient_prefix) log.info("Ontology generated in the directory %s.", generated_folder)
def build_ontology(sql_db, project_name): """ Find the ontology specification from the project, and then create the ontologies. Args: sql_db: The SQLite Database containing the project project_name: The name of the project. Returns: """ onto_path = "./stave_test_onto" res = query( sql_db, f"SELECT ontology FROM stave_backend_project " f"WHERE stave_backend_project.name = " f'"{project_name}"', ).fetchone()[0] with tempfile.NamedTemporaryFile("w") as onto_file: onto_file.write(res) OntologyCodeGenerator().generate(onto_file.name, onto_path, lenient_prefix=True) # Make sure the newly created path is in the python path. sys.path.append(onto_path) # Make sure we can import the newly generated modules. try: importlib.import_module("edu.cmu") except Exception: pass
def clean(args_): """ Function for the `clean` mode. Cleans the given directory of generated files. Args: args_: parsed args for the `clean` mode """ dir_ = normalize_path(args_.dir) generator = OntologyCodeGenerator() is_empty, del_dir = generator.cleanup_generated_ontology(dir_, args_.force) if not is_empty: log.info("Directory %s not empty, cannot delete completely.", dir_) else: log.info("Directory %s deleted.", dir_) if not args_.force: log.info("Deleted files moved to %s.", del_dir)
def test_include_and_exclude_init(self): json_file_path = os.path.join( self.spec_dir, "example_import_ontology.json" ) with tempfile.TemporaryDirectory() as temp_dir: temp_filename = _get_temp_filename(json_file_path, temp_dir) # Test with include_init = True folder_path = self.generator.generate( temp_filename, temp_dir, is_dry_run=False, include_init=True ) gen_files = sorted(utils.get_generated_files_in_dir(folder_path)) # Assert the generated python files exp_file_path = [ "ft/__init__", "ft/onto/__init__", "ft/onto/example_import_ontology", ] exp_files = sorted( [ f"{os.path.join(folder_path, file)}.py" for file in exp_file_path ] ) self.assertEqual(gen_files, exp_files) # Now, corrupt one of the init files corrupted_path = os.path.join(folder_path, "ft/__init__.py") with open(corrupted_path, "w") as f: f.write("# ***corrupted file***\n") # Re-generate using include_init = False self.generator = OntologyCodeGenerator() folder_path = self.generator.generate( temp_filename, folder_path, is_dry_run=False, include_init=False ) gen_files = sorted(utils.get_generated_files_in_dir(folder_path)) # Assert the generated python files after removing the corrupted # file which should not have been regenerated exp_files = [file for file in exp_files if file != corrupted_path] self.assertEqual(gen_files, exp_files)
def test_top_ontology_parsing_imports(self): temp_dir = tempfile.mkdtemp() temp_filename = os.path.join(temp_dir, 'temp.py') sys.path.append(temp_dir) with open(temp_filename, 'w') as temp_file: temp_file.write('import os.path\n' 'import os.path as os_path\n' 'from os import path\n') temp_module = importlib.import_module('temp') manager = ImportManager(None, None) gen = OntologyCodeGenerator() gen.initialize_top_entries(manager, temp_module) imports = manager.get_import_statements() expected_imports = ["from os import path"] self.assertListEqual(imports, expected_imports)
def build_ontology(): onto_path = "./stave_test_onto" res = self._query( f'SELECT ontology FROM nlpviewer_backend_project ' f'WHERE nlpviewer_backend_project.name = ' f'"{project_name}"').fetchone()[0] with tempfile.NamedTemporaryFile('w') as onto_file: onto_file.write(res) OntologyCodeGenerator().generate(onto_file.name, onto_path, lenient_prefix=True) # Make sure the newly created path is in the python path. sys.path.append(onto_path) # Make sure we can import the newly generated modules. try: importlib.import_module('edu.cmu') except Exception: pass
class GenerateOntologyTest(unittest.TestCase): def setUp(self): self.generator = OntologyCodeGenerator() self.dir_path = None curr_dir = os.path.dirname(__file__) self.spec_dir = os.path.join(curr_dir, "test_specs/") self.test_output = os.path.join(curr_dir, "test_outputs/") def tearDown(self): """ Cleans up the generated files after test case if any. Only cleans up if generate_ontology passes successfully. """ if self.dir_path is not None: self.generator.cleanup_generated_ontology(self.dir_path, is_forced=True) @data(('example_ontology', ['ft/onto/example_import_ontology', 'ft/onto/example_ontology']), ('example_complex_ontology', ['ft/onto/example_complex_ontology']), ('example_multi_module_ontology', ['ft/onto/ft_module', 'custom/user/custom_module']), ('race_qa_onto', ['ft/onto/race_qa_ontology'])) def test_generated_code(self, value): input_file_name, file_paths = value file_paths = sorted(file_paths + _get_init_paths(file_paths)) # Read json and generate code in a file. with tempfile.TemporaryDirectory() as tempdir: json_file_path = os.path.join(self.spec_dir, f'{input_file_name}.json') folder_path = self.generator.generate(json_file_path, tempdir, is_dry_run=True) self.dir_path = folder_path # Reorder code. generated_files = sorted( utils.get_generated_files_in_dir(folder_path)) expected_files = [ f"{os.path.join(folder_path, file)}.py" for file in file_paths ] self.assertEqual(generated_files, expected_files) for i, generated_file in enumerate(generated_files): with open(generated_file, 'r') as f: generated_code = f.read() # assert if generated code matches with the expected code expected_code_path = os.path.join(self.test_output, f'{file_paths[i]}.py') with open(expected_code_path, 'r') as f: expected_code = f.read() self.assertEqual(generated_code, expected_code) def test_dry_run_false(self): json_file_path = os.path.join(self.spec_dir, "example_import_ontology.json") with tempfile.TemporaryDirectory() as temp_dir: temp_filename = _get_temp_filename(json_file_path, temp_dir) self.generator.generate(temp_filename, temp_dir, is_dry_run=False) folder_path = temp_dir for name in ["ft", "onto", "example_import_ontology.py"]: self.assertTrue(name in os.listdir(folder_path)) folder_path = os.path.join(folder_path, name) def test_include_and_exclude_init(self): json_file_path = os.path.join(self.spec_dir, "example_import_ontology.json") with tempfile.TemporaryDirectory() as temp_dir: temp_filename = _get_temp_filename(json_file_path, temp_dir) # Test with include_init = True folder_path = self.generator.generate(temp_filename, temp_dir, is_dry_run=False, include_init=True) gen_files = sorted(utils.get_generated_files_in_dir(folder_path)) # Assert the generated python files exp_file_path = [ 'ft/__init__', 'ft/onto/__init__', 'ft/onto/example_import_ontology' ] exp_files = sorted([ f"{os.path.join(folder_path, file)}.py" for file in exp_file_path ]) self.assertEqual(gen_files, exp_files) # Now, corrupt one of the init files corrupted_path = os.path.join(folder_path, 'ft/__init__.py') with open(corrupted_path, 'w') as f: f.write('# ***corrupted file***\n') # Re-generate using include_init = False self.generator = OntologyCodeGenerator() folder_path = self.generator.generate(temp_filename, folder_path, is_dry_run=False, include_init=False) gen_files = sorted(utils.get_generated_files_in_dir(folder_path)) # Assert the generated python files after removing the corrupted # file which should not have been regenerated exp_files = [file for file in exp_files if file != corrupted_path] self.assertEqual(gen_files, exp_files) @data( (True, 'test_duplicate_entry.json', DuplicateEntriesWarning), (True, 'test_duplicate_attr_name.json', DuplicatedAttributesWarning), (False, 'example_ontology.json', OntologySourceNotFoundException), (False, 'test_invalid_parent.json', ParentEntryNotSupportedException), (False, 'test_invalid_attribute.json', TypeNotDeclaredException), (False, 'test_nested_item_type.json', UnsupportedTypeException), (False, 'test_no_item_type.json', TypeNotDeclaredException), (False, 'test_unknown_item_type.json', TypeNotDeclaredException)) def test_warnings_errors(self, value): expected_warning, file, msg_type = value temp_dir = tempfile.mkdtemp() json_file_name = os.path.join(self.spec_dir, file) temp_filename = _get_temp_filename(json_file_name, temp_dir) if expected_warning: with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") self.generator.generate(temp_filename, temp_dir, is_dry_run=True) self.assertEqual(len(w), 1) assert w[0].category, msg_type else: with self.assertRaises(msg_type): self.generator.generate(temp_filename, temp_dir, is_dry_run=True) @log_capture() def test_directory_already_present(self): json_file_path = os.path.join(self.spec_dir, "example_import_ontology.json") with tempfile.TemporaryDirectory() as temp_dir: os.mkdir(os.path.join(temp_dir, "ft")) temp_filename = _get_temp_filename(json_file_path, temp_dir) with LogCapture() as l: self.generator.generate(temp_filename, temp_dir, False) l.check_present( ('root', 'WARNING', f'The directory with the name ft is already present in ' f'{temp_dir}. New files will be merge into the existing ' f'directory.')) def test_top_ontology_parsing_imports(self): temp_dir = tempfile.mkdtemp() temp_filename = os.path.join(temp_dir, 'temp.py') sys.path.append(temp_dir) with open(temp_filename, 'w') as temp_file: temp_file.write('import os.path\n' 'import os.path as os_path\n' 'from os import path\n') temp_module = importlib.import_module('temp') manager = ImportManager(None, None) gen = OntologyCodeGenerator() gen.initialize_top_entries(manager, temp_module) imports = manager.get_import_statements() expected_imports = ["from os import path"] self.assertListEqual(imports, expected_imports) @data("example_ontology.json", "example_import_ontology.json", "example_multi_module_ontology.json", "example_complex_ontology.json", "test_unknown_item_type.json") def test_valid_json(self, input_filepath): input_filepath = os.path.join(self.spec_dir, input_filepath) utils.validate_json_schema(input_filepath) @data(("test_duplicate_attribute.json", "non-unique elements"), ("test_additional_properties.json", "Additional properties are not allowed")) def test_invalid_json(self, value): input_filepath, error_msg = value input_filepath = os.path.join(self.spec_dir, input_filepath) with self.assertRaises(jsonschema.exceptions.ValidationError) as cm: utils.validate_json_schema(input_filepath) self.assertTrue(error_msg in cm.exception.args[0])
def initialize(self) -> "Pipeline": """ This function should be called before the pipeline can be used to process the actual data. This function will call the `initialize` of all the components inside this pipeline. Returns: """ # create EntryTree type object merged_entry_tree to store the parsed # entry tree from ontology specification file passed in as part of # resource and add the result to resource with key of merged_entry_tree. merged_entry_tree = EntryTree() if self.resource.get("onto_specs_path"): OntologyCodeGenerator().parse_schema_for_no_import_onto_specs_file( ontology_path=self.resource.get("onto_specs_path"), ontology_dict=self.resource.get("onto_specs_dict"), merged_entry_tree=merged_entry_tree, ) self.resource.update(merged_entry_tree=merged_entry_tree) # The process manager need to be assigned first. self._proc_mgr = ProcessManager(len(self._components)) if self._initialized: # The pipeline has already been initialized, so we are doing # re-initialization here. logging.info("Re-initializing the Pipeline.") # Reset the flags of the components before initializing them. self._reader.reset_flags() for c in self._components: c.reset_flags() # Handle the reader. if not self._reader.is_initialized: self._reader.initialize(self.resource, self._reader_config) else: logging.info( "The reader [%s] has already initialized, " "will skip its initialization.", self._reader.name, ) if self._check_type_consistency: self.reader.enforce_consistency(enforce=True) else: self.reader.enforce_consistency(enforce=False) # Handle other components. self.initialize_components() self._initialized = True # Create profiler if self._enable_profiling: self.reader.set_profiling(True) self._profiler = [0.0] * len(self.components) # Check record types and attributes of each pipeline component if self._do_init_type_check: current_records: Dict[str, Set[str]] = {} self._reader.record(current_records) for component in self.components: if hasattr(component, "expected_types_and_attributes"): record_types_and_attributes_check( component.expected_types_and_attributes( ), # type: ignore current_records, ) if hasattr(component, "record"): component.record(current_records) # type: ignore return self