def test_merge_documents_duplicate_ids(): # given metadata_list = [ { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "id": "http://example.org/test1", "name": "test_1", }, { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "id": "http://example.org/test1", "name": "test_1b", }, { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "id": "http://example.org/test2", "name": "test_2", }, ] # when results = merge_documents(metadata_list) # then expected_results = { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "id": "http://example.org/test1", "schema:sameAs": "http://example.org/test2", "name": ["test_1", "test_1b", "test_2"], } assert results == expected_results
def test_merge_documents(): """ Test the creation of a coherent minimal metadata set """ # given metadata_list = [ { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "name": "test_1", "version": "0.0.2", "description": "Simple package.json test for indexer", "codeRepository": "git+https://github.com/moranegg/metadata_test", }, { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "name": "test_0_1", "version": "0.0.2", "description": "Simple package.json test for indexer", "codeRepository": "git+https://github.com/moranegg/metadata_test", }, { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "name": "test_metadata", "version": "0.0.2", "author": { "type": "Person", "name": "moranegg", }, }, ] # when results = merge_documents(metadata_list) # then expected_results = { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "version": "0.0.2", "description": "Simple package.json test for indexer", "name": ["test_1", "test_0_1", "test_metadata"], "author": [{ "type": "Person", "name": "moranegg" }], "codeRepository": "git+https://github.com/moranegg/metadata_test", } assert results == expected_results
def test_merge_documents_lists_duplicates(): """Tests merging two @list elements with a duplicate subelement.""" # given metadata_list = [ { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "author": { "@list": [ { "name": "test_1" }, ] }, }, { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "author": { "@list": [ { "name": "test_2" }, { "name": "test_1" }, ] }, }, ] # when results = merge_documents(metadata_list) # then expected_results = { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "author": [ { "name": "test_1" }, { "name": "test_2" }, ], } assert results == expected_results
def test_merge_documents_list_right(): """Tests merging an @list with a singleton.""" # given metadata_list = [ { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "author": { "@list": [ { "name": "test_1" }, ] }, }, { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "author": { "name": "test_2" }, }, ] # when results = merge_documents(metadata_list) # then expected_results = { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "author": [ { "name": "test_1" }, { "name": "test_2" }, ], } assert results == expected_results
def translate_revision_intrinsic_metadata( self, detected_files: Dict[str, List[Any]], log_suffix: str ) -> Tuple[List[Any], Any]: """ Determine plan of action to translate metadata when containing one or multiple detected files: Args: detected_files: dictionary mapping context names (e.g., "npm", "authors") to list of sha1 Returns: (List[str], dict): list of mappings used and dict with translated metadata according to the CodeMeta vocabulary """ used_mappings = [MAPPINGS[context].name for context in detected_files] metadata = [] tool = { "name": "swh-metadata-translator", "version": "0.0.2", "configuration": {}, } # TODO: iterate on each context, on each file # -> get raw_contents # -> translate each content config = {k: self.config[k] for k in [INDEXER_CFG_KEY, "objstorage", "storage"]} config["tools"] = [tool] for context in detected_files.keys(): cfg = deepcopy(config) cfg["tools"][0]["configuration"]["context"] = context c_metadata_indexer = ContentMetadataIndexer(config=cfg) # sha1s that are in content_metadata table sha1s_in_storage = [] metadata_generator = self.idx_storage.content_metadata_get( detected_files[context] ) for c in metadata_generator: # extracting metadata sha1 = c.id sha1s_in_storage.append(sha1) local_metadata = c.metadata # local metadata is aggregated if local_metadata: metadata.append(local_metadata) sha1s_filtered = [ item for item in detected_files[context] if item not in sha1s_in_storage ] if sha1s_filtered: # content indexing try: c_metadata_indexer.run( sha1s_filtered, log_suffix=log_suffix, ) # on the fly possibility: for result in c_metadata_indexer.results: local_metadata = result.metadata metadata.append(local_metadata) except Exception: self.log.exception("Exception while indexing metadata on contents") metadata = merge_documents(metadata) return (used_mappings, metadata)