def parse_source( key: str, source: dict, output_directory: str, prefix_map: Dict[str, str] = None, node_property_predicates: Set[str] = None, predicate_mappings: Dict[str, str] = None, checkpoint: bool = False, ) -> Sink: """ Parse a source from a merge config YAML. Parameters ---------- key: str Source key source: Dict Source configuration output_directory: str Location to write output to prefix_map: Dict[str, str] Non-canonical CURIE mappings node_property_predicates: Set[str] A set of predicates that ought to be treated as node properties (This is applicable for RDF) predicate_mappings: Dict[str, str] A mapping of predicate IRIs to property names (This is applicable for RDF) checkpoint: bool Whether to serialize each individual source to a TSV Returns ------- kgx.sink.sink.Sink Returns an instance of Sink """ log.info(f"Processing source '{key}'") if not key: key = os.path.basename(source["input"]["filename"][0]) input_args = prepare_input_args( key, source, output_directory, prefix_map, node_property_predicates, predicate_mappings, ) transformer = Transformer(stream=True) transformer.transform(input_args) transformer.store.graph.name = key if checkpoint: log.info(f"Writing checkpoint for source '{key}'") checkpoint_output = f"{output_directory}/{key}" if output_directory else key transformer.save({"filename": checkpoint_output, "format": "tsv"}) # Current "Callable" metadata not needed at this point # but causes peculiar problems downstream, so we clear it. transformer.store.clear_graph_metadata() return transformer.store
def neo4j_download( uri: str, username: str, password: str, output: str, output_format: str, output_compression: Optional[str], stream: bool, node_filters: Optional[Tuple] = None, edge_filters: Optional[Tuple] = None, ) -> Transformer: """ Download nodes and edges from Neo4j database. Parameters ---------- uri: str Neo4j URI. For example, https://localhost:7474 username: str Username for authentication password: str Password for authentication output: str Where to write the output (stdout, by default) output_format: Optional[str] The output type (``tsv``, by default) output_compression: Optional[str] The output compression type stream: bool Whether to parse input as a stream node_filters: Optional[Tuple] Node filters edge_filters: Optional[Tuple] Edge filters Returns ------- kgx.Transformer The NeoTransformer """ transformer = Transformer(stream=stream) transformer.transform({ "uri": uri, "username": username, "password": password, "format": "neo4j", "node_filters": node_filters, "edge_filters": edge_filters, }) if not output_format: output_format = "tsv" transformer.save({ "filename": output, "format": output_format, "compression": output_compression }) return transformer
def neo4j_upload( inputs: List[str], input_format: str, input_compression: Optional[str], uri: str, username: str, password: str, stream: bool, node_filters: Optional[Tuple] = None, edge_filters: Optional[Tuple] = None, ) -> Transformer: """ Upload a set of nodes/edges to a Neo4j database. Parameters ---------- inputs: List[str] A list of files that contains nodes/edges input_format: str The input format input_compression: Optional[str] The input compression type uri: str The full HTTP address for Neo4j database username: str Username for authentication password: str Password for authentication stream: bool Whether to parse input as a stream node_filters: Optional[Tuple] Node filters edge_filters: Optional[Tuple] Edge filters Returns ------- kgx.Transformer The NeoTransformer """ transformer = Transformer(stream=stream) transformer.transform({ "filename": inputs, "format": input_format, "compression": input_compression, "node_filters": node_filters, "edge_filters": edge_filters, }) transformer.save({ "uri": uri, "username": username, "password": password, "format": "neo4j" }) return transformer
def test_rdf_transform3(): """ Test parsing an RDF N-triple and round-trip. """ input_args1 = { 'filename': [os.path.join(RESOURCE_DIR, 'rdf', 'test1.nt')], 'format': 'nt' } t1 = Transformer() t1.transform(input_args1) assert t1.store.graph.number_of_nodes() == 2 assert t1.store.graph.number_of_edges() == 1 output_args1 = { 'filename': os.path.join(TARGET_DIR, 'test1-export.nt'), 'format': 'nt' } t1.save(output_args1) input_args2 = { 'filename': [os.path.join(TARGET_DIR, 'test1-export.nt')], 'format': 'nt' } t2 = Transformer() t2.transform(input_args2) assert t2.store.graph.number_of_nodes() == 2 assert t2.store.graph.number_of_edges() == 1 n1t1 = t1.store.graph.nodes()['ENSEMBL:ENSG0000000000001'] n1t2 = t2.store.graph.nodes()['ENSEMBL:ENSG0000000000001'] n1t3 = t2.store.graph.nodes()['ENSEMBL:ENSG0000000000001'] assert n1t1['type'] == n1t2['type'] == n1t3['type'] == 'SO:0000704' assert len(n1t1['category']) == len(n1t2['category']) == len( n1t3['category']) == 4 assert ('biolink:Gene' in n1t1['category'] and 'biolink:Gene' in n1t2['category'] and 'biolink:Gene' in n1t3['category']) assert ('biolink:GenomicEntity' in n1t1['category'] and 'biolink:GenomicEntity' in n1t2['category'] and 'biolink:GenomicEntity' in n1t3['category']) assert ('biolink:NamedThing' in n1t1['category'] and 'biolink:NamedThing' in n1t2['category'] and 'biolink:NamedThing' in n1t3['category']) assert n1t1['name'] == n1t2['name'] == n1t3['name'] == 'Test Gene 123' assert (n1t1['description'] == n1t2['description'] == n1t3['description'] == 'This is a Test Gene 123') assert ('Test Dataset' in n1t1['provided_by'] and 'Test Dataset' in n1t2['provided_by'] and 'Test Dataset' in n1t3['provided_by'])
def test_rdf_transform3(): """ Test parsing an RDF N-triple and round-trip. """ input_args1 = { "filename": [os.path.join(RESOURCE_DIR, "rdf", "test1.nt")], "format": "nt", } t1 = Transformer() t1.transform(input_args1) assert t1.store.graph.number_of_nodes() == 2 assert t1.store.graph.number_of_edges() == 1 output_args1 = { "filename": os.path.join(TARGET_DIR, "test1-export.nt"), "format": "nt", } t1.save(output_args1) input_args2 = { "filename": [os.path.join(TARGET_DIR, "test1-export.nt")], "format": "nt", } t2 = Transformer() t2.transform(input_args2) assert t2.store.graph.number_of_nodes() == 2 assert t2.store.graph.number_of_edges() == 1 n1t1 = t1.store.graph.nodes()["ENSEMBL:ENSG0000000000001"] n1t2 = t2.store.graph.nodes()["ENSEMBL:ENSG0000000000001"] n1t3 = t2.store.graph.nodes()["ENSEMBL:ENSG0000000000001"] assert n1t1["type"] == n1t2["type"] == n1t3["type"] == "SO:0000704" assert len(n1t1["category"]) == len(n1t2["category"]) == len( n1t3["category"]) == 4 assert ("biolink:Gene" in n1t1["category"] and "biolink:Gene" in n1t2["category"] and "biolink:Gene" in n1t3["category"]) assert ("biolink:GenomicEntity" in n1t1["category"] and "biolink:GenomicEntity" in n1t2["category"] and "biolink:GenomicEntity" in n1t3["category"]) assert ("biolink:NamedThing" in n1t1["category"] and "biolink:NamedThing" in n1t2["category"] and "biolink:NamedThing" in n1t3["category"]) assert n1t1["name"] == n1t2["name"] == n1t3["name"] == "Test Gene 123" assert (n1t1["description"] == n1t2["description"] == n1t3["description"] == "This is a Test Gene 123") assert ("Test Dataset" in n1t1["provided_by"] and "Test Dataset" in n1t2["provided_by"] and "Test Dataset" in n1t3["provided_by"])
def parse_source( key: str, source: dict, output_directory: str, prefix_map: Dict[str, str] = None, node_property_predicates: Set[str] = None, predicate_mappings: Dict[str, str] = None, checkpoint: bool = False, ) -> Sink: """ Parse a source from a merge config YAML. Parameters ---------- key: str Source key source: Dict Source configuration output_directory: str Location to write output to prefix_map: Dict[str, str] Non-canonical CURIE mappings node_property_predicates: Set[str] A set of predicates that ought to be treated as node properties (This is applicable for RDF) predicate_mappings: Dict[str, str] A mapping of predicate IRIs to property names (This is applicable for RDF) checkpoint: bool Whether to serialize each individual source to a TSV Returns ------- kgx.sink.sink.Sink Returns an instance of Sink """ log.info(f"Processing source '{key}'") if not key: key = os.path.basename(source['input']['filename'][0]) input_args = prepare_input_args(key, source, output_directory, prefix_map, node_property_predicates, predicate_mappings) transformer = Transformer() transformer.transform(input_args) transformer.store.graph.name = key if checkpoint: log.info(f"Writing checkpoint for source '{key}'") checkpoint_output = f"{output_directory}/{key}" if output_directory else key transformer.save({'filename': checkpoint_output, 'format': 'tsv'}) return transformer.store
def test_csv_to_neo4j_load_to_graph_transform(clean_database): """ Test to load a csv KGX file into Neo4j. """ logger.debug("test_csv_to_neo4j_load...") input_args1 = { "filename": [ os.path.join(RESOURCE_DIR, "cm_nodes.csv"), os.path.join(RESOURCE_DIR, "cm_edges.csv"), ], "format": "csv", } t1 = Transformer() t1.transform(input_args1) output_args = { "uri": DEFAULT_NEO4J_URL, "username": DEFAULT_NEO4J_USERNAME, "password": DEFAULT_NEO4J_PASSWORD, "format": "neo4j", } t1.save(output_args) """ Continue sequentially to test read from Neo4j to write out back to CSV. """ logger.debug("test_neo4j_to_graph_transform") input_args = { "uri": DEFAULT_NEO4J_URL, "username": DEFAULT_NEO4J_USERNAME, "password": DEFAULT_NEO4J_PASSWORD, "format": "neo4j", } output_filename = os.path.join(TARGET_DIR, "neo_graph") output_args = {"filename": output_filename, "format": "csv"} t = Transformer() t.transform(input_args, output_args) assert t.store.graph.number_of_nodes() == 10 assert t.store.graph.number_of_edges() == 11 assert os.path.exists(f"{output_filename}_nodes.csv") assert os.path.exists(f"{output_filename}_edges.csv")
def test_csv_to_neo_load(): """ Test to load a CSV to Neo4j. """ input_args1 = { 'filename': [ os.path.join(RESOURCE_DIR, 'cm_nodes.csv'), os.path.join(RESOURCE_DIR, 'cm_edges.csv'), ], 'format': 'csv', } t1 = Transformer() t1.transform(input_args1) output_args = { 'uri': DEFAULT_NEO4J_URL, 'username': DEFAULT_NEO4J_USERNAME, 'password': DEFAULT_NEO4J_PASSWORD, 'format': 'neo4j', } t1.save(output_args)
def _transform(query): """ Transform an input to an output via Transformer. """ t1 = Transformer() t1.transform(query[0]) t1.save(query[1].copy()) assert t1.store.graph.number_of_nodes() == query[2] assert t1.store.graph.number_of_edges() == query[3] output = query[1] if output['format'] in {'tsv', 'csv', 'jsonl'}: input_args = { 'filename': [ f"{output['filename']}_nodes.{output['format']}", f"{output['filename']}_edges.{output['format']}", ], 'format': output['format'], } elif output['format'] in {'neo4j'}: input_args = { 'uri': DEFAULT_NEO4J_URL, 'username': DEFAULT_NEO4J_USERNAME, 'password': DEFAULT_NEO4J_PASSWORD, 'format': 'neo4j', } else: input_args = { 'filename': [f"{output['filename']}"], 'format': output['format'] } t2 = Transformer() t2.transform(input_args) assert t2.store.graph.number_of_nodes() == query[2] assert t2.store.graph.number_of_edges() == query[3]
def _transform(query): """ Transform an input to an output via Transformer. """ t1 = Transformer() t1.transform(query[0]) t1.save(query[1].copy()) assert t1.store.graph.number_of_nodes() == query[2] assert t1.store.graph.number_of_edges() == query[3] output = query[1] if output["format"] in {"tsv", "csv", "jsonl"}: input_args = { "filename": [ f"{output['filename']}_nodes.{output['format']}", f"{output['filename']}_edges.{output['format']}", ], "format": output["format"], } elif output["format"] in {"neo4j"}: input_args = { "uri": DEFAULT_NEO4J_URL, "username": DEFAULT_NEO4J_USERNAME, "password": DEFAULT_NEO4J_PASSWORD, "format": "neo4j", } else: input_args = { "filename": [f"{output['filename']}"], "format": output["format"] } t2 = Transformer() t2.transform(input_args) assert t2.store.graph.number_of_nodes() == query[2] assert t2.store.graph.number_of_edges() == query[3]
def test_rdf_transform5(): """ Parse an RDF N-Triple and round-trip, with user defined node property predicates and export property types. """ node_property_predicates = { f"https://www.example.org/UNKNOWN/{x}" for x in ['fusion', 'homology', 'combined_score', 'cooccurence'] } property_types = {} for k in node_property_predicates: property_types[k] = 'xsd:float' input_args1 = { 'filename': [os.path.join(RESOURCE_DIR, 'rdf', 'test3.nt')], 'format': 'nt', 'node_property_predicates': node_property_predicates, } t1 = Transformer() t1.transform(input_args1) assert t1.store.graph.number_of_nodes() == 7 assert t1.store.graph.number_of_edges() == 6 output_args2 = { 'filename': os.path.join(TARGET_DIR, 'test3-export.nt'), 'format': 'nt', 'property_types': property_types, } t1.save(output_args2) input_args2 = { 'filename': [os.path.join(TARGET_DIR, 'test3-export.nt')], 'format': 'nt' } t2 = Transformer() t2.transform(input_args2) assert t2.store.graph.number_of_nodes() == 7 assert t2.store.graph.number_of_edges() == 6 n1t1 = t1.store.graph.nodes()['ENSEMBL:ENSG0000000000001'] n1t2 = t2.store.graph.nodes()['ENSEMBL:ENSG0000000000001'] assert n1t1['type'] == n1t2['type'] == 'SO:0000704' assert len(n1t1['category']) == len(n1t2['category']) == 4 assert 'biolink:Gene' in n1t1['category'] and 'biolink:Gene' in n1t2[ 'category'] assert ('biolink:GenomicEntity' in n1t1['category'] and 'biolink:GenomicEntity' in n1t2['category']) assert 'biolink:NamedThing' in n1t1[ 'category'] and 'biolink:NamedThing' in n1t2['category'] assert n1t1['name'] == n1t2['name'] == 'Test Gene 123' assert n1t1['description'] == n1t2[ 'description'] == 'This is a Test Gene 123' assert 'Test Dataset' in n1t1['provided_by'] and 'Test Dataset' in n1t2[ 'provided_by'] e1t1 = list( t1.store.graph.get_edge('ENSEMBL:ENSP0000000000001', 'ENSEMBL:ENSP0000000000002').values())[0] e1t2 = list( t2.store.graph.get_edge('ENSEMBL:ENSP0000000000001', 'ENSEMBL:ENSP0000000000002').values())[0] assert e1t1['subject'] == e1t2['subject'] == 'ENSEMBL:ENSP0000000000001' assert e1t1['object'] == e1t2['object'] == 'ENSEMBL:ENSP0000000000002' assert e1t1['predicate'] == e1t2['predicate'] == 'biolink:interacts_with' assert e1t1['relation'] == e1t2['relation'] == 'biolink:interacts_with' assert e1t1['type'] == e1t2['type'] == 'biolink:Association' assert e1t1['id'] == e1t2[ 'id'] == 'urn:uuid:fcf76807-f909-4ccb-b40a-3b79b49aa518' assert e1t2['fusion'] == 0.0 assert e1t2['homology'] == 0.0 assert e1t2['combined_score'] == 490.0 assert e1t2['cooccurence'] == 332.0
def test_rdf_transform2(): """ Test parsing an RDF N-triple, with user defined prefix map, node property predicates, and predicate mappings. """ prefix_map = { 'HGNC': 'https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/', 'OMIM': 'http://omim.org/entry/', } node_property_predicates = { 'http://purl.obolibrary.org/obo/RO_0002558', 'http://purl.org/dc/elements/1.1/source', 'https://monarchinitiative.org/frequencyOfPhenotype', } predicate_mappings = { 'http://purl.org/dc/elements/1.1/source': 'source', 'https://monarchinitiative.org/frequencyOfPhenotype': 'frequency_of_phenotype', } input_args1 = { 'filename': [os.path.join(RESOURCE_DIR, 'rdf', 'oban-test.nt')], 'format': 'nt', 'prefix_map': prefix_map, 'node_property_predicates': node_property_predicates, 'predicate_mappings': predicate_mappings, } t1 = Transformer() t1.transform(input_args1) assert t1.store.graph.number_of_nodes() == 14 assert t1.store.graph.number_of_edges() == 7 n1t1 = t1.store.graph.nodes()['HP:0000505'] assert len(n1t1['category']) == 1 assert 'biolink:NamedThing' in n1t1['category'] e1t1 = list(t1.store.graph.get_edge('OMIM:166400', 'HP:0000006').values())[0] assert e1t1['subject'] == 'OMIM:166400' assert e1t1['object'] == 'HP:0000006' assert e1t1['relation'] == 'RO:0000091' assert e1t1['type'] == 'OBAN:association' assert e1t1['has_evidence'] == 'ECO:0000501' assert e1t1['source'] == 'OMIM:166400' e2t1 = list(t1.store.graph.get_edge('ORPHA:93262', 'HP:0000505').values())[0] assert e2t1['subject'] == 'ORPHA:93262' assert e2t1['object'] == 'HP:0000505' assert e2t1['relation'] == 'RO:0002200' assert e2t1['type'] == 'OBAN:association' assert e2t1['frequency_of_phenotype'] == 'HP:0040283' assert e2t1['source'] == 'ORPHA:93262' property_types = { 'frequency_of_phenotype': 'uriorcurie', 'source': 'uriorcurie' } output_args1 = { 'filename': os.path.join(TARGET_DIR, 'oban-export.nt'), 'format': 'nt', 'property_types': property_types, } t1.save(output_args1) input_args2 = { 'filename': [os.path.join(TARGET_DIR, 'oban-export.nt')], 'format': 'nt' } t2 = Transformer() t2.transform(input_args2) assert t2.store.graph.number_of_nodes() == 14 assert t2.store.graph.number_of_edges() == 7 n1t2 = t2.store.graph.nodes()['HP:0000505'] assert len(n1t2['category']) == 1 assert 'biolink:NamedThing' in n1t2['category'] e1t2 = list(t2.store.graph.get_edge('OMIM:166400', 'HP:0000006').values())[0] assert e1t2['subject'] == 'OMIM:166400' assert e1t2['object'] == 'HP:0000006' assert e1t2['relation'] == 'RO:0000091' assert e1t2['type'] == 'biolink:Association' assert e1t2['has_evidence'] == 'ECO:0000501' assert e1t2['source'] == 'OMIM:166400' e2t2 = list(t2.store.graph.get_edge('ORPHA:93262', 'HP:0000505').values())[0] assert e2t2['subject'] == 'ORPHA:93262' assert e2t2['object'] == 'HP:0000505' assert e2t2['relation'] == 'RO:0002200' assert e2t2['type'] == 'biolink:Association' assert e2t2['frequency_of_phenotype'] == 'HP:0040283' assert e2t2['source'] == 'ORPHA:93262' input_args3 = { 'filename': [os.path.join(TARGET_DIR, 'oban-export.nt')], 'format': 'nt' } t3 = Transformer() t3.transform(input_args3) assert t3.store.graph.number_of_nodes() == 14 assert t3.store.graph.number_of_edges() == 7 n1t3 = t1.store.graph.nodes()['HP:0000505'] assert len(n1t3['category']) == 1 assert 'biolink:NamedThing' in n1t3['category'] e1t3 = list(t3.store.graph.get_edge('OMIM:166400', 'HP:0000006').values())[0] assert e1t3['subject'] == 'OMIM:166400' assert e1t3['object'] == 'HP:0000006' assert e1t3['relation'] == 'RO:0000091' assert e1t3['type'] == 'biolink:Association' assert e1t3['has_evidence'] == 'ECO:0000501' assert e1t3['source'] == 'OMIM:166400' e2t3 = list(t3.store.graph.get_edge('ORPHA:93262', 'HP:0000505').values())[0] assert e2t3['subject'] == 'ORPHA:93262' assert e2t3['object'] == 'HP:0000505' assert e2t3['relation'] == 'RO:0002200' assert e2t3['type'] == 'biolink:Association' assert e2t3['frequency_of_phenotype'] == 'HP:0040283' assert e2t3['source'] == 'ORPHA:93262'
def test_rdf_transform5(): """ Parse an RDF N-Triple and round-trip, with user defined node property predicates and export property types. """ node_property_predicates = { f"https://www.example.org/UNKNOWN/{x}" for x in ["fusion", "homology", "combined_score", "cooccurence"] } property_types = {} for k in node_property_predicates: property_types[k] = "xsd:float" input_args1 = { "filename": [os.path.join(RESOURCE_DIR, "rdf", "test3.nt")], "format": "nt", "node_property_predicates": node_property_predicates, } t1 = Transformer() t1.transform(input_args1) assert t1.store.graph.number_of_nodes() == 7 assert t1.store.graph.number_of_edges() == 6 output_args2 = { "filename": os.path.join(TARGET_DIR, "test3-export.nt"), "format": "nt", "property_types": property_types, } t1.save(output_args2) input_args2 = { "filename": [os.path.join(TARGET_DIR, "test3-export.nt")], "format": "nt", } t2 = Transformer() t2.transform(input_args2) assert t2.store.graph.number_of_nodes() == 7 assert t2.store.graph.number_of_edges() == 6 n1t1 = t1.store.graph.nodes()["ENSEMBL:ENSG0000000000001"] n1t2 = t2.store.graph.nodes()["ENSEMBL:ENSG0000000000001"] assert n1t1["type"] == n1t2["type"] == "SO:0000704" assert len(n1t1["category"]) == len(n1t2["category"]) == 4 assert "biolink:Gene" in n1t1["category"] and "biolink:Gene" in n1t2[ "category"] assert ("biolink:GenomicEntity" in n1t1["category"] and "biolink:GenomicEntity" in n1t2["category"]) assert ("biolink:NamedThing" in n1t1["category"] and "biolink:NamedThing" in n1t2["category"]) assert n1t1["name"] == n1t2["name"] == "Test Gene 123" assert n1t1["description"] == n1t2[ "description"] == "This is a Test Gene 123" assert ("Test Dataset" in n1t1["provided_by"] and "Test Dataset" in n1t2["provided_by"]) e1t1 = list( t1.store.graph.get_edge("ENSEMBL:ENSP0000000000001", "ENSEMBL:ENSP0000000000002").values())[0] e1t2 = list( t2.store.graph.get_edge("ENSEMBL:ENSP0000000000001", "ENSEMBL:ENSP0000000000002").values())[0] assert e1t1["subject"] == e1t2["subject"] == "ENSEMBL:ENSP0000000000001" assert e1t1["object"] == e1t2["object"] == "ENSEMBL:ENSP0000000000002" assert e1t1["predicate"] == e1t2["predicate"] == "biolink:interacts_with" assert e1t1["relation"] == e1t2["relation"] == "biolink:interacts_with" assert e1t1["type"] == e1t2["type"] == "biolink:Association" assert e1t1["id"] == e1t2[ "id"] == "urn:uuid:fcf76807-f909-4ccb-b40a-3b79b49aa518" assert "test3.nt" in e1t1["knowledge_source"] assert e1t2["fusion"] == 0.0 assert e1t2["homology"] == 0.0 assert e1t2["combined_score"] == 490.0 assert e1t2["cooccurence"] == 332.0 assert "test3.nt" in e1t2["knowledge_source"]
def test_rdf_transform2(): """ Test parsing an RDF N-triple, with user defined prefix map, node property predicates, and predicate mappings. """ prefix_map = { "HGNC": "https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/", "OMIM": "http://omim.org/entry/", } node_property_predicates = { "http://purl.obolibrary.org/obo/RO_0002558", "http://purl.org/dc/elements/1.1/source", "https://monarchinitiative.org/frequencyOfPhenotype", } predicate_mappings = { "http://purl.org/dc/elements/1.1/source": "source", "https://monarchinitiative.org/frequencyOfPhenotype": "frequency_of_phenotype", } input_args1 = { "filename": [os.path.join(RESOURCE_DIR, "rdf", "oban-test.nt")], "format": "nt", "prefix_map": prefix_map, "node_property_predicates": node_property_predicates, "predicate_mappings": predicate_mappings, } t1 = Transformer() t1.transform(input_args1) assert t1.store.graph.number_of_nodes() == 14 assert t1.store.graph.number_of_edges() == 7 n1t1 = t1.store.graph.nodes()["HP:0000505"] assert len(n1t1["category"]) == 1 assert "biolink:NamedThing" in n1t1["category"] e1t1 = list(t1.store.graph.get_edge("OMIM:166400", "HP:0000006").values())[0] assert e1t1["subject"] == "OMIM:166400" assert e1t1["object"] == "HP:0000006" assert e1t1["relation"] == "RO:0000091" assert e1t1["type"] == "OBAN:association" assert e1t1["has_evidence"] == "ECO:0000501" assert e1t1["source"] == "OMIM:166400" e2t1 = list(t1.store.graph.get_edge("ORPHA:93262", "HP:0000505").values())[0] assert e2t1["subject"] == "ORPHA:93262" assert e2t1["object"] == "HP:0000505" assert e2t1["relation"] == "RO:0002200" assert e2t1["type"] == "OBAN:association" assert e2t1["frequency_of_phenotype"] == "HP:0040283" assert e2t1["source"] == "ORPHA:93262" property_types = { "frequency_of_phenotype": "uriorcurie", "source": "uriorcurie" } output_args1 = { "filename": os.path.join(TARGET_DIR, "oban-export.nt"), "format": "nt", "property_types": property_types, } t1.save(output_args1) input_args2 = { "filename": [os.path.join(TARGET_DIR, "oban-export.nt")], "format": "nt", } t2 = Transformer() t2.transform(input_args2) assert t2.store.graph.number_of_nodes() == 14 assert t2.store.graph.number_of_edges() == 7 n1t2 = t2.store.graph.nodes()["HP:0000505"] assert len(n1t2["category"]) == 1 assert "biolink:NamedThing" in n1t2["category"] e1t2 = list(t2.store.graph.get_edge("OMIM:166400", "HP:0000006").values())[0] assert e1t2["subject"] == "OMIM:166400" assert e1t2["object"] == "HP:0000006" assert e1t2["relation"] == "RO:0000091" assert e1t2["type"] == "biolink:Association" assert e1t2["has_evidence"] == "ECO:0000501" assert e1t2["source"] == "OMIM:166400" e2t2 = list(t2.store.graph.get_edge("ORPHA:93262", "HP:0000505").values())[0] assert e2t2["subject"] == "ORPHA:93262" assert e2t2["object"] == "HP:0000505" assert e2t2["relation"] == "RO:0002200" assert e2t2["type"] == "biolink:Association" assert e2t2["frequency_of_phenotype"] == "HP:0040283" assert e2t2["source"] == "ORPHA:93262" input_args3 = { "filename": [os.path.join(TARGET_DIR, "oban-export.nt")], "format": "nt", } t3 = Transformer() t3.transform(input_args3) assert t3.store.graph.number_of_nodes() == 14 assert t3.store.graph.number_of_edges() == 7 n1t3 = t1.store.graph.nodes()["HP:0000505"] assert len(n1t3["category"]) == 1 assert "biolink:NamedThing" in n1t3["category"] e1t3 = list(t3.store.graph.get_edge("OMIM:166400", "HP:0000006").values())[0] assert e1t3["subject"] == "OMIM:166400" assert e1t3["object"] == "HP:0000006" assert e1t3["relation"] == "RO:0000091" assert e1t3["type"] == "biolink:Association" assert e1t3["has_evidence"] == "ECO:0000501" assert e1t3["source"] == "OMIM:166400" e2t3 = list(t3.store.graph.get_edge("ORPHA:93262", "HP:0000505").values())[0] assert e2t3["subject"] == "ORPHA:93262" assert e2t3["object"] == "HP:0000505" assert e2t3["relation"] == "RO:0002200" assert e2t3["type"] == "biolink:Association" assert e2t3["frequency_of_phenotype"] == "HP:0040283" assert e2t3["source"] == "ORPHA:93262"