Пример #1
0
    def do_test(self, fname):
        from fhirtordf.loaders.fhirresourceloader import FHIRResource

        json_file = fname + ".json"
        turtle_file = fname + ".ttl"

        target = FHIRResource(self.fhir_ontology,
                              os.path.join(self.base_dir, json_file),
                              "http://hl7.org/fhir/")
        turtle_fname = os.path.join(self.base_dir, turtle_file)
        source = PrettyGraph()
        source.load(turtle_fname, format="turtle")
        self.maxDiff = None
        self.assertEqual(
            *rdf_compare_split(source, target.graph, ignore_owl_version=False))
Пример #2
0
 def __init__(self,
              vocabulary: Graph,
              json_fname: Optional[str],
              base_uri: str,
              data: Optional[JsonObj] = None,
              target: Optional[Graph] = None,
              add_ontology_header: bool = True,
              replace_narrative_text: bool = False,
              is_root=True,
              resource_uri: Optional[URIRef] = None):
     """
     Construct an RDF representation
     :param vocabulary: FHIR Metadata Vocabulary (fhir.ttl)
     :param json_fname: URI or file name of resource to convert
     :param base_uri: base of resource URI -- will be combined with the resource id to generate the actual URI
     :param data: if present load this data rather than json_fname
     :param target: target graph -- used for collections, bundles, etc.
     :param add_ontology_header: Add the OWL ontology header to the output
     :param replace_narrative_text: Replace long narrative text section with boilerplate
     :param is_root: True means this is a root node, False a component
     :param resource_uri: If present, this becomes the resource subject
     """
     if json_fname:
         self.root = load(json_fname)
     elif data:
         self.root = data
     else:
         assert False, "Either a json file name or actual data image must be supplied"
     self._base_uri = base_uri + ('/' if base_uri[-1] not in '/#' else '')
     if 'resourceType' not in self.root:
         raise ValueError("{} is not a FHIR resource".format(json_fname))
     if resource_uri:
         self._resource_uri = resource_uri
     else:
         if 'id' not in self.root:
             self.root.id = str(uuid4())
         self._resource_uri = URIRef(self._base_uri +
                                     self.root.resourceType + '/' +
                                     self.root.id)
     self._meta = FHIRMetaVocEntry(vocabulary, FHIR[self.root.resourceType])
     self._g = PrettyGraph() if target is None else target
     self._vocabulary = vocabulary
     self._addl_namespaces = dict()
     self._add_ontology_header = add_ontology_header
     self._replace_narrative_text = replace_narrative_text
     self.generate(is_root)
Пример #3
0
 def test_data_entry(self):
     save_output = False
     from fhirtordf.loaders.fhirresourceloader import FHIRResource
     with open(
             os.path.join(self.base_dir, 'synthea_data',
                          'Adams301_Keyshawn30_74.json')) as f:
         collection = load(f)
     source = FHIRResource(self.fhir_ontology,
                           None,
                           "http://standardhealthrecord.org/fhir/",
                           data=collection.entry[0].resource)
     turtle_fname = os.path.join(self.base_dir, 'synthea_data',
                                 'Adams301_Keyshawn30_74_entry0.ttl')
     if save_output:
         with open(turtle_fname, 'w') as output:
             output.write(str(source))
     target = PrettyGraph()
     target.load(turtle_fname, format="turtle")
     # Note: This will fail if we use the pure turtle serializer (vs our changes in this package)
     self.maxDiff = None
     self.assertEqual(
         '', rdf_compare(source.graph, target, ignore_owl_version=True))
     self.assertFalse(save_output, "Update output file always fails")
Пример #4
0
def complete_definition(subj: Node,
                        source_graph: Graph,
                        target_graph: Optional[Graph] = None) -> PrettyGraph:
    """
    Return the transitive closure of subject.
    :param subj: URI or BNode for subject
    :param source_graph: Graph containing defininition
    :param target_graph: return graph (for recursion)
    :return: target_graph
    """
    if target_graph is None:
        target_graph = PrettyGraph()
    for p, o in source_graph.predicate_objects(subj):
        target_graph.add((subj, p, o))
        if isinstance(o, BNode):
            complete_definition(o, source_graph, target_graph)
    return target_graph
Пример #5
0
class FHIRResource:
    """ A FHIR RDF representation of a FHIR JSON resource """
    def __init__(self,
                 vocabulary: Graph,
                 json_fname: Optional[str],
                 base_uri: str,
                 data: Optional[JsonObj] = None,
                 target: Optional[Graph] = None,
                 add_ontology_header: bool = True,
                 replace_narrative_text: bool = False,
                 is_root=True,
                 resource_uri: Optional[URIRef] = None):
        """
        Construct an RDF representation
        :param vocabulary: FHIR Metadata Vocabulary (fhir.ttl)
        :param json_fname: URI or file name of resource to convert
        :param base_uri: base of resource URI -- will be combined with the resource id to generate the actual URI
        :param data: if present load this data rather than json_fname
        :param target: target graph -- used for collections, bundles, etc.
        :param add_ontology_header: Add the OWL ontology header to the output
        :param replace_narrative_text: Replace long narrative text section with boilerplate
        :param is_root: True means this is a root node, False a component
        :param resource_uri: If present, this becomes the resource subject
        """
        if json_fname:
            self.root = load(json_fname)
        elif data:
            self.root = data
        else:
            assert False, "Either a json file name or actual data image must be supplied"
        self._base_uri = base_uri + ('/' if base_uri[-1] not in '/#' else '')
        if 'resourceType' not in self.root:
            raise ValueError("{} is not a FHIR resource".format(json_fname))
        if resource_uri:
            self._resource_uri = resource_uri
        else:
            if 'id' not in self.root:
                self.root.id = str(uuid4())
            self._resource_uri = URIRef(self._base_uri +
                                        self.root.resourceType + '/' +
                                        self.root.id)
        self._meta = FHIRMetaVocEntry(vocabulary, FHIR[self.root.resourceType])
        self._g = PrettyGraph() if target is None else target
        self._vocabulary = vocabulary
        self._addl_namespaces = dict()
        self._add_ontology_header = add_ontology_header
        self._replace_narrative_text = replace_narrative_text
        self.generate(is_root)

    @property
    def resource_id(self) -> Optional[str]:
        return value(self._g, self._resource_uri, FHIR.Resource.id)

    @property
    def resource_type(self) -> str:
        return self.root.resourceType

    @property
    def graph(self):
        return self._g

    def add_prefixes(self, nsmap: Dict[str, Namespace]) -> None:
        """
        Add the required prefix definitions
        :return:
        """
        [self._g.bind(e[0], e[1]) for e in nsmap.items()]

    def add_ontology_definition(self) -> None:
        ont_uri = URIRef(str(self._resource_uri) + ".ttl")
        self.add(ont_uri, RDF.type, OWL.Ontology)\
            .add(ont_uri, OWL.imports, FHIR['fhir.ttl'])
        if 'meta' in self.root and 'versionId' in self.root.meta:
            ont_uri_str = str(ont_uri)
            if re.search(r'\.\w+$', ont_uri_str):
                ont_uri_str, suffix = ont_uri_str.rsplit('.', 1)
                suffix = '.' + suffix
            else:
                suffix = ''
            self.add(
                ont_uri, OWL.versionIRI,
                URIRef(ont_uri_str + '/_history/' + self.root.meta.versionId +
                       suffix))

    def add(self, subj: Node, pred: URIRef, obj: Node) -> "FHIRResource":
        """
        Shortcut to rdflib add function
        :param subj:
        :param pred:
        :param obj:
        :return: self for chaining
        """
        self._g.add((subj, pred, obj))
        return self

    def add_value_node(self,
                       subj: Node,
                       pred: URIRef,
                       val: Union[JsonObj, str, List],
                       valuetype: Optional[URIRef] = None) -> None:
        """
        Expand val according to the range of pred and add it to the graph
        :param subj: graph subject
        :param pred: graph predicate
        :param val: JSON representation of target object
        :param valuetype: predicate type if it can't be directly determined
        """
        pred_type = self._meta.predicate_type(
            pred) if not valuetype else valuetype
        # Transform generic resources into specific types
        if pred_type == FHIR.Resource:
            pred_type = FHIR[val.resourceType]

        val_meta = FHIRMetaVocEntry(self._vocabulary, pred_type)
        for k, p in val_meta.predicates().items():
            if isinstance(val, JsonObj) and k in val:
                self.add_val(subj, p, val, k)
                if pred == FHIR.CodeableConcept.coding:
                    self.add_type_arc(subj, val)
            elif k == "value" and val_meta.predicate_type(p) == FHIR.Element:
                # value / Element is the wild card combination -- if there is a "value[x]" in val, emit it where the
                # type comes from 'x'
                for vk in val._as_dict.keys():
                    if vk.startswith(k):
                        self.add_val(subj, FHIR['Extension.' + vk], val, vk,
                                     self._meta.value_predicate_to_type(vk))
            else:
                # Can have an extension only without a primary value
                self.add_extension_val(subj, val, k, p)

    def add_reference(self, subj: Node, val: str) -> None:
        """
        Add a fhir:link and RDF type arc if it can be determined
        :param subj: reference subject
        :param val: reference value
        """
        match = FHIR_RESOURCE_RE.match(val)
        ref_uri_str = res_type = None
        if match:
            ref_uri_str = val if match.group(FHIR_RE_BASE) else (
                self._base_uri + urllib.parse.quote(val))
            res_type = match.group(FHIR_RE_RESOURCE)
        elif '://' in val:
            ref_uri_str = val
            res_type = "Resource"
        elif self._base_uri and not val.startswith('#') and not val.startswith(
                '/'):
            ref_uri_str = self._base_uri + urllib.parse.quote(val)
            res_type = val.split('/', 1)[0] if '/' in val else "Resource"
        if ref_uri_str:
            ref_uri = URIRef(ref_uri_str)
            self.add(subj, FHIR.link, ref_uri)
            self.add(ref_uri, RDF.type, FHIR[res_type])

    def add_type_arc(self, subj: Node, val: JsonObj) -> None:
        if "system" in val and "code" in val:
            for k in codesystem_maps.keys():
                if (isinstance(k, str)
                        and k == val.system) or (not isinstance(k, str)
                                                 and k.match(val.system)):
                    type_uri = codesystem_maps[k](val.system,
                                                  urllib.parse.quote(val.code),
                                                  self._addl_namespaces)
                    if type_uri:
                        self.add(subj, RDF.type, type_uri)
                    break

    def node_subject(self, list_idx: int, subj: Node, pred: URIRef,
                     node: JsonObj) -> Node:
        if pred == FHIR.Bundle.entry:
            entry = BNode()
            self.add(entry, FHIR.index, Literal(list_idx))
            self.add_val(entry, FHIR.Bundle.entry.fullUrl, node, 'fullUrl')
            self.add(entry, FHIR.Bundle.entry.resource, URIRef(node.fullUrl))
            self.add(subj, pred, entry)
            return URIRef(node.fullUrl)
        else:
            return BNode()

    def add_val(self,
                subj: Node,
                pred: URIRef,
                json_obj: JsonObj,
                json_key: str,
                valuetype: Optional[URIRef] = None) -> Optional[BNode]:
        """
        Add the RDF representation of val to the graph as a target of subj, pred.  Note that FHIR lists are
        represented as a list of BNODE objects with a fhir:index discrimanant
        :param subj: graph subject
        :param pred: predicate
        :param json_obj: object containing json_key
        :param json_key: name of the value in the JSON resource
        :param valuetype: value type if NOT determinable by predicate
        :return: value node if target is a BNode else None
        """
        if json_key not in json_obj:
            print("Expecting to find object named '{}' in JSON:".format(
                json_key))
            print(json_obj._as_json_dumps())
            print("entry skipped")
            return None
        val = json_obj[json_key]
        if isinstance(val, List):
            list_idx = 0
            for lv in val:
                entry_bnode = BNode()
                # TODO: this is getting messy. Refactor and clean this up
                if pred == FHIR.Bundle.entry:
                    entry_subj = URIRef(lv.fullUrl)
                    self.add(entry_bnode, FHIR.index, Literal(list_idx))
                    self.add_val(entry_bnode, FHIR.Bundle.entry.fullUrl, lv,
                                 'fullUrl')
                    self.add(entry_bnode, FHIR.Bundle.entry.resource,
                             entry_subj)
                    self.add(subj, pred, entry_bnode)
                    entry_mv = FHIRMetaVocEntry(self._vocabulary,
                                                FHIR.BundleEntryComponent)
                    for k, p in entry_mv.predicates().items():
                        if k not in ['resource', 'fullUrl'] and k in lv:
                            print("---> adding {}".format(k))
                            self.add_val(subj, p, lv, k)
                    FHIRResource(self._vocabulary,
                                 None,
                                 self._base_uri,
                                 lv.resource,
                                 self._g,
                                 False,
                                 self._replace_narrative_text,
                                 False,
                                 resource_uri=entry_subj)
                else:
                    self.add(entry_bnode, FHIR.index, Literal(list_idx))
                    if isinstance(lv, JsonObj):
                        self.add_value_node(entry_bnode, pred, lv, valuetype)
                    else:
                        vt = self._meta.predicate_type(pred)
                        atom_type = self._meta.primitive_datatype_nostring(
                            vt) if vt else None
                        self.add(entry_bnode, FHIR.value,
                                 Literal(lv, datatype=atom_type))
                    self.add(subj, pred, entry_bnode)
                list_idx += 1
        else:
            vt = self._meta.predicate_type(
                pred) if not valuetype else valuetype
            if self._meta.is_atom(pred):
                if self._replace_narrative_text and pred == FHIR.Narrative.div and len(
                        val) > 120:
                    val = REPLACED_NARRATIVE_TEXT
                self.add(subj, pred, Literal(val))
            else:
                v = BNode()
                if self._meta.is_primitive(vt):
                    self.add(
                        v, FHIR.value,
                        Literal(
                            str(val),
                            datatype=self._meta.primitive_datatype_nostring(
                                vt, val)))
                else:
                    self.add_value_node(v, pred, val, valuetype)
                self.add(subj, pred, v)
                if pred == FHIR.Reference.reference:
                    self.add_reference(subj, val)
                elif pred == FHIR.RelatedArtifact.resource:
                    self.add_reference(v, val)
                self.add_extension_val(v, json_obj, json_key)
                return v
        return None

    def add_extension_val(self,
                          subj: Node,
                          json_obj: Union[JsonObj, List[JsonObjTypes]],
                          key: str,
                          pred: Optional[URIRef] = None) -> None:
        """
        Add any extensions for the supplied object. This can be called in following situations:
        1) Single extended value 
                "key" : (value),
                "_key" : {
                    "extension": [
                       {
                        "url": "http://...",
                        "value[x]": "......" 
                       }
                    ]
                }
        2) Single extension only
                "_key" : {
                    "extension": [
                       {
                        "url": "http://...",
                        "value[x]": "......" 
                       }
                    ]
                }
        3) Multiple extended values:
                (TBD)
                
        4) Multiple extensions only
                "_key" : [
                  { 
                    "extension": [
                       {
                        "url": "http://...",
                        "value[x]": "......" 
                       }
                    ]
                  }
                ]
                    
        :param subj: Node containing subject
        :param json_obj: Object (potentially) containing "_key"
        :param key: name of element that is possibly extended (as indicated by "_" prefix)
        :param pred: predicate for the contained elements. Only used in situations 3) (?) and 4 
        """
        extendee_name = "_" + key
        if extendee_name in json_obj:
            if not isinstance(subj, BNode):
                raise NotImplementedError(
                    "Extension to something other than a simple BNode")
            if isinstance(json_obj[extendee_name], list):
                if not pred:
                    raise NotImplemented("Case 3 not implemented")
                entry_idx = 0
                for extension in json_obj[extendee_name]:
                    entry = BNode()
                    self.add(entry, FHIR.index, Literal(entry_idx))
                    self.add_val(entry, FHIR.Element.extension, extension,
                                 'extension')
                    self.add(subj, pred, entry)
                    entry_idx += 1
            elif 'fhir_comments' in json_obj[extendee_name] and len(
                    json_obj[extendee_name]) == 1:
                # TODO: determine whether and how fhir comments should be represented in RDF.
                # for the moment we just drop them
                print("fhir_comment ignored")
                print(json_obj[extendee_name]._as_json_dumps())
                pass
            else:
                self.add_val(subj, FHIR.Element.extension,
                             json_obj[extendee_name], 'extension')

    def add_resource(self, subj: URIRef, json_obj: JsonObj):
        self.add(subj, RDF.type, FHIR[json_obj.resourceType])
        for k, p in self._meta.predicates().items():
            if k in json_obj:
                self.add_val(subj, p, json_obj, k)

    def generate(self, is_root: bool) -> Graph:
        if is_root:
            self.add_prefixes(namespaces)
            if self._add_ontology_header:
                self.add_ontology_definition()
            self.add(self._resource_uri, FHIR.nodeRole, FHIR.treeRoot)
        self.add_resource(self._resource_uri, self.root)
        self.add_prefixes(self._addl_namespaces)
        return self._g

    def __str__(self):
        return self._g.serialize()
Пример #6
0
def rdf_compare_split(
    g1: Graph,
    g2: Graph,
    ignore_owl_version: bool = False,
    ignore_type_arcs: bool = False,
    compare_filter: Optional[Callable[[Graph, Graph, Graph], None]] = None
) -> Tuple[str, str]:
    """
    Compare graph g1 and g2
    :param g1: expected graph
    :param g2: actual graph
    :param ignore_owl_version:
    :param ignore_type_arcs:
    :param compare_filter: Final adjustment for graph difference. Used, for example, to deal with FHIR decimal problems.
    :return: Different elements in first (expected) graph, second (actual) graph
    """
    def graph_for_subject(g: Graph, subj: Node) -> Graph:
        subj_in_g = complete_definition(subj, g)
        if ignore_type_arcs:
            for ta_s, ta_o in subj_in_g.subject_objects(RDF.type):
                if isinstance(ta_s, BNode) and isinstance(ta_o, URIRef):
                    subj_in_g.remove((ta_s, RDF.type, ta_o))
        if ignore_owl_version:
            subj_in_g.remove(
                (subj, OWL.versionIRI, subj_in_g.value(subj, OWL.versionIRI)))
        return subj_in_g

    def primary_subjects(g: Graph) -> Set[Node]:
        anon_subjs = set(anon_s for anon_s in g.subjects()
                         if isinstance(anon_s, BNode)
                         and len([g.subject_predicates(anon_s)]) == 0)
        return set(s_ for s_ in g1.subjects()
                   if isinstance(s_, URIRef)).union(anon_subjs)

    expected = ""
    actual = ""

    # Step 1: Find any subjects in one graph that don't exist in the other
    g1_subjs = primary_subjects(g1)
    g2_subjs = primary_subjects(g2)
    for s in g1_subjs - g2_subjs:
        expected += PrettyGraph.strip_prefixes(complete_definition(s, g1))
    for s in g2_subjs - g1_subjs:
        actual += PrettyGraph.strip_prefixes(complete_definition(s, g2))

    # Step 2: Iterate over all of the remaining subjects comparing their contents
    for s in g1_subjs.intersection(g2_subjs):
        s_in_g1 = graph_for_subject(g1, s)
        s_in_g2 = graph_for_subject(g2, s)
        in_both, in_first, in_second = graph_diff(skolemize(s_in_g1),
                                                  skolemize(s_in_g2))
        if compare_filter:
            compare_filter(in_both, in_first, in_second)
        if len(list(in_first)) or len(list(in_second)):
            expected += "\n\nSubject {} DIFFERENCE: ".format(
                s) + '=' * 30 + '\n'
            actual += "\n\nSubject {} DIFFERENCE: ".format(s) + '=' * 30 + '\n'
            if len(in_first):
                expected += '\n'.join(dump_nt_sorted(in_first))
            if len(in_second):
                actual += '\n'.join(dump_nt_sorted(in_second))
            expected += '-' * 40
            actual += '-' * 40
    return expected, actual