def addAttributes(self,id,attributes): """ Adds generic attributes in bulk to object [id] and inserts into the graph :param id: subject identifier/URI :param attributes: Dictionary with keys as prefix:term and value of attribute} \ example: {"ncit:age":15,"ncit:gender":"M", Constants.NIDM_FAMILY_NAME:"Keator"} :return: TypeError if namespace prefix does not exist in graph """ #iterate through attributes for key in attributes.keys(): #is the key already mapped to a URL (i.e. using one of the constants from Constants.py) or is it in prefix:term form? #if not validators.url(key): #check if namespace prefix already exists in graph or #if we're using a Constants reference if (not self.checkNamespacePrefix(key.split(':')[0])): raise TypeError("Namespace prefix " + key + " not in graph, use addAttributesWithNamespaces or manually add!") #figure out datatype of literal datatype = self.getDataType(attributes[key]) #if (not validators.url(key)): #we must be using the prefix:term form instead of a constant directly # if (datatype != None): # id.add_attributes({self.namespaces[key.split(':')[0]][key.split(':')[1]]:Literal(attributes[key],datatype=datatype)}) # else: # id.add_attributes({self.namespaces[key.split(':')[0]][key.split(':')[1]]:Literal(attributes[key])}) #else: #we're using the Constants form if (datatype != None): id.add_attributes({key:pm.Literal(attributes[key],datatype=datatype)}) else: id.add_attributes({key:pm.Literal(attributes[key])})
def addLiteralAttribute(self, namespace_prefix, term, object, namespace_uri=None): """ Adds generic literal and inserts into the graph :param namespace_prefix: namespace prefix :param pred_term: predidate term to associate with tuple :param object: literal to add as object of tuple :param namespace_uri: If namespace_prefix isn't one already used then use this optional argument to define :return: none """ #figure out datatype of literal datatype = self.getDataType(object) #check if namespace prefix already exists in graph if not self.checkNamespacePrefix(namespace_prefix): #if so, use URI #namespace_uri = self.namespaces[namespace_prefix] #else: #add namespace_uri + prefix to graph if (namespace_uri == None): raise TypeError("Namespace_uri argument must be defined for new namespaces") else: self.addNamespace(namespace_prefix,namespace_uri) #figure out if predicate namespace is defined, if not, return predicate namespace error try: if (datatype != None): self.add_attributes({str(namespace_prefix + ':' + term): pm.Literal(object, datatype=datatype)}) else: self.add_attributes({str(namespace_prefix + ':' + term): pm.Literal(object)}) except KeyError as e: print("\nPredicate namespace identifier \" %s \" not found! \n" % (str(e).split("'")[1])) print("Use addNamespace method to add namespace before adding literal attribute \n") print("No attribute has been added \n")
def addAttributesWithNamespaces(self,id,attributes): """ Adds generic attributes in bulk to object [id] and inserts into the graph :param id: subject identifier/URI :param attributes: List of dictionaries with keys prefix, uri, term, value} \ example: [ {uri:"http://ncitt.ncit.nih.gov/", prefix:"ncit", term:"age", value:15}, {uri:"http://ncitt.ncit.nih.gov/", prefix:"ncit", term:"gender", value:"M"}] :return: TypeError if namespace prefix already exists in graph but URI is different """ #iterate through list of attributes for tuple in attributes: #check if namespace prefix already exists in graph if self.checkNamespacePrefix(tuple['prefix']): #checking if existing prefix maps to same namespaceURI, if so use it, if not then raise error if (self.namespaces[tuple['prefix']] != tuple['uri']): raise TypeError("Namespace prefix: " + tuple['prefix'] + "already exists in document") else: #add tuple to graph self.addNamespace(tuple['prefix'], tuple['uri']) #figure out datatype of literal datatype = self.getDataType(tuple['value']) if (datatype != None): id.add_attributes({self.namespaces[tuple['prefix']][tuple['term']]:pm.Literal(tuple['value'],datatype=datatype)}) else: id.add_attributes({self.namespaces[tuple['prefix']][tuple['term']]:pm.Literal(tuple['value'])})
def decode_rdf_representation(self, literal, graph): if isinstance(literal, RDFLiteral): value = literal.value if literal.value is not None else literal datatype = literal.datatype if hasattr(literal, 'datatype') else None langtag = literal.language if hasattr(literal, 'language') else None if datatype and 'XMLLiteral' in datatype: value = literal if datatype and 'base64Binary' in datatype: value = base64.standard_b64encode(value) if datatype == XSD['QName']: return pm.Literal(literal, datatype=XSD_QNAME) if datatype == XSD['dateTime']: return dateutil.parser.parse(literal) if datatype == XSD['gYear']: return pm.Literal(dateutil.parser.parse(literal).year, datatype=self.valid_identifier(datatype)) if datatype == XSD['gYearMonth']: parsed_info = dateutil.parser.parse(literal) return pm.Literal('{0}-{1:02d}'.format(parsed_info.year, parsed_info.month), datatype=self.valid_identifier(datatype)) else: # The literal of standard Python types is not converted here # It will be automatically converted when added to a record by # _auto_literal_conversion() return pm.Literal(value, self.valid_identifier(datatype), langtag) elif isinstance(literal, URIRef): rval = self.valid_identifier(literal) if rval is None: prefix, iri, _ = graph.namespace_manager.compute_qname(literal) ns = self.document.add_namespace(prefix, iri) rval = pm.QualifiedName(ns, literal.replace(ns.uri, '')) return rval else: # simple type, just return it return literal
def prov_encode(graph, value, create_container=True): if isinstance(value, (list, tuple)) and create_container: value = list(value) if len(value) == 0: encoded_literal = safe_encode(value) attr = {pm.PROV['value']: encoded_literal} eid = get_attr_id(attr) return graph.entity(eid, attr) if len(value) == 1: return prov_encode(graph, value[0]) entities = [] for item in value: item_entity = prov_encode(graph, item) entities.append(item_entity) if isinstance(item, (list, tuple)): continue item_entity_val = list(item_entity.value)[0] is_str = isinstance(item_entity_val, str) if not is_str or (is_str and 'file://' not in item_entity_val): return prov_encode(graph, value, create_container=False) eid = get_id() entity = graph.collection(identifier=eid) for item_entity in entities: graph.hadMember(eid, item_entity) return entity else: encoded_literal = safe_encode(value) attr = {pm.PROV['value']: encoded_literal} if isinstance(value, str) and os.path.exists(value): attr.update({pm.PROV['location']: encoded_literal}) if not os.path.isdir(value): sha512 = hash_infile(value, crypto=hashlib.sha512) attr.update({ crypto['sha512']: pm.Literal(sha512, pm.XSD['string']) }) eid = get_attr_id( attr, skip=[pm.PROV['location'], pm.PROV['value']]) else: eid = get_attr_id(attr, skip=[pm.PROV['location']]) else: eid = get_attr_id(attr) entity = graph.entity(eid, attr) return entity
def test_serialization_example_7(self): """ Test the serialization of example 7 which is a basic activity. """ document = prov.ProvDocument() document.add_namespace(*EX_NS) document.activity( "ex:a1", "2011-11-16T16:05:00", "2011-11-16T16:06:00", [(prov.PROV_TYPE, prov.Literal("ex:edit", prov.XSD_QNAME)), ("ex:host", "server.example.org")]) with io.BytesIO() as actual: document.serialize(format='xml', destination=actual) compare_xml(os.path.join(DATA_PATH, "example_07.xml"), actual)
def prov_encode(graph, value, create_container=True): if isinstance(value, list) and create_container: if len(value) == 0: encoded_literal = safe_encode(value) attr = {pm.PROV['value']: encoded_literal} id = get_attr_id(attr) entity = graph.entity(id, attr) elif len(value) > 1: try: entities = [] for item in value: item_entity = prov_encode(graph, item) entities.append(item_entity) if isinstance(item, list): continue if not isinstance( list(item_entity.value)[0], string_types): raise ValueError('Not a string literal') if 'file://' not in list(item_entity.value)[0]: raise ValueError('No file found') id = get_id() entity = graph.collection(identifier=id) for item_entity in entities: graph.hadMember(id, item_entity) except ValueError as e: iflogger.debug(e) entity = prov_encode(graph, value, create_container=False) else: entity = prov_encode(graph, value[0]) else: encoded_literal = safe_encode(value) attr = {pm.PROV['value']: encoded_literal} if isinstance(value, string_types) and os.path.exists(value): attr.update({pm.PROV['location']: encoded_literal}) if not os.path.isdir(value): sha512 = hash_infile(value, crypto=hashlib.sha512) attr.update( {crypto['sha512']: pm.Literal(sha512, pm.XSD['string'])}) id = get_attr_id(attr, skip=[pm.PROV['location'], pm.PROV['value']]) else: id = get_attr_id(attr, skip=[pm.PROV['location']]) else: id = get_attr_id(attr) entity = graph.entity(id, attr) return entity
def setEntry(rec, regNS): """ interpret value provided via v3 bindings, check if qualified name or value, handle datatypes accordingly Args: rec : a key value pair read from v3 bindings file regNS: the namespaces read from the context section of the v3 bindings file Returns: "prov-ified" value, value as-is as fallback #keys: @id (for quali) # @type (for value) # @value (for value) """ out=rec try: if "@id" in rec: toks=rec["@id"].split(":") #print (repr(toks)) if len(toks) > 2: raise BindingFileException( "Invalid Qualified Name " + rec["@id"] + " found in V3 Json Binding " + repr(rec)) #print( "Invalid Qualified Name " + rec["@id"] + " found in V3 Json Binding" ) #for ns in regNS.get_registered_namespaces(): for ns in regNS: #print (ns) if ns.prefix==toks[0]: #print ("HIT") out=prov.QualifiedName(ns, toks[1]) if "@value" in rec: if "@type" in rec: dt=rec["@type"] if isinstance(rec["@type"], basestring): dt=xsd_datype_to_prov_datatype(dt, regNS) out=prov.Literal(rec["@value"], datatype=dt) else: out=rec["@value"] except: raise BindingFileException("Error parsing " + repr(rec)) #pass return out
def safe_encode(x, as_literal=True): """Encodes a python value for prov """ if x is None: value = "Unknown" if as_literal: return prov.Literal(value, prov.XSD['string']) else: return value try: if isinstance(x, (str, unicode)): if os.path.exists(x): value = 'file://%s%s' % (getfqdn(), x) if not as_literal: return value try: return prov.URIRef(value) except AttributeError: return prov.Literal(value, prov.XSD['anyURI']) else: if len(x) > max_text_len: value = x[:max_text_len - 13] + ['...Clipped...'] else: value = x if not as_literal: return value return prov.Literal(value, prov.XSD['string']) if isinstance(x, (int,)): if not as_literal: return x return prov.Literal(int(x), prov.XSD['integer']) if isinstance(x, (float,)): if not as_literal: return x return prov.Literal(x, prov.XSD['float']) if not as_literal: return dumps(x) return prov.Literal(dumps(x), nidm['pickle']) except TypeError as e: value = "Could not encode: " + str(e) if not as_literal: return value return prov.Literal(value, prov.XSD['string'])
def cff2provn(filename): """Parse cml xml file and return a prov bundle object""" #filename = "/Users/fariba/Desktop/UCI/freesurfer/scripts/meta-MC-SCA-023_tp1.cml" tree = xml.dom.minidom.parse(filename) collections = tree.documentElement g = prov.ProvBundle() g.add_namespace(xsd) g.add_namespace(dcterms) g.add_namespace(cml) url_entity = g.entity(cml[get_id()]) url_entity.add_extra_attributes({ prov.PROV['type']: nidm['nidm:ConnectomeFileFormat'], prov.PROV['location']: prov.Literal(filename, prov.XSD['String']) }) cml_collection = g.collection(cml[get_id()]) cml_collection.add_extra_attributes({ prov.PROV['type']: cml['connectome'], prov.PROV['label']: filename }) g.wasDerivedFrom(cml_collection, url_entity) # get species, subject_name, and subject_timepoint species = tree.getElementsByTagName('cml:species')[0].toxml() species = species.replace('<cml:species>', '').replace('</cml:species>', '') tp = '' sub = '' tags = collections.getElementsByTagName("cml:tag") for t in tags: if t.attributes['key'].value == 'subject_name': sub = t.toxml() if t.attributes['key'].value == 'subject_timepoint': tp = t.toxml() sub = sub.replace('<cml:tag key="subject_name">', '').replace('</cml:tag>', '') tp = tp.replace('<cml:tag key="subject_timepoint">', '').replace('</cml:tag>', '') #print species + " " + sub + " " + tp cml_meta = g.entity(cml[get_id()]) cml_meta.add_extra_attributes({ prov.PROV['type']: cml['connectome-meta'], cml['species']: species, cml['timepoint']: tp, cml['subject_name']: sub }) g.hadMember(cml_collection, cml_meta) volumes = collections.getElementsByTagName("cml:connectome-volume") c = 0 for v in volumes: c = c + 1 #print v.getAttribute("src") + " " + v.getAttribute("dtype") + " " + v.getAttribute("name") + " " + v.getAttribute("fileformat") #print v.attributes['fileformat'].value dtype = v.getAttribute('dtype') src = v.getAttribute('src') name = v.getAttribute('name') fileformat = v.getAttribute('fileformat') cml_volume = g.entity(cml[get_id()]) cml_volume.add_extra_attributes({ prov.PROV['type']: cml['connectome-volume'], cml['dtype']: dtype, cml['src']: src, cml['name']: name, cml['fileformat']: fileformat }) g.hadMember(cml_collection, cml_volume) tracks = collections.getElementsByTagName("cml:connectome-track") c = 0 for t in tracks: c = c + 1 #print t.getAttribute("src") + " " + t.getAttribute("dtype") + " " + t.getAttribute("name") + " " + t.getAttribute("fileformat") dtype = t.getAttribute('dtype') src = t.getAttribute('src') name = t.getAttribute('name') fileformat = t.getAttribute('fileformat') cml_track = g.entity(cml[get_id()]) cml_track.add_extra_attributes({ prov.PROV['type']: cml['connectome-track'], cml['dtype']: dtype, cml['src']: src, cml['name']: name, cml['fileformat']: fileformat }) g.hadMember(cml_collection, cml_track) networks = collections.getElementsByTagName("cml:connectome-network") c = 0 for n in networks: c = c + 1 #print n.getAttribute("src") + " " + n.getAttribute("dtype") + " " + n.getAttribute("name") + " " + n.getAttribute("fileformat") dtype = n.getAttribute('dtype') src = n.getAttribute('src') name = n.getAttribute('name') fileformat = n.getAttribute('fileformat') cml_network = g.entity(cml[get_id()]) cml_network.add_extra_attributes({ prov.PROV['type']: cml['connectome-network'], cml['dtype']: dtype, cml['src']: src, cml['name']: name, cml['fileformat']: fileformat }) g.hadMember(cml_collection, cml_network) surfaces = collections.getElementsByTagName("cml:connectome-surface") c = 0 for s in surfaces: c = c + 1 #print s.getAttribute("src") + " " + s.getAttribute("dtype") + " " + s.getAttribute("name") + " " + s.getAttribute("fileformat") dtype = s.getAttribute('dtype') src = s.getAttribute('src') name = s.getAttribute('name') fileformat = s.getAttribute('fileformat') cml_surface = g.entity(cml[get_id()]) cml_surface.add_extra_attributes({ prov.PROV['type']: cml['connectome-surface'], cml['dtype']: dtype, cml['src']: src, cml['name']: name, cml['fileformat']: fileformat }) g.hadMember(cml_collection, cml_surface) data = collections.getElementsByTagName("cml:connectome-data") c = 0 for d in data: c = c + 1 #print d.getAttribute("src") + " " + d.getAttribute("dtype") + " " + d.getAttribute("name") + " " + d.getAttribute("fileformat") dtype = d.getAttribute('dtype') src = d.getAttribute('src') name = d.getAttribute('name') cml_data = g.entity(cml[get_id()]) cml_data.add_extra_attributes({ prov.PROV['type']: cml['connectome-data'], cml['dtype']: dtype, cml['src']: src, cml['name']: name, cml['fileformat']: fileformat }) g.hadMember(cml_collection, cml_data) return g
def add_results(self, results, keep_provenance=False): if keep_provenance and results.provenance: self.g = deepcopy(results.provenance) return self.g runtime = results.runtime interface = results.interface inputs = results.inputs outputs = results.outputs classname = interface.__name__ modulepath = "{0}.{1}".format(interface.__module__, interface.__name__) activitytype = ''.join([i.capitalize() for i in modulepath.split('.')]) a0_attrs = { nipype_ns['module']: interface.__module__, nipype_ns["interface"]: classname, pm.PROV["type"]: nipype_ns[activitytype], pm.PROV["label"]: classname, nipype_ns['duration']: safe_encode(runtime.duration), nipype_ns['workingDirectory']: safe_encode(runtime.cwd), nipype_ns['returnCode']: safe_encode(runtime.returncode), nipype_ns['platform']: safe_encode(runtime.platform), nipype_ns['version']: safe_encode(runtime.version), } a0_attrs[foaf["host"]] = pm.Literal(runtime.hostname, pm.XSD['anyURI']) try: a0_attrs.update( {nipype_ns['command']: safe_encode(runtime.cmdline)}) a0_attrs.update( {nipype_ns['commandPath']: safe_encode(runtime.command_path)}) a0_attrs.update( {nipype_ns['dependencies']: safe_encode(runtime.dependencies)}) except AttributeError: pass a0 = self.g.activity(get_id(), runtime.startTime, runtime.endTime, a0_attrs) # environment id = get_id() env_collection = self.g.collection(id) env_collection.add_attributes({ pm.PROV['type']: nipype_ns['Environment'], pm.PROV['label']: "Environment" }) self.g.used(a0, id) # write environment entities for idx, (key, val) in enumerate(sorted(runtime.environ.items())): if key not in PROV_ENVVARS: continue in_attr = { pm.PROV["label"]: key, nipype_ns["environmentVariable"]: key, pm.PROV["value"]: safe_encode(val) } id = get_attr_id(in_attr) self.g.entity(id, in_attr) self.g.hadMember(env_collection, id) # write input entities if inputs: id = get_id() input_collection = self.g.collection(id) input_collection.add_attributes({ pm.PROV['type']: nipype_ns['Inputs'], pm.PROV['label']: "Inputs" }) # write input entities for idx, (key, val) in enumerate(sorted(inputs.items())): in_entity = prov_encode(self.g, val).identifier self.g.hadMember(input_collection, in_entity) used_attr = {pm.PROV["label"]: key, nipype_ns["inPort"]: key} self.g.used(activity=a0, entity=in_entity, other_attributes=used_attr) # write output entities if outputs: id = get_id() output_collection = self.g.collection(id) if not isinstance(outputs, dict): outputs = outputs.get_traitsfree() output_collection.add_attributes({ pm.PROV['type']: nipype_ns['Outputs'], pm.PROV['label']: "Outputs" }) self.g.wasGeneratedBy(output_collection, a0) # write output entities for idx, (key, val) in enumerate(sorted(outputs.items())): out_entity = prov_encode(self.g, val).identifier self.g.hadMember(output_collection, out_entity) gen_attr = {pm.PROV["label"]: key, nipype_ns["outPort"]: key} self.g.generation(out_entity, activity=a0, other_attributes=gen_attr) # write runtime entities id = get_id() runtime_collection = self.g.collection(id) runtime_collection.add_attributes({ pm.PROV['type']: nipype_ns['Runtime'], pm.PROV['label']: "RuntimeInfo" }) self.g.wasGeneratedBy(runtime_collection, a0) for key, value in sorted(runtime.items()): if not value: continue if key not in ['stdout', 'stderr', 'merged']: continue attr = {pm.PROV["label"]: key, nipype_ns[key]: safe_encode(value)} id = get_id() self.g.entity(get_id(), attr) self.g.hadMember(runtime_collection, id) # create agents user_attr = { pm.PROV["type"]: pm.PROV["Person"], pm.PROV["label"]: getpass.getuser(), foaf["name"]: safe_encode(getpass.getuser()) } user_agent = self.g.agent(get_attr_id(user_attr), user_attr) agent_attr = { pm.PROV["type"]: pm.PROV["SoftwareAgent"], pm.PROV["label"]: "Nipype", foaf["name"]: safe_encode("Nipype"), nipype_ns["version"]: __version__ } for key, value in list(get_info().items()): agent_attr.update({nipype_ns[key]: safe_encode(value)}) software_agent = self.g.agent(get_attr_id(agent_attr), agent_attr) self.g.wasAssociatedWith( a0, user_agent, None, None, {pm.PROV["hadRole"]: nipype_ns["LoggedInUser"]}) self.g.wasAssociatedWith(a0, software_agent) return self.g
def safe_encode(x, as_literal=True): """ Encodes a python value for prov """ if x is None: value = "Unknown" if as_literal: return pm.Literal(value, pm.XSD['string']) else: return value if isinstance(x, (str, bytes)): if isinstance(x, bytes): x = str(x, 'utf-8') if os.path.exists(x): if x[0] != os.pathsep: x = os.path.abspath(x) value = 'file://{}{}'.format(platform.node().lower(), x) if not as_literal: return value try: return pm.URIRef(value) except AttributeError: return pm.Literal(value, pm.XSD['anyURI']) else: value = x if len(x) > max_text_len: cliptxt = '...Clipped...' value = x[:max_text_len - len(cliptxt)] + cliptxt if not as_literal: return value return pm.Literal(value, pm.XSD['string']) if isinstance(x, int): if not as_literal: return x return pm.Literal(int(x), pm.XSD['integer']) if isinstance(x, float): if not as_literal: return x return pm.Literal(x, pm.XSD['float']) if isinstance(x, dict): outdict = {} for key, value in list(x.items()): encoded_value = safe_encode(value, as_literal=False) if isinstance(encoded_value, pm.Literal): outdict[key] = encoded_value.json_representation() else: outdict[key] = encoded_value try: jsonstr = json.dumps(outdict) except UnicodeDecodeError as excp: jsonstr = "Could not encode dictionary. {}".format(excp) iflogger.warn('Prov: %s', jsonstr) if not as_literal: return jsonstr return pm.Literal(jsonstr, pm.XSD['string']) if isinstance(x, (list, tuple)): x = list(x) is_object = False try: nptype = np.array(x).dtype is_object = nptype == np.dtype(object) except ValueError: is_object = True # If the array contains an heterogeneous mixture of data types # they should be encoded sequentially if is_object: outlist = [] for value in x: encoded_value = safe_encode(value, as_literal=False) if isinstance(encoded_value, pm.Literal): outlist.append(encoded_value.json_representation()) else: outlist.append(encoded_value) x = outlist try: jsonstr = json.dumps(x) except UnicodeDecodeError as excp: jsonstr = "Could not encode list/tuple. {}".format(excp) iflogger.warn('Prov: %s', jsonstr) if not as_literal: return jsonstr return pm.Literal(jsonstr, pm.XSD['string']) # If is a literal, and as_literal do nothing. # else bring back to json. if isinstance(x, pm.Literal): if as_literal: return x return dumps(x.json_representation()) jsonstr = None ltype = pm.XSD['string'] try: jsonstr = json.dumps(x.__dict__) except AttributeError: pass if jsonstr is None: try: jsonstr = dumps(x) ltype = nipype_ns['pickle'] except TypeError as excp: jsonstr = 'Could not encode object. {}'.format(excp) if not as_literal: return jsonstr return pm.Literal(jsonstr, ltype)
def safe_encode(x, as_literal=True): """Encodes a python value for prov """ if x is None: value = "Unknown" if as_literal: return pm.Literal(value, pm.XSD['string']) else: return value try: if isinstance(x, (str, unicode)): if os.path.exists(x): value = 'file://%s%s' % (getfqdn(), x) if not as_literal: return value try: return pm.URIRef(value) except AttributeError: return pm.Literal(value, pm.XSD['anyURI']) else: if len(x) > max_text_len: value = x[:max_text_len - 13] + ['...Clipped...'] else: value = x if not as_literal: return value return pm.Literal(value, pm.XSD['string']) if isinstance(x, (int, )): if not as_literal: return x return pm.Literal(int(x), pm.XSD['integer']) if isinstance(x, (float, )): if not as_literal: return x return pm.Literal(x, pm.XSD['float']) if isinstance(x, dict): outdict = {} for key, value in x.items(): encoded_value = safe_encode(value, as_literal=False) if isinstance(encoded_value, (pm.Literal, )): outdict[key] = encoded_value.json_representation() else: outdict[key] = encoded_value if not as_literal: return json.dumps(outdict) return pm.Literal(json.dumps(outdict), pm.XSD['string']) if isinstance(x, list): try: nptype = np.array(x).dtype if nptype == np.dtype(object): raise ValueError('dtype object') except ValueError, e: outlist = [] for value in x: encoded_value = safe_encode(value, as_literal=False) if isinstance(encoded_value, (pm.Literal, )): outlist.append(encoded_value.json_representation()) else: outlist.append(encoded_value) else: outlist = x if not as_literal: return json.dumps(outlist) return pm.Literal(json.dumps(outlist), pm.XSD['string']) if not as_literal: return dumps(x) return pm.Literal(dumps(x), nipype_ns['pickle'])
else: outlist.append(encoded_value) else: outlist = x if not as_literal: return json.dumps(outlist) return pm.Literal(json.dumps(outlist), pm.XSD['string']) if not as_literal: return dumps(x) return pm.Literal(dumps(x), nipype_ns['pickle']) except TypeError, e: iflogger.info(e) value = "Could not encode: " + str(e) if not as_literal: return value return pm.Literal(value, pm.XSD['string']) def prov_encode(graph, value, create_container=True): if isinstance(value, list) and create_container: if len(value) == 0: encoded_literal = safe_encode(value) attr = {pm.PROV['value']: encoded_literal} id = get_attr_id(attr) entity = graph.entity(id, attr) elif len(value) > 1: try: entities = [] for item in value: item_entity = prov_encode(graph, item) entities.append(item_entity)
def add_results(self, results): if results.provenance: try: self.g.add_bundle(results.provenance) except pm.ProvException: self.g.add_bundle(results.provenance, get_id()) return self.g runtime = results.runtime interface = results.interface inputs = results.inputs outputs = results.outputs classname = interface.__name__ a0_attrs = { nipype_ns['module']: interface.__module__, nipype_ns["interface"]: classname, pm.PROV["label"]: classname, nipype_ns['duration']: safe_encode(runtime.duration), nipype_ns['working_directory']: safe_encode(runtime.cwd), nipype_ns['return_code']: safe_encode(runtime.returncode), nipype_ns['platform']: safe_encode(runtime.platform), nipype_ns['version']: safe_encode(runtime.version), } try: a0_attrs[foaf["host"]] = pm.URIRef(runtime.hostname) except AttributeError: a0_attrs[foaf["host"]] = pm.Literal(runtime.hostname, pm.XSD['anyURI']) try: a0_attrs.update( {nipype_ns['command']: safe_encode(runtime.cmdline)}) a0_attrs.update( {nipype_ns['command_path']: safe_encode(runtime.command_path)}) a0_attrs.update( {nipype_ns['dependencies']: safe_encode(runtime.dependencies)}) except AttributeError: pass a0 = self.g.activity(get_id(), runtime.startTime, runtime.endTime, a0_attrs) # environment id = get_id() env_collection = self.g.collection(id) env_collection.add_extra_attributes({ pm.PROV['type']: nipype_ns['environment'], pm.PROV['label']: "Environment" }) self.g.used(a0, id) # write environment entities for idx, (key, val) in enumerate(sorted(runtime.environ.items())): if key not in [ 'PATH', 'FSLDIR', 'FREESURFER_HOME', 'ANTSPATH', 'CAMINOPATH', 'CLASSPATH', 'LD_LIBRARY_PATH', 'DYLD_LIBRARY_PATH', 'FIX_VERTEX_AREA', 'FSF_OUTPUT_FORMAT', 'FSLCONFDIR', 'FSLOUTPUTTYPE', 'LOGNAME', 'USER', 'MKL_NUM_THREADS', 'OMP_NUM_THREADS' ]: continue in_attr = { pm.PROV["label"]: key, nipype_ns["environment_variable"]: key, pm.PROV["value"]: safe_encode(val) } id = get_attr_id(in_attr) self.g.entity(id, in_attr) self.g.hadMember(env_collection, id) # write input entities if inputs: id = get_id() input_collection = self.g.collection(id) input_collection.add_extra_attributes({ pm.PROV['type']: nipype_ns['inputs'], pm.PROV['label']: "Inputs" }) # write input entities for idx, (key, val) in enumerate(sorted(inputs.items())): in_entity = prov_encode(self.g, val).get_identifier() self.g.hadMember(input_collection, in_entity) used_attr = {pm.PROV["label"]: key, nipype_ns["in_port"]: key} self.g.used(activity=a0, entity=in_entity, other_attributes=used_attr) # write output entities if outputs: id = get_id() output_collection = self.g.collection(id) if not isinstance(outputs, dict): outputs = outputs.get_traitsfree() output_collection.add_extra_attributes({ pm.PROV['type']: nipype_ns['outputs'], pm.PROV['label']: "Outputs" }) self.g.wasGeneratedBy(output_collection, a0) # write output entities for idx, (key, val) in enumerate(sorted(outputs.items())): out_entity = prov_encode(self.g, val).get_identifier() self.g.hadMember(output_collection, out_entity) gen_attr = {pm.PROV["label"]: key, nipype_ns["out_port"]: key} self.g.generation(out_entity, activity=a0, other_attributes=gen_attr) # write runtime entities id = get_id() runtime_collection = self.g.collection(id) runtime_collection.add_extra_attributes({ pm.PROV['type']: nipype_ns['runtime'], pm.PROV['label']: "RuntimeInfo" }) self.g.wasGeneratedBy(runtime_collection, a0) for key, value in sorted(runtime.items()): if not value: continue if key not in ['stdout', 'stderr', 'merged']: continue attr = {pm.PROV["label"]: key, nipype_ns[key]: safe_encode(value)} id = get_id() self.g.entity(get_id(), attr) self.g.hadMember(runtime_collection, id) # create agents user_attr = { pm.PROV["type"]: pm.PROV["Person"], pm.PROV["label"]: pwd.getpwuid(os.geteuid()).pw_name, foaf["name"]: safe_encode(pwd.getpwuid(os.geteuid()).pw_name) } user_agent = self.g.agent(get_attr_id(user_attr), user_attr) agent_attr = { pm.PROV["type"]: pm.PROV["SoftwareAgent"], pm.PROV["label"]: "Nipype", foaf["name"]: safe_encode("Nipype") } for key, value in get_info().items(): agent_attr.update({nipype_ns[key]: safe_encode(value)}) software_agent = self.g.agent(get_attr_id(agent_attr), agent_attr) self.g.wasAssociatedWith( a0, user_agent, None, None, {pm.PROV["hadRole"]: nipype_ns["LoggedInUser"]}) self.g.wasAssociatedWith(a0, software_agent) return self.g
def parse_stats(g, fs_stat_file, entity_uri): """Convert stats file to a nidm object """ header, tableinfo, measures = read_stats(fs_stat_file) get_id = lambda : niiri[uuid.uuid1().hex] a0 = g.activity(get_id(), startTime=dt.isoformat(dt.utcnow())) user_agent = g.agent(get_id(), {prov.PROV["type"]: prov.PROV["Person"], prov.PROV["label"]: pwd.getpwuid(os.geteuid()).pw_name, foaf["name"]: pwd.getpwuid(os.geteuid()).pw_name}) g.wasAssociatedWith(a0, user_agent, None, None, {prov.PROV["Role"]: "LoggedInUser"}) stat_collection = g.collection(get_id()) stat_collection.add_extra_attributes({prov.PROV['type']: fs['FreeSurferStatsCollection']}) # header elements statheader_collection = g.entity(get_id()) attributes = {prov.PROV['type']: fs['StatFileHeader']} for key, value in header.items(): attributes[fs[key.replace('.c', '-c')]] = value statheader_collection.add_extra_attributes(attributes) # measures struct_info = {} measure_list = [] measure_graph = rdflib.ConjunctiveGraph() measure_graph.namespace_manager.bind('fs', fs.get_uri()) measure_graph.namespace_manager.bind('nidm', nidm.get_uri()) unknown_units = set(('unitless', 'NA')) for measure in measures: obj_attr = [] struct_uri = fs[measure['structure'].replace('.', '-')] if measure['source'] == 'Header': measure_name = measure['name'] if measure_name not in measure_list: measure_list.append(measure_name) measure_uri = fs[measure_name].rdf_representation() measure_graph.add((measure_uri, rdflib.RDF['type'], fs['Measure'].rdf_representation())) measure_graph.add((measure_uri, rdflib.RDFS['label'], rdflib.Literal(measure['description']))) measure_graph.add((measure_uri, nidm['unitsLabel'].rdf_representation(), rdflib.Literal(measure['units']))) obj_attr.append((nidm["anatomicalAnnotation"], struct_uri)) if str(measure['units']) in unknown_units and \ '.' not in measure['value']: valref = prov.Literal(int(measure['value']), prov.XSD['integer']) else: valref= prov.Literal(float(measure['value']), prov.XSD['float']) obj_attr.append((fs[measure_name], valref)) elif measure['source'] == 'Table': obj_attr.append((nidm["anatomicalAnnotation"], struct_uri)) for column_info in measure['items']: measure_name = column_info['name'] if column_info['units'] in unknown_units and \ '.' not in column_info['value']: valref = prov.Literal(int(column_info['value']), prov.XSD['integer']) else: valref= prov.Literal(float(column_info['value']), prov.XSD['float']) obj_attr.append((fs[measure_name], valref)) if measure_name not in measure_list: measure_list.append(measure_name) measure_uri = fs[measure_name].rdf_representation() measure_graph.add((measure_uri, rdflib.RDF['type'], fs['Measure'].rdf_representation())) measure_graph.add((measure_uri, rdflib.RDFS['label'], rdflib.Literal(column_info['description']))) measure_graph.add((measure_uri, nidm['unitsLabel'].rdf_representation(), rdflib.Literal(column_info['units']))) id = get_id() if struct_uri in struct_info: euri = struct_info[struct_uri] euri.add_extra_attributes(obj_attr) else: euri = g.entity(id, obj_attr) struct_info[struct_uri] = euri g.hadMember(stat_collection, id) g.hadMember(stat_collection, statheader_collection) g.derivation(stat_collection, entity_uri) g.wasGeneratedBy(stat_collection, a0) return g, measure_graph