def add_commit(graph: ProvDocument, package: CommitModelPackage) -> ProvDocument: """Add commit activity, agents for author and committer, relations between agents and activity.""" author, committer, commit = package.author, package.committer, package.commit graph.agent(*author) graph.agent(*committer) graph.activity(*commit) graph.wasAssociatedWith(commit.id, author.id) graph.wasAssociatedWith(commit.id, committer.id) return graph
def job2prov(job): """ Create ProvDocument based on job description :param job: UWS job :return: ProvDocument """ # job.jdl.content = { # 'description': description, # 'parameters': parameters, # 'results': results, # 'executionduration': execdur, # 'quote': quote # } # parameters[pname] = { # 'type': p.get('type'), # 'required': p.get('required'), # 'default': p.get('default'), # 'description': list(p)[0].text, # } # results[r.get('value')] = { # 'mediaType': r.get('mediaType'), # 'default': r.get('default'), # 'description': list(r)[0].text, # } pdoc = ProvDocument() # Declaring namespaces for various prefixes used in the example pdoc.add_namespace('prov', 'http://www.w3.org/ns/prov#') pdoc.add_namespace('voprov', 'http://www.ivoa.net/ns/voprov#') pdoc.add_namespace('cta', 'http://www.cta-observatory.org#') pdoc.add_namespace('uwsdata', 'https://voparis-uws-test.obspm.fr/rest/' + job.jobname + '/' + job.jobid + '/') pdoc.add_namespace('ctajobs', 'http://www.cta-observatory.org#') # Adding an activity ctbin = pdoc.activity('ctajobs:' + job.jobname, job.start_time, job.end_time) # TODO: add job description, version, url, ... # Agent pdoc.agent('cta:consortium', other_attributes={'prov:type': "Organization"}) pdoc.wasAssociatedWith(ctbin, 'cta:consortium') # Entities, in and out with relations e_in = [] for pname, pdict in job.jdl.content['parameters'].iteritems(): #if pname.startswith('in'): if any(x in pdict['type'] for x in ['file', 'xs:anyURI']): e_in.append(pdoc.entity('uwsdata:parameters/' + pname)) # TODO: use publisher_did? add prov attributes, add voprov attributes? ctbin.used(e_in[-1]) e_out = [] for rname, rdict in job.jdl.content['results'].iteritems(): e_out.append(pdoc.entity('uwsdata:results/' + rname)) # TODO: use publisher_did? add prov attributes, add voprov attributes? e_out[-1].wasGeneratedBy(ctbin) for e in e_in: e_out[-1].wasDerivedFrom(e) return pdoc
def ctfToProv(): d1 = ProvDocument() dummy = ProvDocument() ex = Namespace( 'ex', 'http://example/' ) # namespaces do not need to be explicitly added to a document #data = event_field(os.path.join(trace_path,'../config.yaml')) counter = 0 counter_1 = 0 relationships = [] entities = [] activities = [] for event in trace_collection.events: dataset = { 'ex:' + k: event[k] for k in event.field_list_with_scope( babeltrace.CTFScope.EVENT_FIELDS) } dataset.update( {'ex:' + 'timestamp': (event['timestamp'] / 1000000000)}) #dataset.update({'ex:'+'name':event.name}) e1 = d1.entity(ex['event' + str(counter)], dataset) entities.append(e1) producer_agent = d1.agent('ex:' + event['producer_id']) controller_agent = d1.agent('ex:' + event['controller_id']) activity = d1.activity('ex:' + event['activity'] + str(counter_1)) activities.append(activity) d1.wasGeneratedBy(e1, activity) # strings used to detect if the relationship already exists in the d1 document association_relationship = str( dummy.wasAssociatedWith(activity, producer_agent)) used_relationship = str(dummy.used(controller_agent, producer_agent)) # Add activity to producer agent if it has not been added before. d1.wasAssociatedWith(activity, producer_agent) # if association_relationship not in relationships: # d1.wasAssociatedWith(activity, producer_agent) # relationships.append(association_relationship) # Add producer agent to controller agent if it has not been added yet. if used_relationship not in relationships: d1.used(controller_agent, producer_agent) relationships.append(used_relationship) # Add temporal relationship between this event and the previous one. if counter > 0: d1.wasAssociatedWith(entities[counter - 1], e1) counter += 1 counter_1 += 1 return d1
def gen_prov_graph(file_path, option): ''' generates prov graph from form json file option = "all": add attribues to nodes ''' form_file = open(file_path, "r") json_info = form_file.read() form_file.close() sf_dict = json.loads(json_info) d1 = ProvDocument() d1.add_namespace('subm', 'http://www.enes.org/enes_entity/data_submsission') global_in_out = d1.entity("subm:" + "form_name_xx") print("workflow definition: ", sf_dict['workflow']) for [act_name, act] in sf_dict['workflow']: print("adding entities for workflow_step: ", act_name) entity_in_dict = sf_dict[act_name]['entity_in'] entity_out_dict = sf_dict[act_name]['entity_out'] agent_dict = sf_dict[act_name]['agent'] activity_dict = sf_dict[act_name]['activity'] # generate nodes in_node = d1.entity("subm:" + entity_in_dict['i_name']) out_node = d1.entity("subm:" + entity_out_dict['i_name']) agent = d1.agent("subm:" + agent_dict['i_name']) activity = d1.activity("subm:" + activity_dict['i_name']) #clean up and prefix dictionaries entity_in_dict = prefix_dict(entity_in_dict, 'subm') entity_out_dict = prefix_dict(entity_out_dict, 'subm') agent_dict = prefix_dict(agent_dict, 'subm') activity_dict = prefix_dict(activity_dict, 'subm') if option == "all": in_node.add_attributes(entity_in_dict) out_node.add_attributes(entity_out_dict) agent.add_attributes(agent_dict) activity.add_attributes(activity_dict) # connect nodes in graph d1.wasGeneratedBy(out_node, activity) d1.used(activity, in_node) d1.wasAssociatedWith(activity, agent) d1.wasDerivedFrom(in_node, out_node) d1.used(activity, global_in_out) d1.wasGeneratedBy(global_in_out, activity) return d1
def ctfToProv(): d1 = ProvDocument() dummy = ProvDocument() ex = Namespace('ex', 'http://example/') # namespaces do not need to be explicitly added to a document #data = event_field(os.path.join(trace_path,'../config.yaml')) counter = 0 counter_1 = 0 relationships = [] entities = [] activities = [] producer_events = {} for event in trace_collection.events: dataset = {'ex:'+k:event[k] for k in event.field_list_with_scope( babeltrace.CTFScope.EVENT_FIELDS)} dataset.update({'ex:'+'timestamp':(event['timestamp']/1000000000)}) #dataset.update({'ex:'+'name':event.name}) e1 = d1.entity(ex['event'+str(counter)],dataset) entities.append(e1) producer_agent = d1.agent('ex:'+event['producer_id']) if event['producer_id'] not in producer_events: producer_events[event['producer_id']] = [] else: pel = producer_events[events['producer_id']] d1.wasAssociatedWith(pel[len(pel)-1], e1) pel.append(e1) controller_agent = d1.agent('ex:'+event['controller_id']) activity = d1.activity('ex:'+event['activity']+str(counter_1)) activities.append(activity) d1.wasGeneratedBy(e1, activity) # strings used to detect if the relationship already exists in the d1 document association_relationship = str(dummy.wasAssociatedWith(activity, producer_agent)) used_relationship = str(dummy.used(controller_agent, producer_agent)) # Add activity to producer agent if it has not been added before. d1.wasAssociatedWith(activity, producer_agent) # if association_relationship not in relationships: # d1.wasAssociatedWith(activity, producer_agent) # relationships.append(association_relationship) # Add producer agent to controller agent if it has not been added yet. if used_relationship not in relationships: d1.used(controller_agent, producer_agent) relationships.append(used_relationship) # Add temporal relationship between this event and the previous one. # if counter > 0: # d1.wasAssociatedWith(entities[counter - 1], e1) counter+=1 counter_1 +=1 return d1
def add_resource_creation(graph: ProvDocument, package: ResourceModelPackage) -> ProvDocument: """Add model for resource creation.""" creator, creation, resource, resource_version = package.creation graph.activity(*creation) graph.entity(*resource) graph.entity(*resource_version) graph.agent(*creator) graph.wasAssociatedWith(creation.id, creator.id) graph.wasAttributedTo(resource.id, creator.id) graph.wasAttributedTo(resource_version.id, creator.id) graph.wasGeneratedBy(resource.id, creation.id) graph.wasGeneratedBy(resource_version.id, creation.id) graph.specializationOf(resource_version.id, resource.id) return graph
def _create_trial_info(document: provo.ProvDocument, trial: Trial, suffix=""): invalid_identifiers = ["."] identifier = trial.script for char in invalid_identifiers: identifier = identifier.replace(char, "_") document.agent("{}{}".format(identifier, suffix), [(provo.PROV_TYPE, provo.PROV["SoftwareAgent"]), ("codeHash", trial.code_hash), ("script", trial.script), ("id", trial.id)]) document.activity("trial{}Execution".format(trial.id), trial.start, trial.finish, [("nowCommand", trial.command), ("parentId", trial.parent_id), ("inheritedId", trial.inherited_id)]) document.wasAssociatedWith("trial{}Execution".format(trial.id), "{}{}".format(identifier, suffix), None, "trial{}ExecutionByScript".format(trial.id))
def add_event_chain(graph: ProvDocument, package: ResourceModelPackage) -> ProvDocument: """Add chain of events beginning at the creation event.""" previous_event = previous_resource_version = None for chain_link in package.event_chain: user, event, resource, resource_version = chain_link graph.entity(*resource) graph.entity(*resource_version) graph.activity(*event) graph.agent(*user) graph.wasAssociatedWith(event.id, user.id) graph.wasAttributedTo(resource_version.id, user.id) graph.specializationOf(resource_version.id, resource.id) if previous_event is not None and previous_resource_version is not None: graph.entity(*previous_resource_version) graph.activity(*previous_event) graph.wasGeneratedBy(resource_version.id, event.id) graph.used(event.id, previous_resource_version.id) graph.wasDerivedFrom(resource_version.id, previous_resource_version.id) graph.wasInformedBy(event.id, previous_event.id) previous_event = event previous_resource_version = resource_version return graph
def release_tag_model(graph: ProvDocument, packages: ReleaseTagPackage): for package in packages: if package.release_package is not None: r_user, release, release_event, release_evidence, assets = package.release_package graph.agent(*r_user) graph.entity(*release) graph.activity(*release_event) graph.entity(*release_evidence) for asset in assets: graph.entity(*asset) graph.hadMember(asset.id, release.id) graph.hadMember(release_evidence.id, release.id) graph.wasGeneratedBy(release.id, release_event.id) graph.wasAttributedTo(release.id, r_user.id) graph.wasAssociatedWith(release_event.id, r_user.id) if package.tag_package is not None: t_user, tag, tag_event = package.tag_package graph.agent(*t_user) graph.entity(*tag) graph.activity(*tag_event) if package.release_package is not None: graph.hadMember(tag.id, release.id) graph.wasGeneratedBy(tag.id, tag_event.id) graph.wasAttributedTo(tag.id, t_user.id) graph.wasAssociatedWith(tag_event.id, t_user.id) if package.commit_package is not None: author, commit_event, _, commit, _ = package.commit_package graph.agent(*author) graph.activity(*commit_event) graph.entity(*commit) if package.tag_package is not None: graph.hadMember(commit.id, tag.id) graph.wasGeneratedBy(commit.id, commit_event.id) graph.wasAttributedTo(commit.id, author.id) graph.wasAssociatedWith(commit_event.id, author.id) return graph
def ctfToProv(): d1 = ProvDocument() dummy = ProvDocument() ex = Namespace( 'ex', 'http://example/' ) # namespaces do not need to be explicitly added to a document #data = event_field(os.path.join(trace_path,'../config.yaml')) counter = 0 #counter_1 = 0 relationships = [] entityActivityList = [] # activities = [] can_events = defaultdict(list) for event in trace_collection.events: dataset = { 'ex:' + k: event[k] for k in event.field_list_with_scope( babeltrace.CTFScope.EVENT_FIELDS) } #dataset.update({'ex:'+'timestamp':(event['timestamp']/1000000000)}) dataset.update({'ex:' + 'name': event.name}) # #calculates PGN # pf = str(bin(int(dataset['node_id'], 16)))[5:13] # if int(pf) > 240: # pgn = int(str(bin(int(dataset['node_id'], 16)))[3:21], 2) # else: # pgn = int(str(bin(int(dataset['node_id'], 16)))[3:13], 2) # #Gets source address. # sa = str(bin(int(dataset['node_id'], 16)))[-8:] #gets last byte. sa = event['producer_id'] activity = event['activity'] e1 = d1.entity(ex['event' + str(counter)], dataset) #create class object to store entity and activity data field. entity_activity = entityActivity() entity_activity.addEntityActivity(e1, activity) #entityActivityList.append(e1) #can_events.setdefault(str(sa),[]).append(e1) can_events[sa].append(entity_activity) #node_id = d1.agent('ex:'+event['node_id']) controller_agent = d1.agent('ex:' + event['controller_id']) # activity = d1.activity('ex:'+event['activity']+str(counter)) # activities.append(activity) #d1.wasGeneratedBy(e1, activity) # strings used to detect if the relationship already exists in the d1 document # association_relationship = str(dummy.wasAssociatedWith(activity, sa)) # used_relationship = str(dummy.used(network_id, sa)) #add activity to sensor agent # d1.wasAssociatedWith(activity,sensor_agent) #check if the association already esists # if association_relationship not in relationships: # d1.wasAssociatedWith(activity,sensor_agent) # relationships.append(association_relationship) # if used_relationship not in relationships: # d1.used(network_id, sa) # relationships.append(used_relationship) #counter+=1 #counter_1 +=1 # for index in range(len(entityActivityList)-1): # d1.wasAssociatedWith(entityActivityList[index], entityActivityList[index + 1]) # for index in range(len(entityActivityList)): # d1.wasGeneratedBy(entityActivityList[index], activities[index]) # d1.wasAssociatedWith(activities[index],sa) for key in can_events.keys(): producer_agent = d1.agent('ex:' + str(key)) used_relationship = str(dummy.used(controller_agent, producer_agent)) #association_relationship = str(dummy.wasAssociatedWith(activity, sa)) if used_relationship not in relationships: d1.used(controller_agent, producer_agent) relationships.append(used_relationship) entityActivityList = can_events[key] for index in range(len(entityActivityList) - 1): d1.wasAssociatedWith(entityActivityList[index].getEntity(), entityActivityList[index + 1].getEntity()) d1.wasGeneratedBy(entityActivityList[index], entityActivityList[index].getActivity()) d1.wasAssociatedWith(entityActivityList[index].getActivity(), producer_agent) return d1
def primer_example(): # https://github.com/lucmoreau/ProvToolbox/blob/master/prov-n/src/test/resources/prov/primer.pn # =========================================================================== # document g = ProvDocument() # prefix ex <http://example/> # prefix dcterms <http://purl.org/dc/terms/> # prefix foaf <http://xmlns.com/foaf/0.1/> ex = Namespace( "ex", "http://example/" ) # namespaces do not need to be explicitly added to a document g.add_namespace("dcterms", "http://purl.org/dc/terms/") g.add_namespace("foaf", "http://xmlns.com/foaf/0.1/") # entity(ex:article, [dcterms:title="Crime rises in cities"]) # first time the ex namespace was used, it is added to the document automatically g.entity(ex["article"], {"dcterms:title": "Crime rises in cities"}) # entity(ex:articleV1) g.entity(ex["articleV1"]) # entity(ex:articleV2) g.entity(ex["articleV2"]) # entity(ex:dataSet1) g.entity(ex["dataSet1"]) # entity(ex:dataSet2) g.entity(ex["dataSet2"]) # entity(ex:regionList) g.entity(ex["regionList"]) # entity(ex:composition) g.entity(ex["composition"]) # entity(ex:chart1) g.entity(ex["chart1"]) # entity(ex:chart2) g.entity(ex["chart2"]) # entity(ex:blogEntry) g.entity(ex["blogEntry"]) # activity(ex:compile) g.activity( "ex:compile") # since ex is registered, it can be used like this # activity(ex:compile2) g.activity("ex:compile2") # activity(ex:compose) g.activity("ex:compose") # activity(ex:correct, 2012-03-31T09:21:00, 2012-04-01T15:21:00) g.activity("ex:correct", "2012-03-31T09:21:00", "2012-04-01T15:21:00") # date time can be provided as strings # activity(ex:illustrate) g.activity("ex:illustrate") # used(ex:compose, ex:dataSet1, -, [ prov:role = "ex:dataToCompose"]) g.used("ex:compose", "ex:dataSet1", other_attributes={"prov:role": "ex:dataToCompose"}) # used(ex:compose, ex:regionList, -, [ prov:role = "ex:regionsToAggregateBy"]) g.used( "ex:compose", "ex:regionList", other_attributes={"prov:role": "ex:regionsToAggregateBy"}, ) # wasGeneratedBy(ex:composition, ex:compose, -) g.wasGeneratedBy("ex:composition", "ex:compose") # used(ex:illustrate, ex:composition, -) g.used("ex:illustrate", "ex:composition") # wasGeneratedBy(ex:chart1, ex:illustrate, -) g.wasGeneratedBy("ex:chart1", "ex:illustrate") # wasGeneratedBy(ex:chart1, ex:compile, 2012-03-02T10:30:00) g.wasGeneratedBy("ex:chart1", "ex:compile", "2012-03-02T10:30:00") # wasGeneratedBy(ex:chart2, ex:compile2, 2012-04-01T15:21:00) # # # agent(ex:derek, [ prov:type="prov:Person", foaf:givenName = "Derek", # foaf:mbox= "<mailto:[email protected]>"]) g.agent( "ex:derek", { "prov:type": PROV["Person"], "foaf:givenName": "Derek", "foaf:mbox": "<mailto:[email protected]>", }, ) # wasAssociatedWith(ex:compose, ex:derek, -) g.wasAssociatedWith("ex:compose", "ex:derek") # wasAssociatedWith(ex:illustrate, ex:derek, -) g.wasAssociatedWith("ex:illustrate", "ex:derek") # # agent(ex:chartgen, [ prov:type="prov:Organization", # foaf:name = "Chart Generators Inc"]) g.agent( "ex:chartgen", { "prov:type": PROV["Organization"], "foaf:name": "Chart Generators Inc" }, ) # actedOnBehalfOf(ex:derek, ex:chartgen, ex:compose) g.actedOnBehalfOf("ex:derek", "ex:chartgen", "ex:compose") # wasAttributedTo(ex:chart1, ex:derek) g.wasAttributedTo("ex:chart1", "ex:derek") # wasGeneratedBy(ex:dataSet2, ex:correct, -) g.wasGeneratedBy("ex:dataSet2", "ex:correct") # used(ex:correct, ex:dataSet1, -) g.used("ex:correct", "ex:dataSet1") # wasDerivedFrom(ex:dataSet2, ex:dataSet1, [prov:type='prov:Revision']) g.wasDerivedFrom("ex:dataSet2", "ex:dataSet1", other_attributes={"prov:type": PROV["Revision"]}) # wasDerivedFrom(ex:chart2, ex:dataSet2) g.wasDerivedFrom("ex:chart2", "ex:dataSet2") # wasDerivedFrom(ex:blogEntry, ex:article, [prov:type='prov:Quotation']) g.wasDerivedFrom("ex:blogEntry", "ex:article", other_attributes={"prov:type": PROV["Quotation"]}) # specializationOf(ex:articleV1, ex:article) g.specializationOf("ex:articleV1", "ex:article") # wasDerivedFrom(ex:articleV1, ex:dataSet1) g.wasDerivedFrom("ex:articleV1", "ex:dataSet1") # specializationOf(ex:articleV2, ex:article) g.specializationOf("ex:articleV2", "ex:article") # wasDerivedFrom(ex:articleV2, ex:dataSet2) g.wasDerivedFrom("ex:articleV2", "ex:dataSet2") # alternateOf(ex:articleV2, ex:articleV1) g.alternateOf("ex:articleV2", "ex:articleV1") # endDocument return g
def w3c_publication_2(): # https://github.com/lucmoreau/ProvToolbox/blob/master/asn/src/test/resources/prov/w3c-publication2.prov-asn #=========================================================================== # bundle # # prefix ex <http://example.org/> # prefix rec <http://example.org/record> # # prefix w3 <http://www.w3.org/TR/2011/> # prefix hg <http://dvcs.w3.org/hg/prov/raw-file/9628aaff6e20/model/releases/WD-prov-dm-20111215/> # # # entity(hg:Overview.html, [ prov:type="file in hg" ]) # entity(w3:WD-prov-dm-20111215, [ prov:type="html4" ]) # # # activity(ex:rcp,-,-,[prov:type="copy directory"]) # # wasGeneratedBy(rec:g; w3:WD-prov-dm-20111215, ex:rcp, -) # # entity(ex:req3, [ prov:type="http://www.w3.org/2005/08/01-transitions.html#pubreq" %% xsd:anyURI ]) # # used(rec:u; ex:rcp,hg:Overview.html,-) # used(ex:rcp, ex:req3, -) # # # wasDerivedFrom(w3:WD-prov-dm-20111215, hg:Overview.html, ex:rcp, rec:g, rec:u) # # agent(ex:webmaster, [ prov:type='prov:Person' ]) # # wasAssociatedWith(ex:rcp, ex:webmaster, -) # # endBundle #=========================================================================== ex = Namespace('ex', 'http://example.org/') rec = Namespace('rec', 'http://example.org/record') w3 = Namespace('w3', 'http://www.w3.org/TR/2011/') hg = Namespace('hg', 'http://dvcs.w3.org/hg/prov/raw-file/9628aaff6e20/model/releases/WD-prov-dm-20111215/') g = ProvDocument() g.entity(hg['Overview.html'], {'prov:type': "file in hg"}) g.entity(w3['WD-prov-dm-20111215'], {'prov:type': "html4"}) g.activity(ex['rcp'], None, None, {'prov:type': "copy directory"}) g.wasGeneratedBy('w3:WD-prov-dm-20111215', 'ex:rcp', identifier=rec['g']) g.entity('ex:req3', {'prov:type': Identifier("http://www.w3.org/2005/08/01-transitions.html#pubreq")}) g.used('ex:rcp', 'hg:Overview.html', identifier='rec:u') g.used('ex:rcp', 'ex:req3') g.wasDerivedFrom('w3:WD-prov-dm-20111215', 'hg:Overview.html', 'ex:rcp', 'rec:g', 'rec:u') g.agent('ex:webmaster', {'prov:type': "Person"}) g.wasAssociatedWith('ex:rcp', 'ex:webmaster') return g
def primer_example(): # https://github.com/lucmoreau/ProvToolbox/blob/master/prov-n/src/test/resources/prov/primer.pn #=========================================================================== # document g = ProvDocument() # prefix ex <http://example/> # prefix dcterms <http://purl.org/dc/terms/> # prefix foaf <http://xmlns.com/foaf/0.1/> ex = Namespace('ex', 'http://example/') # namespaces do not need to be explicitly added to a document g.add_namespace("dcterms", "http://purl.org/dc/terms/") g.add_namespace("foaf", "http://xmlns.com/foaf/0.1/") # entity(ex:article, [dcterms:title="Crime rises in cities"]) # first time the ex namespace was used, it is added to the document automatically g.entity(ex['article'], {'dcterms:title': "Crime rises in cities"}) # entity(ex:articleV1) g.entity(ex['articleV1']) # entity(ex:articleV2) g.entity(ex['articleV2']) # entity(ex:dataSet1) g.entity(ex['dataSet1']) # entity(ex:dataSet2) g.entity(ex['dataSet2']) # entity(ex:regionList) g.entity(ex['regionList']) # entity(ex:composition) g.entity(ex['composition']) # entity(ex:chart1) g.entity(ex['chart1']) # entity(ex:chart2) g.entity(ex['chart2']) # entity(ex:blogEntry) g.entity(ex['blogEntry']) # activity(ex:compile) g.activity('ex:compile') # since ex is registered, it can be used like this # activity(ex:compile2) g.activity('ex:compile2') # activity(ex:compose) g.activity('ex:compose') # activity(ex:correct, 2012-03-31T09:21:00, 2012-04-01T15:21:00) g.activity('ex:correct', '2012-03-31T09:21:00', '2012-04-01T15:21:00') # date time can be provided as strings # activity(ex:illustrate) g.activity('ex:illustrate') # used(ex:compose, ex:dataSet1, -, [ prov:role = "ex:dataToCompose"]) g.used('ex:compose', 'ex:dataSet1', other_attributes={'prov:role': "ex:dataToCompose"}) # used(ex:compose, ex:regionList, -, [ prov:role = "ex:regionsToAggregateBy"]) g.used('ex:compose', 'ex:regionList', other_attributes={'prov:role': "ex:regionsToAggregateBy"}) # wasGeneratedBy(ex:composition, ex:compose, -) g.wasGeneratedBy('ex:composition', 'ex:compose') # used(ex:illustrate, ex:composition, -) g.used('ex:illustrate', 'ex:composition') # wasGeneratedBy(ex:chart1, ex:illustrate, -) g.wasGeneratedBy('ex:chart1', 'ex:illustrate') # wasGeneratedBy(ex:chart1, ex:compile, 2012-03-02T10:30:00) g.wasGeneratedBy('ex:chart1', 'ex:compile', '2012-03-02T10:30:00') # wasGeneratedBy(ex:chart2, ex:compile2, 2012-04-01T15:21:00) # # # agent(ex:derek, [ prov:type="prov:Person", foaf:givenName = "Derek", # foaf:mbox= "<mailto:[email protected]>"]) g.agent('ex:derek', { 'prov:type': PROV["Person"], 'foaf:givenName': "Derek", 'foaf:mbox': "<mailto:[email protected]>" }) # wasAssociatedWith(ex:compose, ex:derek, -) g.wasAssociatedWith('ex:compose', 'ex:derek') # wasAssociatedWith(ex:illustrate, ex:derek, -) g.wasAssociatedWith('ex:illustrate', 'ex:derek') # # agent(ex:chartgen, [ prov:type="prov:Organization", # foaf:name = "Chart Generators Inc"]) g.agent('ex:chartgen', {'prov:type': PROV["Organization"], 'foaf:name': "Chart Generators Inc"}) # actedOnBehalfOf(ex:derek, ex:chartgen, ex:compose) g.actedOnBehalfOf('ex:derek', 'ex:chartgen', 'ex:compose') # wasAttributedTo(ex:chart1, ex:derek) g.wasAttributedTo('ex:chart1', 'ex:derek') # wasGeneratedBy(ex:dataSet2, ex:correct, -) g.wasGeneratedBy('ex:dataSet2', 'ex:correct') # used(ex:correct, ex:dataSet1, -) g.used('ex:correct', 'ex:dataSet1') # wasDerivedFrom(ex:dataSet2, ex:dataSet1, [prov:type='prov:Revision']) g.wasDerivedFrom('ex:dataSet2', 'ex:dataSet1', other_attributes={'prov:type': PROV['Revision']}) # wasDerivedFrom(ex:chart2, ex:dataSet2) g.wasDerivedFrom('ex:chart2', 'ex:dataSet2') # wasDerivedFrom(ex:blogEntry, ex:article, [prov:type='prov:Quotation']) g.wasDerivedFrom('ex:blogEntry', 'ex:article', other_attributes={'prov:type': PROV['Quotation']}) # specializationOf(ex:articleV1, ex:article) g.specializationOf('ex:articleV1', 'ex:article') # wasDerivedFrom(ex:articleV1, ex:dataSet1) g.wasDerivedFrom('ex:articleV1', 'ex:dataSet1') # specializationOf(ex:articleV2, ex:article) g.specializationOf('ex:articleV2', 'ex:article') # wasDerivedFrom(ex:articleV2, ex:dataSet2) g.wasDerivedFrom('ex:articleV2', 'ex:dataSet2') # alternateOf(ex:articleV2, ex:articleV1) g.alternateOf('ex:articleV2', 'ex:articleV1') # endDocument return g
def ctfToProv(): d1 = ProvDocument() dummy = ProvDocument() ex = Namespace('ex', 'http://example/') # namespaces do not need to be explicitly added to a document #data = event_field(os.path.join(trace_path,'../config.yaml')) counter = 0 #counter_1 = 0 relationships = [] entityActivityList = [] # activities = [] can_events = defaultdict(list) for event in trace_collection.events: dataset = {'ex:'+k:event[k] for k in event.field_list_with_scope( babeltrace.CTFScope.EVENT_FIELDS)} #dataset.update({'ex:'+'timestamp':(event['timestamp']/1000000000)}) dataset.update({'ex:'+'name':event.name}) # #calculates PGN # pf = str(bin(int(dataset['node_id'], 16)))[5:13] # if int(pf) > 240: # pgn = int(str(bin(int(dataset['node_id'], 16)))[3:21], 2) # else: # pgn = int(str(bin(int(dataset['node_id'], 16)))[3:13], 2) # #Gets source address. # sa = str(bin(int(dataset['node_id'], 16)))[-8:] #gets last byte. sa = event['producer_id'] activity = event['activity'] e1 = d1.entity(ex['event'+str(counter)],dataset) #create class object to store entity and activity data field. entity_activity = entityActivity() entity_activity.addEntityActivity(e1, activity) #entityActivityList.append(e1) #can_events.setdefault(str(sa),[]).append(e1) can_events[sa].append(entity_activity) #node_id = d1.agent('ex:'+event['node_id']) controller_agent = d1.agent('ex:'+event['controller_id']) # activity = d1.activity('ex:'+event['activity']+str(counter)) # activities.append(activity) #d1.wasGeneratedBy(e1, activity) # strings used to detect if the relationship already exists in the d1 document # association_relationship = str(dummy.wasAssociatedWith(activity, sa)) # used_relationship = str(dummy.used(network_id, sa)) #add activity to sensor agent # d1.wasAssociatedWith(activity,sensor_agent) #check if the association already esists # if association_relationship not in relationships: # d1.wasAssociatedWith(activity,sensor_agent) # relationships.append(association_relationship) # if used_relationship not in relationships: # d1.used(network_id, sa) # relationships.append(used_relationship) #counter+=1 #counter_1 +=1 # for index in range(len(entityActivityList)-1): # d1.wasAssociatedWith(entityActivityList[index], entityActivityList[index + 1]) # for index in range(len(entityActivityList)): # d1.wasGeneratedBy(entityActivityList[index], activities[index]) # d1.wasAssociatedWith(activities[index],sa) for key in can_events.keys(): producer_agent = d1.agent('ex:'+str(key)) used_relationship = str(dummy.used(controller_agent, producer_agent)) #association_relationship = str(dummy.wasAssociatedWith(activity, sa)) if used_relationship not in relationships: d1.used(controller_agent, producer_agent) relationships.append(used_relationship) entityActivityList = can_events[key] for index in range(len(entityActivityList)-1): d1.wasAssociatedWith(entityActivityList[index].getEntity(), entityActivityList[index + 1].getEntity()) d1.wasGeneratedBy(entityActivityList[index], entityActivityList[index].getActivity()) d1.wasAssociatedWith(entityActivityList[index].getActivity(), producer_agent) return d1
def w3c_publication_1(): # https://github.com/lucmoreau/ProvToolbox/blob/master/asn/src/test/resources/prov/w3c-publication1.prov-asn #=========================================================================== # bundle # # prefix ex <http://example.org/> # # prefix w3 <http://www.w3.org/> # prefix tr <http://www.w3.org/TR/2011/> # prefix process <http://www.w3.org/2005/10/Process-20051014/tr.html#> # prefix email <https://lists.w3.org/Archives/Member/w3c-archive/> # prefix chairs <https://lists.w3.org/Archives/Member/chairs/> # prefix trans <http://www.w3.org/2005/08/01-transitions.html#> # prefix rec54 <http://www.w3.org/2001/02pd/rec54#> # # # entity(tr:WD-prov-dm-20111018, [ prov:type='rec54:WD' ]) # entity(tr:WD-prov-dm-20111215, [ prov:type='rec54:WD' ]) # entity(process:rec-advance, [ prov:type='prov:Plan' ]) # # # entity(chairs:2011OctDec/0004, [ prov:type='trans:transreq' ]) # entity(email:2011Oct/0141, [ prov:type='trans:pubreq' ]) # entity(email:2011Dec/0111, [ prov:type='trans:pubreq' ]) # # # wasDerivedFrom(tr:WD-prov-dm-20111215, tr:WD-prov-dm-20111018) # # # activity(ex:act1,-,-,[prov:type="publish"]) # activity(ex:act2,-,-,[prov:type="publish"]) # # wasGeneratedBy(tr:WD-prov-dm-20111018, ex:act1, -) # wasGeneratedBy(tr:WD-prov-dm-20111215, ex:act2, -) # # used(ex:act1, chairs:2011OctDec/0004, -) # used(ex:act1, email:2011Oct/0141, -) # used(ex:act2, email:2011Dec/0111, -) # # agent(w3:Consortium, [ prov:type='prov:Organization' ]) # # wasAssociatedWith(ex:act1, w3:Consortium, process:rec-advance) # wasAssociatedWith(ex:act2, w3:Consortium, process:rec-advance) # # endBundle #=========================================================================== g = ProvDocument() g.add_namespace('ex', 'http://example.org/') g.add_namespace('w3', 'http://www.w3.org/') g.add_namespace('tr', 'http://www.w3.org/TR/2011/') g.add_namespace('process', 'http://www.w3.org/2005/10/Process-20051014/tr.html#') g.add_namespace('email', 'https://lists.w3.org/Archives/Member/w3c-archive/') g.add_namespace('chairs', 'https://lists.w3.org/Archives/Member/chairs/') g.add_namespace('trans', 'http://www.w3.org/2005/08/01-transitions.html#') g.add_namespace('rec54', 'http://www.w3.org/2001/02pd/rec54#') g.entity('tr:WD-prov-dm-20111018', {'prov:type': 'rec54:WD'}) g.entity('tr:WD-prov-dm-20111215', {'prov:type': 'rec54:WD'}) g.entity('process:rec-advance', {'prov:type': 'prov:Plan'}) g.entity('chairs:2011OctDec/0004', {'prov:type': 'trans:transreq'}) g.entity('email:2011Oct/0141', {'prov:type': 'trans:pubreq'}) g.entity('email:2011Dec/0111', {'prov:type': 'trans:pubreq'}) g.wasDerivedFrom('tr:WD-prov-dm-20111215', 'tr:WD-prov-dm-20111018') g.activity('ex:act1', other_attributes={'prov:type': "publish"}) g.activity('ex:act2', other_attributes={'prov:type': "publish"}) g.wasGeneratedBy('tr:WD-prov-dm-20111018', 'ex:act1') g.wasGeneratedBy('tr:WD-prov-dm-20111215', 'ex:act2') g.used('ex:act1', 'chairs:2011OctDec/0004') g.used('ex:act1', 'email:2011Oct/0141') g.used('ex:act2', 'email:2011Dec/0111') g.agent('w3:Consortium', other_attributes={'prov:type': "Organization"}) g.wasAssociatedWith('ex:act1', 'w3:Consortium', 'process:rec-advance') g.wasAssociatedWith('ex:act2', 'w3:Consortium', 'process:rec-advance') return g
def w3c_publication_2(): # https://github.com/lucmoreau/ProvToolbox/blob/master/asn/src/test/resources/prov/w3c-publication2.prov-asn #=========================================================================== # bundle # # prefix ex <http://example.org/> # prefix rec <http://example.org/record> # # prefix w3 <http://www.w3.org/TR/2011/> # prefix hg <http://dvcs.w3.org/hg/prov/raw-file/9628aaff6e20/model/releases/WD-prov-dm-20111215/> # # # entity(hg:Overview.html, [ prov:type="file in hg" ]) # entity(w3:WD-prov-dm-20111215, [ prov:type="html4" ]) # # # activity(ex:rcp,-,-,[prov:type="copy directory"]) # # wasGeneratedBy(rec:g; w3:WD-prov-dm-20111215, ex:rcp, -) # # entity(ex:req3, [ prov:type="http://www.w3.org/2005/08/01-transitions.html#pubreq" %% xsd:anyURI ]) # # used(rec:u; ex:rcp,hg:Overview.html,-) # used(ex:rcp, ex:req3, -) # # # wasDerivedFrom(w3:WD-prov-dm-20111215, hg:Overview.html, ex:rcp, rec:g, rec:u) # # agent(ex:webmaster, [ prov:type='prov:Person' ]) # # wasAssociatedWith(ex:rcp, ex:webmaster, -) # # endBundle #=========================================================================== ex = Namespace('ex', 'http://example.org/') rec = Namespace('rec', 'http://example.org/record') w3 = Namespace('w3', 'http://www.w3.org/TR/2011/') hg = Namespace( 'hg', 'http://dvcs.w3.org/hg/prov/raw-file/9628aaff6e20/model/releases/WD-prov-dm-20111215/' ) g = ProvDocument() g.entity(hg['Overview.html'], {'prov:type': "file in hg"}) g.entity(w3['WD-prov-dm-20111215'], {'prov:type': "html4"}) g.activity(ex['rcp'], None, None, {'prov:type': "copy directory"}) g.wasGeneratedBy('w3:WD-prov-dm-20111215', 'ex:rcp', identifier=rec['g']) g.entity( 'ex:req3', { 'prov:type': Identifier("http://www.w3.org/2005/08/01-transitions.html#pubreq") }) g.used('ex:rcp', 'hg:Overview.html', identifier='rec:u') g.used('ex:rcp', 'ex:req3') g.wasDerivedFrom('w3:WD-prov-dm-20111215', 'hg:Overview.html', 'ex:rcp', 'rec:g', 'rec:u') g.agent('ex:webmaster', {'prov:type': "Person"}) g.wasAssociatedWith('ex:rcp', 'ex:webmaster') return g
def w3c_publication_1(): # https://github.com/lucmoreau/ProvToolbox/blob/master/asn/src/test/resources/prov/w3c-publication1.prov-asn # =========================================================================== # bundle # # prefix ex <http://example.org/> # # prefix w3 <http://www.w3.org/> # prefix tr <http://www.w3.org/TR/2011/> # prefix process <http://www.w3.org/2005/10/Process-20051014/tr.html#> # prefix email <https://lists.w3.org/Archives/Member/w3c-archive/> # prefix chairs <https://lists.w3.org/Archives/Member/chairs/> # prefix trans <http://www.w3.org/2005/08/01-transitions.html#> # prefix rec54 <http://www.w3.org/2001/02pd/rec54#> # # # entity(tr:WD-prov-dm-20111018, [ prov:type='rec54:WD' ]) # entity(tr:WD-prov-dm-20111215, [ prov:type='rec54:WD' ]) # entity(process:rec-advance, [ prov:type='prov:Plan' ]) # # # entity(chairs:2011OctDec/0004, [ prov:type='trans:transreq' ]) # entity(email:2011Oct/0141, [ prov:type='trans:pubreq' ]) # entity(email:2011Dec/0111, [ prov:type='trans:pubreq' ]) # # # wasDerivedFrom(tr:WD-prov-dm-20111215, tr:WD-prov-dm-20111018) # # # activity(ex:act1,-,-,[prov:type="publish"]) # activity(ex:act2,-,-,[prov:type="publish"]) # # wasGeneratedBy(tr:WD-prov-dm-20111018, ex:act1, -) # wasGeneratedBy(tr:WD-prov-dm-20111215, ex:act2, -) # # used(ex:act1, chairs:2011OctDec/0004, -) # used(ex:act1, email:2011Oct/0141, -) # used(ex:act2, email:2011Dec/0111, -) # # agent(w3:Consortium, [ prov:type='prov:Organization' ]) # # wasAssociatedWith(ex:act1, w3:Consortium, process:rec-advance) # wasAssociatedWith(ex:act2, w3:Consortium, process:rec-advance) # # endBundle # =========================================================================== g = ProvDocument() g.add_namespace("ex", "http://example.org/") g.add_namespace("w3", "http://www.w3.org/") g.add_namespace("tr", "http://www.w3.org/TR/2011/") g.add_namespace("process", "http://www.w3.org/2005/10/Process-20051014/tr.html#") g.add_namespace("email", "https://lists.w3.org/Archives/Member/w3c-archive/") g.add_namespace("chairs", "https://lists.w3.org/Archives/Member/chairs/") g.add_namespace("trans", "http://www.w3.org/2005/08/01-transitions.html#") g.add_namespace("rec54", "http://www.w3.org/2001/02pd/rec54#") g.entity("tr:WD-prov-dm-20111018", {"prov:type": "rec54:WD"}) g.entity("tr:WD-prov-dm-20111215", {"prov:type": "rec54:WD"}) g.entity("process:rec-advance", {"prov:type": "prov:Plan"}) g.entity("chairs:2011OctDec/0004", {"prov:type": "trans:transreq"}) g.entity("email:2011Oct/0141", {"prov:type": "trans:pubreq"}) g.entity("email:2011Dec/0111", {"prov:type": "trans:pubreq"}) g.wasDerivedFrom("tr:WD-prov-dm-20111215", "tr:WD-prov-dm-20111018") g.activity("ex:act1", other_attributes={"prov:type": "publish"}) g.activity("ex:act2", other_attributes={"prov:type": "publish"}) g.wasGeneratedBy("tr:WD-prov-dm-20111018", "ex:act1") g.wasGeneratedBy("tr:WD-prov-dm-20111215", "ex:act2") g.used("ex:act1", "chairs:2011OctDec/0004") g.used("ex:act1", "email:2011Oct/0141") g.used("ex:act2", "email:2011Dec/0111") g.agent("w3:Consortium", other_attributes={"prov:type": "Organization"}) g.wasAssociatedWith("ex:act1", "w3:Consortium", "process:rec-advance") g.wasAssociatedWith("ex:act2", "w3:Consortium", "process:rec-advance") return g
class LogProv(): def __init__(self, log_dic): self._prov_doc = ProvDocument() vre_namespace = self._prov_doc.add_namespace( 'vre', 'https://www.vre4eic.eu/log#') prov_namespace = self._prov_doc.add_namespace( 'prov', 'http://www.w3.org/ns/prov#') if ('request_url_username' in log_dic and log_dic['request_url_username']): remote_host = self._prov_doc.agent( vre_namespace['ag1'], { prov_namespace['type']: PROV["SoftwareAgent"], vre_namespace['hasIP']: log_dic['remote_host'], vre_namespace['hasUsername']: log_dic['request_url_username'] }) else: remote_host = self._prov_doc.agent( vre_namespace['ag1'], { prov_namespace['type']: PROV["SoftwareAgent"], vre_namespace['hasIP']: log_dic['remote_host'] }) if ('request_url_hostname' in log_dic and log_dic['request_url_hostname']): request_hostname = self._prov_doc.agent( vre_namespace['ag2'], { prov_namespace['type']: PROV["SoftwareAgent"], vre_namespace['hasIP']: log_dic['remote_host'] }) request_entity = self._prov_doc.entity( vre_namespace['en1'], { vre_namespace['status']: log_dic['status'], vre_namespace['responseBytes']: log_dic['response_bytes_clf'] }) received_activity = self._prov_doc.activity( vre_namespace['ac1'], other_attributes={ vre_namespace['requestURL']: log_dic['request_url'], vre_namespace['requestMethod']: log_dic['request_method'], vre_namespace['httpVersion']: log_dic['request_http_ver'] }) self._prov_doc.generation(remote_host, activity=received_activity, time=log_dic['time_received_tz_isoformat']) self._prov_doc.wasAttributedTo(request_entity, received_activity) self._prov_doc.wasAssociatedWith(received_activity, remote_host) @property def prov_doc(self): return self._prov_doc @prov_doc.setter def prov_doc(self, value): self._prov_doc = value @prov_doc.deleter def prov_doc(self): del self._prov_doc
class NIDMExporter(): """ Generic class to parse a result directory to extract the pieces of information to be stored in NIDM-Results and to generate a NIDM-Results export. """ def __init__(self, version, out_dir, zipped=True): out_dirname = os.path.basename(out_dir) out_path = os.path.dirname(out_dir) # Create output path from output name self.zipped = zipped if not self.zipped: out_dirname = out_dirname+".nidm" else: out_dirname = out_dirname+".nidm.zip" out_dir = os.path.join(out_path, out_dirname) # Quit if output path already exists and user doesn't want to overwrite # it if os.path.exists(out_dir): msg = out_dir+" already exists, overwrite?" if not input("%s (y/N) " % msg).lower() == 'y': quit("Bye.") if os.path.isdir(out_dir): shutil.rmtree(out_dir) else: os.remove(out_dir) self.out_dir = out_dir if version == "dev": self.version = {'major': 10000, 'minor': 0, 'revision': 0, 'num': version} else: major, minor, revision = version.split(".") if "-rc" in revision: revision, rc = revision.split("-rc") else: rc = -1 self.version = {'major': int(major), 'minor': int(minor), 'revision': int(revision), 'rc': int(rc), 'num': version} # Initialise prov document self.doc = ProvDocument() self._add_namespaces() # A temp directory that will contain the exported data self.export_dir = tempfile.mkdtemp(prefix="nidm-", dir=out_path) self.prepend_path = '' def parse(self): """ Parse a result directory to extract the pieces information to be stored in NIDM-Results. """ try: # Methods: find_software, find_model_fitting, find_contrasts and # find_inferences should be defined in the children classes and # return a list of NIDM Objects as specified in the objects module # Object of type Software describing the neuroimaging software # package used for the analysis self.software = self._find_software() # List of objects of type ModelFitting describing the # model fitting step in NIDM-Results (main activity: Model # Parameters Estimation) self.model_fittings = self._find_model_fitting() # Dictionary of (key, value) pairs where where key is a tuple # containing the identifier of a ModelParametersEstimation object # and a tuple of identifiers of ParameterEstimateMap objects and # value is an object of type Contrast describing the contrast # estimation step in NIDM-Results (main activity: Contrast # Estimation) self.contrasts = self._find_contrasts() # Inference activity and entities # Dictionary of (key, value) pairs where key is the identifier of a # ContrastEstimation object and value is an object of type # Inference describing the inference step in NIDM-Results (main # activity: Inference) self.inferences = self._find_inferences() except Exception: self.cleanup() raise def cleanup(self): if os.path.isdir(self.export_dir): shutil.rmtree(self.export_dir) def add_object(self, nidm_object, export_file=True): """ Add a NIDMObject to a NIDM-Results export. """ if not export_file: export_dir = None else: export_dir = self.export_dir if not isinstance(nidm_object, NIDMFile): nidm_object.export(self.version, export_dir) else: nidm_object.export(self.version, export_dir, self.prepend_path) # ProvDocument: add object to the bundle if nidm_object.prov_type == PROV['Activity']: self.bundle.activity(nidm_object.id, other_attributes=nidm_object.attributes) elif nidm_object.prov_type == PROV['Entity']: self.bundle.entity(nidm_object.id, other_attributes=nidm_object.attributes) elif nidm_object.prov_type == PROV['Agent']: self.bundle.agent(nidm_object.id, other_attributes=nidm_object.attributes) # self.bundle.update(nidm_object.p) def export(self): """ Generate a NIDM-Results export. """ try: if not os.path.isdir(self.export_dir): os.mkdir(self.export_dir) # Initialise main bundle self._create_bundle(self.version) self.add_object(self.software) # Add model fitting steps if not isinstance(self.model_fittings, list): self.model_fittings = list(self.model_fittings.values()) for model_fitting in self.model_fittings: # Design Matrix # model_fitting.activity.used(model_fitting.design_matrix) self.bundle.used(model_fitting.activity.id, model_fitting.design_matrix.id) self.add_object(model_fitting.design_matrix) # *** Export visualisation of the design matrix self.add_object(model_fitting.design_matrix.image) if model_fitting.design_matrix.image.file is not None: self.add_object(model_fitting.design_matrix.image.file) if model_fitting.design_matrix.hrf_models is not None: # drift model self.add_object(model_fitting.design_matrix.drift_model) if self.version['major'] > 1 or \ (self.version['major'] == 1 and self.version['minor'] >= 3): # Machine # model_fitting.data.wasAttributedTo(model_fitting.machine) self.bundle.wasAttributedTo(model_fitting.data.id, model_fitting.machine.id) self.add_object(model_fitting.machine) # Imaged subject or group(s) for sub in model_fitting.subjects: self.add_object(sub) # model_fitting.data.wasAttributedTo(sub) self.bundle.wasAttributedTo(model_fitting.data.id, sub.id) # Data # model_fitting.activity.used(model_fitting.data) self.bundle.used(model_fitting.activity.id, model_fitting.data.id) self.add_object(model_fitting.data) # Error Model # model_fitting.activity.used(model_fitting.error_model) self.bundle.used(model_fitting.activity.id, model_fitting.error_model.id) self.add_object(model_fitting.error_model) # Parameter Estimate Maps for param_estimate in model_fitting.param_estimates: # param_estimate.wasGeneratedBy(model_fitting.activity) self.bundle.wasGeneratedBy(param_estimate.id, model_fitting.activity.id) self.add_object(param_estimate) self.add_object(param_estimate.coord_space) self.add_object(param_estimate.file) if param_estimate.derfrom is not None: self.bundle.wasDerivedFrom(param_estimate.id, param_estimate.derfrom.id) self.add_object(param_estimate.derfrom) self.add_object(param_estimate.derfrom.file, export_file=False) # Residual Mean Squares Map # model_fitting.rms_map.wasGeneratedBy(model_fitting.activity) self.add_object(model_fitting.rms_map) self.bundle.wasGeneratedBy(model_fitting.rms_map.id, model_fitting.activity.id) self.add_object(model_fitting.rms_map.coord_space) self.add_object(model_fitting.rms_map.file) if model_fitting.rms_map.derfrom is not None: self.bundle.wasDerivedFrom( model_fitting.rms_map.id, model_fitting.rms_map.derfrom.id) self.add_object(model_fitting.rms_map.derfrom) self.add_object(model_fitting.rms_map.derfrom.file, export_file=False) # Resels per Voxel Map if model_fitting.rpv_map is not None: self.add_object(model_fitting.rpv_map) self.bundle.wasGeneratedBy(model_fitting.rpv_map.id, model_fitting.activity.id) self.add_object(model_fitting.rpv_map.coord_space) self.add_object(model_fitting.rpv_map.file) if model_fitting.rpv_map.inf_id is not None: self.bundle.used(model_fitting.rpv_map.inf_id, model_fitting.rpv_map.id) if model_fitting.rpv_map.derfrom is not None: self.bundle.wasDerivedFrom( model_fitting.rpv_map.id, model_fitting.rpv_map.derfrom.id) self.add_object(model_fitting.rpv_map.derfrom) self.add_object(model_fitting.rpv_map.derfrom.file, export_file=False) # Mask # model_fitting.mask_map.wasGeneratedBy(model_fitting.activity) self.bundle.wasGeneratedBy(model_fitting.mask_map.id, model_fitting.activity.id) self.add_object(model_fitting.mask_map) if model_fitting.mask_map.derfrom is not None: self.bundle.wasDerivedFrom( model_fitting.mask_map.id, model_fitting.mask_map.derfrom.id) self.add_object(model_fitting.mask_map.derfrom) self.add_object(model_fitting.mask_map.derfrom.file, export_file=False) # Create coordinate space export self.add_object(model_fitting.mask_map.coord_space) # Create "Mask map" entity self.add_object(model_fitting.mask_map.file) # Grand Mean map # model_fitting.grand_mean_map.wasGeneratedBy(model_fitting.activity) self.bundle.wasGeneratedBy(model_fitting.grand_mean_map.id, model_fitting.activity.id) self.add_object(model_fitting.grand_mean_map) # Coordinate space entity self.add_object(model_fitting.grand_mean_map.coord_space) # Grand Mean Map entity self.add_object(model_fitting.grand_mean_map.file) # Model Parameters Estimation activity self.add_object(model_fitting.activity) self.bundle.wasAssociatedWith(model_fitting.activity.id, self.software.id) # model_fitting.activity.wasAssociatedWith(self.software) # self.add_object(model_fitting) # Add contrast estimation steps analysis_masks = dict() for (model_fitting_id, pe_ids), contrasts in list( self.contrasts.items()): for contrast in contrasts: model_fitting = self._get_model_fitting(model_fitting_id) # for contrast in contrasts: # contrast.estimation.used(model_fitting.rms_map) self.bundle.used(contrast.estimation.id, model_fitting.rms_map.id) # contrast.estimation.used(model_fitting.mask_map) self.bundle.used(contrast.estimation.id, model_fitting.mask_map.id) analysis_masks[contrast.estimation.id] = \ model_fitting.mask_map.id self.bundle.used(contrast.estimation.id, contrast.weights.id) self.bundle.used(contrast.estimation.id, model_fitting.design_matrix.id) # contrast.estimation.wasAssociatedWith(self.software) self.bundle.wasAssociatedWith(contrast.estimation.id, self.software.id) for pe_id in pe_ids: # contrast.estimation.used(pe_id) self.bundle.used(contrast.estimation.id, pe_id) # Create estimation activity self.add_object(contrast.estimation) # Create contrast weights self.add_object(contrast.weights) if contrast.contrast_map is not None: # Create contrast Map # contrast.contrast_map.wasGeneratedBy(contrast.estimation) self.bundle.wasGeneratedBy(contrast.contrast_map.id, contrast.estimation.id) self.add_object(contrast.contrast_map) self.add_object(contrast.contrast_map.coord_space) # Copy contrast map in export directory self.add_object(contrast.contrast_map.file) if contrast.contrast_map.derfrom is not None: self.bundle.wasDerivedFrom( contrast.contrast_map.id, contrast.contrast_map.derfrom.id) self.add_object(contrast.contrast_map.derfrom) self.add_object(contrast.contrast_map.derfrom.file, export_file=False) # Create Std Err. Map (T-tests) or Explained Mean Sq. Map # (F-tests) # contrast.stderr_or_expl_mean_sq_map.wasGeneratedBy # (contrast.estimation) stderr_explmeansq_map = ( contrast.stderr_or_expl_mean_sq_map) self.bundle.wasGeneratedBy( stderr_explmeansq_map.id, contrast.estimation.id) self.add_object(stderr_explmeansq_map) self.add_object( stderr_explmeansq_map.coord_space) if isinstance(stderr_explmeansq_map, ContrastStdErrMap) and \ stderr_explmeansq_map.contrast_var: self.add_object( stderr_explmeansq_map.contrast_var) if stderr_explmeansq_map.var_coord_space: self.add_object( stderr_explmeansq_map.var_coord_space) if stderr_explmeansq_map.contrast_var.coord_space: self.add_object( stderr_explmeansq_map.contrast_var.coord_space) self.add_object( stderr_explmeansq_map.contrast_var.file, export_file=False) self.bundle.wasDerivedFrom( stderr_explmeansq_map.id, stderr_explmeansq_map.contrast_var.id) self.add_object(stderr_explmeansq_map.file) # Create Statistic Map # contrast.stat_map.wasGeneratedBy(contrast.estimation) self.bundle.wasGeneratedBy(contrast.stat_map.id, contrast.estimation.id) self.add_object(contrast.stat_map) self.add_object(contrast.stat_map.coord_space) # Copy Statistical map in export directory self.add_object(contrast.stat_map.file) if contrast.stat_map.derfrom is not None: self.bundle.wasDerivedFrom( contrast.stat_map.id, contrast.stat_map.derfrom.id) self.add_object(contrast.stat_map.derfrom) self.add_object(contrast.stat_map.derfrom.file, export_file=False) # Create Z Statistic Map if contrast.z_stat_map: # contrast.z_stat_map.wasGeneratedBy(contrast.estimation) self.bundle.wasGeneratedBy(contrast.z_stat_map.id, contrast.estimation.id) self.add_object(contrast.z_stat_map) self.add_object(contrast.z_stat_map.coord_space) # Copy Statistical map in export directory self.add_object(contrast.z_stat_map.file) # self.add_object(contrast) # Add inference steps for contrast_id, inferences in list(self.inferences.items()): contrast = self._get_contrast(contrast_id) for inference in inferences: if contrast.z_stat_map: used_id = contrast.z_stat_map.id else: used_id = contrast.stat_map.id # inference.inference_act.used(used_id) self.bundle.used(inference.inference_act.id, used_id) # inference.inference_act.wasAssociatedWith(self.software) self.bundle.wasAssociatedWith(inference.inference_act.id, self.software.id) # self.add_object(inference) # Excursion set # inference.excursion_set.wasGeneratedBy(inference.inference_act) self.bundle.wasGeneratedBy(inference.excursion_set.id, inference.inference_act.id) self.add_object(inference.excursion_set) self.add_object(inference.excursion_set.coord_space) if inference.excursion_set.visu is not None: self.add_object(inference.excursion_set.visu) if inference.excursion_set.visu.file is not None: self.add_object(inference.excursion_set.visu.file) # Copy "Excursion set map" file in export directory self.add_object(inference.excursion_set.file) if inference.excursion_set.clust_map is not None: self.add_object(inference.excursion_set.clust_map) self.add_object(inference.excursion_set.clust_map.file) self.add_object( inference.excursion_set.clust_map.coord_space) if inference.excursion_set.mip is not None: self.add_object(inference.excursion_set.mip) self.add_object(inference.excursion_set.mip.file) # Height threshold if inference.height_thresh.equiv_thresh is not None: for equiv in inference.height_thresh.equiv_thresh: self.add_object(equiv) self.add_object(inference.height_thresh) # Extent threshold if inference.extent_thresh.equiv_thresh is not None: for equiv in inference.extent_thresh.equiv_thresh: self.add_object(equiv) self.add_object(inference.extent_thresh) # Display Mask (potentially more than 1) if inference.disp_mask: for mask in inference.disp_mask: # inference.inference_act.used(mask) self.bundle.used(inference.inference_act.id, mask.id) self.add_object(mask) # Create coordinate space entity self.add_object(mask.coord_space) # Create "Display Mask Map" entity self.add_object(mask.file) if mask.derfrom is not None: self.bundle.wasDerivedFrom(mask.id, mask.derfrom.id) self.add_object(mask.derfrom) self.add_object(mask.derfrom.file, export_file=False) # Search Space self.bundle.wasGeneratedBy(inference.search_space.id, inference.inference_act.id) # inference.search_space.wasGeneratedBy(inference.inference_act) self.add_object(inference.search_space) self.add_object(inference.search_space.coord_space) # Copy "Mask map" in export directory self.add_object(inference.search_space.file) # Peak Definition if inference.peak_criteria: # inference.inference_act.used(inference.peak_criteria) self.bundle.used(inference.inference_act.id, inference.peak_criteria.id) self.add_object(inference.peak_criteria) # Cluster Definition if inference.cluster_criteria: # inference.inference_act.used(inference.cluster_criteria) self.bundle.used(inference.inference_act.id, inference.cluster_criteria.id) self.add_object(inference.cluster_criteria) if inference.clusters: # Clusters and peaks for cluster in inference.clusters: # cluster.wasDerivedFrom(inference.excursion_set) self.bundle.wasDerivedFrom( cluster.id, inference.excursion_set.id) self.add_object(cluster) for peak in cluster.peaks: self.bundle.wasDerivedFrom(peak.id, cluster.id) self.add_object(peak) self.add_object(peak.coordinate) if cluster.cog is not None: self.bundle.wasDerivedFrom(cluster.cog.id, cluster.id) self.add_object(cluster.cog) self.add_object(cluster.cog.coordinate) # Inference activity # inference.inference_act.wasAssociatedWith(inference.software_id) # inference.inference_act.used(inference.height_thresh) self.bundle.used(inference.inference_act.id, inference.height_thresh.id) # inference.inference_act.used(inference.extent_thresh) self.bundle.used(inference.inference_act.id, inference.extent_thresh.id) self.bundle.used(inference.inference_act.id, analysis_masks[contrast.estimation.id]) self.add_object(inference.inference_act) # Write-out prov file self.save_prov_to_files() return self.out_dir except Exception: self.cleanup() raise def _get_model_fitting(self, mf_id): """ Retreive model fitting with identifier 'mf_id' from the list of model fitting objects stored in self.model_fitting """ for model_fitting in self.model_fittings: if model_fitting.activity.id == mf_id: return model_fitting raise Exception("Model fitting activity with id: " + str(mf_id) + " not found.") def _get_contrast(self, con_id): """ Retreive contrast with identifier 'con_id' from the list of contrast objects stored in self.contrasts """ for contrasts in list(self.contrasts.values()): for contrast in contrasts: if contrast.estimation.id == con_id: return contrast raise Exception("Contrast activity with id: " + str(con_id) + " not found.") def _add_namespaces(self): """ Add namespaces to NIDM document. """ self.doc.add_namespace(NIDM) self.doc.add_namespace(NIIRI) self.doc.add_namespace(CRYPTO) self.doc.add_namespace(DCT) self.doc.add_namespace(DC) self.doc.add_namespace(NFO) self.doc.add_namespace(OBO) self.doc.add_namespace(SCR) self.doc.add_namespace(NIF) def _create_bundle(self, version): """ Initialise NIDM-Results bundle. """ # *** Bundle entity if not hasattr(self, 'bundle_ent'): self.bundle_ent = NIDMResultsBundle(nidm_version=version['num']) self.bundle = ProvBundle(identifier=self.bundle_ent.id) self.bundle_ent.export(self.version, self.export_dir) # # provn export # self.bundle = ProvBundle(identifier=bundle_id) self.doc.entity(self.bundle_ent.id, other_attributes=self.bundle_ent.attributes) # *** NIDM-Results Export Activity if version['num'] not in ["1.0.0", "1.1.0"]: if not hasattr(self, 'export_act'): self.export_act = NIDMResultsExport() self.export_act.export(self.version, self.export_dir) # self.doc.update(self.export_act.p) self.doc.activity(self.export_act.id, other_attributes=self.export_act.attributes) # *** bundle was Generated by NIDM-Results Export Activity if not hasattr(self, 'export_time'): self.export_time = str(datetime.datetime.now().time()) if version['num'] in ["1.0.0", "1.1.0"]: self.doc.wasGeneratedBy(entity=self.bundle_ent.id, time=self.export_time) else: # provn self.doc.wasGeneratedBy( entity=self.bundle_ent.id, activity=self.export_act.id, time=self.export_time) # *** NIDM-Results Exporter (Software Agent) if version['num'] not in ["1.0.0", "1.1.0"]: if not hasattr(self, 'exporter'): self.exporter = self._get_exporter() self.exporter.export(self.version, self.export_dir) # self.doc.update(self.exporter.p) self.doc.agent(self.exporter.id, other_attributes=self.exporter.attributes) self.doc.wasAssociatedWith(self.export_act.id, self.exporter.id) def _get_model_parameters_estimations(self, error_model): """ Infer model estimation method from the 'error_model'. Return an object of type ModelParametersEstimation. """ if error_model.dependance == NIDM_INDEPEDENT_ERROR: if error_model.variance_homo: estimation_method = STATO_OLS else: estimation_method = STATO_WLS else: estimation_method = STATO_GLS mpe = ModelParametersEstimation(estimation_method, self.software.id) return mpe def use_prefixes(self, ttl): prefix_file = os.path.join(os.path.dirname(__file__), 'prefixes.csv') context = dict() with open(prefix_file, encoding="ascii") as csvfile: reader = csv.reader(csvfile) next(reader, None) # skip the headers for alphanum_id, prefix, uri in reader: if alphanum_id in ttl: context[prefix] = uri ttl = "@prefix " + prefix + ": <" + uri + "> .\n" + ttl ttl = ttl.replace(alphanum_id, prefix + ":") if uri in ttl: ttl = ttl.replace(alphanum_id, prefix + ":") elif uri in ttl: context[prefix] = uri ttl = "@prefix " + prefix + ": <" + uri + "> .\n" + ttl ttl = ttl.replace(alphanum_id, prefix + ":") return (ttl, context) def save_prov_to_files(self, showattributes=False): """ Write-out provn serialisation to nidm.provn. """ self.doc.add_bundle(self.bundle) # provn_file = os.path.join(self.export_dir, 'nidm.provn') # provn_fid = open(provn_file, 'w') # # FIXME None # # provn_fid.write(self.doc.get_provn(4).replace("None", "-")) # provn_fid.close() ttl_file = os.path.join(self.export_dir, 'nidm.ttl') ttl_txt = self.doc.serialize(format='rdf', rdf_format='turtle') ttl_txt, json_context = self.use_prefixes(ttl_txt) # Add namespaces to json-ld context for namespace in self.doc._namespaces.get_registered_namespaces(): json_context[namespace._prefix] = namespace._uri for namespace in \ list(self.doc._namespaces._default_namespaces.values()): json_context[namespace._prefix] = namespace._uri json_context["xsd"] = "http://www.w3.org/2000/01/rdf-schema#" # Work-around to issue with INF value in rdflib (reported in # https://github.com/RDFLib/rdflib/pull/655) ttl_txt = ttl_txt.replace(' inf ', ' "INF"^^xsd:float ') with open(ttl_file, 'w') as ttl_fid: ttl_fid.write(ttl_txt) # print(json_context) jsonld_file = os.path.join(self.export_dir, 'nidm.json') jsonld_txt = self.doc.serialize(format='rdf', rdf_format='json-ld', context=json_context) with open(jsonld_file, 'w') as jsonld_fid: jsonld_fid.write(jsonld_txt) # provjsonld_file = os.path.join(self.export_dir, 'nidm.provjsonld') # provjsonld_txt = self.doc.serialize(format='jsonld') # with open(provjsonld_file, 'w') as provjsonld_fid: # provjsonld_fid.write(provjsonld_txt) # provn_file = os.path.join(self.export_dir, 'nidm.provn') # provn_txt = self.doc.serialize(format='provn') # with open(provn_file, 'w') as provn_fid: # provn_fid.write(provn_txt) # Post-processing if not self.zipped: # Just rename temp directory to output_path os.rename(self.export_dir, self.out_dir) else: # Create a zip file that contains the content of the temp directory os.chdir(self.export_dir) zf = zipfile.ZipFile(os.path.join("..", self.out_dir), mode='w') try: for root, dirnames, filenames in os.walk("."): for filename in filenames: zf.write(os.path.join(filename)) finally: zf.close() # Need to move up before deleting the folder os.chdir("..") shutil.rmtree(os.path.join("..", self.export_dir))
def useGenDependency(self, aDO, usedList, genList, throughActivity): aID = throughActivity.id # create provlet d1 = ProvDocument() # d1 is now an empty provenance document d1.add_namespace("dt", "http://cs.ncl.ac.uk/dtsim/") usedEntities = [] for aRO in usedList: usedEntities.append(d1.entity(DTns + aRO.id)) genEntities = [] for aRO1 in genList: genEntities.append(d1.entity(DTns + aRO1.id)) a = d1.activity(DTns + aID) ag1 = d1.agent(DTns + str(aDO.id)) d1.wasAssociatedWith(a, ag1) for ue in usedEntities: d1.used(a, ue) for gene in genEntities: d1.wasAttributedTo(gene, ag1) d1.wasGeneratedBy(gene, a) # associate this provlet to each generated RO for aRO1 in genList: aRO1.provlet = d1 print "event {n}: DO {do}: {ro1} <- wgby <- {act} <- used {ro}".format( n=currentReuseCount, do=aDO.id, ro1=aRO1.id, act=aID, ro=aRO.id ) for genRO in genList: for uRO in usedList: # update upstream pointer genRO.upstream.append( (uRO, throughActivity) ) # dep on aRO through activity aID FIXME URGENTLY!!! not designed for M-M for uRO in usedList: for genRO in genList: # update downstream uRO.downstream.append((genRO, throughActivity)) # aR1 is downstream from aR1 through activity aID # update global graph globalUsedEntities = [] for aRO in usedList: globalUsedEntities.append(pGlobal.entity(DTns + aRO.id)) globalGenEntities = [] for aR1 in genList: globalGenEntities.append(pGlobal.entity(DTns + aR1.id)) a = pGlobal.activity(DTns + aID) ag1 = pGlobal.agent(DTns + str(aDO.id)) pGlobal.wasAssociatedWith(a, ag1) for ue in globalUsedEntities: pGlobal.used(a, ue) for gene in globalGenEntities: pGlobal.wasAttributedTo(gene, ag1) pGlobal.wasGeneratedBy(gene, a) # trigger credit recomputation # each used RO needs its credit updated with aRO1.credit for each generated aRO1 through activity aID aCreditManager.addGenerationCredit(usedList, genList, throughActivity) # self.notify(d1) return d1
class Provenance(object): def __init__(self, output_dir): self.output_dir = output_dir self.doc = None self.workflow = None def start(self, workflow=False): from daops import __version__ as daops_version from housemartin import __version__ as housemartin_version self.doc = ProvDocument() # Declaring namespaces for various prefixes self.doc.set_default_namespace(uri="http://purl.org/roocs/prov#") self.doc.add_namespace("prov", uri="http://www.w3.org/ns/prov#") self.doc.add_namespace( "provone", uri="http://purl.dataone.org/provone/2015/01/15/ontology#" ) self.doc.add_namespace("dcterms", uri="http://purl.org/dc/terms/") # Define entities project_cds = self.doc.agent( ":copernicus_CDS", { "prov:type": "prov:Organization", "dcterms:title": "Copernicus Climate Data Store", }, ) self.sw_housemartin = self.doc.agent( ":housemartin", { "prov:type": "prov:SoftwareAgent", "dcterms:source": f"https://github.com/cedadev/housemartin/releases/tag/v{housemartin_version}", }, ) self.doc.wasAttributedTo(self.sw_housemartin, project_cds) self.sw_daops = self.doc.agent( ":daops", { "prov:type": "prov:SoftwareAgent", "dcterms:source": f"https://github.com/roocs/daops/releases/tag/v{daops_version}", }, ) # workflow if workflow is True: self.workflow = self.doc.entity( ":workflow", {"prov:type": "provone:Workflow"} ) orchestrate = self.doc.activity( ":orchestrate", other_attributes={ "prov:startedAtTime": "2020-11-26T09:15:00", "prov:endedAtTime": "2020-11-26T09:30:00", }, ) self.doc.wasAssociatedWith( orchestrate, agent=self.sw_housemartin, plan=self.workflow ) def add_operator(self, operator, parameters, collection, output): op = self.doc.activity( f":{operator}", other_attributes={ ":time": parameters.get("time"), ":apply_fixes": parameters.get("apply_fixes"), }, ) # input data ds_in = os.path.basename(collection[0]) # ds_in_attrs = { # 'prov:type': 'provone:Data', # 'prov:value': f'{ds_in}', # } op_in = self.doc.entity(f":{ds_in}") # operator started by daops if self.workflow: self.doc.wasAssociatedWith(op, agent=self.sw_daops, plan=self.workflow) else: self.doc.start(op, starter=self.sw_daops, trigger=self.sw_housemartin) # Generated output file ds_out = os.path.basename(output[0]) # ds_out_attrs = { # 'prov:type': 'provone:Data', # 'prov:value': f'{ds_out}', # } op_out = self.doc.entity(f":{ds_out}") self.doc.wasDerivedFrom(op_out, op_in, activity=op) def write_json(self): outfile = os.path.join(self.output_dir, "provenance.json") self.doc.serialize(outfile, format="json") return outfile def write_png(self): outfile = os.path.join(self.output_dir, "provenance.png") figure = prov_to_dot(self.doc) figure.write_png(outfile) return outfile
"name": "", "email": "" }) add.entity("File", other_attributes={ "prov:type": "file", "path_at_addition": "" }) add.entity("File Version", other_attributes={ "prov:type": "file_version", "old_path": "", "new_path": "" }) add.wasInformedBy("Commit", "Parent Commit") add.wasAssociatedWith("Commit", "Committer") add.wasAssociatedWith("Commit", "Author") add.wasGeneratedBy("File", "Commit") add.wasGeneratedBy("File Version", "Commit") add.wasAttributedTo("File", "Author") add.wasAttributedTo("File Version", "Author") add.specializationOf("File Version", "File") mod = ProvDocument() mod.set_default_namespace("gitlab2prov:") mod.activity( "Commit", other_attributes={ "prov:type": "commit", "title": "", "message": "",
def w3c_publication_2(): # https://github.com/lucmoreau/ProvToolbox/blob/master/asn/src/test/resources/prov/w3c-publication2.prov-asn # =========================================================================== # bundle # # prefix ex <http://example.org/> # prefix rec <http://example.org/record> # # prefix w3 <http://www.w3.org/TR/2011/> # prefix hg <http://dvcs.w3.org/hg/prov/raw-file/9628aaff6e20/model/releases/WD-prov-dm-20111215/> # # # entity(hg:Overview.html, [ prov:type="file in hg" ]) # entity(w3:WD-prov-dm-20111215, [ prov:type="html4" ]) # # # activity(ex:rcp,-,-,[prov:type="copy directory"]) # # wasGeneratedBy(rec:g; w3:WD-prov-dm-20111215, ex:rcp, -) # # entity(ex:req3, [ prov:type="http://www.w3.org/2005/08/01-transitions.html#pubreq" %% xsd:anyURI ]) # # used(rec:u; ex:rcp,hg:Overview.html,-) # used(ex:rcp, ex:req3, -) # # # wasDerivedFrom(w3:WD-prov-dm-20111215, hg:Overview.html, ex:rcp, rec:g, rec:u) # # agent(ex:webmaster, [ prov:type='prov:Person' ]) # # wasAssociatedWith(ex:rcp, ex:webmaster, -) # # endBundle # =========================================================================== ex = Namespace("ex", "http://example.org/") rec = Namespace("rec", "http://example.org/record") w3 = Namespace("w3", "http://www.w3.org/TR/2011/") hg = Namespace( "hg", "http://dvcs.w3.org/hg/prov/raw-file/9628aaff6e20/model/releases/WD-prov-dm-20111215/", ) g = ProvDocument() g.entity(hg["Overview.html"], {"prov:type": "file in hg"}) g.entity(w3["WD-prov-dm-20111215"], {"prov:type": "html4"}) g.activity(ex["rcp"], None, None, {"prov:type": "copy directory"}) g.wasGeneratedBy("w3:WD-prov-dm-20111215", "ex:rcp", identifier=rec["g"]) g.entity( "ex:req3", { "prov:type": Identifier("http://www.w3.org/2005/08/01-transitions.html#pubreq") }, ) g.used("ex:rcp", "hg:Overview.html", identifier="rec:u") g.used("ex:rcp", "ex:req3") g.wasDerivedFrom("w3:WD-prov-dm-20111215", "hg:Overview.html", "ex:rcp", "rec:g", "rec:u") g.agent("ex:webmaster", {"prov:type": "Person"}) g.wasAssociatedWith("ex:rcp", "ex:webmaster") return g
class ProvenanceProfile: """ Provenance profile. Populated as the workflow runs. """ def __init__( self, research_object: "ResearchObject", full_name: str, host_provenance: bool, user_provenance: bool, orcid: str, fsaccess: StdFsAccess, run_uuid: Optional[uuid.UUID] = None, ) -> None: """Initialize the provenance profile.""" self.fsaccess = fsaccess self.orcid = orcid self.research_object = research_object self.folder = self.research_object.folder self.document = ProvDocument() self.host_provenance = host_provenance self.user_provenance = user_provenance self.engine_uuid = research_object.engine_uuid # type: str self.add_to_manifest = self.research_object.add_to_manifest if self.orcid: _logger.debug("[provenance] Creator ORCID: %s", self.orcid) self.full_name = full_name if self.full_name: _logger.debug("[provenance] Creator Full name: %s", self.full_name) self.workflow_run_uuid = run_uuid or uuid.uuid4() self.workflow_run_uri = self.workflow_run_uuid.urn # type: str self.generate_prov_doc() def __str__(self) -> str: """Represent this Provenvance profile as a string.""" return "ProvenanceProfile <{}> in <{}>".format( self.workflow_run_uri, self.research_object, ) def generate_prov_doc(self) -> Tuple[str, ProvDocument]: """Add basic namespaces.""" def host_provenance(document: ProvDocument) -> None: """Record host provenance.""" document.add_namespace(CWLPROV) document.add_namespace(UUID) document.add_namespace(FOAF) hostname = getfqdn() # won't have a foaf:accountServiceHomepage for unix hosts, but # we can at least provide hostname document.agent( ACCOUNT_UUID, { PROV_TYPE: FOAF["OnlineAccount"], "prov:location": hostname, CWLPROV["hostname"]: hostname, }, ) self.cwltool_version = "cwltool %s" % versionstring().split()[-1] self.document.add_namespace("wfprov", "http://purl.org/wf4ever/wfprov#") # document.add_namespace('prov', 'http://www.w3.org/ns/prov#') self.document.add_namespace("wfdesc", "http://purl.org/wf4ever/wfdesc#") # TODO: Make this ontology. For now only has cwlprov:image self.document.add_namespace("cwlprov", "https://w3id.org/cwl/prov#") self.document.add_namespace("foaf", "http://xmlns.com/foaf/0.1/") self.document.add_namespace("schema", "http://schema.org/") self.document.add_namespace("orcid", "https://orcid.org/") self.document.add_namespace("id", "urn:uuid:") # NOTE: Internet draft expired 2004-03-04 (!) # https://tools.ietf.org/html/draft-thiemann-hash-urn-01 # TODO: Change to nih:sha-256; hashes # https://tools.ietf.org/html/rfc6920#section-7 self.document.add_namespace("data", "urn:hash::sha1:") # Also needed for docker images self.document.add_namespace(SHA256, "nih:sha-256;") # info only, won't really be used by prov as sub-resources use / self.document.add_namespace("researchobject", self.research_object.base_uri) # annotations self.metadata_ns = self.document.add_namespace( "metadata", self.research_object.base_uri + METADATA + "/") # Pre-register provenance directory so we can refer to its files self.provenance_ns = self.document.add_namespace( "provenance", self.research_object.base_uri + posix_path(PROVENANCE) + "/") ro_identifier_workflow = self.research_object.base_uri + "workflow/packed.cwl#" self.wf_ns = self.document.add_namespace("wf", ro_identifier_workflow) ro_identifier_input = (self.research_object.base_uri + "workflow/primary-job.json#") self.document.add_namespace("input", ro_identifier_input) # More info about the account (e.g. username, fullname) # may or may not have been previously logged by user_provenance() # .. but we always know cwltool was launched (directly or indirectly) # by a user account, as cwltool is a command line tool account = self.document.agent(ACCOUNT_UUID) if self.orcid or self.full_name: person = {PROV_TYPE: PROV["Person"], "prov:type": SCHEMA["Person"]} if self.full_name: person["prov:label"] = self.full_name person["foaf:name"] = self.full_name person["schema:name"] = self.full_name else: # TODO: Look up name from ORCID API? pass agent = self.document.agent(self.orcid or uuid.uuid4().urn, person) self.document.actedOnBehalfOf(account, agent) else: if self.host_provenance: host_provenance(self.document) if self.user_provenance: self.research_object.user_provenance(self.document) # The execution of cwltool wfengine = self.document.agent( self.engine_uuid, { PROV_TYPE: PROV["SoftwareAgent"], "prov:type": WFPROV["WorkflowEngine"], "prov:label": self.cwltool_version, }, ) # FIXME: This datetime will be a bit too delayed, we should # capture when cwltool.py earliest started? self.document.wasStartedBy(wfengine, None, account, datetime.datetime.now()) # define workflow run level activity self.document.activity( self.workflow_run_uri, datetime.datetime.now(), None, { PROV_TYPE: WFPROV["WorkflowRun"], "prov:label": "Run of workflow/packed.cwl#main", }, ) # association between SoftwareAgent and WorkflowRun main_workflow = "wf:main" self.document.wasAssociatedWith(self.workflow_run_uri, self.engine_uuid, main_workflow) self.document.wasStartedBy(self.workflow_run_uri, None, self.engine_uuid, datetime.datetime.now()) return (self.workflow_run_uri, self.document) def evaluate( self, process: Process, job: JobsType, job_order_object: CWLObjectType, research_obj: "ResearchObject", ) -> None: """Evaluate the nature of job.""" if not hasattr(process, "steps"): # record provenance of independent commandline tool executions self.prospective_prov(job) customised_job = copy_job_order(job, job_order_object) self.used_artefacts(customised_job, self.workflow_run_uri) research_obj.create_job(customised_job) elif hasattr(job, "workflow"): # record provenance of workflow executions self.prospective_prov(job) customised_job = copy_job_order(job, job_order_object) self.used_artefacts(customised_job, self.workflow_run_uri) def record_process_start( self, process: Process, job: JobsType, process_run_id: Optional[str] = None) -> Optional[str]: if not hasattr(process, "steps"): process_run_id = self.workflow_run_uri elif not hasattr(job, "workflow"): # commandline tool execution as part of workflow name = "" if isinstance(job, (CommandLineJob, JobBase, WorkflowJob)): name = job.name process_name = urllib.parse.quote(name, safe=":/,#") process_run_id = self.start_process(process_name, datetime.datetime.now()) return process_run_id def start_process( self, process_name: str, when: datetime.datetime, process_run_id: Optional[str] = None, ) -> str: """Record the start of each Process.""" if process_run_id is None: process_run_id = uuid.uuid4().urn prov_label = "Run of workflow/packed.cwl#main/" + process_name self.document.activity( process_run_id, None, None, { PROV_TYPE: WFPROV["ProcessRun"], PROV_LABEL: prov_label }, ) self.document.wasAssociatedWith(process_run_id, self.engine_uuid, str("wf:main/" + process_name)) self.document.wasStartedBy(process_run_id, None, self.workflow_run_uri, when, None, None) return process_run_id def record_process_end( self, process_name: str, process_run_id: str, outputs: Union[CWLObjectType, MutableSequence[CWLObjectType], None], when: datetime.datetime, ) -> None: self.generate_output_prov(outputs, process_run_id, process_name) self.document.wasEndedBy(process_run_id, None, self.workflow_run_uri, when) def declare_file( self, value: CWLObjectType) -> Tuple[ProvEntity, ProvEntity, str]: if value["class"] != "File": raise ValueError("Must have class:File: %s" % value) # Need to determine file hash aka RO filename entity = None # type: Optional[ProvEntity] checksum = None if "checksum" in value: csum = cast(str, value["checksum"]) (method, checksum) = csum.split("$", 1) if method == SHA1 and self.research_object.has_data_file(checksum): entity = self.document.entity("data:" + checksum) if not entity and "location" in value: location = str(value["location"]) # If we made it here, we'll have to add it to the RO with self.fsaccess.open(location, "rb") as fhandle: relative_path = self.research_object.add_data_file(fhandle) # FIXME: This naively relies on add_data_file setting hash as filename checksum = PurePath(relative_path).name entity = self.document.entity("data:" + checksum, {PROV_TYPE: WFPROV["Artifact"]}) if "checksum" not in value: value["checksum"] = f"{SHA1}${checksum}" if not entity and "contents" in value: # Anonymous file, add content as string entity, checksum = self.declare_string(cast( str, value["contents"])) # By here one of them should have worked! if not entity or not checksum: raise ValueError( "class:File but missing checksum/location/content: %r" % value) # Track filename and extension, this is generally useful only for # secondaryFiles. Note that multiple uses of a file might thus record # different names for the same entity, so we'll # make/track a specialized entity by UUID file_id = value.setdefault("@id", uuid.uuid4().urn) # A specialized entity that has just these names file_entity = self.document.entity( file_id, [(PROV_TYPE, WFPROV["Artifact"]), (PROV_TYPE, WF4EVER["File"])], ) # type: ProvEntity if "basename" in value: file_entity.add_attributes( {CWLPROV["basename"]: value["basename"]}) if "nameroot" in value: file_entity.add_attributes( {CWLPROV["nameroot"]: value["nameroot"]}) if "nameext" in value: file_entity.add_attributes({CWLPROV["nameext"]: value["nameext"]}) self.document.specializationOf(file_entity, entity) # Check for secondaries for sec in cast(MutableSequence[CWLObjectType], value.get("secondaryFiles", [])): # TODO: Record these in a specializationOf entity with UUID? if sec["class"] == "File": (sec_entity, _, _) = self.declare_file(sec) elif sec["class"] == "Directory": sec_entity = self.declare_directory(sec) else: raise ValueError(f"Got unexpected secondaryFiles value: {sec}") # We don't know how/when/where the secondary file was generated, # but CWL convention is a kind of summary/index derived # from the original file. As its generally in a different format # then prov:Quotation is not appropriate. self.document.derivation( sec_entity, file_entity, other_attributes={PROV["type"]: CWLPROV["SecondaryFile"]}, ) return file_entity, entity, checksum def declare_directory(self, value: CWLObjectType) -> ProvEntity: """Register any nested files/directories.""" # FIXME: Calculate a hash-like identifier for directory # so we get same value if it's the same filenames/hashes # in a different location. # For now, mint a new UUID to identify this directory, but # attempt to keep it inside the value dictionary dir_id = cast(str, value.setdefault("@id", uuid.uuid4().urn)) # New annotation file to keep the ORE Folder listing ore_doc_fn = dir_id.replace("urn:uuid:", "directory-") + ".ttl" dir_bundle = self.document.bundle(self.metadata_ns[ore_doc_fn]) coll = self.document.entity( dir_id, [ (PROV_TYPE, WFPROV["Artifact"]), (PROV_TYPE, PROV["Collection"]), (PROV_TYPE, PROV["Dictionary"]), (PROV_TYPE, RO["Folder"]), ], ) # ORE description of ro:Folder, saved separately coll_b = dir_bundle.entity( dir_id, [(PROV_TYPE, RO["Folder"]), (PROV_TYPE, ORE["Aggregation"])], ) self.document.mentionOf(dir_id + "#ore", dir_id, dir_bundle.identifier) # dir_manifest = dir_bundle.entity( # dir_bundle.identifier, {PROV["type"]: ORE["ResourceMap"], # ORE["describes"]: coll_b.identifier}) coll_attribs = [(ORE["isDescribedBy"], dir_bundle.identifier)] coll_b_attribs = [] # type: List[Tuple[Identifier, ProvEntity]] # FIXME: .listing might not be populated yet - hopefully # a later call to this method will sort that is_empty = True if "listing" not in value: get_listing(self.fsaccess, value) for entry in cast(MutableSequence[CWLObjectType], value.get("listing", [])): is_empty = False # Declare child-artifacts entity = self.declare_artefact(entry) self.document.membership(coll, entity) # Membership relation aka our ORE Proxy m_id = uuid.uuid4().urn m_entity = self.document.entity(m_id) m_b = dir_bundle.entity(m_id) # PROV-O style Dictionary # https://www.w3.org/TR/prov-dictionary/#dictionary-ontological-definition # ..as prov.py do not currently allow PROV-N extensions # like hadDictionaryMember(..) m_entity.add_asserted_type(PROV["KeyEntityPair"]) m_entity.add_attributes({ PROV["pairKey"]: entry["basename"], PROV["pairEntity"]: entity, }) # As well as a being a # http://wf4ever.github.io/ro/2016-01-28/ro/#FolderEntry m_b.add_asserted_type(RO["FolderEntry"]) m_b.add_asserted_type(ORE["Proxy"]) m_b.add_attributes({ RO["entryName"]: entry["basename"], ORE["proxyIn"]: coll, ORE["proxyFor"]: entity, }) coll_attribs.append((PROV["hadDictionaryMember"], m_entity)) coll_b_attribs.append((ORE["aggregates"], m_b)) coll.add_attributes(coll_attribs) coll_b.add_attributes(coll_b_attribs) # Also Save ORE Folder as annotation metadata ore_doc = ProvDocument() ore_doc.add_namespace(ORE) ore_doc.add_namespace(RO) ore_doc.add_namespace(UUID) ore_doc.add_bundle(dir_bundle) ore_doc = ore_doc.flattened() ore_doc_path = str(PurePosixPath(METADATA, ore_doc_fn)) with self.research_object.write_bag_file( ore_doc_path) as provenance_file: ore_doc.serialize(provenance_file, format="rdf", rdf_format="turtle") self.research_object.add_annotation(dir_id, [ore_doc_fn], ORE["isDescribedBy"].uri) if is_empty: # Empty directory coll.add_asserted_type(PROV["EmptyCollection"]) coll.add_asserted_type(PROV["EmptyDictionary"]) self.research_object.add_uri(coll.identifier.uri) return coll def declare_string(self, value: str) -> Tuple[ProvEntity, str]: """Save as string in UTF-8.""" byte_s = BytesIO(str(value).encode(ENCODING)) data_file = self.research_object.add_data_file(byte_s, content_type=TEXT_PLAIN) checksum = PurePosixPath(data_file).name # FIXME: Don't naively assume add_data_file uses hash in filename! data_id = "data:%s" % PurePosixPath(data_file).stem entity = self.document.entity(data_id, { PROV_TYPE: WFPROV["Artifact"], PROV_VALUE: str(value) }) # type: ProvEntity return entity, checksum def declare_artefact(self, value: Optional[CWLOutputType]) -> ProvEntity: """Create data artefact entities for all file objects.""" if value is None: # FIXME: If this can happen in CWL, we'll # need a better way to represent this in PROV return self.document.entity(CWLPROV["None"], {PROV_LABEL: "None"}) if isinstance(value, (bool, int, float)): # Typically used in job documents for flags # FIXME: Make consistent hash URIs for these # that somehow include the type # (so "1" != 1 != "1.0" != true) entity = self.document.entity(uuid.uuid4().urn, {PROV_VALUE: value}) self.research_object.add_uri(entity.identifier.uri) return entity if isinstance(value, (str, str)): (entity, _) = self.declare_string(value) return entity if isinstance(value, bytes): # If we got here then we must be in Python 3 byte_s = BytesIO(value) data_file = self.research_object.add_data_file(byte_s) # FIXME: Don't naively assume add_data_file uses hash in filename! data_id = "data:%s" % PurePosixPath(data_file).stem return self.document.entity( data_id, { PROV_TYPE: WFPROV["Artifact"], PROV_VALUE: str(value) }, ) if isinstance(value, MutableMapping): if "@id" in value: # Already processed this value, but it might not be in this PROV entities = self.document.get_record(value["@id"]) if entities: return entities[0] # else, unknown in PROV, re-add below as if it's fresh # Base case - we found a File we need to update if value.get("class") == "File": (entity, _, _) = self.declare_file(value) value["@id"] = entity.identifier.uri return entity if value.get("class") == "Directory": entity = self.declare_directory(value) value["@id"] = entity.identifier.uri return entity coll_id = value.setdefault("@id", uuid.uuid4().urn) # some other kind of dictionary? # TODO: also Save as JSON coll = self.document.entity( coll_id, [ (PROV_TYPE, WFPROV["Artifact"]), (PROV_TYPE, PROV["Collection"]), (PROV_TYPE, PROV["Dictionary"]), ], ) if value.get("class"): _logger.warning("Unknown data class %s.", value["class"]) # FIXME: The class might be "http://example.com/somethingelse" coll.add_asserted_type(CWLPROV[value["class"]]) # Let's iterate and recurse coll_attribs = [] # type: List[Tuple[Identifier, ProvEntity]] for (key, val) in value.items(): v_ent = self.declare_artefact(val) self.document.membership(coll, v_ent) m_entity = self.document.entity(uuid.uuid4().urn) # Note: only support PROV-O style dictionary # https://www.w3.org/TR/prov-dictionary/#dictionary-ontological-definition # as prov.py do not easily allow PROV-N extensions m_entity.add_asserted_type(PROV["KeyEntityPair"]) m_entity.add_attributes({ PROV["pairKey"]: str(key), PROV["pairEntity"]: v_ent }) coll_attribs.append((PROV["hadDictionaryMember"], m_entity)) coll.add_attributes(coll_attribs) self.research_object.add_uri(coll.identifier.uri) return coll # some other kind of Collection? # TODO: also save as JSON try: members = [] for each_input_obj in iter(value): # Recurse and register any nested objects e = self.declare_artefact(each_input_obj) members.append(e) # If we reached this, then we were allowed to iterate coll = self.document.entity( uuid.uuid4().urn, [ (PROV_TYPE, WFPROV["Artifact"]), (PROV_TYPE, PROV["Collection"]), ], ) if not members: coll.add_asserted_type(PROV["EmptyCollection"]) else: for member in members: # FIXME: This won't preserve order, for that # we would need to use PROV.Dictionary # with numeric keys self.document.membership(coll, member) self.research_object.add_uri(coll.identifier.uri) # FIXME: list value does not support adding "@id" return coll except TypeError: _logger.warning("Unrecognized type %s of %r", type(value), value) # Let's just fall back to Python repr() entity = self.document.entity(uuid.uuid4().urn, {PROV_LABEL: repr(value)}) self.research_object.add_uri(entity.identifier.uri) return entity def used_artefacts( self, job_order: Union[CWLObjectType, List[CWLObjectType]], process_run_id: str, name: Optional[str] = None, ) -> None: """Add used() for each data artefact.""" if isinstance(job_order, list): for entry in job_order: self.used_artefacts(entry, process_run_id, name) else: # FIXME: Use workflow name in packed.cwl, "main" is wrong for nested workflows base = "main" if name is not None: base += "/" + name for key, value in job_order.items(): prov_role = self.wf_ns[f"{base}/{key}"] try: entity = self.declare_artefact(value) self.document.used( process_run_id, entity, datetime.datetime.now(), None, {"prov:role": prov_role}, ) except OSError: pass def generate_output_prov( self, final_output: Union[CWLObjectType, MutableSequence[CWLObjectType], None], process_run_id: Optional[str], name: Optional[str], ) -> None: """Call wasGeneratedBy() for each output,copy the files into the RO.""" if isinstance(final_output, MutableSequence): for entry in final_output: self.generate_output_prov(entry, process_run_id, name) elif final_output is not None: # Timestamp should be created at the earliest timestamp = datetime.datetime.now() # For each output, find/register the corresponding # entity (UUID) and document it as generated in # a role corresponding to the output for output, value in final_output.items(): entity = self.declare_artefact(value) if name is not None: name = urllib.parse.quote(str(name), safe=":/,#") # FIXME: Probably not "main" in nested workflows role = self.wf_ns[f"main/{name}/{output}"] else: role = self.wf_ns["main/%s" % output] if not process_run_id: process_run_id = self.workflow_run_uri self.document.wasGeneratedBy(entity, process_run_id, timestamp, None, {"prov:role": role}) def prospective_prov(self, job: JobsType) -> None: """Create prospective prov recording as wfdesc prov:Plan.""" if not isinstance(job, WorkflowJob): # direct command line tool execution self.document.entity( "wf:main", { PROV_TYPE: WFDESC["Process"], "prov:type": PROV["Plan"], "prov:label": "Prospective provenance", }, ) return self.document.entity( "wf:main", { PROV_TYPE: WFDESC["Workflow"], "prov:type": PROV["Plan"], "prov:label": "Prospective provenance", }, ) for step in job.steps: stepnametemp = "wf:main/" + str(step.name)[5:] stepname = urllib.parse.quote(stepnametemp, safe=":/,#") provstep = self.document.entity( stepname, { PROV_TYPE: WFDESC["Process"], "prov:type": PROV["Plan"] }, ) self.document.entity( "wf:main", { "wfdesc:hasSubProcess": provstep, "prov:label": "Prospective provenance", }, ) # TODO: Declare roles/parameters as well def activity_has_provenance(self, activity, prov_ids): # type: (str, List[Identifier]) -> None """Add http://www.w3.org/TR/prov-aq/ relations to nested PROV files.""" # NOTE: The below will only work if the corresponding metadata/provenance arcp URI # is a pre-registered namespace in the PROV Document attribs = [(PROV["has_provenance"], prov_id) for prov_id in prov_ids] self.document.activity(activity, other_attributes=attribs) # Tip: we can't use https://www.w3.org/TR/prov-links/#term-mention # as prov:mentionOf() is only for entities, not activities uris = [i.uri for i in prov_ids] self.research_object.add_annotation(activity, uris, PROV["has_provenance"].uri) def finalize_prov_profile(self, name): # type: (Optional[str]) -> List[Identifier] """Transfer the provenance related files to the RO.""" # NOTE: Relative posix path if name is None: # main workflow, fixed filenames filename = "primary.cwlprov" else: # ASCII-friendly filename, avoiding % as we don't want %2520 in manifest.json wf_name = urllib.parse.quote(str(name), safe="").replace("%", "_") # Note that the above could cause overlaps for similarly named # workflows, but that's OK as we'll also include run uuid # which also covers thhe case of this step being run in # multiple places or iterations filename = f"{wf_name}.{self.workflow_run_uuid}.cwlprov" basename = str(PurePosixPath(PROVENANCE) / filename) # TODO: Also support other profiles than CWLProv, e.g. ProvOne # list of prov identifiers of provenance files prov_ids = [] # https://www.w3.org/TR/prov-xml/ with self.research_object.write_bag_file(basename + ".xml") as provenance_file: self.document.serialize(provenance_file, format="xml", indent=4) prov_ids.append(self.provenance_ns[filename + ".xml"]) # https://www.w3.org/TR/prov-n/ with self.research_object.write_bag_file(basename + ".provn") as provenance_file: self.document.serialize(provenance_file, format="provn", indent=2) prov_ids.append(self.provenance_ns[filename + ".provn"]) # https://www.w3.org/Submission/prov-json/ with self.research_object.write_bag_file(basename + ".json") as provenance_file: self.document.serialize(provenance_file, format="json", indent=2) prov_ids.append(self.provenance_ns[filename + ".json"]) # "rdf" aka https://www.w3.org/TR/prov-o/ # which can be serialized to ttl/nt/jsonld (and more!) # https://www.w3.org/TR/turtle/ with self.research_object.write_bag_file(basename + ".ttl") as provenance_file: self.document.serialize(provenance_file, format="rdf", rdf_format="turtle") prov_ids.append(self.provenance_ns[filename + ".ttl"]) # https://www.w3.org/TR/n-triples/ with self.research_object.write_bag_file(basename + ".nt") as provenance_file: self.document.serialize(provenance_file, format="rdf", rdf_format="ntriples") prov_ids.append(self.provenance_ns[filename + ".nt"]) # https://www.w3.org/TR/json-ld/ # TODO: Use a nice JSON-LD context # see also https://eprints.soton.ac.uk/395985/ # 404 Not Found on https://provenance.ecs.soton.ac.uk/prov.jsonld :( with self.research_object.write_bag_file(basename + ".jsonld") as provenance_file: self.document.serialize(provenance_file, format="rdf", rdf_format="json-ld") prov_ids.append(self.provenance_ns[filename + ".jsonld"]) _logger.debug("[provenance] added provenance: %s", prov_ids) return prov_ids
class NIDMExporter(): """ Generic class to parse a result directory to extract the pieces of information to be stored in NIDM-Results and to generate a NIDM-Results export. """ def __init__(self, version, out_dir, zipped=True): out_dirname = os.path.basename(out_dir) out_path = os.path.dirname(out_dir) # Create output path from output name self.zipped = zipped if not self.zipped: out_dirname = out_dirname + ".nidm" else: out_dirname = out_dirname + ".nidm.zip" out_dir = os.path.join(out_path, out_dirname) # Quit if output path already exists and user doesn't want to overwrite # it if os.path.exists(out_dir): msg = out_dir + " already exists, overwrite?" if not input("%s (y/N) " % msg).lower() == 'y': quit("Bye.") if os.path.isdir(out_dir): shutil.rmtree(out_dir) else: os.remove(out_dir) self.out_dir = out_dir if version == "dev": self.version = { 'major': 10000, 'minor': 0, 'revision': 0, 'num': version } else: major, minor, revision = version.split(".") if "-rc" in revision: revision, rc = revision.split("-rc") else: rc = -1 self.version = { 'major': int(major), 'minor': int(minor), 'revision': int(revision), 'rc': int(rc), 'num': version } # Initialise prov document self.doc = ProvDocument() self._add_namespaces() # A temp directory that will contain the exported data self.export_dir = tempfile.mkdtemp(prefix="nidm-", dir=out_path) self.prepend_path = '' def parse(self): """ Parse a result directory to extract the pieces information to be stored in NIDM-Results. """ try: # Methods: find_software, find_model_fitting, find_contrasts and # find_inferences should be defined in the children classes and # return a list of NIDM Objects as specified in the objects module # Object of type Software describing the neuroimaging software # package used for the analysis self.software = self._find_software() # List of objects of type ModelFitting describing the # model fitting step in NIDM-Results (main activity: Model # Parameters Estimation) self.model_fittings = self._find_model_fitting() # Dictionary of (key, value) pairs where where key is a tuple # containing the identifier of a ModelParametersEstimation object # and a tuple of identifiers of ParameterEstimateMap objects and # value is an object of type Contrast describing the contrast # estimation step in NIDM-Results (main activity: Contrast # Estimation) self.contrasts = self._find_contrasts() # Inference activity and entities # Dictionary of (key, value) pairs where key is the identifier of a # ContrastEstimation object and value is an object of type # Inference describing the inference step in NIDM-Results (main # activity: Inference) self.inferences = self._find_inferences() except Exception: self.cleanup() raise def cleanup(self): if os.path.isdir(self.export_dir): shutil.rmtree(self.export_dir) def add_object(self, nidm_object, export_file=True): """ Add a NIDMObject to a NIDM-Results export. """ if not export_file: export_dir = None else: export_dir = self.export_dir if not isinstance(nidm_object, NIDMFile): nidm_object.export(self.version, export_dir) else: nidm_object.export(self.version, export_dir, self.prepend_path) # ProvDocument: add object to the bundle if nidm_object.prov_type == PROV['Activity']: self.bundle.activity(nidm_object.id, other_attributes=nidm_object.attributes) elif nidm_object.prov_type == PROV['Entity']: self.bundle.entity(nidm_object.id, other_attributes=nidm_object.attributes) elif nidm_object.prov_type == PROV['Agent']: self.bundle.agent(nidm_object.id, other_attributes=nidm_object.attributes) # self.bundle.update(nidm_object.p) def export(self): """ Generate a NIDM-Results export. """ try: if not os.path.isdir(self.export_dir): os.mkdir(self.export_dir) # Initialise main bundle self._create_bundle(self.version) self.add_object(self.software) # Add model fitting steps if not isinstance(self.model_fittings, list): self.model_fittings = list(self.model_fittings.values()) for model_fitting in self.model_fittings: # Design Matrix # model_fitting.activity.used(model_fitting.design_matrix) self.bundle.used(model_fitting.activity.id, model_fitting.design_matrix.id) self.add_object(model_fitting.design_matrix) # *** Export visualisation of the design matrix self.add_object(model_fitting.design_matrix.image) if model_fitting.design_matrix.image.file is not None: self.add_object(model_fitting.design_matrix.image.file) if model_fitting.design_matrix.hrf_models is not None: # drift model self.add_object(model_fitting.design_matrix.drift_model) if self.version['major'] > 1 or \ (self.version['major'] == 1 and self.version['minor'] >= 3): # Machine # model_fitting.data.wasAttributedTo(model_fitting.machine) self.bundle.wasAttributedTo(model_fitting.data.id, model_fitting.machine.id) self.add_object(model_fitting.machine) # Imaged subject or group(s) for sub in model_fitting.subjects: self.add_object(sub) # model_fitting.data.wasAttributedTo(sub) self.bundle.wasAttributedTo(model_fitting.data.id, sub.id) # Data # model_fitting.activity.used(model_fitting.data) self.bundle.used(model_fitting.activity.id, model_fitting.data.id) self.add_object(model_fitting.data) # Error Model # model_fitting.activity.used(model_fitting.error_model) self.bundle.used(model_fitting.activity.id, model_fitting.error_model.id) self.add_object(model_fitting.error_model) # Parameter Estimate Maps for param_estimate in model_fitting.param_estimates: # param_estimate.wasGeneratedBy(model_fitting.activity) self.bundle.wasGeneratedBy(param_estimate.id, model_fitting.activity.id) self.add_object(param_estimate) self.add_object(param_estimate.coord_space) self.add_object(param_estimate.file) if param_estimate.derfrom is not None: self.bundle.wasDerivedFrom(param_estimate.id, param_estimate.derfrom.id) self.add_object(param_estimate.derfrom) self.add_object(param_estimate.derfrom.file, export_file=False) # Residual Mean Squares Map # model_fitting.rms_map.wasGeneratedBy(model_fitting.activity) self.add_object(model_fitting.rms_map) self.bundle.wasGeneratedBy(model_fitting.rms_map.id, model_fitting.activity.id) self.add_object(model_fitting.rms_map.coord_space) self.add_object(model_fitting.rms_map.file) if model_fitting.rms_map.derfrom is not None: self.bundle.wasDerivedFrom( model_fitting.rms_map.id, model_fitting.rms_map.derfrom.id) self.add_object(model_fitting.rms_map.derfrom) self.add_object(model_fitting.rms_map.derfrom.file, export_file=False) # Resels per Voxel Map if model_fitting.rpv_map is not None: self.add_object(model_fitting.rpv_map) self.bundle.wasGeneratedBy(model_fitting.rpv_map.id, model_fitting.activity.id) self.add_object(model_fitting.rpv_map.coord_space) self.add_object(model_fitting.rpv_map.file) if model_fitting.rpv_map.inf_id is not None: self.bundle.used(model_fitting.rpv_map.inf_id, model_fitting.rpv_map.id) if model_fitting.rpv_map.derfrom is not None: self.bundle.wasDerivedFrom( model_fitting.rpv_map.id, model_fitting.rpv_map.derfrom.id) self.add_object(model_fitting.rpv_map.derfrom) self.add_object(model_fitting.rpv_map.derfrom.file, export_file=False) # Mask # model_fitting.mask_map.wasGeneratedBy(model_fitting.activity) self.bundle.wasGeneratedBy(model_fitting.mask_map.id, model_fitting.activity.id) self.add_object(model_fitting.mask_map) if model_fitting.mask_map.derfrom is not None: self.bundle.wasDerivedFrom( model_fitting.mask_map.id, model_fitting.mask_map.derfrom.id) self.add_object(model_fitting.mask_map.derfrom) self.add_object(model_fitting.mask_map.derfrom.file, export_file=False) # Create coordinate space export self.add_object(model_fitting.mask_map.coord_space) # Create "Mask map" entity self.add_object(model_fitting.mask_map.file) # Grand Mean map # model_fitting.grand_mean_map.wasGeneratedBy(model_fitting.activity) self.bundle.wasGeneratedBy(model_fitting.grand_mean_map.id, model_fitting.activity.id) self.add_object(model_fitting.grand_mean_map) # Coordinate space entity self.add_object(model_fitting.grand_mean_map.coord_space) # Grand Mean Map entity self.add_object(model_fitting.grand_mean_map.file) # Model Parameters Estimation activity self.add_object(model_fitting.activity) self.bundle.wasAssociatedWith(model_fitting.activity.id, self.software.id) # model_fitting.activity.wasAssociatedWith(self.software) # self.add_object(model_fitting) # Add contrast estimation steps analysis_masks = dict() for (model_fitting_id, pe_ids), contrasts in list(self.contrasts.items()): for contrast in contrasts: model_fitting = self._get_model_fitting(model_fitting_id) # for contrast in contrasts: # contrast.estimation.used(model_fitting.rms_map) self.bundle.used(contrast.estimation.id, model_fitting.rms_map.id) # contrast.estimation.used(model_fitting.mask_map) self.bundle.used(contrast.estimation.id, model_fitting.mask_map.id) analysis_masks[contrast.estimation.id] = \ model_fitting.mask_map.id self.bundle.used(contrast.estimation.id, contrast.weights.id) self.bundle.used(contrast.estimation.id, model_fitting.design_matrix.id) # contrast.estimation.wasAssociatedWith(self.software) self.bundle.wasAssociatedWith(contrast.estimation.id, self.software.id) for pe_id in pe_ids: # contrast.estimation.used(pe_id) self.bundle.used(contrast.estimation.id, pe_id) # Create estimation activity self.add_object(contrast.estimation) # Create contrast weights self.add_object(contrast.weights) if contrast.contrast_map is not None: # Create contrast Map # contrast.contrast_map.wasGeneratedBy(contrast.estimation) self.bundle.wasGeneratedBy(contrast.contrast_map.id, contrast.estimation.id) self.add_object(contrast.contrast_map) self.add_object(contrast.contrast_map.coord_space) # Copy contrast map in export directory self.add_object(contrast.contrast_map.file) if contrast.contrast_map.derfrom is not None: self.bundle.wasDerivedFrom( contrast.contrast_map.id, contrast.contrast_map.derfrom.id) self.add_object(contrast.contrast_map.derfrom) self.add_object(contrast.contrast_map.derfrom.file, export_file=False) # Create Std Err. Map (T-tests) or Explained Mean Sq. Map # (F-tests) # contrast.stderr_or_expl_mean_sq_map.wasGeneratedBy # (contrast.estimation) stderr_explmeansq_map = ( contrast.stderr_or_expl_mean_sq_map) self.bundle.wasGeneratedBy(stderr_explmeansq_map.id, contrast.estimation.id) self.add_object(stderr_explmeansq_map) self.add_object(stderr_explmeansq_map.coord_space) if isinstance(stderr_explmeansq_map, ContrastStdErrMap) and \ stderr_explmeansq_map.contrast_var: self.add_object(stderr_explmeansq_map.contrast_var) if stderr_explmeansq_map.var_coord_space: self.add_object( stderr_explmeansq_map.var_coord_space) if stderr_explmeansq_map.contrast_var.coord_space: self.add_object( stderr_explmeansq_map.contrast_var.coord_space) self.add_object( stderr_explmeansq_map.contrast_var.file, export_file=False) self.bundle.wasDerivedFrom( stderr_explmeansq_map.id, stderr_explmeansq_map.contrast_var.id) self.add_object(stderr_explmeansq_map.file) # Create Statistic Map # contrast.stat_map.wasGeneratedBy(contrast.estimation) self.bundle.wasGeneratedBy(contrast.stat_map.id, contrast.estimation.id) self.add_object(contrast.stat_map) self.add_object(contrast.stat_map.coord_space) # Copy Statistical map in export directory self.add_object(contrast.stat_map.file) if contrast.stat_map.derfrom is not None: self.bundle.wasDerivedFrom( contrast.stat_map.id, contrast.stat_map.derfrom.id) self.add_object(contrast.stat_map.derfrom) self.add_object(contrast.stat_map.derfrom.file, export_file=False) # Create Z Statistic Map if contrast.z_stat_map: # contrast.z_stat_map.wasGeneratedBy(contrast.estimation) self.bundle.wasGeneratedBy(contrast.z_stat_map.id, contrast.estimation.id) self.add_object(contrast.z_stat_map) self.add_object(contrast.z_stat_map.coord_space) # Copy Statistical map in export directory self.add_object(contrast.z_stat_map.file) # self.add_object(contrast) # Add inference steps for contrast_id, inferences in list(self.inferences.items()): contrast = self._get_contrast(contrast_id) for inference in inferences: if contrast.z_stat_map: used_id = contrast.z_stat_map.id else: used_id = contrast.stat_map.id # inference.inference_act.used(used_id) self.bundle.used(inference.inference_act.id, used_id) # inference.inference_act.wasAssociatedWith(self.software) self.bundle.wasAssociatedWith(inference.inference_act.id, self.software.id) # self.add_object(inference) # Excursion set # inference.excursion_set.wasGeneratedBy(inference.inference_act) self.bundle.wasGeneratedBy(inference.excursion_set.id, inference.inference_act.id) self.add_object(inference.excursion_set) self.add_object(inference.excursion_set.coord_space) if inference.excursion_set.visu is not None: self.add_object(inference.excursion_set.visu) if inference.excursion_set.visu.file is not None: self.add_object(inference.excursion_set.visu.file) # Copy "Excursion set map" file in export directory self.add_object(inference.excursion_set.file) if inference.excursion_set.clust_map is not None: self.add_object(inference.excursion_set.clust_map) self.add_object(inference.excursion_set.clust_map.file) self.add_object( inference.excursion_set.clust_map.coord_space) if inference.excursion_set.mip is not None: self.add_object(inference.excursion_set.mip) self.add_object(inference.excursion_set.mip.file) # Height threshold if inference.height_thresh.equiv_thresh is not None: for equiv in inference.height_thresh.equiv_thresh: self.add_object(equiv) self.add_object(inference.height_thresh) # Extent threshold if inference.extent_thresh.equiv_thresh is not None: for equiv in inference.extent_thresh.equiv_thresh: self.add_object(equiv) self.add_object(inference.extent_thresh) # Display Mask (potentially more than 1) if inference.disp_mask: for mask in inference.disp_mask: # inference.inference_act.used(mask) self.bundle.used(inference.inference_act.id, mask.id) self.add_object(mask) # Create coordinate space entity self.add_object(mask.coord_space) # Create "Display Mask Map" entity self.add_object(mask.file) if mask.derfrom is not None: self.bundle.wasDerivedFrom( mask.id, mask.derfrom.id) self.add_object(mask.derfrom) self.add_object(mask.derfrom.file, export_file=False) # Search Space self.bundle.wasGeneratedBy(inference.search_space.id, inference.inference_act.id) # inference.search_space.wasGeneratedBy(inference.inference_act) self.add_object(inference.search_space) self.add_object(inference.search_space.coord_space) # Copy "Mask map" in export directory self.add_object(inference.search_space.file) # Peak Definition if inference.peak_criteria: # inference.inference_act.used(inference.peak_criteria) self.bundle.used(inference.inference_act.id, inference.peak_criteria.id) self.add_object(inference.peak_criteria) # Cluster Definition if inference.cluster_criteria: # inference.inference_act.used(inference.cluster_criteria) self.bundle.used(inference.inference_act.id, inference.cluster_criteria.id) self.add_object(inference.cluster_criteria) if inference.clusters: # Clusters and peaks for cluster in inference.clusters: # cluster.wasDerivedFrom(inference.excursion_set) self.bundle.wasDerivedFrom( cluster.id, inference.excursion_set.id) self.add_object(cluster) for peak in cluster.peaks: self.bundle.wasDerivedFrom(peak.id, cluster.id) self.add_object(peak) self.add_object(peak.coordinate) if cluster.cog is not None: self.bundle.wasDerivedFrom( cluster.cog.id, cluster.id) self.add_object(cluster.cog) self.add_object(cluster.cog.coordinate) # Inference activity # inference.inference_act.wasAssociatedWith(inference.software_id) # inference.inference_act.used(inference.height_thresh) self.bundle.used(inference.inference_act.id, inference.height_thresh.id) # inference.inference_act.used(inference.extent_thresh) self.bundle.used(inference.inference_act.id, inference.extent_thresh.id) self.bundle.used(inference.inference_act.id, analysis_masks[contrast.estimation.id]) self.add_object(inference.inference_act) # Write-out prov file self.save_prov_to_files() return self.out_dir except Exception: self.cleanup() raise def _get_model_fitting(self, mf_id): """ Retreive model fitting with identifier 'mf_id' from the list of model fitting objects stored in self.model_fitting """ for model_fitting in self.model_fittings: if model_fitting.activity.id == mf_id: return model_fitting raise Exception("Model fitting activity with id: " + str(mf_id) + " not found.") def _get_contrast(self, con_id): """ Retreive contrast with identifier 'con_id' from the list of contrast objects stored in self.contrasts """ for contrasts in list(self.contrasts.values()): for contrast in contrasts: if contrast.estimation.id == con_id: return contrast raise Exception("Contrast activity with id: " + str(con_id) + " not found.") def _add_namespaces(self): """ Add namespaces to NIDM document. """ self.doc.add_namespace(NIDM) self.doc.add_namespace(NIIRI) self.doc.add_namespace(CRYPTO) self.doc.add_namespace(DCT) self.doc.add_namespace(DC) self.doc.add_namespace(NFO) self.doc.add_namespace(OBO) self.doc.add_namespace(SCR) self.doc.add_namespace(NIF) def _create_bundle(self, version): """ Initialise NIDM-Results bundle. """ # *** Bundle entity if not hasattr(self, 'bundle_ent'): self.bundle_ent = NIDMResultsBundle(nidm_version=version['num']) self.bundle = ProvBundle(identifier=self.bundle_ent.id) self.bundle_ent.export(self.version, self.export_dir) # # provn export # self.bundle = ProvBundle(identifier=bundle_id) self.doc.entity(self.bundle_ent.id, other_attributes=self.bundle_ent.attributes) # *** NIDM-Results Export Activity if version['num'] not in ["1.0.0", "1.1.0"]: if not hasattr(self, 'export_act'): self.export_act = NIDMResultsExport() self.export_act.export(self.version, self.export_dir) # self.doc.update(self.export_act.p) self.doc.activity(self.export_act.id, other_attributes=self.export_act.attributes) # *** bundle was Generated by NIDM-Results Export Activity if not hasattr(self, 'export_time'): self.export_time = str(datetime.datetime.now().time()) if version['num'] in ["1.0.0", "1.1.0"]: self.doc.wasGeneratedBy(entity=self.bundle_ent.id, time=self.export_time) else: # provn self.doc.wasGeneratedBy(entity=self.bundle_ent.id, activity=self.export_act.id, time=self.export_time) # *** NIDM-Results Exporter (Software Agent) if version['num'] not in ["1.0.0", "1.1.0"]: if not hasattr(self, 'exporter'): self.exporter = self._get_exporter() self.exporter.export(self.version, self.export_dir) # self.doc.update(self.exporter.p) self.doc.agent(self.exporter.id, other_attributes=self.exporter.attributes) self.doc.wasAssociatedWith(self.export_act.id, self.exporter.id) def _get_model_parameters_estimations(self, error_model): """ Infer model estimation method from the 'error_model'. Return an object of type ModelParametersEstimation. """ if error_model.dependance == NIDM_INDEPEDENT_ERROR: if error_model.variance_homo: estimation_method = STATO_OLS else: estimation_method = STATO_WLS else: estimation_method = STATO_GLS mpe = ModelParametersEstimation(estimation_method, self.software.id) return mpe def use_prefixes(self, ttl): prefix_file = os.path.join(os.path.dirname(__file__), 'prefixes.csv') context = dict() with open(prefix_file, encoding="ascii") as csvfile: reader = csv.reader(csvfile) next(reader, None) # skip the headers for alphanum_id, prefix, uri in reader: if alphanum_id in ttl: context[prefix] = uri ttl = "@prefix " + prefix + ": <" + uri + "> .\n" + ttl ttl = ttl.replace(alphanum_id, prefix + ":") if uri in ttl: ttl = ttl.replace(alphanum_id, prefix + ":") elif uri in ttl: context[prefix] = uri ttl = "@prefix " + prefix + ": <" + uri + "> .\n" + ttl ttl = ttl.replace(alphanum_id, prefix + ":") return (ttl, context) def save_prov_to_files(self, showattributes=False): """ Write-out provn serialisation to nidm.provn. """ self.doc.add_bundle(self.bundle) # provn_file = os.path.join(self.export_dir, 'nidm.provn') # provn_fid = open(provn_file, 'w') # # FIXME None # # provn_fid.write(self.doc.get_provn(4).replace("None", "-")) # provn_fid.close() ttl_file = os.path.join(self.export_dir, 'nidm.ttl') ttl_txt = self.doc.serialize(format='rdf', rdf_format='turtle') ttl_txt, json_context = self.use_prefixes(ttl_txt) # Add namespaces to json-ld context for namespace in self.doc._namespaces.get_registered_namespaces(): json_context[namespace._prefix] = namespace._uri for namespace in \ list(self.doc._namespaces._default_namespaces.values()): json_context[namespace._prefix] = namespace._uri json_context["xsd"] = "http://www.w3.org/2000/01/rdf-schema#" # Work-around to issue with INF value in rdflib (reported in # https://github.com/RDFLib/rdflib/pull/655) ttl_txt = ttl_txt.replace(' inf ', ' "INF"^^xsd:float ') with open(ttl_file, 'w') as ttl_fid: ttl_fid.write(ttl_txt) # print(json_context) jsonld_file = os.path.join(self.export_dir, 'nidm.json') jsonld_txt = self.doc.serialize(format='rdf', rdf_format='json-ld', context=json_context) with open(jsonld_file, 'w') as jsonld_fid: jsonld_fid.write(jsonld_txt) # provjsonld_file = os.path.join(self.export_dir, 'nidm.provjsonld') # provjsonld_txt = self.doc.serialize(format='jsonld') # with open(provjsonld_file, 'w') as provjsonld_fid: # provjsonld_fid.write(provjsonld_txt) # provn_file = os.path.join(self.export_dir, 'nidm.provn') # provn_txt = self.doc.serialize(format='provn') # with open(provn_file, 'w') as provn_fid: # provn_fid.write(provn_txt) # Post-processing if not self.zipped: # Just rename temp directory to output_path os.rename(self.export_dir, self.out_dir) else: # Create a zip file that contains the content of the temp directory os.chdir(self.export_dir) zf = zipfile.ZipFile(os.path.join("..", self.out_dir), mode='w') try: for root, dirnames, filenames in os.walk("."): for filename in filenames: zf.write(os.path.join(filename)) shutil.rmtree(os.path.join("..", self.export_dir)) finally: zf.close() os.chdir("..")