def convert_stats_to_nidm(stats): """Convert a stats record into a NIDM entity Returns the entity and the prov document """ from nidm.core import Constants from nidm.experiment.Core import getUUID import prov kwyk = prov.model.Namespace("kwyk", str(KWYKNS)) niiri = prov.model.Namespace("niiri", str(Constants.NIIRI)) nidm = prov.model.Namespace("nidm", "http://purl.org/nidash/nidm#") doc = prov.model.ProvDocument() e = doc.entity(identifier=niiri[getUUID()]) e.add_asserted_type(nidm["KWYKStatsCollection"]) e.add_attributes({ kwyk["kwyk_" + val[0]]: prov.model.Literal( val[1], datatype=prov.model.XSD["float"] if "." in val[1] else prov.model.XSD["integer"], ) for val in stats }) return e, doc
def add_seg_data(nidmdoc, subjid, fs_stats_entity_id, add_to_nidm=False, forceagent=False): ''' WIP: this function creates a NIDM file of brain volume data and if user supplied a NIDM-E file it will add brain volumes to the NIDM-E file for the matching subject ID :param nidmdoc: :param header: :param add_to_nidm: :return: ''' #for each of the header items create a dictionary where namespaces are freesurfer niiri = Namespace("http://iri.nidash.org/") nidmdoc.bind("niiri", niiri) # add namespace for subject id ndar = Namespace(Constants.NDAR) nidmdoc.bind("ndar", ndar) dct = Namespace(Constants.DCT) nidmdoc.bind("dct", dct) sio = Namespace(Constants.SIO) nidmdoc.bind("sio", sio) software_activity = niiri[getUUID()] nidmdoc.add((software_activity, RDF.type, Constants.PROV['Activity'])) nidmdoc.add((software_activity, Constants.DCT["description"], Literal("FSL FAST/FIRST segmentation statistics"))) fs = Namespace(Constants.FSL) #create software agent and associate with software activity #search and see if a software agent exists for this software, if so use it, if not create it for software_uid in nidmdoc.subjects( predicate=Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE, object=URIRef(Constants.FSL)): software_agent = software_uid break else: software_agent = niiri[getUUID()] nidmdoc.add((software_agent, RDF.type, Constants.PROV['Agent'])) neuro_soft = Namespace(Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE) nidmdoc.add((software_agent, Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE, URIRef(Constants.FSL))) nidmdoc.add((software_agent, RDF.type, Constants.PROV["SoftwareAgent"])) association_bnode = BNode() nidmdoc.add((software_activity, Constants.PROV['qualifiedAssociation'], association_bnode)) nidmdoc.add((association_bnode, RDF.type, Constants.PROV['Association'])) nidmdoc.add((association_bnode, Constants.PROV['hadRole'], Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE)) nidmdoc.add((association_bnode, Constants.PROV['agent'], software_agent)) if not add_to_nidm: # create a new agent for subjid participant_agent = niiri[getUUID()] nidmdoc.add((participant_agent, RDF.type, Constants.PROV['Agent'])) nidmdoc.add((participant_agent, URIRef(Constants.NIDM_SUBJECTID.uri), Literal(subjid, datatype=XSD.string))) else: # query to get agent id for subjid #find subject ids and sessions in NIDM document query = """ PREFIX ndar:<https://ndar.nih.gov/api/datadictionary/v2/dataelement/> PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX prov:<http://www.w3.org/ns/prov#> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> select distinct ?agent where { ?agent rdf:type prov:Agent ; ndar:src_subject_id \"%s\"^^xsd:string . }""" % subjid #print(query) qres = nidmdoc.query(query) if len(qres) == 0: print('Subject ID (%s) was not found in existing NIDM file...' % subjid) ############################################################################## # added to account for issues with some BIDS datasets that have leading 00's in subject directories # but not in participants.tsv files. if (len(subjid) - len(subjid.lstrip('0'))) != 0: print('Trying to find subject ID without leading zeros....') query = """ PREFIX ndar:<https://ndar.nih.gov/api/datadictionary/v2/dataelement/> PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX prov:<http://www.w3.org/ns/prov#> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> select distinct ?agent where { ?agent rdf:type prov:Agent ; ndar:src_subject_id \"%s\"^^xsd:string . }""" % subjid.lstrip('0') #print(query) qres2 = nidmdoc.query(query) if len(qres2) == 0: print( "Still can't find subject id after stripping leading zeros..." ) else: for row in qres2: print( 'Found subject ID after stripping zeros: %s in NIDM file (agent: %s)' % (subjid.lstrip('0'), row[0])) participant_agent = row[0] ####################################################################################### if (forceagent is not False) and (qres2 == 0): print('Explicitly creating agent in existing NIDM file...') participant_agent = niiri[getUUID()] nidmdoc.add( (participant_agent, RDF.type, Constants.PROV['Agent'])) nidmdoc.add( (participant_agent, URIRef(Constants.NIDM_SUBJECTID.uri), Literal(subjid, datatype=XSD.string))) elif (forceagent is False) and (qres == 0) and (qres2 == 0): print( 'Not explicitly adding agent to NIDM file, no output written' ) exit() else: for row in qres: print('Found subject ID: %s in NIDM file (agent: %s)' % (subjid, row[0])) participant_agent = row[0] #create a blank node and qualified association with prov:Agent for participant association_bnode = BNode() nidmdoc.add((software_activity, Constants.PROV['qualifiedAssociation'], association_bnode)) nidmdoc.add((association_bnode, RDF.type, Constants.PROV['Association'])) nidmdoc.add((association_bnode, Constants.PROV['hadRole'], Constants.SIO["Subject"])) nidmdoc.add( (association_bnode, Constants.PROV['agent'], participant_agent)) # add association between FSStatsCollection and computation activity nidmdoc.add((URIRef(fs_stats_entity_id.uri), Constants.PROV['wasGeneratedBy'], software_activity)) # get project uuid from NIDM doc and make association with software_activity query = """ prefix nidm: <http://purl.org/nidash/nidm#> PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#> select distinct ?project where { ?project rdf:type nidm:Project . }""" qres = nidmdoc.query(query) for row in qres: nidmdoc.add( (software_activity, Constants.DCT["isPartOf"], row['project']))
def add_seg_data(nidmdoc, subjid, measure, add_to_nidm=False): ''' WIP: this function creates a NIDM file of brain volume data and if user supplied a NIDM-E file it will add brain volumes to the NIDM-E file for the matching subject ID :param nidmdoc: :param subjid: :param add_to_nidm: :return: ''' niiri = Namespace("http://iri.nidash.org/") nidmdoc.bind("niiri", niiri) fs = Namespace("https://surfer.nmr.mgh.harvard.edu/") nidmdoc.bind("fs", fs) software_activity = niiri[getUUID()] nidmdoc.add((software_activity, RDF.type, Constants.PROV['Activity'])) nidmdoc.add((software_activity, Constants.DCT["description"], Literal("ANTS segmentation statistics"))) #create software agent and associate with software activity #software_agent = nidmdoc.graph.agent(QualifiedName(provNamespace("niiri",Constants.NIIRI),getUUID()),other_attributes={ software_agent = niiri[getUUID()] nidmdoc.add((software_agent, RDF.type, Constants.PROV['Agent'])) nidmdoc.add((software_agent, Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE, URIRef(Constants.FSL))) nidmdoc.add((software_agent, RDF.type, Constants.PROV["SoftwareAgent"])) association_bnode = BNode() nidmdoc.add((software_activity, Constants.PROV['qualifiedAssociation'], association_bnode)) nidmdoc.add((association_bnode, RDF.type, Constants.PROV['Agent'])) nidmdoc.add((association_bnode, Constants.PROV['hadRole'], Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE)) nidmdoc.add((association_bnode, Constants.PROV['wasAssociatedWith'], software_agent)) #add ANTS data datum_entity = niiri[getUUID()] nidmdoc.add((datum_entity, RDF.type, Constants.PROV['Entity'])) nidmdoc.add( (datum_entity, RDF.type, Constants.NIDM["ANTSStatsCollection"])) nidmdoc.add( (datum_entity, Constants.PROV['wasGeneratedBy'], software_activity)) if not add_to_nidm: # create a new agent for subjid participant_agent = niiri[getUUID()] nidmdoc.add((participant_agent, RDF.type, Constants.PROV['Agent'])) nidmdoc.add((participant_agent, URIRef(Constants.NIDM_SUBJECTID.uri), Literal(subjid))) else: #search for prov:agent with this subject id #find subject ids and sessions in NIDM document query = """ PREFIX ndar:<https://ndar.nih.gov/api/datadictionary/v2/dataelement/> PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX prov:<http://www.w3.org/ns/prov#> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> select distinct ?agent where { ?agent rdf:type prov:Agent ; ndar:src_subject_id \"%s\"^^xsd:string . }""" % subjid print(query) qres = nidmdoc.query(query) if len(qres) == 0: print( 'Subject ID (%s) was not found in existing NIDM file. No output written...' % subjid) exit() else: for row in qres: print('Found subject ID: %s in NIDM file (agent: %s)' % (subjid, row[0])) participant_agent = row[0] #create a blank node and qualified association with prov:Agent for participant #row[0] association_bnode = BNode() nidmdoc.add((software_activity, Constants.PROV['qualifiedAssociation'], association_bnode)) nidmdoc.add((association_bnode, RDF.type, Constants.PROV['Agent'])) nidmdoc.add((association_bnode, Constants.PROV['hadRole'], Constants.SIO["Subject"])) nidmdoc.add((association_bnode, Constants.PROV['wasAssociatedWith'], participant_agent)) #create a blank node and qualified association with prov:Agent for participant association_bnode = BNode() nidmdoc.add((software_activity, Constants.PROV['qualifiedAssociation'], association_bnode)) nidmdoc.add((association_bnode, RDF.type, Constants.PROV['Agent'])) nidmdoc.add((association_bnode, Constants.PROV['hadRole'], Constants.SIO["Subject"])) nidmdoc.add((association_bnode, Constants.PROV['wasAssociatedWith'], participant_agent)) #iterate over measure dictionary where measures are the lines in the FS stats files which start with '# Measure' and #the whole table at the bottom of the FS stats file that starts with '# ColHeaders for measures in measure: for items in measures["items"]: nidmdoc.add((datum_entity, fs['fs_' + str(measures['structure']).rjust(5, '0')], Literal(items['value'])))
def add_brainvolume_data(nidmdoc, df, id_field, source_row, column_to_terms, png_file=None, output_file=None, root_act=None, nidm_graph=None): ''' :param nidmdoc: :param df: :param id_field: :param source_row: :param empty: :param png_file: :param root_act: :return: ''' #dictionary to store activities for each software agent software_agent = {} software_activity = {} participant_agent = {} entity = {} #this function can be used for both creating a brainvolumes NIDM file from scratch or adding brain volumes to #existing NIDM file. The following logic basically determines which route to take... #if an existing NIDM graph is passed as a parameter then add to existing file if nidm_graph is None: first_row = True #iterate over rows and store in NIDM file for csv_index, csv_row in df.iterrows(): #store other data from row with columns_to_term mappings for row_variable, row_data in csv_row.iteritems(): #check if row_variable is subject id, if so check whether we have an agent for this participant if row_variable == id_field: #store participant id for later use in processing the data for this row participant_id = row_data #if there is no agent for the participant then add one if row_data not in participant_agent.keys(): #add an agent for this person participant_agent[row_data] = nidmdoc.graph.agent( QualifiedName( provNamespace("nidm", Constants.NIDM), getUUID()), other_attributes=({ Constants.NIDM_SUBJECTID: row_data })) continue else: #get source software matching this column deal with duplicate variables in source_row and pandas changing duplicate names software_key = source_row.columns[[ column_index(df, row_variable) ]]._values[0].split(".")[0] #see if we already have a software_activity for this agent if software_key not in software_activity.keys(): #create an activity for the computation...simply a placeholder for more extensive provenance software_activity[ software_key] = nidmdoc.graph.activity( QualifiedName( provNamespace("nidm", Constants.NIDM), getUUID()), other_attributes={ Constants.NIDM_PROJECT_DESCRIPTION: "brain volume computation" }) if root_act is not None: #associate activity with activity of brain volumes creation (root-level activity) software_activity[ software_key].add_attributes( { QualifiedName( provNamespace( "dct", Constants.DCT), 'isPartOf'): root_act }) #associate this activity with the participant nidmdoc.graph.association( activity=software_activity[software_key], agent=participant_agent[participant_id], other_attributes={ PROV_ROLE: Constants.NIDM_PARTICIPANT }) nidmdoc.graph.wasAssociatedWith( activity=software_activity[software_key], agent=participant_agent[participant_id]) #check if there's an associated software agent and if not, create one if software_key not in software_agent.keys(): #create an agent software_agent[software_key] = nidmdoc.graph.agent( QualifiedName( provNamespace("nidm", Constants.NIDM), getUUID()), other_attributes={ 'prov:type': QualifiedName( provNamespace( Core.safe_string( None, string=str( "Neuroimaging Analysis Software" )), Constants. NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE ), ""), QualifiedName( provNamespace( Core.safe_string(None, string=str("Neuroimaging Analysis Software")), Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE), ""): software_key }) #create qualified association with brain volume computation activity nidmdoc.graph.association( activity=software_activity[software_key], agent=software_agent[software_key], other_attributes={ PROV_ROLE: QualifiedName( provNamespace( Core.safe_string( None, string=str( "Neuroimaging Analysis Software" )), Constants. NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE ), "") }) nidmdoc.graph.wasAssociatedWith( activity=software_activity[software_key], agent=software_agent[software_key]) #check if we have an entity for storing this particular variable for this subject and software else create one if software_activity[ software_key].identifier.localpart + participant_agent[ participant_id].identifier.localpart not in entity.keys( ): #create an entity to store brain volume data for this participant entity[software_activity[software_key].identifier. localpart + participant_agent[participant_id]. identifier.localpart] = nidmdoc.graph.entity( QualifiedName( provNamespace("nidm", Constants.NIDM), getUUID())) #add wasGeneratedBy association to activity nidmdoc.graph.wasGeneratedBy( entity=entity[software_activity[software_key]. identifier.localpart + participant_agent[participant_id]. identifier.localpart], activity=software_activity[software_key]) #get column_to_term mapping uri and add as namespace in NIDM document entity[ software_activity[software_key].identifier.localpart + participant_agent[participant_id].identifier. localpart].add_attributes({ QualifiedName( provNamespace( Core.safe_string(None, string=str(row_variable)), column_to_terms[row_variable.split(".")[0]]["url"]), ""): row_data }) #print(project.serializeTurtle()) #just for debugging. resulting graph is too big right now for DOT graph creation so here I'm simply creating #a DOT graph for the processing of 1 row of the brain volumes CSV file so we can at least visually see the #model if png_file is not None: if first_row: #serialize NIDM file #with open(args.output_file,'w') as f: # print("Writing NIDM file...") # f.write(nidmdoc.serializeTurtle()) if png_file: nidmdoc.save_DotGraph(str(output_file + ".pdf"), format="pdf") first_row = False else: first_row = True #logic to add to existing graph #use RDFLib here for temporary graph making query easier rdf_graph = Graph() rdf_graph_parse = rdf_graph.parse(source=StringIO( nidmdoc.serializeTurtle()), format='turtle') #find subject ids and sessions in NIDM document query = """SELECT DISTINCT ?session ?nidm_subj_id ?agent ?entity WHERE { ?activity prov:wasAssociatedWith ?agent ; dct:isPartOf ?session . ?entity prov:wasGeneratedBy ?activity ; nidm:hadImageUsageType nidm:Anatomical . ?agent rdf:type prov:Agent ; ndar:src_subject_id ?nidm_subj_id . }""" #print(query) qres = rdf_graph_parse.query(query) for row in qres: print('%s \t %s' % (row[2], row[1])) #find row in CSV file with subject id matching agent from NIDM file #csv_row = df.loc[df[id_field]==type(df[id_field][0])(row[1])] #find row in CSV file with matching subject id to the agent in the NIDM file #be careful about data types...simply type-change dataframe subject id column and query to strings. #here we're removing the leading 0's from IDs because pandas.read_csv strips those unless you know ahead of #time which column is the subject id.... csv_row = df.loc[df[id_field].astype('str').str.contains( str(row[1]).lstrip("0"))] #if there was data about this subject in the NIDM file already (i.e. an agent already exists with this subject id) #then add this brain volumes data to NIDM file, else skip it.... if (not (len(csv_row.index) == 0)): print("found other data for participant %s" % row[1]) #Here we're sure we have an agent in the NIDM graph that corresponds to the participant in the #brain volumes data. We don't know which AcquisitionObject (entity) describes the T1-weighted scans #used for the project. Since we don't have the SHA512 sums in the brain volumes data (YET) we can't #really verify that it's a particular T1-weighted scan that was used for the brain volumes but we're #simply, for the moment, going to assume it's the activity/session returned by the above query #where we've specifically asked for the entity which has a nidm:hasImageUsageType nidm:Anatomical #NIDM document entity uuid which has a nidm:hasImageUsageType nidm:Anatomical #this is the entity that is associated with the brain volume report for this participant anat_entity_uuid = row[3] #Now we need to set up the entities/activities, etc. to add the brain volume data for this row of the #CSV file and link it to the above entity and the agent for this participant which is row[0] #store other data from row with columns_to_term mappings for row_variable, row_data in csv_row.iteritems(): #check if row_variable is subject id, if so check whether we have an agent for this participant if row_variable == id_field: #store participant id for later use in processing the data for this row participant_id = row_data.values[0] print("participant id: %s" % participant_id) continue else: #get source software matching this column deal with duplicate variables in source_row and pandas changing duplicate names software_key = source_row.columns[[ column_index(df, row_variable) ]]._values[0].split(".")[0] #see if we already have a software_activity for this agent if software_key + row[2] not in software_activity.keys( ): #create an activity for the computation...simply a placeholder for more extensive provenance software_activity[ software_key + row[2]] = nidmdoc.graph.activity( QualifiedName( provNamespace("niiri", Constants.NIIRI), getUUID()), other_attributes={ Constants.NIDM_PROJECT_DESCRIPTION: "brain volume computation", PROV_ATTR_USED_ENTITY: anat_entity_uuid }) #associate the activity with the entity containing the original T1-weighted scan which is stored in anat_entity_uuid if root_act is not None: #associate activity with activity of brain volumes creation (root-level activity) software_activity[ software_key + row[2]].add_attributes({ QualifiedName( provNamespace( "dct", Constants.DCT), 'isPartOf'): root_act }) #associate this activity with the participant..the participant's agent is row[2] in the query response nidmdoc.graph.association( activity=software_activity[software_key + row[2]], agent=row[2], other_attributes={ PROV_ROLE: Constants.NIDM_PARTICIPANT }) nidmdoc.graph.wasAssociatedWith( activity=software_activity[software_key + row[2]], agent=row[2]) #check if there's an associated software agent and if not, create one if software_key not in software_agent.keys(): #if we have a URL defined for this software in Constants.py then use it else simply use the string name of the software product if software_key.lower( ) in Constants.namespaces: #create an agent software_agent[software_key] = nidmdoc.graph.agent( QualifiedName( provNamespace( "niiri", Constants.NIIRI), getUUID()), other_attributes={ 'prov:type': QualifiedName( provNamespace( Core.safe_string( None, string=str( "Neuroimaging Analysis Software" )), Constants. NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE ), ""), QualifiedName( provNamespace( Core.safe_string(None, string=str("Neuroimaging Analysis Software")), Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE), ""): QualifiedName( provNamespace( software_key, Constants.namespaces[ software_key.lower()]), "") }) else: #create an agent software_agent[software_key] = nidmdoc.graph.agent( QualifiedName( provNamespace( "niiri", Constants.NIIRI), getUUID()), other_attributes={ 'prov:type': QualifiedName( provNamespace( Core.safe_string( None, string=str( "Neuroimaging Analysis Software" )), Constants. NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE ), ""), QualifiedName( provNamespace( Core.safe_string(None, string=str("Neuroimaging Analysis Software")), Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE), ""): software_key }) #create qualified association with brain volume computation activity nidmdoc.graph.association( activity=software_activity[software_key + row[2]], agent=software_agent[software_key], other_attributes={ PROV_ROLE: QualifiedName( provNamespace( Core.safe_string( None, string=str( "Neuroimaging Analysis Software" )), Constants. NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE ), "") }) nidmdoc.graph.wasAssociatedWith( activity=software_activity[software_key + row[2]], agent=software_agent[software_key]) #check if we have an entity for storing this particular variable for this subject and software else create one if software_activity[ software_key + row[2]].identifier.localpart + row[ 2] not in entity.keys(): #create an entity to store brain volume data for this participant entity[software_activity[ software_key + row[2]].identifier.localpart + row[2]] = nidmdoc.graph.entity( QualifiedName( provNamespace( "niiri", Constants.NIIRI), getUUID())) #add wasGeneratedBy association to activity nidmdoc.graph.wasGeneratedBy( entity=entity[software_activity[ software_key + row[2]].identifier.localpart + row[2]], activity=software_activity[software_key + row[2]]) #get column_to_term mapping uri and add as namespace in NIDM document entity[ software_activity[software_key + row[2]].identifier.localpart + row[2]].add_attributes({ QualifiedName( provNamespace( Core.safe_string(None, string=str(row_variable)), column_to_terms[row_variable.split(".")[0]]["url"]), ""): row_data.values[0] })
def main(argv): parser = ArgumentParser( description="""This program will load in a CSV file made during simple-2 brain volumes experiment which has the following organization: source FSL FSL FSL participant_id left nucleus accumbens volume left amygdala volume sub-0050002 796.4723293 1255.574283 4449.579039 sub-0050003 268.9688215 878.7860634 3838.602449 sub-0050004 539.0969914 1195.288168 3561.518188 If will use the first row to determine the software used for the segmentations and the second row for the variable names. Then it does a simple NIDM conversion using example model in: https://docs.google.com/document/d/1PyBoM7J0TuzTC1TIIFPDqd05nomcCM5Pvst8yCoqLng/edit""" ) parser.add_argument('-csv', dest='csv_file', required=True, help="Path to CSV file to convert") parser.add_argument('-ilxkey', dest='key', required=True, help="Interlex/SciCrunch API key to use for query") parser.add_argument( '-json_map', dest='json_map', required=False, help="User-suppled JSON file containing variable-term mappings.") parser.add_argument( '-nidm', dest='nidm_file', required=False, help="Optional NIDM file to add CSV->NIDM converted graph to") parser.add_argument( '-owl', action='store_true', required=False, help='Optionally searches NIDM OWL files...internet connection required' ) parser.add_argument( '-png', action='store_true', required=False, help= 'Optional flag, when set a PNG image file of RDF graph will be produced' ) parser.add_argument('-out', dest='output_file', required=True, help="Filename to save NIDM file") args = parser.parse_args() #open CSV file and read first line which is the source of the segmentations source_row = pd.read_csv(args.csv_file, nrows=0) #open CSV file and load into df = pd.read_csv(args.csv_file, skiprows=0, header=1) #account for duplicate column names # df.columns = df.iloc[0] df = df.reindex(df.index.drop(0)).reset_index(drop=True) #get unique variable names from CSV data file #note, duplicate variable names will be appended with a ".X" where X is the number of duplicates unique_vars = [] for variable in list(df): temp = variable.split(".")[0] if temp not in unique_vars: unique_vars.append(temp) #do same as above for unique software agents unique_software = [] for variable in list(source_row): temp = variable.split(".")[0] if temp not in unique_software: unique_software.append(temp) #maps variables in CSV file to terms if args.owl: column_to_terms = map_variables_to_terms( df=pd.DataFrame(columns=unique_vars), apikey=args.key, directory=dirname(args.output_file), output_file=join(dirname(args.output_file), "json_map.json"), json_file=args.json_map, owl_file=args.owl) else: column_to_terms = map_variables_to_terms( df=pd.DataFrame(columns=unique_vars), apikey=args.key, directory=dirname(args.output_file), output_file=join(dirname(args.output_file), "json_map.json"), json_file=args.json_map) #get subjectID field from CSV id_field = getSubjIDColumn(column_to_terms, df) # WIP!!!######################################################################################### #go line by line through CSV file creating NIDM structures #If user has added an existing NIDM file as a command line parameter then add to existing file for subjects who exist in the NIDM file if args.nidm_file is not None: print("Adding to NIDM file...") #read in NIDM file project = read_nidm(args.nidm_file) root_act = project.graph.activity( QualifiedName(provNamespace("niiri", Constants.NIIRI), getUUID()), other_attributes={ Constants.NIDM_PROJECT_DESCRIPTION: "Brain volumes provenance document" }) #this function sucks...more thought needed for version that works with adding to existing NIDM file versus creating a new NIDM file.... add_brainvolume_data(nidmdoc=project, df=df, id_field=id_field, root_act=root_act, column_to_terms=column_to_terms, png_file=args.png, output_file=args.output_file, source_row=source_row, nidm_graph=True) #serialize NIDM file with open(args.output_file, 'w') as f: print("Writing NIDM file...") f.write(project.serializeTurtle()) #if args.png: # nidmdoc.save_DotGraph(str(args.output_file + ".png"), format="png") # #find subject ids and sessions in NIDM document # query = """SELECT DISTINCT ?session ?nidm_subj_id ?agent ?entity # WHERE { # ?activity prov:wasAssociatedWith ?agent ; # dct:isPartOf ?session . # ?entity prov:wasGeneratedBy ?activity ; # nidm:hasImageUsageType nidm:Anatomical . # ?agent rdf:type prov:Agent ; # ndar:src_subject_id ?nidm_subj_id . # # }""" # #print(query) # qres = rdf_graph_parse.query(query) # for row in qres: # print('%s \t %s' %(row[0],row[1])) # #find row in CSV file with subject id matching agent from NIDM file # #csv_row = df.loc[df[id_field]==type(df[id_field][0])(row[1])] # #find row in CSV file with matching subject id to the agent in the NIDM file # #be carefull about data types...simply type-change dataframe subject id column and query to strings. # #here we're removing the leading 0's from IDs because pandas.read_csv strips those unless you know ahead of # #time which column is the subject id.... # csv_row = df.loc[df[id_field].astype('str').str.contains(str(row[1]).lstrip("0"))] # #if there was data about this subject in the NIDM file already (i.e. an agent already exists with this subject id) # #then add this brain volumes data to NIDM file, else skip it.... # if (not (len(csv_row.index)==0)): #Here we're sure we have an agent in the NIDM graph that corresponds to the participant in the #brain volumes data. We don't know which AcquisitionObject (entity) describes the T1-weighted scans #used for the project. Since we don't have the SHA512 sums in the brain volumes data (YET) we can't #really verify that it's a particular T1-weighted scan that was used for the brain volumes but we're #simply, for the moment, going to assume it's the activity/session returned by the above query #where we've specifically asked for the entity which has a nidm:hasImageUsageType nidm:Anatomical #NIDM document entity uuid which has a nidm:hasImageUsageType nidm:Anatomical #this is the entity that is associated with the brain volume report for this participant # entity_uuid = row[3] #Now we need to set up the entities/activities, etc. to add the brain volume data for this row of the #CSV file and link it to the above entity and the agent for this participant which is row[0] #add acquisition entity for assessment # acq_entity = AssessmentObject(acquisition=acq) #add qualified association with existing agent # acq.add_qualified_association(person=row[2],role=Constants.NIDM_PARTICIPANT) # #store other data from row with columns_to_term mappings # for row_variable in csv_row: #check if row_variable is subject id, if so skip it # if row_variable==id_field: # continue # else: #get column_to_term mapping uri and add as namespace in NIDM document #provNamespace(Core.safe_string(None,string=str(row_variable)), column_to_terms[row_variable]["url"]) # acq_entity.add_attributes({QualifiedName(provNamespace(Core.safe_string(None,string=str(row_variable)), column_to_terms[row_variable]["url"]), ""):csv_row[row_variable].values[0]}) # continue # #serialize NIDM file # with open(args.nidm_file,'w') as f: # print("Writing NIDM file...") # f.write(project.serializeTurtle()) # project.save_DotGraph(str(args.nidm_file + ".png"), format="png") ############################################################################################################################## else: print("Creating NIDM file...") #If user did not choose to add this data to an existing NIDM file then create a new one for the CSV data #create an empty NIDM graph nidmdoc = Core() root_act = nidmdoc.graph.activity( QualifiedName(provNamespace("niiri", Constants.NIIRI), getUUID()), other_attributes={ Constants.NIDM_PROJECT_DESCRIPTION: "Brain volumes provenance document" }) #this function sucks...more thought needed for version that works with adding to existing NIDM file versus creating a new NIDM file.... add_brainvolume_data(nidmdoc=nidmdoc, df=df, id_field=id_field, root_act=root_act, column_to_terms=column_to_terms, png_file=args.png, output_file=args.output_file, source_row=source_row) #serialize NIDM file with open(args.output_file, 'w') as f: print("Writing NIDM file...") f.write(nidmdoc.serializeTurtle()) if args.png: # nidmdoc.save_DotGraph(str(args.output_file + ".png"), format="png") nidmdoc.save_DotGraph(str(args.output_file + ".pdf"), format="pdf")
def add_seg_data(nidmdoc, measure, header, tableinfo, json_map, png_file=None, output_file=None, root_act=None, nidm_graph=None): ''' WIP: this function creates a NIDM file of brain volume data and if user supplied a NIDM-E file it will add :param nidmdoc: :param measure: :param json_map: :param png_file: :param root_act: :param nidm_graph: :return: ''' #read in json_map #dictionary to store activities for each software agent software_agent={} software_activity={} participant_agent={} entity={} #this function can be used for both creating a brainvolumes NIDM file from scratch or adding brain volumes to #existing NIDM file. The following logic basically determines which route to take... #if an existing NIDM graph is passed as a parameter then add to existing file if nidm_graph is None: first_row=True #iterate over measure dictionary for measures in measure: #key is print(measures) #store other data from row with columns_to_term mappings for row_variable,row_data in csv_row.iteritems(): #check if row_variable is subject id, if so check whether we have an agent for this participant if row_variable==id_field: #store participant id for later use in processing the data for this row participant_id = row_data #if there is no agent for the participant then add one if row_data not in participant_agent.keys(): #add an agent for this person participant_agent[row_data] = nidmdoc.graph.agent(QualifiedName(provNamespace("nidm",Constants.NIDM),getUUID()),other_attributes=({Constants.NIDM_SUBJECTID:row_data})) continue else: #get source software matching this column deal with duplicate variables in source_row and pandas changing duplicate names software_key = source_row.columns[[column_index(df,row_variable)]]._values[0].split(".")[0] #see if we already have a software_activity for this agent if software_key not in software_activity.keys(): #create an activity for the computation...simply a placeholder for more extensive provenance software_activity[software_key] = nidmdoc.graph.activity(QualifiedName(provNamespace("nidm",Constants.NIDM),getUUID()),other_attributes={Constants.NIDM_PROJECT_DESCRIPTION:"brain volume computation"}) if root_act is not None: #associate activity with activity of brain volumes creation (root-level activity) software_activity[software_key].add_attributes({QualifiedName(provNamespace("dct",Constants.DCT),'isPartOf'):root_act}) #associate this activity with the participant nidmdoc.graph.association(activity=software_activity[software_key],agent=participant_agent[participant_id],other_attributes={PROV_ROLE:Constants.NIDM_PARTICIPANT}) nidmdoc.graph.wasAssociatedWith(activity=software_activity[software_key],agent=participant_agent[participant_id]) #check if there's an associated software agent and if not, create one if software_key not in software_agent.keys(): #create an agent software_agent[software_key] = nidmdoc.graph.agent(QualifiedName(provNamespace("nidm",Constants.NIDM),getUUID()),other_attributes={'prov:type':QualifiedName(provNamespace(Core.safe_string(None,string=str("Neuroimaging Analysis Software")),Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE),""), QualifiedName(provNamespace(Core.safe_string(None,string=str("Neuroimaging Analysis Software")),Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE),""):software_key } ) #create qualified association with brain volume computation activity nidmdoc.graph.association(activity=software_activity[software_key],agent=software_agent[software_key],other_attributes={PROV_ROLE:QualifiedName(provNamespace(Core.safe_string(None,string=str("Neuroimaging Analysis Software")),Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE),"")}) nidmdoc.graph.wasAssociatedWith(activity=software_activity[software_key],agent=software_agent[software_key]) #check if we have an entity for storing this particular variable for this subject and software else create one if software_activity[software_key].identifier.localpart + participant_agent[participant_id].identifier.localpart not in entity.keys(): #create an entity to store brain volume data for this participant entity[software_activity[software_key].identifier.localpart + participant_agent[participant_id].identifier.localpart] = nidmdoc.graph.entity( QualifiedName(provNamespace("nidm",Constants.NIDM),getUUID())) #add wasGeneratedBy association to activity nidmdoc.graph.wasGeneratedBy(entity=entity[software_activity[software_key].identifier.localpart + participant_agent[participant_id].identifier.localpart], activity=software_activity[software_key]) #get column_to_term mapping uri and add as namespace in NIDM document entity[software_activity[software_key].identifier.localpart + participant_agent[participant_id].identifier.localpart].add_attributes({QualifiedName(provNamespace(Core.safe_string(None,string=str(row_variable)), column_to_terms[row_variable.split(".")[0]]["url"]),""):row_data}) #print(project.serializeTurtle()) #just for debugging. resulting graph is too big right now for DOT graph creation so here I'm simply creating #a DOT graph for the processing of 1 row of the brain volumes CSV file so we can at least visually see the #model if png_file is not None: if first_row: #serialize NIDM file #with open(args.output_file,'w') as f: # print("Writing NIDM file...") # f.write(nidmdoc.serializeTurtle()) if png_file: nidmdoc.save_DotGraph(str(output_file + ".pdf"), format="pdf") first_row=False
def main(argv): import argparse parser = argparse.ArgumentParser(prog='fs_to_nidm.py', description='''This program will load in a aseg.stats file from Freesurfer , augment the Freesurfer anatomical region designations with common data element anatomical designations, and save the statistics + region designations out as NIDM serializations (i.e. TURTLE, JSON-LD RDF))''') parser.add_argument('-s', '--subject_dir', dest='subject_dir', type=str, required=True, help='Path to Freesurfer subject directory') parser.add_argument('-j','--json_map', dest='json_file',type=str, required=True, help='JSON mapping file which maps Freesurfer aseg anatomy terms to commond data elements') parser.add_argument('-o', '--output_dir', dest='output_file', type=str, help='Output directory') parser.add_argument('--n','--nidm', dest='nidm_file', type=str, required=False, help='Optional NIDM file to add segmentation data to.') args = parser.parse_args() [header, tableinfo, measures] = read_stats(os.path.join(args.subject_dir,"stats","aseg.stats")) #for measures we need to create NIDM structures using anatomy mappings #If user has added an existing NIDM file as a command line parameter then add to existing file for subjects who exist in the NIDM file if args.nidm_file is None: print("Creating NIDM file...") #If user did not choose to add this data to an existing NIDM file then create a new one for the CSV data #create an empty NIDM graph nidmdoc = Core() root_act = nidmdoc.graph.activity(QualifiedName(provNamespace("nidm",Constants.NIDM),getUUID()),other_attributes={Constants.NIDM_PROJECT_DESCRIPTION:"Freesurfer segmentation statistics"}) #this function sucks...more thought needed for version that works with adding to existing NIDM file versus creating a new NIDM file.... add_seg_data(nidmdoc=nidmdoc,measure=measures,header=header, tableinfo=tableinfo, json_map=args.json_file) #serialize NIDM file with open(args.output_file,'w') as f: print("Writing NIDM file...") f.write(nidmdoc.serializeJSONLD()) nidmdoc.save_DotGraph(str(args.output_file + ".pdf"), format="pdf")
def add_seg_data(nidmdoc, measure, header, json_map, png_file=None, output_file=None, root_act=None, nidm_graph=None): ''' WIP: this function creates a NIDM file of brain volume data and if user supplied a NIDM-E file it will add brain volumes to the NIDM-E file for the matching subject ID :param nidmdoc: :param measure: :param header: :param json_map: :param png_file: :param root_act: :param nidm_graph: :return: ''' niiri = prov.Namespace("niiri", "http://iri.nidash.org/") #this function can be used for both creating a brainvolumes NIDM file from scratch or adding brain volumes to #existing NIDM file. The following logic basically determines which route to take... #if an existing NIDM graph is passed as a parameter then add to existing file if nidm_graph is None: first_row = True #for each of the header items create a dictionary where namespaces are freesurfer #software_activity = nidmdoc.graph.activity(QualifiedName(provNamespace("niiri",Constants.NIIRI),getUUID()),other_attributes={Constants.NIDM_PROJECT_DESCRIPTION:"Freesurfer segmentation statistics"}) software_activity = nidmdoc.graph.activity( niiri[getUUID()], other_attributes={ Constants.NIDM_PROJECT_DESCRIPTION: "Freesurfer segmentation statistics" }) for key, value in header.items(): software_activity.add_attributes({ QualifiedName(provNamespace("fs", Constants.FREESURFER), key): value }) #create software agent and associate with software activity #software_agent = nidmdoc.graph.agent(QualifiedName(provNamespace("niiri",Constants.NIIRI),getUUID()),other_attributes={ software_agent = nidmdoc.graph.agent( niiri[getUUID()], other_attributes={ QualifiedName( provNamespace( "Neuroimaging_Analysis_Software", Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE), ""): Constants.FREESURFER, prov.PROV_TYPE: prov.PROV["SoftwareAgent"] }) #create qualified association with brain volume computation activity nidmdoc.graph.association( activity=software_activity, agent=software_agent, other_attributes={ PROV_ROLE: Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE }) nidmdoc.graph.wasAssociatedWith(activity=software_activity, agent=software_agent) #print(nidmdoc.serializeTurtle()) with open('measure.json', 'w') as fp: json.dump(measure, fp) with open('json_map.json', 'w') as fp: json.dump(json_map, fp) #datum_entity=nidmdoc.graph.entity(QualifiedName(provNamespace("niiri",Constants.NIIRI),getUUID()),other_attributes={ datum_entity = nidmdoc.graph.entity( niiri[getUUID()], other_attributes={ prov.PROV_TYPE: QualifiedName( provNamespace("nidm", "http://purl.org/nidash/nidm#"), "FSStatsCollection") }) nidmdoc.graph.wasGeneratedBy(software_activity, datum_entity) #iterate over measure dictionary where measures are the lines in the FS stats files which start with '# Measure' and #the whole table at the bottom of the FS stats file that starts with '# ColHeaders for measures in measure: #check if we have a CDE mapping for the anatomical structure referenced in the FS stats file if measures["structure"] in json_map['Anatomy']: #for the various fields in the FS stats file row starting with '# Measure'... for items in measures["items"]: # if the if items['name'] in json_map['Measures'].keys(): if not json_map['Anatomy'][ measures["structure"]]['label']: continue #region_entity=nidmdoc.graph.entity(QualifiedName(provNamespace("niiri",Constants.NIIRI),getUUID()),other_attributes={prov.PROV_TYPE: region_entity = nidmdoc.graph.entity( niiri[getUUID()], other_attributes={ prov.PROV_TYPE: QualifiedName( provNamespace( "measurement_datum", "http://uri.interlex.org/base/ilx_0738269#" ), "") }) #construct the custom CDEs to describe measurements of the various brain regions region_entity.add_attributes({ QualifiedName( provNamespace( "isAbout", "http://uri.interlex.org/ilx_0381385#"), ""): json_map['Anatomy'][ measures["structure"]]['isAbout'], QualifiedName( provNamespace( "hasLaterality", "http://uri.interlex.org/ilx_0381387#"), ""): json_map['Anatomy'][ measures["structure"]]['hasLaterality'], Constants.NIDM_PROJECT_DESCRIPTION: json_map['Anatomy'][measures["structure"]] ['definition'], QualifiedName( provNamespace( "isMeasureOf", "http://uri.interlex.org/ilx_0381389#"), ""): QualifiedName( provNamespace( "GrayMatter", "http://uri.interlex.org/ilx_0104768#"), ""), QualifiedName( provNamespace( "rdfs", "http://www.w3.org/2000/01/rdf-schema#"), "label"): json_map['Anatomy'][measures["structure"]]['label'] }) #QualifiedName(provNamespace("hasUnit","http://uri.interlex.org/ilx_0381384#"),""):json_map['Anatomy'][measures["structure"]]['units'], #print("%s:%s" %(key,value)) region_entity.add_attributes({ QualifiedName( provNamespace( "hasMeasurementType", "http://uri.interlex.org/ilx_0381388#"), ""): json_map['Measures'][items['name']]["measureOf"], QualifiedName( provNamespace( "hasDatumType", "http://uri.interlex.org/ilx_0738262#"), ""): json_map['Measures'][items['name']]["datumType"] }) datum_entity.add_attributes( {region_entity.identifier: items['value']})