Exemplo n.º 1
0
def main(argv):
    #create new nidm-experiment document with project
    kwargs={Constants.NIDM_PROJECT_NAME:"FBIRN_PhaseII",Constants.NIDM_PROJECT_IDENTIFIER:9610,Constants.NIDM_PROJECT_DESCRIPTION:"Test investigation"}
    project = Project(attributes=kwargs)
    
    #test add string attribute with existing namespace
    #nidm_doc.addLiteralAttribute("nidm","isFun","ForMe")
    project.add_attributes({Constants.NIDM["isFun"]:"ForMe"})

    #test adding string attribute with new namespace/term
    project.addLiteralAttribute("fred","notFound","in namespaces","www.fred.org/")

    #test add float attribute
    project.addLiteralAttribute("nidm", "float", float(2.34))

    #test adding attributes in bulk with mix of existing and new namespaces
    #nidm_doc.addAttributesWithNamespaces(nidm_doc.getProject(),[{"prefix":"nidm", "uri":nidm_doc.namespaces["nidm"], "term":"score", "value":int(15)}, \
        #                                              {"prefix":"dave", "uri":"http://www.davidkeator.com/", "term":"isAwesome", "value":"15"}, \
        #                                              {"prefix":"nidm", "uri":nidm_doc.namespaces["nidm"], "term":"value", "value":float(2.34)}])
    
    #nidm_doc.addAttributes(nidm_doc.getProject(),{"nidm:test":int(15), "ncit:isTerminology":"15","ncit:joker":float(1)})


    #test add PI to investigation
    project_PI = project.add_person(attributes={Constants.NIDM_FAMILY_NAME:"Keator", Constants.NIDM_GIVEN_NAME:"David"})

    #add qualified association of project PI to project activity
    project.add_qualified_association(person=project_PI,role=Constants.NIDM_PI)

    #test add session to graph and associate with project
    session = Session(project)
    session.add_attributes({Constants.NIDM:"test"})
    #project.add_sessions(session)

    #test add MR acquisition activity / entity to graph and associate with session
    acq_act = MRAcquisition(session=session)
    #test add acquisition object entity to graph associated with participant role NIDM_PARTICIPANT
    acq_entity = MRObject(acquisition=acq_act)

    #add person to graph
    person = acq_act.add_person(attributes={Constants.NIDM_GIVEN_NAME:"George"})
    #add qualified association of person with role NIDM_PARTICIPANT, and associated with acquistion activity
    acq_act.add_qualified_association(person=person, role=Constants.NIDM_PARTICIPANT)


    #test add Assessment acquisition activity / entity to graph and associate with session
    acq_act = AssessmentAcquisition(session=session)
    #test add acquisition object entity to graph associated with participant role NIDM_PARTICIPANT
    acq_entity = AssessmentObject(acquisition=acq_act)
    acq_entity.add_attributes({Constants.NIDM["Q1"]:"Q1 Answer",Constants.NIDM["Q2"]:"Q2 Answer" })
    #associate person as participant
    acq_act.add_qualified_association(person=person, role=Constants.NIDM_PARTICIPANT)


    #test add DemographicsAssessment acquisition activity / entity to graph and associate with session
    acq_act = AssessmentAcquisition(session=session)
    #test add acquisition object entity to graph associated with participant role NIDM_PARTICIPANT
    acq_entity = DemographicsObject(acquisition=acq_act)
    #add new person to graph
    person2 = acq_act.add_person(attributes={Constants.NIDM_FAMILY_NAME:"Doe", \
            Constants.NIDM_GIVEN_NAME:"John"})
    #associate person2 with assessment acquisition
    acq_act.add_qualified_association(person=person2, role=Constants.NIDM_PARTICIPANT)

    acq_entity.add_attributes({Constants.NIDM_AGE:60,Constants.NIDM_GENDER:"Male" })


    #save a turtle file
    with open("test.ttl",'w') as f:
        f.write (project.serializeTurtle())

    #save a DOT graph as PDF
    project.save_DotGraph("test.png",format="png")
Exemplo n.º 2
0
def bidsmri2project(directory, args):

    # initialize empty cde graph...it may get replaced if we're doing variable to term mapping or not
    cde=Graph()

    # Parse dataset_description.json file in BIDS directory
    if (os.path.isdir(os.path.join(directory))):
        try:
            with open(os.path.join(directory,'dataset_description.json')) as data_file:
                dataset = json.load(data_file)
        except OSError:
            logging.critical("Cannot find dataset_description.json file which is required in the BIDS spec")
            exit("-1")
    else:
        logging.critical("Error: BIDS directory %s does not exist!" %os.path.join(directory))
        exit("-1")

    # create project / nidm-exp doc
    project = Project()

    # if there are git annex sources then add them
    num_sources=addGitAnnexSources(obj=project.get_uuid(),bids_root=directory)
    # else just add the local path to the dataset
    if num_sources == 0:
        project.add_attributes({Constants.PROV['Location']:"file:/" + directory})


    # add various attributes if they exist in BIDS dataset
    for key in dataset:
        # if key from dataset_description file is mapped to term in BIDS_Constants.py then add to NIDM object
        if key in BIDS_Constants.dataset_description:
            if type(dataset[key]) is list:
                project.add_attributes({BIDS_Constants.dataset_description[key]:"".join(dataset[key])})
            else:
                project.add_attributes({BIDS_Constants.dataset_description[key]:dataset[key]})




    # get BIDS layout
    bids_layout = BIDSLayout(directory)


    # create empty dictinary for sessions where key is subject id and used later to link scans to same session as demographics
    session={}
    participant={}
    # Parse participants.tsv file in BIDS directory and create study and acquisition objects
    if os.path.isfile(os.path.join(directory,'participants.tsv')):
        with open(os.path.join(directory,'participants.tsv')) as csvfile:
            participants_data = csv.DictReader(csvfile, delimiter='\t')

            # logic to map variables to terms.
            # first iterate over variables in dataframe and check which ones are already mapped as BIDS constants and which are not.  For those that are not
            # we want to use the variable-term mapping functions to help the user do the mapping
            # iterate over columns
            mapping_list=[]
            column_to_terms={}
            for field in participants_data.fieldnames:

                # column is not in BIDS_Constants
                if not (field in BIDS_Constants.participants):
                    # add column to list for column_to_terms mapping
                    mapping_list.append(field)



            #if user didn't supply a json mapping file but we're doing some variable-term mapping create an empty one for column_to_terms to use
            if args.json_map == False:
                #defaults to participants.json because here we're mapping the participants.tsv file variables to terms
                # if participants.json file doesn't exist then run without json mapping file
                if not os.path.isfile(os.path.join(directory,'participants.json')):
                    #maps variables in CSV file to terms
                    temp=DataFrame(columns=mapping_list)
                    if args.no_concepts:
                        column_to_terms,cde = map_variables_to_terms(directory=directory,assessment_name='participants.tsv',
                            df=temp,output_file=os.path.join(directory,'participants.json'),bids=True,associate_concepts=False)
                    else:
                        column_to_terms,cde = map_variables_to_terms(directory=directory,assessment_name='participants.tsv',
                            df=temp,output_file=os.path.join(directory,'participants.json'),bids=True)
                else:
                    #maps variables in CSV file to terms
                    temp=DataFrame(columns=mapping_list)
                    if args.no_concepts:
                        column_to_terms,cde = map_variables_to_terms(directory=directory, assessment_name='participants.tsv', df=temp,
                            output_file=os.path.join(directory,'participants.json'),json_file=os.path.join(directory,'participants.json'),bids=True,associate_concepts=False)
                    else:
                        column_to_terms,cde = map_variables_to_terms(directory=directory, assessment_name='participants.tsv', df=temp,
                            output_file=os.path.join(directory,'participants.json'),json_file=os.path.join(directory,'participants.json'),bids=True)
            else:
                #maps variables in CSV file to terms
                temp=DataFrame(columns=mapping_list)
                if args.no_concepts:
                    column_to_terms, cde = map_variables_to_terms(directory=directory, assessment_name='participants.tsv', df=temp,
                        output_file=os.path.join(directory,'participants.json'),json_file=args.json_map,bids=True,associate_concepts=False)
                else:
                    column_to_terms, cde = map_variables_to_terms(directory=directory, assessment_name='participants.tsv', df=temp,
                        output_file=os.path.join(directory,'participants.json'),json_file=args.json_map,bids=True)


            for row in participants_data:
                #create session object for subject to be used for participant metadata and image data
                #parse subject id from "sub-XXXX" string
                temp = row['participant_id'].split("-")
                #for ambiguity in BIDS datasets.  Sometimes participant_id is sub-XXXX and othertimes it's just XXXX
                if len(temp) > 1:
                    subjid = temp[1]
                else:
                    subjid = temp[0]
                logging.info(subjid)
                session[subjid] = Session(project)

                #add acquisition object
                acq = AssessmentAcquisition(session=session[subjid])

                acq_entity = AssessmentObject(acquisition=acq)
                participant[subjid] = {}
                participant[subjid]['person'] = acq.add_person(attributes=({Constants.NIDM_SUBJECTID:row['participant_id']}))

                # add nfo:filename entry to assessment entity to reflect provenance of where this data came from
                acq_entity.add_attributes({Constants.NIDM_FILENAME:getRelPathToBIDS(os.path.join(directory,'participants.tsv'),directory)})
                #acq_entity.add_attributes({Constants.NIDM_FILENAME:os.path.join(directory,'participants.tsv')})

                #add qualified association of participant with acquisition activity
                acq.add_qualified_association(person=participant[subjid]['person'],role=Constants.NIDM_PARTICIPANT)
                # print(acq)

                # if there are git annex sources for participants.tsv file then add them
                num_sources=addGitAnnexSources(obj=acq_entity.get_uuid(),bids_root=directory)
                # else just add the local path to the dataset
                if num_sources == 0:
                    acq_entity.add_attributes({Constants.PROV['Location']:"file:/" + os.path.join(directory,'participants.tsv')})

                 # if there's a JSON sidecar file then create an entity and associate it with all the assessment entities
                if os.path.isfile(os.path.join(directory,'participants.json')):
                    json_sidecar = AssessmentObject(acquisition=acq)
                    json_sidecar.add_attributes({PROV_TYPE:QualifiedName(Namespace("bids",Constants.BIDS),"sidecar_file"), Constants.NIDM_FILENAME:
                        getRelPathToBIDS(os.path.join(directory,'participants.json'),directory)})

                    # add Git Annex Sources
                    # if there are git annex sources for participants.tsv file then add them
                    num_sources=addGitAnnexSources(obj=json_sidecar.get_uuid(),filepath=os.path.join(directory,'participants.json'),bids_root=directory)
                    # else just add the local path to the dataset
                    if num_sources == 0:
                        json_sidecar.add_attributes({Constants.PROV['Location']:"file:/" + os.path.join(directory,'participants.json')})


                # check if json_sidecar entity exists and if so associate assessment entity with it
                if 'json_sidecar' in  locals():
                    #connect json_entity with acq_entity
                    acq_entity.add_attributes({Constants.PROV["wasInfluencedBy"]:json_sidecar})

                for key,value in row.items():
                    if not value:
                        continue
                    #for variables in participants.tsv file who have term mappings in BIDS_Constants.py use those, add to json_map so we don't have to map these if user
                    #supplied arguments to map variables
                    if key in BIDS_Constants.participants:
                        # WIP
                        # Here we are adding to CDE graph data elements for BIDS Constants that remain fixed for each BIDS-compliant dataset

                        if not (BIDS_Constants.participants[key] == Constants.NIDM_SUBJECTID):


                            # create a namespace with the URL for fixed BIDS_Constants term
                            # item_ns = Namespace(str(Constants.BIDS.namespace.uri))
                            # add prefix to namespace which is the BIDS fixed variable name
                            # cde.bind(prefix="bids", namespace=item_ns)
                            # ID for BIDS variables is always the same bids:[bids variable]
                            cde_id = Constants.BIDS[key]
                            # add the data element to the CDE graph
                            cde.add((cde_id,RDF.type, Constants.NIDM['DataElement']))
                            cde.add((cde_id,RDF.type, Constants.PROV['Entity']))
                            # add some basic information about this data element
                            cde.add((cde_id,Constants.RDFS['label'],Literal(BIDS_Constants.participants[key].localpart)))
                            cde.add((cde_id,Constants.NIDM['isAbout'],URIRef(BIDS_Constants.participants[key].uri)))
                            cde.add((cde_id,Constants.NIDM['source_variable'],Literal(key)))
                            cde.add((cde_id,Constants.NIDM['description'],Literal("participant/subject identifier")))
                            cde.add((cde_id,Constants.RDFS['comment'],Literal("BIDS participants_id variable fixed in specification")))
                            cde.add((cde_id,Constants.RDFS['valueType'],URIRef(Constants.XSD["string"])))

                            acq_entity.add_attributes({cde_id:Literal(value)})

                        # if this was the participant_id, we already handled it above creating agent / qualified association
                        # if not (BIDS_Constants.participants[key] == Constants.NIDM_SUBJECTID):
                        #    acq_entity.add_attributes({BIDS_Constants.participants[key]:value})


                    # else if user added -mapvars flag to command line then we'll use the variable-> term mapping procedures to help user map variables to terms (also used
                    # in CSV2NIDM.py)
                    else:

                        # WIP: trying to add new support for CDEs...
                        add_attributes_with_cde(prov_object=acq_entity,cde=cde,row_variable=key,value=value)
                        # if key in column_to_terms:
                        #    acq_entity.add_attributes({QualifiedName(provNamespace(Core.safe_string(None,string=str(key)), column_to_terms[key]["url"]), ""):value})
                        # else:

                        #    acq_entity.add_attributes({Constants.BIDS[key.replace(" ", "_")]:value})


    # create acquisition objects for each scan for each subject

    # loop through all subjects in dataset
    for subject_id in bids_layout.get_subjects():
        logging.info("Converting subject: %s" %subject_id)
        # skip .git directories...added to support datalad datasets
        if subject_id.startswith("."):
            continue

        # check if there are a session numbers.  If so, store it in the session activity and create a new
        # sessions for these imaging acquisitions.  Because we don't know which imaging session the root
        # participants.tsv file data may be associated with we simply link the imaging acquisitions to different
        # sessions (i.e. the participants.tsv file goes into an AssessmentAcquisition and linked to a unique
        # sessions and the imaging acquisitions go into MRAcquisitions and has a unique session)
        imaging_sessions = bids_layout.get_sessions(subject=subject_id)
        # if session_dirs has entries then get any metadata about session and store in session activity

        # bids_layout.get(subject=subject_id,type='session',extensions='.tsv')
        # bids_layout.get(subject=subject_id,type='scans',extensions='.tsv')
        # bids_layout.get(extensions='.tsv',return_type='obj')

        # loop through each session if there is a sessions directory
        if len(imaging_sessions) > 0:
            for img_session in imaging_sessions:
                # create a new session
                ses = Session(project)
                # add session number as metadata
                ses.add_attributes({Constants.BIDS['session_number']:img_session})
                addimagingsessions(bids_layout=bids_layout,subject_id=subject_id,session=ses,participant=participant, directory=directory,img_session=img_session)
        # else we have no ses-* directories in the BIDS layout
        addimagingsessions(bids_layout=bids_layout,subject_id=subject_id,session=Session(project),participant=participant, directory=directory)



        # Added temporarily to support phenotype files
        # for each *.tsv / *.json file pair in the phenotypes directory
        # WIP: ADD VARIABLE -> TERM MAPPING HERE
        for tsv_file in glob.glob(os.path.join(directory,"phenotype","*.tsv")):
            # for now, open the TSV file, extract the row for this subject, store it in an acquisition object and link to
            # the associated JSON data dictionary file
            with open(tsv_file) as phenofile:
                pheno_data = csv.DictReader(phenofile, delimiter='\t')
                for row in pheno_data:
                    subjid = row['participant_id'].split("-")
                    if not subjid[1] == subject_id:
                        continue
                    else:
                        # add acquisition object
                        acq = AssessmentAcquisition(session=session[subjid[1]])
                        # add qualified association with person
                        acq.add_qualified_association(person=participant[subject_id]['person'],role=Constants.NIDM_PARTICIPANT)

                        acq_entity = AssessmentObject(acquisition=acq)



                        for key,value in row.items():
                            if not value:
                                continue
                            # we're using participant_id in NIDM in agent so don't add to assessment as a triple.
                            # BIDS phenotype files seem to have an index column with no column header variable name so skip those
                            if ((not key == "participant_id") and (key != "")):
                                # for now we're using a placeholder namespace for BIDS and simply the variable names as the concept IDs..
                                acq_entity.add_attributes({Constants.BIDS[key]:value})

                        # link TSV file
                        acq_entity.add_attributes({Constants.NIDM_FILENAME:getRelPathToBIDS(tsv_file,directory)})
                        #acq_entity.add_attributes({Constants.NIDM_FILENAME:tsv_file})

                        # if there are git annex sources for participants.tsv file then add them
                        num_sources=addGitAnnexSources(obj=acq_entity.get_uuid(),bids_root=directory)
                        # else just add the local path to the dataset
                        if num_sources == 0:
                            acq_entity.add_attributes({Constants.PROV['Location']:"file:/" + tsv_file})


                        # link associated JSON file if it exists
                        data_dict = os.path.join(directory,"phenotype",os.path.splitext(os.path.basename(tsv_file))[0]+ ".json")
                        if os.path.isfile(data_dict):
                            # if file exists, create a new entity and associate it with the appropriate activity  and a used relationship
                            # with the TSV-related entity
                            json_entity = AssessmentObject(acquisition=acq)
                            json_entity.add_attributes({PROV_TYPE:Constants.BIDS["sidecar_file"], Constants.NIDM_FILENAME:
                                getRelPathToBIDS(data_dict,directory)})

                            # add Git Annex Sources
                            # if there are git annex sources for participants.tsv file then add them
                            num_sources=addGitAnnexSources(obj=json_entity.get_uuid(),filepath=data_dict,bids_root=directory)
                            # else just add the local path to the dataset
                            if num_sources == 0:
                                json_entity.add_attributes({Constants.PROV['Location']:"file:/" + data_dict})

                            #connect json_entity with acq_entity
                            acq_entity.add_attributes({Constants.PROV["wasInfluencedBy"]:json_entity.get_uuid()})


    return project, cde