示例#1
0
def create_descriptor():
    """
    Returns a simple but complete ISA-JSON 1.0 descriptor for illustration.
    """

    # Create an empty Investigation object and set some values to the
    # instance variables.

    investigation = Investigation()
    investigation.identifier = "1"
    investigation.title = "My Simple ISA Investigation"
    investigation.description = \
        "We could alternatively use the class constructor's parameters to " \
        "set some default values at the time of creation, however we " \
        "want to demonstrate how to use the object's instance variables " \
        "to set values."
    investigation.submission_date = "2016-11-03"
    investigation.public_release_date = "2016-11-03"

    # Create an empty Study object and set some values. The Study must have a
    # filename, otherwise when we serialize it to ISA-Tab we would not know
    # where to write it. We must also attach the study to the investigation
    # by adding it to the 'investigation' object's list of studies.

    study = Study(filename="s_study.txt")
    study.identifier = "1"
    study.title = "My ISA Study"
    study.description = \
        "Like with the Investigation, we could use the class constructor " \
        "to set some default values, but have chosen to demonstrate in this " \
        "example the use of instance variables to set initial values."
    study.submission_date = "2016-11-03"
    study.public_release_date = "2016-11-03"
    investigation.studies.append(study)

    # This is to show that ISA Comments can be used to annotate ISA objects, here ISA Study
    study.comments.append(Comment(name="Study Start Date", value="Sun"))

    # Some instance variables are typed with different objects and lists of
    # objects. For example, a Study can have a list of design descriptors.
    # A design descriptor is an Ontology Annotation describing the kind of
    # study at hand. Ontology Annotations should typically reference an
    # Ontology Source. We demonstrate a mix of using the class constructors
    # and setting values with instance variables. Note that the
    # OntologyAnnotation object 'intervention_design' links its 'term_source'
    # directly to the 'obi' object instance. To ensure the OntologySource
    # is encapsulated in the descriptor, it is added to a list of
    # 'ontology_source_references' in the Investigation object. The
    # 'intervention_design' object is then added to the list of
    # 'design_descriptors' held by the Study object.

    obi = OntologySource(name='OBI',
                         description="Ontology for Biomedical Investigations")
    investigation.ontology_source_references.append(obi)

    intervention_design = OntologyAnnotation(term_source=obi)
    intervention_design.term = "intervention design"
    intervention_design.term_accession = \
        "http://purl.obolibrary.org/obo/OBI_0000115"
    study.design_descriptors.append(intervention_design)

    # Other instance variables common to both Investigation and Study objects
    # include 'contacts' and 'publications', each with lists of corresponding
    # Person and Publication objects.

    contact = Person(first_name="Alice",
                     last_name="Robertson",
                     affiliation="University of Life",
                     roles=[OntologyAnnotation(term='submitter')])
    study.contacts.append(contact)
    publication = Publication(title="Experiments with Elephants",
                              author_list="A. Robertson, B. Robertson")
    publication.pubmed_id = "12345678"
    publication.status = OntologyAnnotation(term="published")
    study.publications.append(publication)

    # To create the study graph that corresponds to the contents of the study
    # table file (the s_*.txt file), we need to create a process sequence.
    # To do this we use the Process class and attach it to the Study object's
    # 'process_sequence' list instance variable. Each process must be linked
    # with a Protocol object that is attached to a Study object's 'protocols'
    # list instance variable. The sample collection Process object usually has
    # as input a Source material and as output a Sample material.

    # Here we create one Source material object and attach it to our study.

    source = Source(name='source_material')
    study.sources.append(source)

    # Then we create three Sample objects, with organism as H**o Sapiens, and
    # attach them to the study. We use the utility function
    # batch_create_material() to clone a prototype material object. The
    # function automatiaclly appends an index to the material name. In this
    # case, three samples will be created, with the names 'sample_material-0',
    # 'sample_material-1' and 'sample_material-2'.

    prototype_sample = Sample(name='sample_material', derives_from=[source])

    ncbitaxon = OntologySource(name='NCBITaxon', description="NCBI Taxonomy")
    investigation.ontology_source_references.append(ncbitaxon)

    characteristic_organism = Characteristic(
        category=OntologyAnnotation(term="Organism"),
        value=OntologyAnnotation(
            term="H**o Sapiens",
            term_source=ncbitaxon,
            term_accession="http://purl.bioontology.org/ontology/NCBITAXON/"
            "9606"))

    # Adding the description to the ISA Source Material:
    source.characteristics.append(characteristic_organism)
    study.sources.append(source)

    #declaring a new ontology and adding it to the list of resources used
    uberon = OntologySource(name='UBERON', description='Uber Anatomy Ontology')
    investigation.ontology_source_references.append(uberon)

    #preparing an ISA Characteristic object (~Material Property ) to annotate sample materials
    characteristic_organ = Characteristic(
        category=OntologyAnnotation(term="OrganismPart"),
        value=OntologyAnnotation(
            term="liver",
            term_source=uberon,
            term_accession="http://purl.bioontology.org/ontology/UBERON/"
            "123245"))

    prototype_sample.characteristics.append(characteristic_organ)

    study.samples = batch_create_materials(prototype_sample, n=3)
    # creates a batch of 3 samples

    # Now we create a single Protocol object that represents our sample
    # collection protocol, and attach it to the study object. Protocols must be
    # declared before we describe Processes, as a processing event of some sort
    # must execute some defined protocol. In the case of the class model,
    # Protocols should therefore be declared before Processes in order for the
    # Process to be linked to one.

    sample_collection_protocol = Protocol(
        name="sample collection",
        protocol_type=OntologyAnnotation(term="sample collection"))
    study.protocols.append(sample_collection_protocol)
    sample_collection_process = Process(
        executes_protocol=sample_collection_protocol)

    # adding a dummy Comment[] to ISA.protocol object
    study.protocols[0].comments.append(
        Comment(name="Study Start Date", value="Uranus"))
    study.protocols[0].comments.append(
        Comment(name="Study End Date", value="2017-08-11"))
    # checking that the ISA Protocool object has been modified
    # print(study.protocols[0])

    # Creation of an ISA Study Factor object
    f = StudyFactor(
        name="treatment['modality']",
        factor_type=OntologyAnnotation(term="treatment['modality']"))
    # testing serialization to ISA-TAB of Comments attached to ISA objects.
    f.comments.append(Comment(name="Study Start Date", value="Saturn"))
    f.comments.append(Comment(name="Study End Date", value="2039-12-12"))
    print(f.comments[0].name, "|", f.comments[0].value)

    # checking that the ISA Factor object has been modified
    study.factors.append(f)

    # Next, we link our materials to the Process. In this particular case, we
    # are describing a sample collection process that takes one source
    # material, and produces three different samples.
    #
    # (source_material)->(sample collection)->
    # [(sample_material-0), (sample_material-1), (sample_material-2)]

    for src in study.sources:
        sample_collection_process.inputs.append(src)
    for sam in study.samples:
        sample_collection_process.outputs.append(sam)

    # Finally, attach the finished Process object to the study
    # process_sequence. This can be done many times to describe multiple
    # sample collection events.

    study.process_sequence.append(sample_collection_process)

    #IMPORTANT: remember to populate the list of ontology categories used to annotation ISA Material in a Study:
    study.characteristic_categories.append(characteristic_organism.category)

    # Next, we build n Assay object and attach two protocols,
    # extraction and sequencing.

    assay = Assay(filename="a_assay.txt")
    extraction_protocol = Protocol(
        name='extraction',
        protocol_type=OntologyAnnotation(term="material extraction"))
    study.protocols.append(extraction_protocol)
    sequencing_protocol = Protocol(
        name='sequencing',
        protocol_type=OntologyAnnotation(term="material sequencing"))
    study.protocols.append(sequencing_protocol)

    # To build out assay graphs, we enumereate the samples from the
    # study-level, and for each sample we create an extraction process and
    # a sequencing process. The extraction process takes as input a sample
    # material, and produces an extract material. The sequencing process
    # takes the extract material and produces a data file. This will
    # produce three graphs, from sample material through to data, as follows:
    #
    # (sample_material-0)->(extraction)->(extract-0)->(sequencing)->
    # (sequenced-data-0)
    # (sample_material-1)->(extraction)->(extract-1)->(sequencing)->
    # (sequenced-data-1)
    # (sample_material-2)->(extraction)->(extract-2)->(sequencing)->
    # (sequenced-data-2)
    #
    # Note that the extraction processes and sequencing processes are
    # distinctly separate instances, where the three
    # graphs are NOT interconnected.

    for i, sample in enumerate(study.samples):

        # create an extraction process that executes the extraction protocol

        extraction_process = Process(executes_protocol=extraction_protocol)

        # extraction process takes as input a sample, and produces an extract
        # material as output

        extraction_process.inputs.append(sample)
        material = Material(name="extract-{}".format(i))
        material.type = "Extract Name"
        extraction_process.outputs.append(material)

        # create a sequencing process that executes the sequencing protocol

        sequencing_process = Process(executes_protocol=sequencing_protocol)
        sequencing_process.name = "assay-name-{}".format(i)
        sequencing_process.inputs.append(extraction_process.outputs[0])

        # Sequencing process usually has an output data file

        datafile = DataFile(filename="sequenced-data-{}".format(i),
                            label="Raw Data File",
                            generated_from=[sample])
        sequencing_process.outputs.append(datafile)

        # ensure Processes are linked
        plink(sequencing_process, extraction_process)

        # make sure the extract, data file, and the processes are attached to
        # the assay

        assay.samples.append(sample)
        assay.data_files.append(datafile)
        assay.other_material.append(material)
        assay.process_sequence.append(extraction_process)
        assay.process_sequence.append(sequencing_process)
        assay.measurement_type = OntologyAnnotation(term="gene sequencing")
        assay.technology_type = OntologyAnnotation(
            term="nucleotide sequencing")

    # attach the assay to the study
    study.assays.append(assay)

    import json
    from isatools.isajson import ISAJSONEncoder

    # To write JSON out, use the ISAJSONEncoder class with the json package
    # and use dump() or dumps(). Note that the extra parameters sort_keys,
    # indent and separators are to make the output more human-readable.

    return json.dumps(investigation,
                      cls=ISAJSONEncoder,
                      sort_keys=True,
                      indent=4,
                      separators=(',', ': '))
示例#2
0
    def _exportISATAB(self, destinationPath, detailsDict):
        """
		Export the dataset's metadata to the directory *destinationPath* as ISATAB
		detailsDict should have the format:
		detailsDict = {
		    'investigation_identifier' : "i1",
		    'investigation_title' : "Give it a title",
		    'investigation_description' : "Add a description",
		    'investigation_submission_date' : "2016-11-03",
		    'investigation_public_release_date' : "2016-11-03",
		    'first_name' : "Noureddin",
		    'last_name' : "Sadawi",
		    'affiliation' : "University",
		    'study_filename' : "my_ms_study",
		    'study_material_type' : "Serum",
		    'study_identifier' : "s1",
		    'study_title' : "Give the study a title",
		    'study_description' : "Add study description",
		    'study_submission_date' : "2016-11-03",
		    'study_public_release_date' : "2016-11-03",
		    'assay_filename' : "my_ms_assay"
		}

		:param str destinationPath: Path to a directory in which the output will be saved
		:param dict detailsDict: Contains several key, value pairs required to for ISATAB
		:raises IOError: If writing one of the files fails
		"""

        from isatools.model import Investigation, Study, Assay, OntologyAnnotation, OntologySource, Person, Publication, Protocol, Source
        from isatools.model import Comment, Sample, Characteristic, Process, Material, DataFile, ParameterValue, plink
        from isatools import isatab
        import isaExplorer as ie

        investigation = Investigation()

        investigation.identifier = detailsDict['investigation_identifier']
        investigation.title = detailsDict['investigation_title']
        investigation.description = detailsDict['investigation_description']
        investigation.submission_date = detailsDict[
            'investigation_submission_date']  #use today if not specified
        investigation.public_release_date = detailsDict[
            'investigation_public_release_date']
        study = Study(filename='s_' + detailsDict['study_filename'] + '.txt')
        study.identifier = detailsDict['study_identifier']
        study.title = detailsDict['study_title']
        study.description = detailsDict['study_description']
        study.submission_date = detailsDict['study_submission_date']
        study.public_release_date = detailsDict['study_public_release_date']
        investigation.studies.append(study)
        obi = OntologySource(
            name='OBI', description="Ontology for Biomedical Investigations")
        investigation.ontology_source_references.append(obi)
        intervention_design = OntologyAnnotation(term_source=obi)
        intervention_design.term = "intervention design"
        intervention_design.term_accession = "http://purl.obolibrary.org/obo/OBI_0000115"
        study.design_descriptors.append(intervention_design)

        # Other instance variables common to both Investigation and Study objects include 'contacts' and 'publications',
        # each with lists of corresponding Person and Publication objects.

        contact = Person(first_name=detailsDict['first_name'],
                         last_name=detailsDict['last_name'],
                         affiliation=detailsDict['affiliation'],
                         roles=[OntologyAnnotation(term='submitter')])
        study.contacts.append(contact)
        publication = Publication(title="Experiments with Data",
                                  author_list="Auther 1, Author 2")
        publication.pubmed_id = "12345678"
        publication.status = OntologyAnnotation(term="published")
        study.publications.append(publication)

        # To create the study graph that corresponds to the contents of the study table file (the s_*.txt file), we need
        # to create a process sequence. To do this we use the Process class and attach it to the Study object's
        # 'process_sequence' list instance variable. Each process must be linked with a Protocol object that is attached to
        # a Study object's 'protocols' list instance variable. The sample collection Process object usually has as input
        # a Source material and as output a Sample material.

        sample_collection_protocol = Protocol(
            id_="sample collection",
            name="sample collection",
            protocol_type=OntologyAnnotation(term="sample collection"))
        aliquoting_protocol = Protocol(
            id_="aliquoting",
            name="aliquoting",
            protocol_type=OntologyAnnotation(term="aliquoting"))

        for index, row in self.sampleMetadata.iterrows():
            src_name = row['Sample File Name']
            source = Source(name=src_name)

            source.comments.append(
                Comment(name='Study Name', value=row['Study']))
            study.sources.append(source)

            sample_name = src_name
            sample = Sample(name=sample_name, derives_from=[source])
            # check if field exists first
            status = row[
                'Status'] if 'Status' in self.sampleMetadata.columns else 'N/A'
            characteristic_material_type = Characteristic(
                category=OntologyAnnotation(term="material type"),
                value=status)
            sample.characteristics.append(characteristic_material_type)

            #characteristic_material_role = Characteristic(category=OntologyAnnotation(term="material role"), value=row['AssayRole'])
            #sample.characteristics.append(characteristic_material_role)

            # check if field exists first
            age = row['Age'] if 'Age' in self.sampleMetadata.columns else 'N/A'
            characteristic_age = Characteristic(
                category=OntologyAnnotation(term="Age"),
                value=age,
                unit='Year')
            sample.characteristics.append(characteristic_age)
            # check if field exists first
            gender = row[
                'Gender'] if 'Gender' in self.sampleMetadata.columns else 'N/A'
            characteristic_gender = Characteristic(
                category=OntologyAnnotation(term="Gender"), value=gender)
            sample.characteristics.append(characteristic_gender)

            ncbitaxon = OntologySource(name='NCBITaxon',
                                       description="NCBI Taxonomy")
            characteristic_organism = Characteristic(
                category=OntologyAnnotation(term="Organism"),
                value=OntologyAnnotation(
                    term="H**o Sapiens",
                    term_source=ncbitaxon,
                    term_accession=
                    "http://purl.bioontology.org/ontology/NCBITAXON/9606"))
            sample.characteristics.append(characteristic_organism)

            study.samples.append(sample)

            # check if field exists first
            sampling_date = row['Sampling Date'] if not pandas.isnull(
                row['Sampling Date']) else None
            sample_collection_process = Process(
                id_='sam_coll_proc',
                executes_protocol=sample_collection_protocol,
                date_=sampling_date)
            aliquoting_process = Process(id_='sam_coll_proc',
                                         executes_protocol=aliquoting_protocol,
                                         date_=sampling_date)

            sample_collection_process.inputs = [source]
            aliquoting_process.outputs = [sample]

            # links processes
            plink(sample_collection_process, aliquoting_process)

            study.process_sequence.append(sample_collection_process)
            study.process_sequence.append(aliquoting_process)

        study.protocols.append(sample_collection_protocol)
        study.protocols.append(aliquoting_protocol)

        ### Add NMR Assay ###
        nmr_assay = Assay(
            filename='a_' + detailsDict['assay_filename'] + '.txt',
            measurement_type=OntologyAnnotation(term="metabolite profiling"),
            technology_type=OntologyAnnotation(term="NMR spectroscopy"))
        extraction_protocol = Protocol(
            name='extraction',
            protocol_type=OntologyAnnotation(term="material extraction"))

        study.protocols.append(extraction_protocol)
        nmr_protocol = Protocol(
            name='NMR spectroscopy',
            protocol_type=OntologyAnnotation(term="NMR Assay"))
        nmr_protocol.add_param('Run Order')
        #if 'Instrument' in self.sampleMetadata.columns:
        nmr_protocol.add_param('Instrument')
        #if 'Sample Batch' in self.sampleMetadata.columns:
        nmr_protocol.add_param('Sample Batch')
        nmr_protocol.add_param('Acquisition Batch')

        study.protocols.append(nmr_protocol)

        #for index, row in sampleMetadata.iterrows():
        for index, sample in enumerate(study.samples):
            row = self.sampleMetadata.loc[
                self.sampleMetadata['Sample File Name'].astype(
                    str) == sample.name]
            # create an extraction process that executes the extraction protocol
            extraction_process = Process(executes_protocol=extraction_protocol)

            # extraction process takes as input a sample, and produces an extract material as output
            sample_name = sample.name
            sample = Sample(name=sample_name, derives_from=[source])
            #print(row['Acquired Time'].values[0])

            extraction_process.inputs.append(sample)
            material = Material(name="extract-{}".format(index))
            material.type = "Extract Name"
            extraction_process.outputs.append(material)

            # create a ms process that executes the nmr protocol
            nmr_process = Process(executes_protocol=nmr_protocol,
                                  date_=datetime.isoformat(
                                      datetime.strptime(
                                          str(row['Acquired Time'].values[0]),
                                          '%Y-%m-%d %H:%M:%S')))

            nmr_process.name = "assay-name-{}".format(index)
            nmr_process.inputs.append(extraction_process.outputs[0])
            # nmr process usually has an output data file
            # check if field exists first
            assay_data_name = row['Assay data name'].values[
                0] if 'Assay data name' in self.sampleMetadata.columns else 'N/A'
            datafile = DataFile(filename=assay_data_name,
                                label="NMR Assay Name",
                                generated_from=[sample])
            nmr_process.outputs.append(datafile)

            #nmr_process.parameter_values.append(ParameterValue(category='Run Order',value=str(i)))
            nmr_process.parameter_values = [
                ParameterValue(category=nmr_protocol.get_param('Run Order'),
                               value=row['Run Order'].values[0])
            ]
            # check if field exists first
            instrument = row['Instrument'].values[
                0] if 'Instrument' in self.sampleMetadata.columns else 'N/A'
            nmr_process.parameter_values.append(
                ParameterValue(category=nmr_protocol.get_param('Instrument'),
                               value=instrument))
            # check if field exists first
            sbatch = row['Sample batch'].values[
                0] if 'Sample batch' in self.sampleMetadata.columns else 'N/A'
            nmr_process.parameter_values.append(
                ParameterValue(category=nmr_protocol.get_param('Sample Batch'),
                               value=sbatch))
            nmr_process.parameter_values.append(
                ParameterValue(
                    category=nmr_protocol.get_param('Acquisition Batch'),
                    value=row['Batch'].values[0]))

            # ensure Processes are linked forward and backward
            plink(extraction_process, nmr_process)
            # make sure the extract, data file, and the processes are attached to the assay
            nmr_assay.samples.append(sample)
            nmr_assay.data_files.append(datafile)
            nmr_assay.other_material.append(material)
            nmr_assay.process_sequence.append(extraction_process)
            nmr_assay.process_sequence.append(nmr_process)
            nmr_assay.measurement_type = OntologyAnnotation(
                term="metabolite profiling")
            nmr_assay.technology_type = OntologyAnnotation(
                term="NMR spectroscopy")

        # attach the assay to the study
        study.assays.append(nmr_assay)

        if os.path.exists(os.path.join(destinationPath,
                                       'i_Investigation.txt')):
            ie.appendStudytoISA(study, destinationPath)
        else:
            isatab.dump(isa_obj=investigation, output_path=destinationPath)
示例#3
0
def create_isa_files(investigation_id):

    # Create investigation
    dj_i = Investigation.objects.get(pk=investigation_id)
    itm_i = itm.Investigation(filename="i_investigation.txt")
    itm_i.identifier = "i1"
    itm_i.title = dj_i.name
    itm_i.description = dj_i.description

    ################################################################################################################
    # STUDIES
    ################################################################################################################
    itm_sample_d = {}  # to traceback from django samples
    for i, dj_s in enumerate(dj_i.study_set.all()):

        itm_s = itm.Study(filename="s_study.txt")
        itm_s.identifier = "s" + str(i)
        itm_s.title = dj_s.name
        itm_s.description = dj_s.description
        itm_s.grant_number = dj_s.grant_number
        itm_s.public_release_date = dj_s.public_release_date.strftime(
            "%Y-%m-%d") if dj_s.public_release_date else ''
        itm_s.submission_date = dj_s.submission_date.strftime(
            "%Y-%m-%d") if dj_s.submission_date else ''

        itm_i.studies.append(itm_s)

        itm_i, itm_s = add_lcms_untargeted_meta(itm_i,
                                                itm_s,
                                                msms_performed=True)

        # Add study samples
        # loop through the study samples

        ################################################################################################################
        # STUDY SAMPLES
        ################################################################################################################
        for j, dj_ss in enumerate(dj_s.studysample_set.all()):
            # We are saying that each sample is derived from a different source material, this might not be true for
            # for all cases but should be fine for the resulting ISA-Tab structure for MetaboLights
            source = itm.Source(name='{} source'.format(dj_ss.sample_name))
            itm_s.sources.append(source)

            # Sample material from the source
            itm_sample = itm.Sample(name=dj_ss.sample_name,
                                    derives_from=source)

            #=====================
            # Add organism for sample
            #=====================
            if dj_ss.organism:
                dj_org_ont = dj_ss.organism.ontologyterm
                source = check_ontology_source(itm_i, dj_org_ont.ontology_name)
                if not source:
                    source = itm.OntologySource(
                        name=dj_org_ont.ontology_prefix,
                        description=dj_org_ont.ontology_name)
                    itm_i.ontology_source_references.append(source)

                val = itm.OntologyAnnotation(term=dj_org_ont.name,
                                             term_source=source,
                                             term_accession=dj_org_ont.iri)
            else:

                val = itm.OntologyAnnotation(term='',
                                             term_source='',
                                             term_accession='')

            char = itm.Characteristic(category=itm.OntologyAnnotation(
                term="Organism",
                term_source="NCIT",
                term_accession="http://purl.obolibrary.org/obo/NCIT_C14250"),
                                      value=val)
            itm_sample.characteristics.append(char)

            # =====================
            # Add organism part
            # =====================
            if dj_ss.organism_part:
                dj_org_ont = dj_ss.organism_part.ontologyterm

                source = check_ontology_source(itm_i, dj_org_ont.ontology_name)
                if not source:
                    source = itm.OntologySource(
                        name=dj_org_ont.ontology_prefix,
                        description=dj_org_ont.ontology_name)
                    itm_i.ontology_source_references.append(source)

                val = itm.OntologyAnnotation(term=dj_org_ont.name,
                                             term_source=source,
                                             term_accession=dj_org_ont.iri)

            else:
                val = itm.OntologyAnnotation(term='',
                                             term_source='',
                                             term_accession='')

            char = itm.Characteristic(category=itm.OntologyAnnotation(
                term="Organism part",
                term_source="NCIT",
                term_accession="http://purl.obolibrary.org/obo/NCIT_C103199"),
                                      value=val)

            # Add organism part for sample (e.g. foot, eye, leg, whole organism...)
            itm_sample.characteristics.append(char)

            # Potential to add technical replicates (repeat extractions of the same material but
            # to confusing to use for DMA because we have extractions at different points (Liquid Extraction, SPE and
            # Fractionation
            itm_s.samples.append(itm_sample)
            itm_sample_d[dj_ss.id] = itm_sample

        # Create sample collection protocol (we just use 1 for all samples for the time being) but should technically
        # be divided into groups (if resulting ISA-Tab for MetaboLights is the same then we can just leave as is)
        sample_collection_protocol = itm.Protocol(
            name="sample collection",
            protocol_type=itm.OntologyAnnotation(term="sample collection"))
        itm_s.protocols.append(sample_collection_protocol)
        sample_collection_process = itm.Process(
            executes_protocol=sample_collection_protocol)

        # Next, we link our materials to the Process. In this particular case, we are describing a sample collection
        # process that takes one source material, and produces three different samples.
        #
        # (daphnia magna source) -> (sample collection) -> [(daphnia_material0-0), (daphnia_material0-1), (daphnia_material0-2)]
        # (solvent blank source) -> (sample collection) -> [(blank_material1-0), (blank_material1-1), (sample_material1-2)]

        for src in itm_s.sources:
            sample_collection_process.inputs.append(src)
        for sam in itm_s.samples:
            sample_collection_process.outputs.append(sam)

        # Finally, attach the finished Process object to the study process_sequence. This can be done many times to
        # describe multiple sample collection events.
        itm_s.process_sequence.append(sample_collection_process)

        ################################################################################################################
        #  ASSAYS
        ################################################################################################################

        # get dictionary of Django protocols
        dj_p = {'ex': {}, 'spe': {}, 'chr': {}, 'meas': {}}

        for dj_a in dj_s.assay_set.all():
            for dj_ad in dj_a.assaydetail_set.all():
                ex = dj_ad.extractionprocess.extractionprotocol
                dj_p['ex'][ex.id] = ex

                spe = dj_ad.speprocess.speprotocol
                dj_p['spe'][spe.id] = spe

                chrom = dj_ad.chromatographyprocess.chromatographyprotocol
                dj_p['chr'][chrom.id] = chrom

                meas = dj_ad.measurementprocess.measurementprotocol
                dj_p['meas'][meas.id] = meas

        # Create isa tab protocols
        itm_p = {'ex': {}, 'spe': {}, 'chr': {}, 'meas': {}}

        # sequencing_protocol = itm.Protocol(name='sequencing', protocol_type=itm.OntologyAnnotation(term="material sequencing"))
        # itm_s.protocols.append(sequencing_protocol)

        for k, dj_ex in six.iteritems(dj_p['ex']):

            if dj_ex.name:
                nm = dj_ex.name
            else:
                nm = dj_ex.extractiontype.type

            #===========================================
            # Get extraction protocols
            #===========================================
            source = check_ontology_source(itm_i, 'CHMO')
            extraction_protocol = itm.Protocol(
                name='Extraction {}'.format(nm),
                protocol_type=itm.OntologyAnnotation(term="Extraction"),
            )

            param = itm.ProtocolParameter(parameter_name=itm.OntologyAnnotation(
                term="Derivatization",
                term_source=source,
                term_accession='http://purl.obolibrary.org/obo/CHMO_0001485'))
            extraction_protocol.parameters.append(param)

            itm_s.protocols.append(extraction_protocol)

            itm_p['ex'][k] = extraction_protocol

        for k, dj_spe in six.iteritems(dj_p['spe']):
            if dj_spe.name:
                nm = dj_spe.name
            else:
                nm = dj_spe.spetype.type

            #===========================================
            # Get chromatography protocols
            #===========================================
            spe_protocol = itm.Protocol(
                name='Solid Phase Extraction {}'.format(nm),
                protocol_type=itm.OntologyAnnotation(
                    term="Solid Phase Extraction"),
                components=itm.OntologyAnnotation(term=nm),
                description=dj_spe.description)
            itm_s.protocols.append(spe_protocol)
            itm_p['spe'][k] = spe_protocol

        for k, dj_chr in six.iteritems(dj_p['chr']):

            #===========================================
            # Get chromatography protocols
            #===========================================
            chromatography_protocol = itm.Protocol(
                name='Chromatography {}'.format(dj_chr.name),
                protocol_type=itm.OntologyAnnotation(term="Chromatography"))

            itm_s.protocols.append(chromatography_protocol)

            itm_p['chr'][k] = chromatography_protocol

        for k, dj_meas in six.iteritems(dj_p['meas']):
            #===========================================
            # Get measurment protocols (just mass spec for now)
            #===========================================
            if dj_meas.name:
                nm = dj_meas.name
            else:
                nm = dj_meas.measurementtechnique.type
            mass_spec_protocol = itm.Protocol(
                name='Mass spectrometry {}'.format(nm),
                protocol_type=itm.OntologyAnnotation(term="Mass spectrometry"))
            itm_s.protocols.append(mass_spec_protocol)
            itm_p['meas'][k] = mass_spec_protocol

        for dj_a in dj_s.assay_set.all():
            itm_a = itm.Assay(filename="a_assay_{}.txt".format(dj_a.name))

            # go through each details (which is linked to all the revelant process)
            for dj_ad in dj_a.assaydetail_set.all():

                ####################################
                # Get extraction
                ####################################
                itm_ex_prot = itm_p['ex'][
                    dj_ad.extractionprocess.extractionprotocol.id]

                extraction_process = itm.Process(executes_protocol=itm_ex_prot)
                extraction_process.name = "extract-process-{}".format(
                    dj_ad.code_field)
                material = itm.Material(
                    name="extract-{}".format(dj_ad.code_field))
                material.type = "Extract Name"
                extraction_process.outputs.append(material)

                ############################################################
                ##### IMPORTANT: WE add the sample input here! #############
                itm_samplei = itm_sample_d[dj_ad.studysample_id]
                extraction_process.inputs.append(itm_samplei)

                ####################################
                # Get SPE
                ####################################
                if dj_ad.speprocess:

                    itm_spe_prot = itm_p['spe'][
                        dj_ad.speprocess.speprotocol.id]
                    spe_process = itm.Process(executes_protocol=itm_spe_prot)
                    spe_process.name = "spe-process-{}".format(
                        dj_ad.code_field)
                    spe_process.inputs.append(extraction_process.outputs[0])

                    material = itm.Material(
                        name="SPE-Eluent-{}".format(dj_ad.code_field))
                    material.type = "Extract Name"
                    spe_process.outputs.append(material)

                ####################################
                # Get chromatography
                ####################################
                itm_chr_prot = itm_p['chr'][
                    dj_ad.chromatographyprocess.chromatographyprotocol.id]
                chr_process = itm.Process(executes_protocol=itm_chr_prot)
                chr_process.name = "chr-process-{}".format(dj_ad.code_field)

                if dj_ad.speprocess:
                    chr_process.inputs.append(spe_process.outputs[0])
                else:
                    chr_process.inputs.append(extraction_process.outputs[0])

                material = itm.Material(
                    name="Chromatography-Eluent-{}".format(dj_ad.code_field))
                material.type = "Extract Name"

                chr_process.outputs.append(material)

                ####################################
                # Get measurements (mass spec only)
                ####################################
                itm_meas_prot = itm_p['meas'][
                    dj_ad.measurementprocess.measurementprotocol.id]
                meas_process = itm.Process(executes_protocol=itm_meas_prot)
                meas_process.name = "meas-process-{}".format(dj_ad.code_field)
                meas_process.inputs.append(chr_process.outputs[0])

                # get output file
                for file_details in dj_ad.assayrun_set.all().values(
                        'run__mfile', 'run__mfile__original_filename'):
                    datafile = itm.DataFile(
                        filename=file_details['run__mfile__original_filename'],
                        label="Raw Data File")
                    meas_process.outputs.append(datafile)
                    itm_a.data_files.append(datafile)

                if dj_ad.speprocess:
                    itm.plink(extraction_process, spe_process)
                    itm.plink(spe_process, chr_process)
                else:
                    itm.plink(extraction_process, chr_process)

                itm.plink(chr_process, meas_process)

                itm_a.samples.append(itm_samplei)
                itm_a.other_material.append(material)
                itm_a.process_sequence.append(extraction_process)

                if dj_ad.speprocess:
                    itm_a.process_sequence.append(spe_process)

                itm_a.process_sequence.append(chr_process)
                itm_a.process_sequence.append(meas_process)
                itm_a.measurement_type = itm.OntologyAnnotation(
                    term="gene sequencing")
                itm_a.technology_type = itm.OntologyAnnotation(
                    term="nucleotide sequencing")

            itm_s.assays.append(itm_a)

    # Note we haven't added factors yet

    return itm_i, json.dumps(itm_i,
                             cls=ISAJSONEncoder,
                             sort_keys=True,
                             indent=4,
                             separators=(',', ': '))
示例#4
0
def convert(json_path, output_path):
    print(json_path)
    print(output_path)

    with open(json_path, 'r') as f:
        dcc_json = json.load(f)

    # print(array['protocol'])
    # for element in array['protocol']:
    #     array['protocol'][element]['id']
    #     array['protocol'][element]['description']
    #     array['protocol'][element]['type']
    #     array['protocol'][element]['filename']

    # for element in array['measurement']:
    #     print(array['measurement'][element]['corrected_mz'])

    # for element in array['subject']:
    #     print(array['subject'][element]['species'])

    # Building the Investigation Object and its elements:

    project_set_json = dcc_json.get('project')

    if len(project_set_json) == 0:
        raise IOError('No project found in input JSON')

    # print(next(iter(project_set_json)))
    project_json = next(iter(project_set_json.values()))
    investigation = Investigation(identifier=project_json['id'])

    obi = OntologySource(name='OBI',
                         description='Ontology for Biomedical Investigations')
    investigation.ontology_source_references.append(obi)

    inv_person = Person(
        first_name=project_json['PI_first_name'],
        last_name=project_json['PI_last_name'],
        email=project_json['PI_email'],
        address=project_json['address'],
        affiliation=(', '.join(
            [project_json['department'], project_json['institution']])),
        roles=[
            OntologyAnnotation(term="",
                               term_source=obi,
                               term_accession="http://purl.org/obo/OBI_1")
        ])
    investigation.contacts.append(inv_person)

    study_set_json = dcc_json.get('study')

    if len(study_set_json) > 0:
        study_json = next(iter(study_set_json.values()))

        study = Study(
            identifier=study_json['id'],
            title=study_json['title'],
            description=study_json['description'],
            design_descriptors=[
                OntologyAnnotation(term=study_json['type'],
                                   term_source=obi,
                                   term_accession="http://purl.org/obo/OBI_1")
            ],
            filename='s_{study_id}.txt'.format(study_id=study_json['id']))

        investigation.studies = [study]

        studyid = study_json['id']
        print(studyid)
        study_person = Person(
            first_name=study_json['PI_first_name'],
            last_name=study_json['PI_last_name'],
            email=study_json['PI_email'],
            address=study_json['address'],
            affiliation=(', '.join(
                [study_json['department'], study_json['institution']])),
            roles=[
                OntologyAnnotation(term='principal investigator',
                                   term_source=obi,
                                   term_accession="http://purl.org/obo/OBI_1")
            ])

        study.contacts.append(study_person)

        for factor_json in dcc_json['factor'].values():
            factor = StudyFactor(name=factor_json['id'])
            study.factors.append(factor)

        for i, protocol_json in enumerate(dcc_json['protocol'].values()):
            oat_p = protocol_json['type']
            oa_protocol_type = OntologyAnnotation(
                term=oat_p,
                term_source=obi,
                term_accession="http://purl.org/obo/OBI_1")
            study.protocols.append(
                Protocol(name=protocol_json['id'],
                         protocol_type=oa_protocol_type,
                         description=protocol_json['description'],
                         uri=protocol_json['filename']))

            if 'MS' in protocol_json['type']:
                study.assays.append(
                    Assay(measurement_type=OntologyAnnotation(
                        term='mass isotopologue distribution analysis',
                        term_source=obi,
                        term_accession="http://purl.org/obo/OBI_112"),
                          technology_type=OntologyAnnotation(
                              term='mass spectrometry',
                              term_source=obi,
                              term_accession="http://purl.org/obo/OBI_1"),
                          filename='a_assay_ms_{count}.txt'.format(count=i)))

            if 'NMR' in protocol_json['type']:
                study.assays.append(
                    Assay(measurement_type=OntologyAnnotation(
                        term='isotopomer analysis',
                        term_source=obi,
                        term_accession="http://purl.org/obo/OBI_111"),
                          technology_type=OntologyAnnotation(
                              term='nmr spectroscopy',
                              term_source=obi,
                              term_accession="http://purl.org/obo/OBI_1"),
                          filename='a_assay_nmr.txt'))

        for subject_json in dcc_json['subject'].values():

            # print(array['subject'][element])
            if "organism" in subject_json['type']:

                source = Source(name=subject_json['id'])

                ncbitaxon = OntologySource(name='NCBITaxon',
                                           description="NCBI Taxonomy")
                characteristic_organism = Characteristic(
                    category=OntologyAnnotation(term="Organism"),
                    value=OntologyAnnotation(
                        term=subject_json['species'],
                        term_source=ncbitaxon,
                        term_accession=
                        'http://purl.bioontology.org/ontology/NCBITAXON/9606'))
                source.characteristics.append(characteristic_organism)
                study.sources.append(source)

            elif 'tissue_slice' in subject_json['type']:
                # print(array['subject'][element]['type'])
                source = Source(name=subject_json['id'])
                study.sources.append(source)
                ncbitaxon = OntologySource(name='NCBITaxon',
                                           description="NCBI Taxonomy")
                characteristic_organism = Characteristic(
                    category=OntologyAnnotation(term="Organism"),
                    value=OntologyAnnotation(
                        term=subject_json['species'],
                        term_source=ncbitaxon,
                        term_accession=
                        'http://purl.bioontology.org/ontology/NCBITAXON/9606'))
                source.characteristics.append(characteristic_organism)

                sample = Sample(name=subject_json['id'],
                                derives_from=subject_json['parentID'])
                characteristic_organismpart = Characteristic(
                    category=OntologyAnnotation(term='organism_part'),
                    value=OntologyAnnotation(
                        term=subject_json['tissue_type'],
                        term_source=obi,
                        term_accession="http://purl.org/obo/OBI_1"))

                sample.characteristics.append(characteristic_organismpart)
                study.samples.append(sample)
                # print(study.samples[0].name)

                sample_collection_process = Process(
                    executes_protocol=study.get_prot(
                        subject_json['protocol.id']))
                sample_collection_process.inputs.append(source)
                sample_collection_process.outputs.append(sample)
                study.process_sequence.append(sample_collection_process)

            else:
                source = Source(name=subject_json['id'])

                ncbitaxon = OntologySource(name='NCBITaxon',
                                           description="NCBI Taxonomy")
                characteristic_organism = Characteristic(
                    category=OntologyAnnotation(term="Organism"),
                    value=OntologyAnnotation(
                        term=subject_json['species'],
                        term_source=ncbitaxon,
                        term_accession=
                        'http://purl.bioontology.org/ontology/NCBITAXON/9606'))
                source.characteristics.append(characteristic_organism)
                study.sources.append(source)
                print(subject_json['id'])
                print(subject_json['species'])
                print(subject_json['type'])
        # for src in investigation.studies[0].materials:
        #
        # for sam in investigation.studies[0].materials:

        for sample_json in dcc_json['sample'].values():

            if 'cells' in sample_json['type']:
                material_separation_process = Process(
                    executes_protocol=study.get_prot(
                        sample_json['protocol.id']))
                material_separation_process.name = sample_json['id']
                # dealing with input material, check that the parent material is already among known samples or sources

                if len([
                        x for x in study.samples
                        if x.name == sample_json['parentID']
                ]) == 0:
                    material_in = Sample(name=sample_json['parentID'])
                    material_separation_process.inputs.append(material_in)
                    study.assays[0].samples.append(material_in)
                else:
                    print([
                        x for x in study.samples
                        if x.name == sample_json['parentID']
                    ])
                    material_separation_process.inputs.append([
                        x for x in study.samples
                        if x.name == sample_json['parentID']
                    ][0])

                material_out = Sample(name=sample_json['id'])
                material_type = Characteristic(
                    category=OntologyAnnotation(term='material_type'),
                    value=OntologyAnnotation(
                        term=sample_json['type'],
                        term_source=obi,
                        term_accession="http://purl.org/obo/OBI_xxxxxxx"))
                material_out.characteristics.append(material_type)
                material_separation_process.outputs.append(material_out)
                study.assays[0].samples.append(material_out)
                try:
                    sample_collection_process
                except NameError:
                    sample_collection_process = None
                if sample_collection_process is None:
                    sample_collection_process = Process(executes_protocol="")
                else:
                    # plink(protein_extraction_process, data_acq_process)
                    # plink(material_separation_process, protein_extraction_process)

                    plink(sample_collection_process,
                          protein_extraction_process)

            if 'protein_extract' in sample_json['type']:
                protein_extraction_process = Process(
                    executes_protocol=study.get_prot(
                        sample_json['protocol.id']))
                protein_extraction_process.name = sample_json['id']

                if len([
                        x for x in study.samples
                        if x.name == sample_json['parentID']
                ]) == 0:
                    material_in = Sample(name=sample_json['parentID'])
                    protein_extraction_process.inputs.append(material_in)
                    study.assays[0].samples.append(material_in)
                else:
                    # print([x for x in study.samples if x.name == sample_json['parentID']])
                    protein_extraction_process.inputs.append(material_in)

                # for material_in in study.samples:
                #     # print("OHO:", material_in.name)
                #     if material_in.name == sample_json['parentID']:
                #         # print("C:",sample_json['parentID'])
                #         #no need to create, just link to process
                #         protein_extraction_process.inputs.append(x)
                #     else:
                #         # print("D:", sample_json['parentID'])
                #         #create new material and link
                #         material_in = Sample(name=sample_json['parentID'])
                #         protein_extraction_process.inputs.append(material_in)

                material_out = Material(name=sample_json['id'])
                material_out.type = "Extract Name"
                material_type = Characteristic(
                    category=OntologyAnnotation(term='material_type'),
                    value=OntologyAnnotation(
                        term=sample_json['type'],
                        term_source=obi,
                        term_accession="http://purl.org/obo/OBI_1"))
                material_out.characteristics.append(material_type)

                study.assays[0].samples.append(material_in)
                study.assays[0].materials['other_material'].append(material_in)
                try:
                    material_separation_process
                except NameError:
                    material_separation_process = None
                if material_separation_process is None:
                    material_separation_process = Process(executes_protocol="")
                else:
                    # plink(protein_extraction_process, data_acq_process)
                    plink(material_separation_process,
                          protein_extraction_process)

            if 'polar' in sample_json['type']:

                material_in = Material(name=sample_json['parentID'])
                material_type = Characteristic(
                    category=OntologyAnnotation(term='material_type',
                                                term_source=obi),
                    value=OntologyAnnotation(term=sample_json['type'],
                                             term_source=obi))
                material_in.characteristics.append(material_type)
                study.assays[0].materials['other_material'].append(material_in)

                data_acq_process = Process(executes_protocol=study.get_prot(
                    sample_json['protocol.id']))
                data_acq_process.name = sample_json['id']
                datafile = DataFile(
                    filename='{filename}.txt'.format(filename='_'.join(
                        ['mass_isotopomer-data', studyid, sample_json['id']])),
                    label='Raw Data File')
                data_acq_process.outputs.append(datafile)
                # print(study.assays[0].technology_type.term)

                study.assays[0].data_files.append(datafile)
                try:
                    protein_extraction_process
                except NameError:
                    protein_extraction_process = None
                if protein_extraction_process is None:
                    protein_extraction_process = Process(executes_protocol="")
                else:
                    plink(protein_extraction_process, data_acq_process)

            # else:
            #     material_in = Material(name=sample_json['parentID'])
            #     material_out = Material(name=sample_json['id'])
            #     material_type = Characteristic(
            #         category=OntologyAnnotation(term="material_type"),
            #         value=OntologyAnnotation(term=sample_json['type'],
            #                                  term_source=obi,
            #                                  term_accession="http://purl.org/obo/OBI_1"))
            #     material_out.characteristics.append(material_type)
            #     process = Process(executes_protocol=sample_json['protocol.id'])
            #     process.name = sample_json['id']
            #     process.inputs.append(material_in)
            #     process.outputs.append(material_out)
            #
            #     study.assays[0].materials['other_material'].append(material_in)
            #     study.assays[0].materials['other_material'].append(material_out)

            if 'bulk_tissue' in sample_json['type']:
                bulk_process = Process(executes_protocol=study.get_prot(
                    sample_json['protocol.id']))
                bulk_process.name = sample_json['id']

                if len([
                        x for x in study.samples
                        if x.name == sample_json['parentID']
                ]) == 0:
                    material_in = Sample(name=sample_json['parentID'])
                    bulk_process.inputs.append(material_in)
                    study.assays[0].samples.append(material_in)
                else:
                    # print([x for x in study.samples if x.name == sample_json['parentID']])
                    bulk_process.inputs.append(material_in)

                    plink(sample_collection_process, bulk_process)

    data_rec_header = '\t'.join(
        ('metabolite name', 'assignment', 'signal intensity', 'retention time',
         'm/z', 'formula', 'adduct', 'isotopologue', 'sample identifier'))
    records = []
    for element in dcc_json['measurement']:
        # metabolite_name: -> compound
        # array['measurement'][element]['signal_intensity']
        record = '\t'.join((dcc_json['measurement'][element]['compound'],
                            dcc_json['measurement'][element]['assignment'],
                            dcc_json['measurement'][element]['raw_intensity'],
                            dcc_json['measurement'][element]['retention_time'],
                            dcc_json['measurement'][element]['corrected_mz'],
                            dcc_json['measurement'][element]['formula'],
                            dcc_json['measurement'][element]['adduct'],
                            dcc_json['measurement'][element]['isotopologue'],
                            dcc_json['measurement'][element]['sample.id']))
        # print(record)
        records.append(record)

    if not os.path.exists(output_path):
        os.makedirs(output_path)
        try:
            with open(
                    '{output_path}/{study_id}-maf-data-nih-dcc-json.txt'.
                    format(output_path=output_path,
                           study_id=studyid), 'w') as fh:
                print(
                    "'writing 'maf file document' to file from 'generate_maf_file' method:..."
                )
                fh.writelines(data_rec_header)
                fh.writelines('\n')
                for item in records:
                    fh.writelines(item)
                    fh.writelines('\n')

            print("writing 'investigation information' to file...")
            print(isatab.dumps(investigation))

            isatab.dump(investigation, output_path=output_path)
        except IOError:
            print("Error: in main() method can't open file or write data")
def create_data_file(obs_unit, this_assay, sample_collection_process, this_isa_sample, phenotyping_protocol):
    #TODO : reactivate data file generation, one by assay
    # Creating the relevant ISA protocol application / Assay from BRAPI Observation Events:
    # -------------------------------------------------------------------------------------
    # Getting the ISA assay table generated by the 'create_isa_study' method by default
    # DEPRECATED: We are aiming to have one assay by level and one file by level/assay. This file
    # is the direct output of phenotyping-search/table call
    for j in range(len((obs_unit['observations']))):
        # !!!: fix isatab.py to access other protocol_type values to enable Assay Tab serialization
        phenotyping_process = Process(executes_protocol=phenotyping_protocol)
        phenotyping_process.name = "assay-name_(" + obs_unit["observationUnitName"] + ")_" + \
                                   str(j)
        # print("assay name: ", j, "|", phenotyping_process.name)
        phenotyping_process.inputs.append(this_isa_sample)

        # Creating relevant protocol parameter values associated with the protocol application:
        # -------------------------------------------------------------------------------------
        if 'season' in obs_unit['observations'][j].keys():
            pv = ParameterValue(
                category=ProtocolParameter(parameter_name=OntologyAnnotation(term="season")),
                value=OntologyAnnotation(term=str(obs_unit['observations'][j]['season']),
                                         term_source="",
                                         term_accession=""))
        else:
            pv = ParameterValue(
                category=ProtocolParameter(parameter_name=OntologyAnnotation(term="season")),
                value=OntologyAnnotation(term="none reported", term_source="", term_accession=""))

        phenotyping_process.parameter_values.append(pv)

        # Creating relevant protocol parameter values associated with the protocol application:
        # -------------------------------------------------------------------------------------
        if 'season' in obs_unit['observations'][j].keys():
            pv = ParameterValue(
                category=ProtocolParameter(parameter_name=OntologyAnnotation(term="season")),
                value=OntologyAnnotation(term=str(obs_unit['observations'][j]['season']),
                                         term_source="",
                                         term_accession=""))
        else:
            pv = ParameterValue(
                category=ProtocolParameter(parameter_name=OntologyAnnotation(term="season")),
                value=OntologyAnnotation(term="none reported", term_source="", term_accession=""))

        phenotyping_process.parameter_values.append(pv)

        # Getting and setting values for performer and date of the protocol application:
        # -------------------------------------------------------------------------------------
        # if obs_unit['observations'][j]['observationTimeStamp'] is not None:
        #     phenotyping_process.date = str(obs_unit['observations'][j]['observationTimeStamp'])
        # else:
        #     # TODO: implement testing and use of datetime.datetime.today().isoformat()
        #     phenotyping_process.date = "not available"
        # if obs_unit['observations'][j]['collector'] is not None:
        #     phenotyping_process.performer = str(obs_unit['observations'][j]['collector'])
        # else:
        #     phenotyping_process.performer = "none reported"

        # Creating the ISA Raw Data Files associated with each ISA phenotyping assay:
        # -----------------------------------------------------------------------
        # datafile = DataFile(filename="phenotyping-data.txt",
        #                     label="Raw Data File",
        #                     generated_from=[this_isa_sample])
        # phenotyping_process.outputs.append(datafile)

        # Creating processes and linking
        this_assay.samples.append(this_isa_sample)
        # this_assay.process_sequence.append(sample_collection_process)
        this_assay.process_sequence.append(phenotyping_process)
        #this_assay.data_files.append(datafile)
        plink(sample_collection_process, phenotyping_process)
示例#6
0
def create_descriptor():
    """
    Returns a simple but complete ISA-Tab 1.0 descriptor for illustration.
    """

    # Create an empty Investigation object and set some values to the instance
    # variables.

    investigation = Investigation()
    investigation.identifier = "i1"
    investigation.title = "My Simple ISA Investigation"
    investigation.description = \
        "We could alternatively use the class constructor's parameters to " \
        "set some default values at the time of creation, however we want " \
        "to demonstrate how to use the object's instance variables to " \
        "set values."
    investigation.submission_date = "2016-11-03"
    investigation.public_release_date = "2016-11-03"

    # Create an empty Study object and set some values. The Study must have a
    # filename, otherwise when we serialize it to ISA-Tab we would not know
    # where to write it. We must also attach the study to the investigation by
    # adding it to the 'investigation' object's list of studies.

    study = Study(filename="s_study.txt")
    study.identifier = "s1"
    study.title = "My ISA Study"
    study.description = \
        "Like with the Investigation, we could use the class constructor to " \
        "set some default values, but have chosen to demonstrate in this " \
        "example the use of instance variables to set initial values."
    study.submission_date = "2016-11-03"
    study.public_release_date = "2016-11-03"
    investigation.studies.append(study)

    # Some instance variables are typed with different objects and lists of
    # objects. For example, a Study can have a list of design descriptors. A
    # design descriptor is an Ontology Annotation describing the kind of study
    # at hand. Ontology Annotations should typically reference an Ontology
    # Source. We demonstrate a mix of using the class constructors and setting
    # values with instance variables. Note that the OntologyAnnotation object
    # 'intervention_design' links its 'term_source' directly to the 'obi'
    # object instance. To ensure the OntologySource is encapsulated in the
    # descriptor, it is added to a list of 'ontology_source_references' in
    # the Investigation object. The 'intervention_design' object is then
    # added to the list of 'design_descriptors' held by the Study object.

    obi = OntologySource(
        name='OBI',
        description="Ontology for Biomedical Investigations")
    investigation.ontology_source_references.append(obi)
    intervention_design = OntologyAnnotation(term_source=obi)
    intervention_design.term = "intervention design"
    intervention_design.term_accession = \
        "http://purl.obolibrary.org/obo/OBI_0000115"
    study.design_descriptors.append(intervention_design)

    # Other instance variables common to both Investigation and Study objects
    # include 'contacts' and 'publications', each with lists of corresponding
    # Person and Publication objects.

    contact = Person(
        first_name="Alice",
        last_name="Robertson",
        affiliation="University of Life",
        roles=[
            OntologyAnnotation(
                term='submitter')])
    study.contacts.append(contact)
    publication = Publication(
        title="Experiments with Elephants",
        author_list="A. Robertson, B. Robertson")
    publication.pubmed_id = "12345678"
    publication.status = OntologyAnnotation(term="published")
    study.publications.append(publication)

    # To create the study graph that corresponds to the contents of the study
    # table file (the s_*.txt file), we need to create a process sequence.
    # To do this we use the Process class and attach it to the Study object's
    # 'process_sequence' list instance variable. Each process must be linked
    # with a Protocol object that is attached to a Study object's 'protocols'
    # list instance variable. The sample collection Process object usually has
    # as input a Source material and as output a Sample material.

    # Here we create one Source material object and attach it to our study.

    source = Source(name='source_material')
    study.sources.append(source)

    # Then we create three Sample objects, with organism as H**o Sapiens, and
    # attach them to the study. We use the utility function
    # batch_create_material() to clone a prototype material object. The
    # function automatiaclly appends an index to the material name. In this
    # case, three samples will be created, with the names 'sample_material-0',
    # 'sample_material-1' and 'sample_material-2'.

    prototype_sample = Sample(name='sample_material', derives_from=[source])
    ncbitaxon = OntologySource(name='NCBITaxon', description="NCBI Taxonomy")
    characteristic_organism = Characteristic(
        category=OntologyAnnotation(term="Organism"),
        value=OntologyAnnotation(
            term="H**o Sapiens",
            term_source=ncbitaxon,
            term_accession="http://purl.bioontology.org/ontology/NCBITAXON/"
                           "9606"))
    prototype_sample.characteristics.append(characteristic_organism)

    study.samples = batch_create_materials(
        prototype_sample, n=3)  # creates a batch of 3 samples

    # Now we create a single Protocol object that represents our
    # sample collection protocol, and attach it to the study object. Protocols
    # must be declared before we describe Processes, as a processing event of
    # some sort must execute some defined protocol. In the case of the class
    # model, Protocols should therefore be declared before Processes in order
    # for the Process to be linked to one.

    sample_collection_protocol = Protocol(
        name="sample collection",
        protocol_type=OntologyAnnotation(term="sample collection"))
    study.protocols.append(sample_collection_protocol)
    sample_collection_process = Process(
        executes_protocol=sample_collection_protocol)

    # Next, we link our materials to the Process. In this particular case,
    # we are describing a sample collection process that takes one
    # source material, and produces three different samples.
    #
    # (source_material)->(sample collection)->[(sample_material-0),
    # (sample_material-1), (sample_material-2)]

    for src in study.sources:
        sample_collection_process.inputs.append(src)
    for sam in study.samples:
        sample_collection_process.outputs.append(sam)

    # Finally, attach the finished Process object to the study
    # process_sequence. This can be done many times to
    # describe multiple sample collection events.

    study.process_sequence.append(sample_collection_process)

    # Next, we build n Assay object and attach two protocols, extraction and
    # sequencing.

    assay = Assay(filename="a_assay.txt")
    extraction_protocol = Protocol(
        name='extraction',
        protocol_type=OntologyAnnotation(
            term="material extraction"))
    study.protocols.append(extraction_protocol)
    sequencing_protocol = Protocol(
        name='sequencing',
        protocol_type=OntologyAnnotation(
            term="material sequencing"))
    study.protocols.append(sequencing_protocol)

    # To build out assay graphs, we enumereate the samples from the
    # study-level, and for each sample we create an extraction process and a
    # sequencing process. The extraction process takes as input a
    # sample material, and produces an extract material. The sequencing
    # process takes the extract material and produces a data file. This will
    # produce three graphs, from sample material through to data, as follows:
    #
    # (sample_material-0)->(extraction)->(extract-0)->(sequencing)->
    # (sequenced-data-0)
    # (sample_material-1)->(extraction)->(extract-1)->(sequencing)->
    # (sequenced-data-1)
    # (sample_material-2)->(extraction)->(extract-2)->(sequencing)->
    # (sequenced-data-2)
    #
    # Note that the extraction processes and sequencing processes are
    # distinctly separate instances, where the three graphs are NOT
    # interconnected.

    for i, sample in enumerate(study.samples):

        # create an extraction process that executes the extraction protocol

        extraction_process = Process(executes_protocol=extraction_protocol)

        # extraction process takes as input a sample, and produces an extract
        # material as output

        extraction_process.inputs.append(sample)
        material = Material(name="extract-{}".format(i))
        material.type = "Extract Name"
        extraction_process.outputs.append(material)

        # create a sequencing process that executes the sequencing protocol

        sequencing_process = Process(executes_protocol=sequencing_protocol)
        sequencing_process.name = "assay-name-{}".format(i)
        sequencing_process.inputs.append(extraction_process.outputs[0])

        # Sequencing process usually has an output data file

        datafile = DataFile(
            filename="sequenced-data-{}".format(i),
            label="Raw Data File",
            generated_from=[sample])
        sequencing_process.outputs.append(datafile)

        # ensure Processes are linked forward and backward

        plink(extraction_process, sequencing_process)

        # make sure the extract, data file, and the processes are attached to
        # the assay

        assay.samples.append(sample)
        assay.data_files.append(datafile)
        assay.other_material.append(material)
        assay.process_sequence.append(extraction_process)
        assay.process_sequence.append(sequencing_process)
        assay.measurement_type = OntologyAnnotation(term="gene sequencing")
        assay.technology_type = OntologyAnnotation(
            term="nucleotide sequencing")

    # attach the assay to the study

    study.assays.append(assay)

    from isatools import isatab
    # dumps() writes out the ISA as a string representation of the ISA-Tab
    return isatab.dumps(investigation)