Пример #1
0
def create_descriptor():
    """Returns a ISA-Tab descriptor using a simple sample plan for
    illustration."""
    investigation = Investigation(identifier='I1')
    plan = SampleAssayPlan()
    plan.add_sample_type('liver')
    plan.add_sample_plan_record('liver', 5)
    plan.add_sample_type('blood')
    plan.add_sample_plan_record('blood', 3)
    plan.group_size = 2
    f1 = StudyFactor(name='AGENT',
                     factor_type=OntologyAnnotation(term='pertubation agent'))
    f2 = StudyFactor(name='INTENSITY',
                     factor_type=OntologyAnnotation(term='intensity'))
    f3 = StudyFactor(name='DURATION',
                     factor_type=OntologyAnnotation(term='time'))
    treatment_factory = TreatmentFactory(factors=[f1, f2, f3])
    treatment_factory.add_factor_value(f1, {'cocaine', 'crack', 'aether'})
    treatment_factory.add_factor_value(f2, {'low', 'medium', 'high'})
    treatment_factory.add_factor_value(f3, {'short', 'long'})
    ffactorial_design_treatments = treatment_factory\
        .compute_full_factorial_design()
    treatment_sequence = TreatmentSequence(
        ranked_treatments=ffactorial_design_treatments)
    # treatment_factory.add_factor_value('intensity', 1.05)
    study = IsaModelObjectFactory(plan, treatment_sequence)\
        .create_study_from_plan()
    study.filename = 's_study.txt'
    investigation.studies = [study]
    print(isatab.dumps(investigation))
Пример #2
0
 def setUp(self):
     self.design = InterventionStudyDesign()
     self.agent = StudyFactor(name=BASE_FACTORS[0]['name'],
                              factor_type=BASE_FACTORS[0]['type'])
     self.intensity = StudyFactor(name=BASE_FACTORS[1]['name'],
                                  factor_type=BASE_FACTORS[1]['type'])
     self.duration = StudyFactor(name=BASE_FACTORS[2]['name'],
                                 factor_type=BASE_FACTORS[2]['type'])
     self.first_treatment = Treatment(
         treatment_type=INTERVENTIONS['CHEMICAL'],
         factor_values=(FactorValue(factor_name=self.agent, value='crack'),
                        FactorValue(factor_name=self.intensity,
                                    value='low'),
                        FactorValue(factor_name=self.duration,
                                    value='medium')))
     self.second_treatment = Treatment(
         treatment_type=INTERVENTIONS['CHEMICAL'],
         factor_values=(FactorValue(factor_name=self.agent, value='crack'),
                        FactorValue(factor_name=self.intensity,
                                    value='high'),
                        FactorValue(factor_name=self.duration,
                                    value='medium')))
     self.test_sequence = TreatmentSequence(
         ranked_treatments=[(self.first_treatment,
                             1), (self.second_treatment, 2)])
     self.sample_plan = SampleAssayPlan(group_size=10,
                                        sample_plan={},
                                        assay_plan=None)
Пример #3
0
 def setUp(self):
     self.investigation = Investigation(identifier='I1')
     self.f1 = StudyFactor(
         name='AGENT',
         factor_type=OntologyAnnotation(term='pertubation agent'))
     self.f2 = StudyFactor(name='INTENSITY',
                           factor_type=OntologyAnnotation(term='intensity'))
     self.f3 = StudyFactor(name='DURATION',
                           factor_type=OntologyAnnotation(term='time'))
Пример #4
0
    def test_compute_full_factorial_design_empty_intensities(self):
        agent = StudyFactor(name=BASE_FACTORS[0]['name'],
                            factor_type=BASE_FACTORS[0]['type'])
        intensity = StudyFactor(name=BASE_FACTORS[1]['name'],
                                factor_type=BASE_FACTORS[1]['type'])
        duration = StudyFactor(name=BASE_FACTORS[2]['name'],
                               factor_type=BASE_FACTORS[2]['type'])
        self.factory.add_factor_value(agent, {'cocaine', 'crack', 'aether'})
        self.factory.add_factor_value(intensity, set())
        self.factory.add_factor_value(duration, {'short', 'long'})

        full_factorial = self.factory.compute_full_factorial_design()
        self.assertEqual(full_factorial, set())
Пример #5
0
    def test_compute_full_factorial_design_empty_agents(self):

        agent = StudyFactor(name=BASE_FACTORS[0]['name'],
                            factor_type=BASE_FACTORS[0]['type'])
        intensity = StudyFactor(name=BASE_FACTORS[1]['name'],
                                factor_type=BASE_FACTORS[1]['type'])
        duration = StudyFactor(name=BASE_FACTORS[2]['name'],
                               factor_type=BASE_FACTORS[2]['type'])

        self.factory.add_factor_value(agent, set())
        self.factory.add_factor_value(intensity, {'low', 'medium', 'high'})
        self.factory.add_factor_value(duration, {'short', 'long'})

        full_factorial = self.factory.compute_full_factorial_design()
        self.assertEqual(full_factorial, set())
Пример #6
0
 def setUp(self):
     self.maxDiff = None
     self.sequence = TreatmentSequence()
     self.agent = StudyFactor(name=BASE_FACTORS[0]['name'],
                              factor_type=BASE_FACTORS[0]['type'])
     self.intensity = StudyFactor(name=BASE_FACTORS[1]['name'],
                                  factor_type=BASE_FACTORS[1]['type'])
     self.duration = StudyFactor(name=BASE_FACTORS[2]['name'],
                                 factor_type=BASE_FACTORS[2]['type'])
     self.test_treatment = Treatment(
         treatment_type=INTERVENTIONS['CHEMICAL'],
         factor_values=(FactorValue(factor_name=self.agent, value='crack'),
                        FactorValue(factor_name=self.intensity,
                                    value='low'),
                        FactorValue(factor_name=self.duration,
                                    value='short')))
Пример #7
0
 def parse_experimental_factors(self, factors, factortypes, tsrs, tans):
     for factor, factortype, tsr, tan in zip_longest(
             factors, factortypes, tsrs, tans, fillvalue=''):
         if factor != '':  # only add if there's a factor name
             factortype_oa = OntologyAnnotation(
                 term=factortype, term_source=self._ts_dict.get(tsr),
                 term_accession=tan)
             study_factor = StudyFactor(
                 name=factor, factor_type=factortype_oa)
             self.ISA.studies[-1].factors.append(study_factor)
Пример #8
0
def unserialize_study_factor(json_obj):
    name = ''
    if 'factorName' in json_obj and json_obj['factorName'] is not None:
        name = json_obj['factorName']
    factor_type = OntologyAnnotation()
    if 'factorType' in json_obj and json_obj['factorType'] is not None:
        factor_type = unserialize_ontology_annotation(json_obj['factorType'])
    comments = list()
    if 'comments' in json_obj and json_obj['comments'] is not None:
        for comment in json_obj['comments']:
            comments.append(unserialize_comment(comment))

    return StudyFactor(name=name, factor_type=factor_type, comments=comments)
Пример #9
0
def unserialize_factor_value(json_obj):
    factor_name = StudyFactor()
    if 'factorName' in json_obj and json_obj['factorName'] is not None:
        factor_name = unserialize_study_factor(json_obj['factorName'])
    value = OntologyAnnotation()
    if 'value' in json_obj and json_obj['value'] is not None:
        value = unserialize_ontology_annotation(json_obj['value'])
    unit = OntologyAnnotation()
    if 'unit' in json_obj and json_obj['unit'] is not None:
        unit = unserialize_ontology_annotation(json_obj['value'])
    comments = list()
    if 'comments' in json_obj and json_obj['comments'] is not None:
        for comment in json_obj['comments']:
            comments.append(unserialize_comment(comment))

    return FactorValue(factor_name=factor_name,
                       value=value,
                       unit=unit,
                       comments=comments)
Пример #10
0
def create_descriptor():
    """
    Returns a simple but complete ISA-JSON 1.0 descriptor for illustration.
    """

    # Create an empty Investigation object and set some values to the
    # instance variables.

    investigation = Investigation()
    investigation.identifier = "1"
    investigation.title = "My Simple ISA Investigation"
    investigation.description = \
        "We could alternatively use the class constructor's parameters to " \
        "set some default values at the time of creation, however we " \
        "want to demonstrate how to use the object's instance variables " \
        "to set values."
    investigation.submission_date = "2016-11-03"
    investigation.public_release_date = "2016-11-03"

    # Create an empty Study object and set some values. The Study must have a
    # filename, otherwise when we serialize it to ISA-Tab we would not know
    # where to write it. We must also attach the study to the investigation
    # by adding it to the 'investigation' object's list of studies.

    study = Study(filename="s_study.txt")
    study.identifier = "1"
    study.title = "My ISA Study"
    study.description = \
        "Like with the Investigation, we could use the class constructor " \
        "to set some default values, but have chosen to demonstrate in this " \
        "example the use of instance variables to set initial values."
    study.submission_date = "2016-11-03"
    study.public_release_date = "2016-11-03"
    investigation.studies.append(study)

    # This is to show that ISA Comments can be used to annotate ISA objects, here ISA Study
    study.comments.append(Comment(name="Study Start Date", value="Sun"))

    # Some instance variables are typed with different objects and lists of
    # objects. For example, a Study can have a list of design descriptors.
    # A design descriptor is an Ontology Annotation describing the kind of
    # study at hand. Ontology Annotations should typically reference an
    # Ontology Source. We demonstrate a mix of using the class constructors
    # and setting values with instance variables. Note that the
    # OntologyAnnotation object 'intervention_design' links its 'term_source'
    # directly to the 'obi' object instance. To ensure the OntologySource
    # is encapsulated in the descriptor, it is added to a list of
    # 'ontology_source_references' in the Investigation object. The
    # 'intervention_design' object is then added to the list of
    # 'design_descriptors' held by the Study object.

    obi = OntologySource(name='OBI',
                         description="Ontology for Biomedical Investigations")
    investigation.ontology_source_references.append(obi)

    intervention_design = OntologyAnnotation(term_source=obi)
    intervention_design.term = "intervention design"
    intervention_design.term_accession = \
        "http://purl.obolibrary.org/obo/OBI_0000115"
    study.design_descriptors.append(intervention_design)

    # Other instance variables common to both Investigation and Study objects
    # include 'contacts' and 'publications', each with lists of corresponding
    # Person and Publication objects.

    contact = Person(first_name="Alice",
                     last_name="Robertson",
                     affiliation="University of Life",
                     roles=[OntologyAnnotation(term='submitter')])
    study.contacts.append(contact)
    publication = Publication(title="Experiments with Elephants",
                              author_list="A. Robertson, B. Robertson")
    publication.pubmed_id = "12345678"
    publication.status = OntologyAnnotation(term="published")
    study.publications.append(publication)

    # To create the study graph that corresponds to the contents of the study
    # table file (the s_*.txt file), we need to create a process sequence.
    # To do this we use the Process class and attach it to the Study object's
    # 'process_sequence' list instance variable. Each process must be linked
    # with a Protocol object that is attached to a Study object's 'protocols'
    # list instance variable. The sample collection Process object usually has
    # as input a Source material and as output a Sample material.

    # Here we create one Source material object and attach it to our study.

    source = Source(name='source_material')
    study.sources.append(source)

    # Then we create three Sample objects, with organism as H**o Sapiens, and
    # attach them to the study. We use the utility function
    # batch_create_material() to clone a prototype material object. The
    # function automatiaclly appends an index to the material name. In this
    # case, three samples will be created, with the names 'sample_material-0',
    # 'sample_material-1' and 'sample_material-2'.

    prototype_sample = Sample(name='sample_material', derives_from=[source])

    ncbitaxon = OntologySource(name='NCBITaxon', description="NCBI Taxonomy")
    investigation.ontology_source_references.append(ncbitaxon)

    characteristic_organism = Characteristic(
        category=OntologyAnnotation(term="Organism"),
        value=OntologyAnnotation(
            term="H**o Sapiens",
            term_source=ncbitaxon,
            term_accession="http://purl.bioontology.org/ontology/NCBITAXON/"
            "9606"))

    # Adding the description to the ISA Source Material:
    source.characteristics.append(characteristic_organism)
    study.sources.append(source)

    #declaring a new ontology and adding it to the list of resources used
    uberon = OntologySource(name='UBERON', description='Uber Anatomy Ontology')
    investigation.ontology_source_references.append(uberon)

    #preparing an ISA Characteristic object (~Material Property ) to annotate sample materials
    characteristic_organ = Characteristic(
        category=OntologyAnnotation(term="OrganismPart"),
        value=OntologyAnnotation(
            term="liver",
            term_source=uberon,
            term_accession="http://purl.bioontology.org/ontology/UBERON/"
            "123245"))

    prototype_sample.characteristics.append(characteristic_organ)

    study.samples = batch_create_materials(prototype_sample, n=3)
    # creates a batch of 3 samples

    # Now we create a single Protocol object that represents our sample
    # collection protocol, and attach it to the study object. Protocols must be
    # declared before we describe Processes, as a processing event of some sort
    # must execute some defined protocol. In the case of the class model,
    # Protocols should therefore be declared before Processes in order for the
    # Process to be linked to one.

    sample_collection_protocol = Protocol(
        name="sample collection",
        protocol_type=OntologyAnnotation(term="sample collection"))
    study.protocols.append(sample_collection_protocol)
    sample_collection_process = Process(
        executes_protocol=sample_collection_protocol)

    # adding a dummy Comment[] to ISA.protocol object
    study.protocols[0].comments.append(
        Comment(name="Study Start Date", value="Uranus"))
    study.protocols[0].comments.append(
        Comment(name="Study End Date", value="2017-08-11"))
    # checking that the ISA Protocool object has been modified
    # print(study.protocols[0])

    # Creation of an ISA Study Factor object
    f = StudyFactor(
        name="treatment['modality']",
        factor_type=OntologyAnnotation(term="treatment['modality']"))
    # testing serialization to ISA-TAB of Comments attached to ISA objects.
    f.comments.append(Comment(name="Study Start Date", value="Saturn"))
    f.comments.append(Comment(name="Study End Date", value="2039-12-12"))
    print(f.comments[0].name, "|", f.comments[0].value)

    # checking that the ISA Factor object has been modified
    study.factors.append(f)

    # Next, we link our materials to the Process. In this particular case, we
    # are describing a sample collection process that takes one source
    # material, and produces three different samples.
    #
    # (source_material)->(sample collection)->
    # [(sample_material-0), (sample_material-1), (sample_material-2)]

    for src in study.sources:
        sample_collection_process.inputs.append(src)
    for sam in study.samples:
        sample_collection_process.outputs.append(sam)

    # Finally, attach the finished Process object to the study
    # process_sequence. This can be done many times to describe multiple
    # sample collection events.

    study.process_sequence.append(sample_collection_process)

    #IMPORTANT: remember to populate the list of ontology categories used to annotation ISA Material in a Study:
    study.characteristic_categories.append(characteristic_organism.category)

    # Next, we build n Assay object and attach two protocols,
    # extraction and sequencing.

    assay = Assay(filename="a_assay.txt")
    extraction_protocol = Protocol(
        name='extraction',
        protocol_type=OntologyAnnotation(term="material extraction"))
    study.protocols.append(extraction_protocol)
    sequencing_protocol = Protocol(
        name='sequencing',
        protocol_type=OntologyAnnotation(term="material sequencing"))
    study.protocols.append(sequencing_protocol)

    # To build out assay graphs, we enumereate the samples from the
    # study-level, and for each sample we create an extraction process and
    # a sequencing process. The extraction process takes as input a sample
    # material, and produces an extract material. The sequencing process
    # takes the extract material and produces a data file. This will
    # produce three graphs, from sample material through to data, as follows:
    #
    # (sample_material-0)->(extraction)->(extract-0)->(sequencing)->
    # (sequenced-data-0)
    # (sample_material-1)->(extraction)->(extract-1)->(sequencing)->
    # (sequenced-data-1)
    # (sample_material-2)->(extraction)->(extract-2)->(sequencing)->
    # (sequenced-data-2)
    #
    # Note that the extraction processes and sequencing processes are
    # distinctly separate instances, where the three
    # graphs are NOT interconnected.

    for i, sample in enumerate(study.samples):

        # create an extraction process that executes the extraction protocol

        extraction_process = Process(executes_protocol=extraction_protocol)

        # extraction process takes as input a sample, and produces an extract
        # material as output

        extraction_process.inputs.append(sample)
        material = Material(name="extract-{}".format(i))
        material.type = "Extract Name"
        extraction_process.outputs.append(material)

        # create a sequencing process that executes the sequencing protocol

        sequencing_process = Process(executes_protocol=sequencing_protocol)
        sequencing_process.name = "assay-name-{}".format(i)
        sequencing_process.inputs.append(extraction_process.outputs[0])

        # Sequencing process usually has an output data file

        datafile = DataFile(filename="sequenced-data-{}".format(i),
                            label="Raw Data File",
                            generated_from=[sample])
        sequencing_process.outputs.append(datafile)

        # ensure Processes are linked
        plink(sequencing_process, extraction_process)

        # make sure the extract, data file, and the processes are attached to
        # the assay

        assay.samples.append(sample)
        assay.data_files.append(datafile)
        assay.other_material.append(material)
        assay.process_sequence.append(extraction_process)
        assay.process_sequence.append(sequencing_process)
        assay.measurement_type = OntologyAnnotation(term="gene sequencing")
        assay.technology_type = OntologyAnnotation(
            term="nucleotide sequencing")

    # attach the assay to the study
    study.assays.append(assay)

    import json
    from isatools.isajson import ISAJSONEncoder

    # To write JSON out, use the ISAJSONEncoder class with the json package
    # and use dump() or dumps(). Note that the extra parameters sort_keys,
    # indent and separators are to make the output more human-readable.

    return json.dumps(investigation,
                      cls=ISAJSONEncoder,
                      sort_keys=True,
                      indent=4,
                      separators=(',', ': '))
Пример #11
0
        # c = Characteristic(category=OntologyAnnotation(term="germplasmDbId"),
        #                    value=OntologyAnnotation(term=str(ou['germplasmDbId']), term_source="",
        #                    term_accession=""))
        # characteristics.append(c)

    source = Source(name=ou['observationUnitDbId'],
                    characteristics=characteristics)
    # print(source)
    study.sources.append(source)
    sample = Sample(name=ou["observationUnitDbId"])

    if 'treatments' in ou.keys():
        for element in ou['treatments']:
            for key in element.keys():
                f = StudyFactor(name=key,
                                factor_type=OntologyAnnotation(term=key))
                if f not in study.factors:
                    study.factors.append(f)

                fv = FactorValue(factor_name=f,
                                 value=OntologyAnnotation(term=str(
                                     element[key]),
                                                          term_source="",
                                                          term_accession=""))
                sample.factor_values.append(fv)
    print(sample)

    if 'observations' in ou.keys():
        for ob in ou['observations']:
            phenotyping_process = Process(
                executes_protocol=phenotyping_protocol)
Пример #12
0
def convert(json_path, output_path):
    print(json_path)
    print(output_path)

    with open(json_path, 'r') as f:
        dcc_json = json.load(f)

    # print(array['protocol'])
    # for element in array['protocol']:
    #     array['protocol'][element]['id']
    #     array['protocol'][element]['description']
    #     array['protocol'][element]['type']
    #     array['protocol'][element]['filename']

    # for element in array['measurement']:
    #     print(array['measurement'][element]['corrected_mz'])

    # for element in array['subject']:
    #     print(array['subject'][element]['species'])

    # Building the Investigation Object and its elements:

    project_set_json = dcc_json.get('project')

    if len(project_set_json) == 0:
        raise IOError('No project found in input JSON')

    # print(next(iter(project_set_json)))
    project_json = next(iter(project_set_json.values()))
    investigation = Investigation(identifier=project_json['id'])

    obi = OntologySource(name='OBI',
                         description='Ontology for Biomedical Investigations')
    investigation.ontology_source_references.append(obi)

    inv_person = Person(
        first_name=project_json['PI_first_name'],
        last_name=project_json['PI_last_name'],
        email=project_json['PI_email'],
        address=project_json['address'],
        affiliation=(', '.join(
            [project_json['department'], project_json['institution']])),
        roles=[
            OntologyAnnotation(term="",
                               term_source=obi,
                               term_accession="http://purl.org/obo/OBI_1")
        ])
    investigation.contacts.append(inv_person)

    study_set_json = dcc_json.get('study')

    if len(study_set_json) > 0:
        study_json = next(iter(study_set_json.values()))

        study = Study(
            identifier=study_json['id'],
            title=study_json['title'],
            description=study_json['description'],
            design_descriptors=[
                OntologyAnnotation(term=study_json['type'],
                                   term_source=obi,
                                   term_accession="http://purl.org/obo/OBI_1")
            ],
            filename='s_{study_id}.txt'.format(study_id=study_json['id']))

        investigation.studies = [study]

        studyid = study_json['id']
        print(studyid)
        study_person = Person(
            first_name=study_json['PI_first_name'],
            last_name=study_json['PI_last_name'],
            email=study_json['PI_email'],
            address=study_json['address'],
            affiliation=(', '.join(
                [study_json['department'], study_json['institution']])),
            roles=[
                OntologyAnnotation(term='principal investigator',
                                   term_source=obi,
                                   term_accession="http://purl.org/obo/OBI_1")
            ])

        study.contacts.append(study_person)

        for factor_json in dcc_json['factor'].values():
            factor = StudyFactor(name=factor_json['id'])
            study.factors.append(factor)

        for i, protocol_json in enumerate(dcc_json['protocol'].values()):
            oat_p = protocol_json['type']
            oa_protocol_type = OntologyAnnotation(
                term=oat_p,
                term_source=obi,
                term_accession="http://purl.org/obo/OBI_1")
            study.protocols.append(
                Protocol(name=protocol_json['id'],
                         protocol_type=oa_protocol_type,
                         description=protocol_json['description'],
                         uri=protocol_json['filename']))

            if 'MS' in protocol_json['type']:
                study.assays.append(
                    Assay(measurement_type=OntologyAnnotation(
                        term='mass isotopologue distribution analysis',
                        term_source=obi,
                        term_accession="http://purl.org/obo/OBI_112"),
                          technology_type=OntologyAnnotation(
                              term='mass spectrometry',
                              term_source=obi,
                              term_accession="http://purl.org/obo/OBI_1"),
                          filename='a_assay_ms_{count}.txt'.format(count=i)))

            if 'NMR' in protocol_json['type']:
                study.assays.append(
                    Assay(measurement_type=OntologyAnnotation(
                        term='isotopomer analysis',
                        term_source=obi,
                        term_accession="http://purl.org/obo/OBI_111"),
                          technology_type=OntologyAnnotation(
                              term='nmr spectroscopy',
                              term_source=obi,
                              term_accession="http://purl.org/obo/OBI_1"),
                          filename='a_assay_nmr.txt'))

        for subject_json in dcc_json['subject'].values():

            # print(array['subject'][element])
            if "organism" in subject_json['type']:

                source = Source(name=subject_json['id'])

                ncbitaxon = OntologySource(name='NCBITaxon',
                                           description="NCBI Taxonomy")
                characteristic_organism = Characteristic(
                    category=OntologyAnnotation(term="Organism"),
                    value=OntologyAnnotation(
                        term=subject_json['species'],
                        term_source=ncbitaxon,
                        term_accession=
                        'http://purl.bioontology.org/ontology/NCBITAXON/9606'))
                source.characteristics.append(characteristic_organism)
                study.sources.append(source)

            elif 'tissue_slice' in subject_json['type']:
                # print(array['subject'][element]['type'])
                source = Source(name=subject_json['id'])
                study.sources.append(source)
                ncbitaxon = OntologySource(name='NCBITaxon',
                                           description="NCBI Taxonomy")
                characteristic_organism = Characteristic(
                    category=OntologyAnnotation(term="Organism"),
                    value=OntologyAnnotation(
                        term=subject_json['species'],
                        term_source=ncbitaxon,
                        term_accession=
                        'http://purl.bioontology.org/ontology/NCBITAXON/9606'))
                source.characteristics.append(characteristic_organism)

                sample = Sample(name=subject_json['id'],
                                derives_from=subject_json['parentID'])
                characteristic_organismpart = Characteristic(
                    category=OntologyAnnotation(term='organism_part'),
                    value=OntologyAnnotation(
                        term=subject_json['tissue_type'],
                        term_source=obi,
                        term_accession="http://purl.org/obo/OBI_1"))

                sample.characteristics.append(characteristic_organismpart)
                study.samples.append(sample)
                # print(study.samples[0].name)

                sample_collection_process = Process(
                    executes_protocol=study.get_prot(
                        subject_json['protocol.id']))
                sample_collection_process.inputs.append(source)
                sample_collection_process.outputs.append(sample)
                study.process_sequence.append(sample_collection_process)

            else:
                source = Source(name=subject_json['id'])

                ncbitaxon = OntologySource(name='NCBITaxon',
                                           description="NCBI Taxonomy")
                characteristic_organism = Characteristic(
                    category=OntologyAnnotation(term="Organism"),
                    value=OntologyAnnotation(
                        term=subject_json['species'],
                        term_source=ncbitaxon,
                        term_accession=
                        'http://purl.bioontology.org/ontology/NCBITAXON/9606'))
                source.characteristics.append(characteristic_organism)
                study.sources.append(source)
                print(subject_json['id'])
                print(subject_json['species'])
                print(subject_json['type'])
        # for src in investigation.studies[0].materials:
        #
        # for sam in investigation.studies[0].materials:

        for sample_json in dcc_json['sample'].values():

            if 'cells' in sample_json['type']:
                material_separation_process = Process(
                    executes_protocol=study.get_prot(
                        sample_json['protocol.id']))
                material_separation_process.name = sample_json['id']
                # dealing with input material, check that the parent material is already among known samples or sources

                if len([
                        x for x in study.samples
                        if x.name == sample_json['parentID']
                ]) == 0:
                    material_in = Sample(name=sample_json['parentID'])
                    material_separation_process.inputs.append(material_in)
                    study.assays[0].samples.append(material_in)
                else:
                    print([
                        x for x in study.samples
                        if x.name == sample_json['parentID']
                    ])
                    material_separation_process.inputs.append([
                        x for x in study.samples
                        if x.name == sample_json['parentID']
                    ][0])

                material_out = Sample(name=sample_json['id'])
                material_type = Characteristic(
                    category=OntologyAnnotation(term='material_type'),
                    value=OntologyAnnotation(
                        term=sample_json['type'],
                        term_source=obi,
                        term_accession="http://purl.org/obo/OBI_xxxxxxx"))
                material_out.characteristics.append(material_type)
                material_separation_process.outputs.append(material_out)
                study.assays[0].samples.append(material_out)
                try:
                    sample_collection_process
                except NameError:
                    sample_collection_process = None
                if sample_collection_process is None:
                    sample_collection_process = Process(executes_protocol="")
                else:
                    # plink(protein_extraction_process, data_acq_process)
                    # plink(material_separation_process, protein_extraction_process)

                    plink(sample_collection_process,
                          protein_extraction_process)

            if 'protein_extract' in sample_json['type']:
                protein_extraction_process = Process(
                    executes_protocol=study.get_prot(
                        sample_json['protocol.id']))
                protein_extraction_process.name = sample_json['id']

                if len([
                        x for x in study.samples
                        if x.name == sample_json['parentID']
                ]) == 0:
                    material_in = Sample(name=sample_json['parentID'])
                    protein_extraction_process.inputs.append(material_in)
                    study.assays[0].samples.append(material_in)
                else:
                    # print([x for x in study.samples if x.name == sample_json['parentID']])
                    protein_extraction_process.inputs.append(material_in)

                # for material_in in study.samples:
                #     # print("OHO:", material_in.name)
                #     if material_in.name == sample_json['parentID']:
                #         # print("C:",sample_json['parentID'])
                #         #no need to create, just link to process
                #         protein_extraction_process.inputs.append(x)
                #     else:
                #         # print("D:", sample_json['parentID'])
                #         #create new material and link
                #         material_in = Sample(name=sample_json['parentID'])
                #         protein_extraction_process.inputs.append(material_in)

                material_out = Material(name=sample_json['id'])
                material_out.type = "Extract Name"
                material_type = Characteristic(
                    category=OntologyAnnotation(term='material_type'),
                    value=OntologyAnnotation(
                        term=sample_json['type'],
                        term_source=obi,
                        term_accession="http://purl.org/obo/OBI_1"))
                material_out.characteristics.append(material_type)

                study.assays[0].samples.append(material_in)
                study.assays[0].materials['other_material'].append(material_in)
                try:
                    material_separation_process
                except NameError:
                    material_separation_process = None
                if material_separation_process is None:
                    material_separation_process = Process(executes_protocol="")
                else:
                    # plink(protein_extraction_process, data_acq_process)
                    plink(material_separation_process,
                          protein_extraction_process)

            if 'polar' in sample_json['type']:

                material_in = Material(name=sample_json['parentID'])
                material_type = Characteristic(
                    category=OntologyAnnotation(term='material_type',
                                                term_source=obi),
                    value=OntologyAnnotation(term=sample_json['type'],
                                             term_source=obi))
                material_in.characteristics.append(material_type)
                study.assays[0].materials['other_material'].append(material_in)

                data_acq_process = Process(executes_protocol=study.get_prot(
                    sample_json['protocol.id']))
                data_acq_process.name = sample_json['id']
                datafile = DataFile(
                    filename='{filename}.txt'.format(filename='_'.join(
                        ['mass_isotopomer-data', studyid, sample_json['id']])),
                    label='Raw Data File')
                data_acq_process.outputs.append(datafile)
                # print(study.assays[0].technology_type.term)

                study.assays[0].data_files.append(datafile)
                try:
                    protein_extraction_process
                except NameError:
                    protein_extraction_process = None
                if protein_extraction_process is None:
                    protein_extraction_process = Process(executes_protocol="")
                else:
                    plink(protein_extraction_process, data_acq_process)

            # else:
            #     material_in = Material(name=sample_json['parentID'])
            #     material_out = Material(name=sample_json['id'])
            #     material_type = Characteristic(
            #         category=OntologyAnnotation(term="material_type"),
            #         value=OntologyAnnotation(term=sample_json['type'],
            #                                  term_source=obi,
            #                                  term_accession="http://purl.org/obo/OBI_1"))
            #     material_out.characteristics.append(material_type)
            #     process = Process(executes_protocol=sample_json['protocol.id'])
            #     process.name = sample_json['id']
            #     process.inputs.append(material_in)
            #     process.outputs.append(material_out)
            #
            #     study.assays[0].materials['other_material'].append(material_in)
            #     study.assays[0].materials['other_material'].append(material_out)

            if 'bulk_tissue' in sample_json['type']:
                bulk_process = Process(executes_protocol=study.get_prot(
                    sample_json['protocol.id']))
                bulk_process.name = sample_json['id']

                if len([
                        x for x in study.samples
                        if x.name == sample_json['parentID']
                ]) == 0:
                    material_in = Sample(name=sample_json['parentID'])
                    bulk_process.inputs.append(material_in)
                    study.assays[0].samples.append(material_in)
                else:
                    # print([x for x in study.samples if x.name == sample_json['parentID']])
                    bulk_process.inputs.append(material_in)

                    plink(sample_collection_process, bulk_process)

    data_rec_header = '\t'.join(
        ('metabolite name', 'assignment', 'signal intensity', 'retention time',
         'm/z', 'formula', 'adduct', 'isotopologue', 'sample identifier'))
    records = []
    for element in dcc_json['measurement']:
        # metabolite_name: -> compound
        # array['measurement'][element]['signal_intensity']
        record = '\t'.join((dcc_json['measurement'][element]['compound'],
                            dcc_json['measurement'][element]['assignment'],
                            dcc_json['measurement'][element]['raw_intensity'],
                            dcc_json['measurement'][element]['retention_time'],
                            dcc_json['measurement'][element]['corrected_mz'],
                            dcc_json['measurement'][element]['formula'],
                            dcc_json['measurement'][element]['adduct'],
                            dcc_json['measurement'][element]['isotopologue'],
                            dcc_json['measurement'][element]['sample.id']))
        # print(record)
        records.append(record)

    if not os.path.exists(output_path):
        os.makedirs(output_path)
        try:
            with open(
                    '{output_path}/{study_id}-maf-data-nih-dcc-json.txt'.
                    format(output_path=output_path,
                           study_id=studyid), 'w') as fh:
                print(
                    "'writing 'maf file document' to file from 'generate_maf_file' method:..."
                )
                fh.writelines(data_rec_header)
                fh.writelines('\n')
                for item in records:
                    fh.writelines(item)
                    fh.writelines('\n')

            print("writing 'investigation information' to file...")
            print(isatab.dumps(investigation))

            isatab.dump(investigation, output_path=output_path)
        except IOError:
            print("Error: in main() method can't open file or write data")
Пример #13
0
def create_study_sample_and_assay(client, brapi_study_id, isa_study,  sample_collection_protocol, phenotyping_protocol):

    obsunit_to_isasample_mapping_dictionary = {
        "X": "X",
        "Y": "Y",
        "blockNumber": "Block Number",
        "plotNumber": "Plot Number",
        "plantNumber": "Plant Number",
        "observationLevel": "Observation unit type"
    }

    obsunit_to_isaassay_mapping_dictionary = {
        "X": "X",
        "Y": "Y",
        "blockNumber": "Block Number",
        "plotNumber": "Plot Number",
        "plantNumber": "Plant Number",
        "observationLevel": "Observation unit type"
    }

    allready_converted_obs_unit = [] # Allow to handle multiyear observation units
    for obs_unit in client.get_study_observation_units(brapi_study_id):
        # Getting the relevant germplasm used for that observation event:
        # ---------------------------------------------------------------
        this_source = isa_study.get_source(obs_unit['germplasmName'])
        #logger.debug("testing for the source reference: ", str(this_source))

        # TODO Assumed one assay by study. Will need to move to one assay by level/datalink
        this_assay = isa_study.assays[0]

        # Sample == Observation Unit
        if this_source is not None and obs_unit['observationUnitName'] not in allready_converted_obs_unit:
            #The observationUnitName is the buisness ID of the Observation unit (ie plot number) while the ID is the PK
            this_isa_sample = Sample(
                name= obs_unit['observationUnitName'],
                #name=obs_unit['observationUnitDbId'] + "_" + obs_unit['observationUnitName'],
                derives_from=[this_source])
            allready_converted_obs_unit.append(obs_unit['observationUnitName'])

            for key in obs_unit.keys():
                if key in obsunit_to_isasample_mapping_dictionary.keys():
                    if isinstance(obsunit_to_isasample_mapping_dictionary[key], str) and str(obs_unit[key]) is not None :
                        c = Characteristic(category=OntologyAnnotation(term=obsunit_to_isasample_mapping_dictionary[key]),
                                           value=OntologyAnnotation(term=str(obs_unit[key]),
                                                                    term_source="",
                                                                    term_accession=""))
                        this_isa_sample.characteristics.append(c)
               # else:
                    #no defult behaviour, it is maped or ignored
                    # c = Characteristic(category=OntologyAnnotation(term=key),
                    #                    value=OntologyAnnotation(term=str(obs_unit[key]),
                    #                                             term_source="",
                    #                                             term_accession=""))
                    # this_isa_sample.characteristics.append(c)
                if key in obsunit_to_isaassay_mapping_dictionary.keys():
                    if isinstance(obsunit_to_isaassay_mapping_dictionary[key], str):
                        c = Characteristic(category=OntologyAnnotation(term=obsunit_to_isaassay_mapping_dictionary[key]),
                                           value=OntologyAnnotation(term=str(obs_unit[key]),
                                                                    term_source="",
                                                                    term_accession=""))
                        #TODO: quick workaround used to store observation units characteristics
                        this_assay.comments.append(c)


            # if 'observationLevel' in obs_unit.keys():
            #     # TODO: if absent, a warning should be logged as this is a MIAPPE requirement


            # Looking for treatment in BRAPI and mapping to ISA Study Factor Value
            # --------------------------------------------------------------------
            if 'treatments' in obs_unit.keys():
                for element in obs_unit['treatments']:
                    for key in element.keys():
                        f = StudyFactor(name=key, factor_type=OntologyAnnotation(term=key))
                        if f not in isa_study.factors:
                            isa_study.factors.append(f)

                        fv = FactorValue(factor_name=f,
                                         value=OntologyAnnotation(term=str(element[key]),
                                                                  term_source="",
                                                                  term_accession=""))
                        this_isa_sample.factor_values.append(fv)
            isa_study.samples.append(this_isa_sample)
            # print("counting observations: ", i, "before: ", this_source.name)

            # TODO: Add Comment[Factor Values] : iterate through BRAPI treatments to obtain all possible values for a given Factor

            # Creating the corresponding ISA sample entity for structure the document:
            # ------------------------------------------------------------------------
            sample_collection_process = Process(executes_protocol=sample_collection_protocol)
            sample_collection_process.performer = "n.a."
            sample_collection_process.date = datetime.datetime.today().isoformat()
            sample_collection_process.inputs.append(this_source)
            sample_collection_process.outputs.append(this_isa_sample)
            isa_study.process_sequence.append(sample_collection_process)

        #logger.debug(str(this_assay))
        # Creating the relevant ISA protocol application / Assay from BRAPI Observation Events:
        # -------------------------------------------------------------------------------------

    create_data_file(obs_unit, this_assay, sample_collection_process, this_isa_sample, phenotyping_protocol)
Пример #14
0
 def test_add_factor_value_number(self):
     factor = StudyFactor(name=BASE_FACTORS[1]['name'],
                          factor_type=BASE_FACTORS[1]['type'])
     self.factory.add_factor_value(factor, 1.05)
     self.assertEqual(self.factory.factors.get(factor), {1.05})
Пример #15
0
 def test_add_factor_value_set(self):
     values_to_add = {'agent_orange', 'crack, cocaine'}
     factor = StudyFactor(name=BASE_FACTORS[0]['name'],
                          factor_type=BASE_FACTORS[0]['type'])
     self.factory.add_factor_value(factor, values_to_add)
     self.assertEqual(self.factory.factors.get(factor), values_to_add)
Пример #16
0
 def test_add_factor_value_str(self):
     factor = StudyFactor(name=BASE_FACTORS[0]['name'],
                          factor_type=BASE_FACTORS[0]['type'])
     self.factory.add_factor_value(factor, 'agent_orange')
     self.assertEqual(self.factory.factors.get(factor), {'agent_orange'})
Пример #17
0
    def test_compute_full_factorial_design(self):

        agent = StudyFactor(name=BASE_FACTORS[0]['name'],
                            factor_type=BASE_FACTORS[0]['type'])
        intensity = StudyFactor(name=BASE_FACTORS[1]['name'],
                                factor_type=BASE_FACTORS[1]['type'])
        duration = StudyFactor(name=BASE_FACTORS[2]['name'],
                               factor_type=BASE_FACTORS[2]['type'])

        self.factory.add_factor_value(agent, {'cocaine', 'crack', 'aether'})
        self.factory.add_factor_value(intensity, {'low', 'medium', 'high'})
        self.factory.add_factor_value(duration, {'short', 'long'})

        full_factorial = self.factory.compute_full_factorial_design()
        self.assertEqual(
            full_factorial, {
                Treatment(treatment_type=INTERVENTIONS['CHEMICAL'],
                          factor_values=(FactorValue(factor_name=agent,
                                                     value='cocaine'),
                                         FactorValue(factor_name=intensity,
                                                     value='high'),
                                         FactorValue(factor_name=duration,
                                                     value='long'))),
                Treatment(treatment_type=INTERVENTIONS['CHEMICAL'],
                          factor_values=(FactorValue(factor_name=agent,
                                                     value='cocaine'),
                                         FactorValue(factor_name=intensity,
                                                     value='high'),
                                         FactorValue(factor_name=duration,
                                                     value='short'))),
                Treatment(treatment_type=INTERVENTIONS['CHEMICAL'],
                          factor_values=(FactorValue(factor_name=agent,
                                                     value='cocaine'),
                                         FactorValue(factor_name=intensity,
                                                     value='low'),
                                         FactorValue(factor_name=duration,
                                                     value='long'))),
                Treatment(treatment_type=INTERVENTIONS['CHEMICAL'],
                          factor_values=(FactorValue(factor_name=agent,
                                                     value='cocaine'),
                                         FactorValue(factor_name=intensity,
                                                     value='low'),
                                         FactorValue(factor_name=duration,
                                                     value='short'))),
                Treatment(treatment_type=INTERVENTIONS['CHEMICAL'],
                          factor_values=(FactorValue(factor_name=agent,
                                                     value='cocaine'),
                                         FactorValue(factor_name=intensity,
                                                     value='medium'),
                                         FactorValue(factor_name=duration,
                                                     value='long'))),
                Treatment(treatment_type=INTERVENTIONS['CHEMICAL'],
                          factor_values=(FactorValue(factor_name=agent,
                                                     value='cocaine'),
                                         FactorValue(factor_name=intensity,
                                                     value='medium'),
                                         FactorValue(factor_name=duration,
                                                     value='short'))),
                Treatment(treatment_type=INTERVENTIONS['CHEMICAL'],
                          factor_values=(FactorValue(factor_name=agent,
                                                     value='crack'),
                                         FactorValue(factor_name=intensity,
                                                     value='high'),
                                         FactorValue(factor_name=duration,
                                                     value='long'))),
                Treatment(treatment_type=INTERVENTIONS['CHEMICAL'],
                          factor_values=(FactorValue(factor_name=agent,
                                                     value='crack'),
                                         FactorValue(factor_name=intensity,
                                                     value='high'),
                                         FactorValue(factor_name=duration,
                                                     value='short'))),
                Treatment(treatment_type=INTERVENTIONS['CHEMICAL'],
                          factor_values=(FactorValue(factor_name=agent,
                                                     value='crack'),
                                         FactorValue(factor_name=intensity,
                                                     value='low'),
                                         FactorValue(factor_name=duration,
                                                     value='long'))),
                Treatment(treatment_type=INTERVENTIONS['CHEMICAL'],
                          factor_values=(FactorValue(factor_name=agent,
                                                     value='crack'),
                                         FactorValue(factor_name=intensity,
                                                     value='low'),
                                         FactorValue(factor_name=duration,
                                                     value='short'))),
                Treatment(treatment_type=INTERVENTIONS['CHEMICAL'],
                          factor_values=(FactorValue(factor_name=agent,
                                                     value='crack'),
                                         FactorValue(factor_name=intensity,
                                                     value='medium'),
                                         FactorValue(factor_name=duration,
                                                     value='long'))),
                Treatment(treatment_type=INTERVENTIONS['CHEMICAL'],
                          factor_values=(FactorValue(factor_name=agent,
                                                     value='crack'),
                                         FactorValue(factor_name=intensity,
                                                     value='medium'),
                                         FactorValue(factor_name=duration,
                                                     value='short'))),
                Treatment(treatment_type=INTERVENTIONS['CHEMICAL'],
                          factor_values=(FactorValue(factor_name=agent,
                                                     value='aether'),
                                         FactorValue(factor_name=intensity,
                                                     value='high'),
                                         FactorValue(factor_name=duration,
                                                     value='long'))),
                Treatment(treatment_type=INTERVENTIONS['CHEMICAL'],
                          factor_values=(FactorValue(factor_name=agent,
                                                     value='aether'),
                                         FactorValue(factor_name=intensity,
                                                     value='high'),
                                         FactorValue(factor_name=duration,
                                                     value='short'))),
                Treatment(treatment_type=INTERVENTIONS['CHEMICAL'],
                          factor_values=(FactorValue(factor_name=agent,
                                                     value='aether'),
                                         FactorValue(factor_name=intensity,
                                                     value='low'),
                                         FactorValue(factor_name=duration,
                                                     value='long'))),
                Treatment(treatment_type=INTERVENTIONS['CHEMICAL'],
                          factor_values=(FactorValue(factor_name=agent,
                                                     value='aether'),
                                         FactorValue(factor_name=intensity,
                                                     value='low'),
                                         FactorValue(factor_name=duration,
                                                     value='short'))),
                Treatment(treatment_type=INTERVENTIONS['CHEMICAL'],
                          factor_values=(FactorValue(factor_name=agent,
                                                     value='aether'),
                                         FactorValue(factor_name=intensity,
                                                     value='medium'),
                                         FactorValue(factor_name=duration,
                                                     value='long'))),
                Treatment(treatment_type=INTERVENTIONS['CHEMICAL'],
                          factor_values=(FactorValue(factor_name=agent,
                                                     value='aether'),
                                         FactorValue(factor_name=intensity,
                                                     value='medium'),
                                         FactorValue(factor_name=duration,
                                                     value='short')))
            })