示例#1
0
 def test_add_sample_plan_record(self):
     liver_sample_type = Characteristic(
         category=OntologyAnnotation(term='organism part'),
         value=OntologyAnnotation(term='liver'))
     self.plan.add_sample_type(liver_sample_type)
     self.plan.add_sample_plan_record(liver_sample_type, 5)
     self.assertEqual(self.plan.sample_plan, {liver_sample_type: 5})
示例#2
0
 def parse_protocols(self, names, ptypes, tsrs, tans, descriptions,
                     parameterslists, hardwares, softwares, contacts):
     for name, ptype, tsr, tan, description, parameterslist, hardware, \
         software, contact in \
             zip_longest(names, ptypes, tsrs, tans, descriptions,
                         parameterslists, hardwares, softwares, contacts,
                         fillvalue=''):
         if name != '':  # only add if there's a name
             protocoltype_oa = OntologyAnnotation(
                 term=ptype, term_source=self._ts_dict.get(tsr),
                 term_accession=tan)
             protocol = Protocol(name=name, protocol_type=protocoltype_oa,
                                 description=description,
                                 parameters=list(map(
                                     lambda x: ProtocolParameter(
                                         parameter_name=OntologyAnnotation(
                                             term=x)),
                                     parameterslist.split(';')
                                     if parameterslist is not None
                                     else '')))
             protocol.comments = [Comment(name="Protocol Hardware",
                                          value=hardware),
                                  Comment(
                 name="Protocol Software", value=software),
                 Comment(name="Protocol Contact", value=contact)]
             self.ISA.studies[-1].protocols.append(protocol)
示例#3
0
 def test_add_sample_plan_record_err(self):
     liver_sample_type = Characteristic(
         category=OntologyAnnotation(term='organism part'),
         value=OntologyAnnotation(term='liver'))
     self.plan.add_sample_type(liver_sample_type)
     self.assertRaises(TypeError, self.plan.add_sample_plan_record,
                       liver_sample_type, 'five')
示例#4
0
def create_descriptor():
    """Returns a ISA-Tab descriptor using a simple sample plan for
    illustration."""
    investigation = Investigation(identifier='I1')
    plan = SampleAssayPlan()
    plan.add_sample_type('liver')
    plan.add_sample_plan_record('liver', 5)
    plan.add_sample_type('blood')
    plan.add_sample_plan_record('blood', 3)
    plan.group_size = 2
    f1 = StudyFactor(name='AGENT',
                     factor_type=OntologyAnnotation(term='pertubation agent'))
    f2 = StudyFactor(name='INTENSITY',
                     factor_type=OntologyAnnotation(term='intensity'))
    f3 = StudyFactor(name='DURATION',
                     factor_type=OntologyAnnotation(term='time'))
    treatment_factory = TreatmentFactory(factors=[f1, f2, f3])
    treatment_factory.add_factor_value(f1, {'cocaine', 'crack', 'aether'})
    treatment_factory.add_factor_value(f2, {'low', 'medium', 'high'})
    treatment_factory.add_factor_value(f3, {'short', 'long'})
    ffactorial_design_treatments = treatment_factory\
        .compute_full_factorial_design()
    treatment_sequence = TreatmentSequence(
        ranked_treatments=ffactorial_design_treatments)
    # treatment_factory.add_factor_value('intensity', 1.05)
    study = IsaModelObjectFactory(plan, treatment_sequence)\
        .create_study_from_plan()
    study.filename = 's_study.txt'
    investigation.studies = [study]
    print(isatab.dumps(investigation))
 def setUp(self):
     self.assay_type = AssayType(
         measurement_type=MEASUREMENT_TYPE_TRANSCRIPTION_PROFILING,
         technology_type=TECHNOLOGY_TYPE_DNA_MICROARRAY)
     self.assay_type_with_oa = AssayType(
         measurement_type=OntologyAnnotation(
             term=MEASUREMENT_TYPE_TRANSCRIPTION_PROFILING),
         technology_type=OntologyAnnotation(
             term=TECHNOLOGY_TYPE_DNA_MICROARRAY))
示例#6
0
 def test_add_sample_type_str(self):
     liver_sample_type = 'liver'
     self.plan.add_sample_type(liver_sample_type)
     self.assertEqual(
         self.plan.sample_types, {
             Characteristic(
                 category=OntologyAnnotation(term='organism part'),
                 value=OntologyAnnotation(term=liver_sample_type))
         })
示例#7
0
 def setUp(self):
     self.investigation = Investigation(identifier='I1')
     self.f1 = StudyFactor(
         name='AGENT',
         factor_type=OntologyAnnotation(term='pertubation agent'))
     self.f2 = StudyFactor(name='INTENSITY',
                           factor_type=OntologyAnnotation(term='intensity'))
     self.f3 = StudyFactor(name='DURATION',
                           factor_type=OntologyAnnotation(term='time'))
示例#8
0
 def test_add_assay_plan_record(self):
     liver_sample_type = Characteristic(
         category=OntologyAnnotation(term='organism part'),
         value=OntologyAnnotation(term='liver'))
     self.plan.add_sample_type(liver_sample_type)
     self.plan.add_sample_plan_record(liver_sample_type, 5)
     ngs_assay_type = AssayType(measurement_type='ngs')
     self.plan.add_assay_type(ngs_assay_type)
     self.plan.add_assay_plan_record(liver_sample_type, ngs_assay_type)
     self.assertEqual(self.plan.assay_plan,
                      {(liver_sample_type, ngs_assay_type)})
示例#9
0
    def infer_missing_metadata(self):
        S = self.ISA.studies[-1]

        defaultassay = None
        # first let's try and infer the MT/TT from the study design
        # descriptors, only checks first one
        if len(S.design_descriptors) > 0:
            defaultassay = self._get_measurement_and_tech(
                S.design_descriptors[0].term)

        # next, go through the loaded comments to see what we can find
        for comment in S.comments:
            commentkey = get_squashed(comment.name)
            # ArrayExpress specific comments
            # (1) if there is no default assay yet, try use AEExperimentType
            if commentkey == 'aeexperimenttype' and defaultassay is None:
                defaultassay = self._get_measurement_and_tech(comment.value)
            # (2) if there is no identifier set, try use ArrayExpressAccession
            if commentkey == 'arrayexpressaccession':
                if self.ISA.identifier == '':
                    self.ISA.identifier = comment.value
                if S.identifier == '':
                    S.identifier = comment.value
            # (3) if there is no submission date set, try use
            # ArrayExpressSubmissionDate
            if commentkey == 'arrayexpresssubmissiondate':
                if self.ISA.submission_date == '':
                    self.ISA.submission_date = comment.value
                if S.submission_date == '':
                    S.submission_date = comment.value

        # if there is STILL no defaultassay set, try infer from study title
        if defaultassay is None \
                and ('transcriptionprof' in get_squashed(S.title)
                     or 'geneexpressionprof' in get_squashed(S.title)):
            defaultassay = Assay(measurement_type=OntologyAnnotation(
                term='transcription profiling'),
                technology_type=OntologyAnnotation(
                term='DNA microarray'),
                technology_platform='GeneChip')

        if defaultassay is None:
            defaultassay = Assay()

        # set file names if identifiers are available
        self.ISA.filename = 'i_{0}investigation.txt'.format(
            self.ISA.identifier + '_'
            if self.ISA.identifier != '' else self.ISA.identifier)
        S.filename = 's_{0}study.txt'.format(
            S.identifier + '_' if S.identifier != '' else S.identifier)
        defaultassay.filename = 'a_{0}assay.txt'.format(
            S.identifier + '_' if S.identifier != '' else S.identifier)

        S.assays = [defaultassay]
示例#10
0
 def test_sample_types_property_from_set(self):
     liver_sample_type = Characteristic(
         category=OntologyAnnotation(term='organism part'), value='liver')
     blood_sample_type = Characteristic(
         category=OntologyAnnotation(term='organism part'), value='blood')
     heart_sample_type = Characteristic(
         category=OntologyAnnotation(term='organism part'), value='heart')
     test_sample_types = {
         liver_sample_type, blood_sample_type, heart_sample_type
     }
     self.plan.sample_types = test_sample_types
     self.assertEqual(self.plan.sample_types, test_sample_types)
示例#11
0
 def test_add_assay_plan_record_err(self):
     liver_sample_type = Characteristic(
         category=OntologyAnnotation(term='organism part'),
         value=OntologyAnnotation(term='liver'))
     self.plan.add_sample_type(liver_sample_type)
     self.plan.add_sample_plan_record(liver_sample_type, 5)
     ngs_assay_type = AssayType(measurement_type='ngs')
     self.plan.add_assay_type(ngs_assay_type)
     blood_sample_type = Characteristic(
         category=OntologyAnnotation(term='organism part'),
         value=OntologyAnnotation(term='blood'))
     self.assertRaises(ValueError, self.plan.add_assay_plan_record,
                       blood_sample_type, ngs_assay_type)
def create_isa_characteristic(category, value):
    if category is None or len(category) == 0:
        return None
    if value is None or len(value) == 0:
        return None
    # return Characteristic(category, str(value))
    this_characteristic = Characteristic(
        category=OntologyAnnotation(term=str(category)),
        value=OntologyAnnotation(term=str(value),
                                 term_source="",
                                 term_accession=""))
    # print("category: ", this_characteristic.category.term, "value: ", this_characteristic.value.term)
    return this_characteristic
示例#13
0
def unserialize_study_publication(json_obj):
    pubmed_id = ''
    if 'pubMedID' in json_obj and json_obj['pubMedID'] is not None:
        pubmed_id = json_obj['pubMedID']
    doi = ''
    if 'doi' in json_obj and json_obj['doi'] is not None:
        doi = json_obj['doi']
    author_list = ''
    if 'authorList' in json_obj and json_obj['authorList'] is not None:
        author_list = json_obj['authorList']
    title = ''
    if 'title' in json_obj and json_obj['title'] is not None:
        title = json_obj['title']
    status = OntologyAnnotation()
    if 'status' in json_obj and json_obj['status'] is not None:
        status = unserialize_ontology_annotation(json_obj['status'])
    comments = list()
    if 'comments' in json_obj and json_obj['comments'] is not None:
        for comment in json_obj['comments']:
            comments.append(unserialize_comment(comment))

    return Publication(pubmed_id=pubmed_id,
                       doi=doi,
                       author_list=author_list,
                       title=title,
                       status=status,
                       comments=comments)
示例#14
0
def search_ols(term, ontology_source):
    """Returns a list of OntologyAnnotation objects according to what's
    returned by OLS search"""
    url = OLS_API_BASE_URI + "/search"
    if isinstance(ontology_source, str):
        os_search = ontology_source
    elif isinstance(ontology_source, OntologySource):
        os_search = ontology_source.name
    else:
        os_search = None
    query = "{0}&queryFields=label&ontology={1}&exact=True".format(
        term, os_search)
    url += '?q={}'.format(query)
    log.debug(url)
    import requests
    req = requests.get(url)
    J = json.loads(req.text)
    ontology_annotations = []
    for search_result_json in J["response"]["docs"]:
        ontology_annotations.append(
            OntologyAnnotation(term=search_result_json["label"],
                               term_accession=search_result_json["iri"],
                               term_source=ontology_source if isinstance(
                                   ontology_source, OntologySource) else None))
    return ontology_annotations
示例#15
0
 def parse_experimental_designs(self, designs, tsrs, tans):
     for design, tsr, tan in zip_longest(designs, tsrs, tans, fillvalue=''):
         design_descriptor = OntologyAnnotation(
             term=design, term_source=self._ts_dict.get(tsr),
             term_accession=tan)
         if design_descriptor.term != '':  # only add if the DD has a term
             self.ISA.studies[-1].design_descriptors.append(
                 design_descriptor)
示例#16
0
def unserialize_protocol_parameter(json_obj):
    parameter_name = OntologyAnnotation()
    if 'parameter_name' in json_obj and json_obj['parameter_name'] is not None:
        parameter_name = unserialize_ontology_annotation(
            json_obj['parameter_name'])
    unit = OntologyAnnotation()
    if 'unit' in json_obj and json_obj['unit'] is not None:
        unit = unserialize_ontology_annotation(json_obj['unit'])
    comments = list()
    if 'comments' in json_obj and json_obj['comments'] is not None:
        for comment in json_obj['comments']:
            comments.append(unserialize_comment(comment))

    return ProtocolParameter(
        parameter_name=parameter_name,
        # unit=unit,
        comments=comments)
示例#17
0
 def test_sample_types_property_from_list(self):
     liver_sample_type = Characteristic(
         category=OntologyAnnotation(term='organism part'), value='liver')
     blood_sample_type = Characteristic(
         category=OntologyAnnotation(term='organism part'), value='blood')
     brain_sample_type = 'brain'
     test_sample_types = [
         liver_sample_type, blood_sample_type, liver_sample_type,
         brain_sample_type
     ]
     self.plan.sample_types = test_sample_types
     self.assertEqual(
         self.plan.sample_types, {
             blood_sample_type, liver_sample_type,
             Characteristic(
                 category=OntologyAnnotation(term='organism part'),
                 value=OntologyAnnotation(term=brain_sample_type))
         })
def create_isa_study(brapi_study_id):
    """Returns an ISA study given a BrAPI endpoints and a BrAPI study identifier."""
    brapi_study = get_brapi_study(brapi_study_id)
    this_study = Study(filename="s_" + str(brapi_study_id) + ".txt")
    this_study.identifier = brapi_study['studyDbId']
    if 'name' in brapi_study:
        this_study.title = brapi_study['name']
    elif 'studyName' in brapi_study:
        this_study.title = brapi_study['studyName']

    this_study.comments.append(
        Comment(name="Study Start Date", value=brapi_study['startDate']))
    this_study.comments.append(
        Comment(name="Study End Date", value=brapi_study['endDate']))
    if brapi_study['location'] is not None and brapi_study['location'][
            'name'] is not None:
        this_study.comments.append(
            Comment(name="Study Geographical Location",
                    value=brapi_study['location']['name']))
    else:
        this_study.comments.append(
            Comment(name="Study Geographical Location", value=""))

    study_design = brapi_study['studyType']
    oa_st_design = OntologyAnnotation(term=study_design)
    this_study.design_descriptors = [oa_st_design]

    oref_tt = OntologySource(
        name="OBI", description="Ontology for Biomedical Investigation")
    oa_tt = OntologyAnnotation(term="phenotyping",
                               term_accession="",
                               term_source=oref_tt)
    oref_mt = OntologySource(
        name="OBI", description="Ontology for Biomedical Investigation")
    oa_mt = OntologyAnnotation(term="multi-technology",
                               term_accession="",
                               term_source=oref_mt)
    isa_assay_file = "a_" + str(brapi_study_id) + ".txt"
    this_assay = Assay(measurement_type=oa_tt,
                       technology_type=oa_mt,
                       filename=isa_assay_file)
    this_study.assays.append(this_assay)

    return this_study
示例#19
0
def unserialize_factor_value(json_obj):
    factor_name = StudyFactor()
    if 'factorName' in json_obj and json_obj['factorName'] is not None:
        factor_name = unserialize_study_factor(json_obj['factorName'])
    value = OntologyAnnotation()
    if 'value' in json_obj and json_obj['value'] is not None:
        value = unserialize_ontology_annotation(json_obj['value'])
    unit = OntologyAnnotation()
    if 'unit' in json_obj and json_obj['unit'] is not None:
        unit = unserialize_ontology_annotation(json_obj['value'])
    comments = list()
    if 'comments' in json_obj and json_obj['comments'] is not None:
        for comment in json_obj['comments']:
            comments.append(unserialize_comment(comment))

    return FactorValue(factor_name=factor_name,
                       value=value,
                       unit=unit,
                       comments=comments)
示例#20
0
def unserialize_characteristic(json_obj):
    category = OntologyAnnotation()
    if 'category' in json_obj and json_obj['category'] is not None:
        category = unserialize_ontology_annotation(json_obj['category'])
    value = OntologyAnnotation()
    if 'value' in json_obj and json_obj['value'] is not None:
        value = unserialize_ontology_annotation(json_obj['value'])
    unit = OntologyAnnotation()
    if 'unit' in json_obj and json_obj['unit'] is not None:
        unit = unserialize_ontology_annotation(json_obj['value'])
    comments = list()
    if 'comments' in json_obj and json_obj['comments'] is not None:
        for comment in json_obj['comments']:
            comments.append(unserialize_comment(comment))

    return Characteristic(category=category,
                          value=value,
                          unit=unit,
                          comments=comments)
示例#21
0
 def parse_experimental_factors(self, factors, factortypes, tsrs, tans):
     for factor, factortype, tsr, tan in zip_longest(
             factors, factortypes, tsrs, tans, fillvalue=''):
         if factor != '':  # only add if there's a factor name
             factortype_oa = OntologyAnnotation(
                 term=factortype, term_source=self._ts_dict.get(tsr),
                 term_accession=tan)
             study_factor = StudyFactor(
                 name=factor, factor_type=factortype_oa)
             self.ISA.studies[-1].factors.append(study_factor)
示例#22
0
def unserialize_study_factor(json_obj):
    name = ''
    if 'factorName' in json_obj and json_obj['factorName'] is not None:
        name = json_obj['factorName']
    factor_type = OntologyAnnotation()
    if 'factorType' in json_obj and json_obj['factorType'] is not None:
        factor_type = unserialize_ontology_annotation(json_obj['factorType'])
    comments = list()
    if 'comments' in json_obj and json_obj['comments'] is not None:
        for comment in json_obj['comments']:
            comments.append(unserialize_comment(comment))

    return StudyFactor(name=name, factor_type=factor_type, comments=comments)
示例#23
0
 def parse_people(self, lastnames, firstnames, midinitialss, emails,
                  phones, faxes, addresses, affiliations, roles,
                  roletans, roletrs):
     for lastname, firstname, midinitials, email, phone, fax, address, \
         affiliation, role, roletan, roletsr in \
             zip_longest(lastnames, firstnames, midinitialss, emails,
                         phones, faxes, addresses, affiliations, roles,
                         roletans, roletrs, fillvalue=''):
         rolesoa = OntologyAnnotation(
             term=role,
             term_source=self._ts_dict.get(roletsr),
             term_accession=roletan)
         person = Person(last_name=lastname, first_name=firstname,
                         mid_initials=midinitials, email=email,
                         phone=phone, fax=fax, address=address,
                         affiliation=affiliation, roles=[rolesoa])
         self.ISA.studies[-1].contacts.append(person)
示例#24
0
 def parse_publications(self, pubmedids, dois, authorlists,
                        titles, statuses, statustans, statustsrs):
     for pubmedid, doi, authorlist, title, status, statustsr, statustan in \
             zip_longest(pubmedids, dois, authorlists, titles, statuses,
                         statustans, statustsrs, fillvalue=''):
         # only add if there's a pubmed ID, DOI or title
         if pubmedid != '' or doi != '' or title != '':
             statusoa = OntologyAnnotation(
                 term=status,
                 term_source=self._ts_dict.get(statustsr),
                 term_accession=statustan)
             publication = Publication(
                 pubmed_id=pubmedid,
                 doi=doi,
                 author_list=authorlist,
                 title=title,
                 status=statusoa)
             self.ISA.studies[-1].publications.append(publication)
示例#25
0
def unserialize_ontology_annotation(json_obj):
    term = ''
    if 'annotationValue' in json_obj and json_obj[
            'annotationValue'] is not None:
        term = json_obj['annotationValue']
    term_source = None
    if 'termSource' in json_obj and json_obj['termSource'] is not None:
        term_source = unserialize_ontology_source(json_obj['termSource'])
    term_accession = ''
    if 'termAccession' in json_obj and json_obj['termAccession'] is not None:
        term_accession = json_obj['termAccession']
    comments = list()
    if 'comments' in json_obj and json_obj['comments'] is not None:
        for comment in json_obj['comments']:
            comments.append(unserialize_comment(comment))

    return OntologyAnnotation(term=term,
                              term_source=term_source,
                              term_accession=term_accession,
                              comments=comments)
示例#26
0
def unserialize_protocol(json_obj):
    name = ''
    if 'name' in json_obj and json_obj['name'] is not None:
        name = json_obj['name']
    protocol_type = OntologyAnnotation()
    if 'protocol_type' in json_obj and json_obj['protocol_type'] is not None:
        protocol_type = unserialize_ontology_annotation(
            json_obj['protocol_type'])
    description = ''
    if 'description' in json_obj and json_obj['description'] is not None:
        description = json_obj['description']
    uri = ''
    if 'uri' in json_obj and json_obj['uri'] is not None:
        uri = json_obj['uri']
    version = ''
    if 'version' in json_obj and json_obj['version'] is not None:
        version = json_obj['version']
    parameters = list()
    if 'parameters' in json_obj:
        for parameter in json_obj['parameters']:
            parameters.append(unserialize_protocol_parameter(parameter))
    components = list()
    if len(json_obj['components']) > 0:
        for comp in json_obj['components']:
            components.append(unserialize_ontology_annotation(comp))
    comments = list()
    if 'comments' in json_obj and json_obj['comments'] is not None:
        for comment in json_obj['comments']:
            comments.append(unserialize_comment(comment))

    return Protocol(name=name,
                    protocol_type=protocol_type,
                    description=description,
                    uri=uri,
                    version=version,
                    parameters=parameters,
                    components=components,
                    comments=comments)
#     # print(sample)
#     sample_collection_protocol = Protocol(name="sample collection",
#                                           protocol_type=OntologyAnnotation(term="sample collection"))
#     study.protocols.append(sample_collection_protocol)
#     sample_collection_process = Process(executes_protocol=sample_collection_protocol)
#
#     sample_collection_process.inputs.append(source)
#     sample_collection_process.outputs.append(sample)
#
#     study.process_sequence.append(sample_collection_process)

isa_sources = []
counter = 0

phenotyping_protocol = Protocol(
    name="phenotyping", protocol_type=OntologyAnnotation(term="phenotyping"))
study.protocols.append(phenotyping_protocol)
assay = study.assays[0]

for ou in obsunits:
    characteristics = []
    factors = []

    if 'blockNumber' in ou.keys():
        c = Characteristic(category=OntologyAnnotation(term="blockNumber"),
                           value=OntologyAnnotation(term=str(
                               ou['blockNumber']),
                                                    term_source="",
                                                    term_accession=""))
        characteristics.append(c)
示例#28
0
    def _exportISATAB(self, destinationPath, detailsDict):
        """
		Export the dataset's metadata to the directory *destinationPath* as ISATAB
		detailsDict should have the format:
		detailsDict = {
		    'investigation_identifier' : "i1",
		    'investigation_title' : "Give it a title",
		    'investigation_description' : "Add a description",
		    'investigation_submission_date' : "2016-11-03",
		    'investigation_public_release_date' : "2016-11-03",
		    'first_name' : "Noureddin",
		    'last_name' : "Sadawi",
		    'affiliation' : "University",
		    'study_filename' : "my_ms_study",
		    'study_material_type' : "Serum",
		    'study_identifier' : "s1",
		    'study_title' : "Give the study a title",
		    'study_description' : "Add study description",
		    'study_submission_date' : "2016-11-03",
		    'study_public_release_date' : "2016-11-03",
		    'assay_filename' : "my_ms_assay"
		}

		:param str destinationPath: Path to a directory in which the output will be saved
		:param dict detailsDict: Contains several key, value pairs required to for ISATAB
		:raises IOError: If writing one of the files fails
		"""

        from isatools.model import Investigation, Study, Assay, OntologyAnnotation, OntologySource, Person, Publication, Protocol, Source
        from isatools.model import Comment, Sample, Characteristic, Process, Material, DataFile, ParameterValue, plink
        from isatools import isatab
        import isaExplorer as ie

        investigation = Investigation()

        investigation.identifier = detailsDict['investigation_identifier']
        investigation.title = detailsDict['investigation_title']
        investigation.description = detailsDict['investigation_description']
        investigation.submission_date = detailsDict[
            'investigation_submission_date']  #use today if not specified
        investigation.public_release_date = detailsDict[
            'investigation_public_release_date']
        study = Study(filename='s_' + detailsDict['study_filename'] + '.txt')
        study.identifier = detailsDict['study_identifier']
        study.title = detailsDict['study_title']
        study.description = detailsDict['study_description']
        study.submission_date = detailsDict['study_submission_date']
        study.public_release_date = detailsDict['study_public_release_date']
        investigation.studies.append(study)
        obi = OntologySource(
            name='OBI', description="Ontology for Biomedical Investigations")
        investigation.ontology_source_references.append(obi)
        intervention_design = OntologyAnnotation(term_source=obi)
        intervention_design.term = "intervention design"
        intervention_design.term_accession = "http://purl.obolibrary.org/obo/OBI_0000115"
        study.design_descriptors.append(intervention_design)

        # Other instance variables common to both Investigation and Study objects include 'contacts' and 'publications',
        # each with lists of corresponding Person and Publication objects.

        contact = Person(first_name=detailsDict['first_name'],
                         last_name=detailsDict['last_name'],
                         affiliation=detailsDict['affiliation'],
                         roles=[OntologyAnnotation(term='submitter')])
        study.contacts.append(contact)
        publication = Publication(title="Experiments with Data",
                                  author_list="Auther 1, Author 2")
        publication.pubmed_id = "12345678"
        publication.status = OntologyAnnotation(term="published")
        study.publications.append(publication)

        # To create the study graph that corresponds to the contents of the study table file (the s_*.txt file), we need
        # to create a process sequence. To do this we use the Process class and attach it to the Study object's
        # 'process_sequence' list instance variable. Each process must be linked with a Protocol object that is attached to
        # a Study object's 'protocols' list instance variable. The sample collection Process object usually has as input
        # a Source material and as output a Sample material.

        sample_collection_protocol = Protocol(
            id_="sample collection",
            name="sample collection",
            protocol_type=OntologyAnnotation(term="sample collection"))
        aliquoting_protocol = Protocol(
            id_="aliquoting",
            name="aliquoting",
            protocol_type=OntologyAnnotation(term="aliquoting"))

        for index, row in self.sampleMetadata.iterrows():
            src_name = row['Sample File Name']
            source = Source(name=src_name)

            source.comments.append(
                Comment(name='Study Name', value=row['Study']))
            study.sources.append(source)

            sample_name = src_name
            sample = Sample(name=sample_name, derives_from=[source])
            # check if field exists first
            status = row[
                'Status'] if 'Status' in self.sampleMetadata.columns else 'N/A'
            characteristic_material_type = Characteristic(
                category=OntologyAnnotation(term="material type"),
                value=status)
            sample.characteristics.append(characteristic_material_type)

            #characteristic_material_role = Characteristic(category=OntologyAnnotation(term="material role"), value=row['AssayRole'])
            #sample.characteristics.append(characteristic_material_role)

            # check if field exists first
            age = row['Age'] if 'Age' in self.sampleMetadata.columns else 'N/A'
            characteristic_age = Characteristic(
                category=OntologyAnnotation(term="Age"),
                value=age,
                unit='Year')
            sample.characteristics.append(characteristic_age)
            # check if field exists first
            gender = row[
                'Gender'] if 'Gender' in self.sampleMetadata.columns else 'N/A'
            characteristic_gender = Characteristic(
                category=OntologyAnnotation(term="Gender"), value=gender)
            sample.characteristics.append(characteristic_gender)

            ncbitaxon = OntologySource(name='NCBITaxon',
                                       description="NCBI Taxonomy")
            characteristic_organism = Characteristic(
                category=OntologyAnnotation(term="Organism"),
                value=OntologyAnnotation(
                    term="H**o Sapiens",
                    term_source=ncbitaxon,
                    term_accession=
                    "http://purl.bioontology.org/ontology/NCBITAXON/9606"))
            sample.characteristics.append(characteristic_organism)

            study.samples.append(sample)

            # check if field exists first
            sampling_date = row['Sampling Date'] if not pandas.isnull(
                row['Sampling Date']) else None
            sample_collection_process = Process(
                id_='sam_coll_proc',
                executes_protocol=sample_collection_protocol,
                date_=sampling_date)
            aliquoting_process = Process(id_='sam_coll_proc',
                                         executes_protocol=aliquoting_protocol,
                                         date_=sampling_date)

            sample_collection_process.inputs = [source]
            aliquoting_process.outputs = [sample]

            # links processes
            plink(sample_collection_process, aliquoting_process)

            study.process_sequence.append(sample_collection_process)
            study.process_sequence.append(aliquoting_process)

        study.protocols.append(sample_collection_protocol)
        study.protocols.append(aliquoting_protocol)

        ### Add NMR Assay ###
        nmr_assay = Assay(
            filename='a_' + detailsDict['assay_filename'] + '.txt',
            measurement_type=OntologyAnnotation(term="metabolite profiling"),
            technology_type=OntologyAnnotation(term="NMR spectroscopy"))
        extraction_protocol = Protocol(
            name='extraction',
            protocol_type=OntologyAnnotation(term="material extraction"))

        study.protocols.append(extraction_protocol)
        nmr_protocol = Protocol(
            name='NMR spectroscopy',
            protocol_type=OntologyAnnotation(term="NMR Assay"))
        nmr_protocol.add_param('Run Order')
        #if 'Instrument' in self.sampleMetadata.columns:
        nmr_protocol.add_param('Instrument')
        #if 'Sample Batch' in self.sampleMetadata.columns:
        nmr_protocol.add_param('Sample Batch')
        nmr_protocol.add_param('Acquisition Batch')

        study.protocols.append(nmr_protocol)

        #for index, row in sampleMetadata.iterrows():
        for index, sample in enumerate(study.samples):
            row = self.sampleMetadata.loc[
                self.sampleMetadata['Sample File Name'].astype(
                    str) == sample.name]
            # create an extraction process that executes the extraction protocol
            extraction_process = Process(executes_protocol=extraction_protocol)

            # extraction process takes as input a sample, and produces an extract material as output
            sample_name = sample.name
            sample = Sample(name=sample_name, derives_from=[source])
            #print(row['Acquired Time'].values[0])

            extraction_process.inputs.append(sample)
            material = Material(name="extract-{}".format(index))
            material.type = "Extract Name"
            extraction_process.outputs.append(material)

            # create a ms process that executes the nmr protocol
            nmr_process = Process(executes_protocol=nmr_protocol,
                                  date_=datetime.isoformat(
                                      datetime.strptime(
                                          str(row['Acquired Time'].values[0]),
                                          '%Y-%m-%d %H:%M:%S')))

            nmr_process.name = "assay-name-{}".format(index)
            nmr_process.inputs.append(extraction_process.outputs[0])
            # nmr process usually has an output data file
            # check if field exists first
            assay_data_name = row['Assay data name'].values[
                0] if 'Assay data name' in self.sampleMetadata.columns else 'N/A'
            datafile = DataFile(filename=assay_data_name,
                                label="NMR Assay Name",
                                generated_from=[sample])
            nmr_process.outputs.append(datafile)

            #nmr_process.parameter_values.append(ParameterValue(category='Run Order',value=str(i)))
            nmr_process.parameter_values = [
                ParameterValue(category=nmr_protocol.get_param('Run Order'),
                               value=row['Run Order'].values[0])
            ]
            # check if field exists first
            instrument = row['Instrument'].values[
                0] if 'Instrument' in self.sampleMetadata.columns else 'N/A'
            nmr_process.parameter_values.append(
                ParameterValue(category=nmr_protocol.get_param('Instrument'),
                               value=instrument))
            # check if field exists first
            sbatch = row['Sample batch'].values[
                0] if 'Sample batch' in self.sampleMetadata.columns else 'N/A'
            nmr_process.parameter_values.append(
                ParameterValue(category=nmr_protocol.get_param('Sample Batch'),
                               value=sbatch))
            nmr_process.parameter_values.append(
                ParameterValue(
                    category=nmr_protocol.get_param('Acquisition Batch'),
                    value=row['Batch'].values[0]))

            # ensure Processes are linked forward and backward
            plink(extraction_process, nmr_process)
            # make sure the extract, data file, and the processes are attached to the assay
            nmr_assay.samples.append(sample)
            nmr_assay.data_files.append(datafile)
            nmr_assay.other_material.append(material)
            nmr_assay.process_sequence.append(extraction_process)
            nmr_assay.process_sequence.append(nmr_process)
            nmr_assay.measurement_type = OntologyAnnotation(
                term="metabolite profiling")
            nmr_assay.technology_type = OntologyAnnotation(
                term="NMR spectroscopy")

        # attach the assay to the study
        study.assays.append(nmr_assay)

        if os.path.exists(os.path.join(destinationPath,
                                       'i_Investigation.txt')):
            ie.appendStudytoISA(study, destinationPath)
        else:
            isatab.dump(isa_obj=investigation, output_path=destinationPath)
示例#29
0
def create_descriptor():
    """
    Returns a simple but complete ISA-JSON 1.0 descriptor for illustration.
    """

    # Create an empty Investigation object and set some values to the
    # instance variables.

    investigation = Investigation()
    investigation.identifier = "1"
    investigation.title = "My Simple ISA Investigation"
    investigation.description = \
        "We could alternatively use the class constructor's parameters to " \
        "set some default values at the time of creation, however we " \
        "want to demonstrate how to use the object's instance variables " \
        "to set values."
    investigation.submission_date = "2016-11-03"
    investigation.public_release_date = "2016-11-03"

    # Create an empty Study object and set some values. The Study must have a
    # filename, otherwise when we serialize it to ISA-Tab we would not know
    # where to write it. We must also attach the study to the investigation
    # by adding it to the 'investigation' object's list of studies.

    study = Study(filename="s_study.txt")
    study.identifier = "1"
    study.title = "My ISA Study"
    study.description = \
        "Like with the Investigation, we could use the class constructor " \
        "to set some default values, but have chosen to demonstrate in this " \
        "example the use of instance variables to set initial values."
    study.submission_date = "2016-11-03"
    study.public_release_date = "2016-11-03"
    investigation.studies.append(study)

    # This is to show that ISA Comments can be used to annotate ISA objects, here ISA Study
    study.comments.append(Comment(name="Study Start Date", value="Sun"))

    # Some instance variables are typed with different objects and lists of
    # objects. For example, a Study can have a list of design descriptors.
    # A design descriptor is an Ontology Annotation describing the kind of
    # study at hand. Ontology Annotations should typically reference an
    # Ontology Source. We demonstrate a mix of using the class constructors
    # and setting values with instance variables. Note that the
    # OntologyAnnotation object 'intervention_design' links its 'term_source'
    # directly to the 'obi' object instance. To ensure the OntologySource
    # is encapsulated in the descriptor, it is added to a list of
    # 'ontology_source_references' in the Investigation object. The
    # 'intervention_design' object is then added to the list of
    # 'design_descriptors' held by the Study object.

    obi = OntologySource(name='OBI',
                         description="Ontology for Biomedical Investigations")
    investigation.ontology_source_references.append(obi)

    intervention_design = OntologyAnnotation(term_source=obi)
    intervention_design.term = "intervention design"
    intervention_design.term_accession = \
        "http://purl.obolibrary.org/obo/OBI_0000115"
    study.design_descriptors.append(intervention_design)

    # Other instance variables common to both Investigation and Study objects
    # include 'contacts' and 'publications', each with lists of corresponding
    # Person and Publication objects.

    contact = Person(first_name="Alice",
                     last_name="Robertson",
                     affiliation="University of Life",
                     roles=[OntologyAnnotation(term='submitter')])
    study.contacts.append(contact)
    publication = Publication(title="Experiments with Elephants",
                              author_list="A. Robertson, B. Robertson")
    publication.pubmed_id = "12345678"
    publication.status = OntologyAnnotation(term="published")
    study.publications.append(publication)

    # To create the study graph that corresponds to the contents of the study
    # table file (the s_*.txt file), we need to create a process sequence.
    # To do this we use the Process class and attach it to the Study object's
    # 'process_sequence' list instance variable. Each process must be linked
    # with a Protocol object that is attached to a Study object's 'protocols'
    # list instance variable. The sample collection Process object usually has
    # as input a Source material and as output a Sample material.

    # Here we create one Source material object and attach it to our study.

    source = Source(name='source_material')
    study.sources.append(source)

    # Then we create three Sample objects, with organism as H**o Sapiens, and
    # attach them to the study. We use the utility function
    # batch_create_material() to clone a prototype material object. The
    # function automatiaclly appends an index to the material name. In this
    # case, three samples will be created, with the names 'sample_material-0',
    # 'sample_material-1' and 'sample_material-2'.

    prototype_sample = Sample(name='sample_material', derives_from=[source])

    ncbitaxon = OntologySource(name='NCBITaxon', description="NCBI Taxonomy")
    investigation.ontology_source_references.append(ncbitaxon)

    characteristic_organism = Characteristic(
        category=OntologyAnnotation(term="Organism"),
        value=OntologyAnnotation(
            term="H**o Sapiens",
            term_source=ncbitaxon,
            term_accession="http://purl.bioontology.org/ontology/NCBITAXON/"
            "9606"))

    # Adding the description to the ISA Source Material:
    source.characteristics.append(characteristic_organism)
    study.sources.append(source)

    #declaring a new ontology and adding it to the list of resources used
    uberon = OntologySource(name='UBERON', description='Uber Anatomy Ontology')
    investigation.ontology_source_references.append(uberon)

    #preparing an ISA Characteristic object (~Material Property ) to annotate sample materials
    characteristic_organ = Characteristic(
        category=OntologyAnnotation(term="OrganismPart"),
        value=OntologyAnnotation(
            term="liver",
            term_source=uberon,
            term_accession="http://purl.bioontology.org/ontology/UBERON/"
            "123245"))

    prototype_sample.characteristics.append(characteristic_organ)

    study.samples = batch_create_materials(prototype_sample, n=3)
    # creates a batch of 3 samples

    # Now we create a single Protocol object that represents our sample
    # collection protocol, and attach it to the study object. Protocols must be
    # declared before we describe Processes, as a processing event of some sort
    # must execute some defined protocol. In the case of the class model,
    # Protocols should therefore be declared before Processes in order for the
    # Process to be linked to one.

    sample_collection_protocol = Protocol(
        name="sample collection",
        protocol_type=OntologyAnnotation(term="sample collection"))
    study.protocols.append(sample_collection_protocol)
    sample_collection_process = Process(
        executes_protocol=sample_collection_protocol)

    # adding a dummy Comment[] to ISA.protocol object
    study.protocols[0].comments.append(
        Comment(name="Study Start Date", value="Uranus"))
    study.protocols[0].comments.append(
        Comment(name="Study End Date", value="2017-08-11"))
    # checking that the ISA Protocool object has been modified
    # print(study.protocols[0])

    # Creation of an ISA Study Factor object
    f = StudyFactor(
        name="treatment['modality']",
        factor_type=OntologyAnnotation(term="treatment['modality']"))
    # testing serialization to ISA-TAB of Comments attached to ISA objects.
    f.comments.append(Comment(name="Study Start Date", value="Saturn"))
    f.comments.append(Comment(name="Study End Date", value="2039-12-12"))
    print(f.comments[0].name, "|", f.comments[0].value)

    # checking that the ISA Factor object has been modified
    study.factors.append(f)

    # Next, we link our materials to the Process. In this particular case, we
    # are describing a sample collection process that takes one source
    # material, and produces three different samples.
    #
    # (source_material)->(sample collection)->
    # [(sample_material-0), (sample_material-1), (sample_material-2)]

    for src in study.sources:
        sample_collection_process.inputs.append(src)
    for sam in study.samples:
        sample_collection_process.outputs.append(sam)

    # Finally, attach the finished Process object to the study
    # process_sequence. This can be done many times to describe multiple
    # sample collection events.

    study.process_sequence.append(sample_collection_process)

    #IMPORTANT: remember to populate the list of ontology categories used to annotation ISA Material in a Study:
    study.characteristic_categories.append(characteristic_organism.category)

    # Next, we build n Assay object and attach two protocols,
    # extraction and sequencing.

    assay = Assay(filename="a_assay.txt")
    extraction_protocol = Protocol(
        name='extraction',
        protocol_type=OntologyAnnotation(term="material extraction"))
    study.protocols.append(extraction_protocol)
    sequencing_protocol = Protocol(
        name='sequencing',
        protocol_type=OntologyAnnotation(term="material sequencing"))
    study.protocols.append(sequencing_protocol)

    # To build out assay graphs, we enumereate the samples from the
    # study-level, and for each sample we create an extraction process and
    # a sequencing process. The extraction process takes as input a sample
    # material, and produces an extract material. The sequencing process
    # takes the extract material and produces a data file. This will
    # produce three graphs, from sample material through to data, as follows:
    #
    # (sample_material-0)->(extraction)->(extract-0)->(sequencing)->
    # (sequenced-data-0)
    # (sample_material-1)->(extraction)->(extract-1)->(sequencing)->
    # (sequenced-data-1)
    # (sample_material-2)->(extraction)->(extract-2)->(sequencing)->
    # (sequenced-data-2)
    #
    # Note that the extraction processes and sequencing processes are
    # distinctly separate instances, where the three
    # graphs are NOT interconnected.

    for i, sample in enumerate(study.samples):

        # create an extraction process that executes the extraction protocol

        extraction_process = Process(executes_protocol=extraction_protocol)

        # extraction process takes as input a sample, and produces an extract
        # material as output

        extraction_process.inputs.append(sample)
        material = Material(name="extract-{}".format(i))
        material.type = "Extract Name"
        extraction_process.outputs.append(material)

        # create a sequencing process that executes the sequencing protocol

        sequencing_process = Process(executes_protocol=sequencing_protocol)
        sequencing_process.name = "assay-name-{}".format(i)
        sequencing_process.inputs.append(extraction_process.outputs[0])

        # Sequencing process usually has an output data file

        datafile = DataFile(filename="sequenced-data-{}".format(i),
                            label="Raw Data File",
                            generated_from=[sample])
        sequencing_process.outputs.append(datafile)

        # ensure Processes are linked
        plink(sequencing_process, extraction_process)

        # make sure the extract, data file, and the processes are attached to
        # the assay

        assay.samples.append(sample)
        assay.data_files.append(datafile)
        assay.other_material.append(material)
        assay.process_sequence.append(extraction_process)
        assay.process_sequence.append(sequencing_process)
        assay.measurement_type = OntologyAnnotation(term="gene sequencing")
        assay.technology_type = OntologyAnnotation(
            term="nucleotide sequencing")

    # attach the assay to the study
    study.assays.append(assay)

    import json
    from isatools.isajson import ISAJSONEncoder

    # To write JSON out, use the ISAJSONEncoder class with the json package
    # and use dump() or dumps(). Note that the extra parameters sort_keys,
    # indent and separators are to make the output more human-readable.

    return json.dumps(investigation,
                      cls=ISAJSONEncoder,
                      sort_keys=True,
                      indent=4,
                      separators=(',', ': '))
示例#30
0
def convert(json_path, output_path):
    print(json_path)
    print(output_path)

    with open(json_path, 'r') as f:
        dcc_json = json.load(f)

    # print(array['protocol'])
    # for element in array['protocol']:
    #     array['protocol'][element]['id']
    #     array['protocol'][element]['description']
    #     array['protocol'][element]['type']
    #     array['protocol'][element]['filename']

    # for element in array['measurement']:
    #     print(array['measurement'][element]['corrected_mz'])

    # for element in array['subject']:
    #     print(array['subject'][element]['species'])

    # Building the Investigation Object and its elements:

    project_set_json = dcc_json.get('project')

    if len(project_set_json) == 0:
        raise IOError('No project found in input JSON')

    # print(next(iter(project_set_json)))
    project_json = next(iter(project_set_json.values()))
    investigation = Investigation(identifier=project_json['id'])

    obi = OntologySource(name='OBI',
                         description='Ontology for Biomedical Investigations')
    investigation.ontology_source_references.append(obi)

    inv_person = Person(
        first_name=project_json['PI_first_name'],
        last_name=project_json['PI_last_name'],
        email=project_json['PI_email'],
        address=project_json['address'],
        affiliation=(', '.join(
            [project_json['department'], project_json['institution']])),
        roles=[
            OntologyAnnotation(term="",
                               term_source=obi,
                               term_accession="http://purl.org/obo/OBI_1")
        ])
    investigation.contacts.append(inv_person)

    study_set_json = dcc_json.get('study')

    if len(study_set_json) > 0:
        study_json = next(iter(study_set_json.values()))

        study = Study(
            identifier=study_json['id'],
            title=study_json['title'],
            description=study_json['description'],
            design_descriptors=[
                OntologyAnnotation(term=study_json['type'],
                                   term_source=obi,
                                   term_accession="http://purl.org/obo/OBI_1")
            ],
            filename='s_{study_id}.txt'.format(study_id=study_json['id']))

        investigation.studies = [study]

        studyid = study_json['id']
        print(studyid)
        study_person = Person(
            first_name=study_json['PI_first_name'],
            last_name=study_json['PI_last_name'],
            email=study_json['PI_email'],
            address=study_json['address'],
            affiliation=(', '.join(
                [study_json['department'], study_json['institution']])),
            roles=[
                OntologyAnnotation(term='principal investigator',
                                   term_source=obi,
                                   term_accession="http://purl.org/obo/OBI_1")
            ])

        study.contacts.append(study_person)

        for factor_json in dcc_json['factor'].values():
            factor = StudyFactor(name=factor_json['id'])
            study.factors.append(factor)

        for i, protocol_json in enumerate(dcc_json['protocol'].values()):
            oat_p = protocol_json['type']
            oa_protocol_type = OntologyAnnotation(
                term=oat_p,
                term_source=obi,
                term_accession="http://purl.org/obo/OBI_1")
            study.protocols.append(
                Protocol(name=protocol_json['id'],
                         protocol_type=oa_protocol_type,
                         description=protocol_json['description'],
                         uri=protocol_json['filename']))

            if 'MS' in protocol_json['type']:
                study.assays.append(
                    Assay(measurement_type=OntologyAnnotation(
                        term='mass isotopologue distribution analysis',
                        term_source=obi,
                        term_accession="http://purl.org/obo/OBI_112"),
                          technology_type=OntologyAnnotation(
                              term='mass spectrometry',
                              term_source=obi,
                              term_accession="http://purl.org/obo/OBI_1"),
                          filename='a_assay_ms_{count}.txt'.format(count=i)))

            if 'NMR' in protocol_json['type']:
                study.assays.append(
                    Assay(measurement_type=OntologyAnnotation(
                        term='isotopomer analysis',
                        term_source=obi,
                        term_accession="http://purl.org/obo/OBI_111"),
                          technology_type=OntologyAnnotation(
                              term='nmr spectroscopy',
                              term_source=obi,
                              term_accession="http://purl.org/obo/OBI_1"),
                          filename='a_assay_nmr.txt'))

        for subject_json in dcc_json['subject'].values():

            # print(array['subject'][element])
            if "organism" in subject_json['type']:

                source = Source(name=subject_json['id'])

                ncbitaxon = OntologySource(name='NCBITaxon',
                                           description="NCBI Taxonomy")
                characteristic_organism = Characteristic(
                    category=OntologyAnnotation(term="Organism"),
                    value=OntologyAnnotation(
                        term=subject_json['species'],
                        term_source=ncbitaxon,
                        term_accession=
                        'http://purl.bioontology.org/ontology/NCBITAXON/9606'))
                source.characteristics.append(characteristic_organism)
                study.sources.append(source)

            elif 'tissue_slice' in subject_json['type']:
                # print(array['subject'][element]['type'])
                source = Source(name=subject_json['id'])
                study.sources.append(source)
                ncbitaxon = OntologySource(name='NCBITaxon',
                                           description="NCBI Taxonomy")
                characteristic_organism = Characteristic(
                    category=OntologyAnnotation(term="Organism"),
                    value=OntologyAnnotation(
                        term=subject_json['species'],
                        term_source=ncbitaxon,
                        term_accession=
                        'http://purl.bioontology.org/ontology/NCBITAXON/9606'))
                source.characteristics.append(characteristic_organism)

                sample = Sample(name=subject_json['id'],
                                derives_from=subject_json['parentID'])
                characteristic_organismpart = Characteristic(
                    category=OntologyAnnotation(term='organism_part'),
                    value=OntologyAnnotation(
                        term=subject_json['tissue_type'],
                        term_source=obi,
                        term_accession="http://purl.org/obo/OBI_1"))

                sample.characteristics.append(characteristic_organismpart)
                study.samples.append(sample)
                # print(study.samples[0].name)

                sample_collection_process = Process(
                    executes_protocol=study.get_prot(
                        subject_json['protocol.id']))
                sample_collection_process.inputs.append(source)
                sample_collection_process.outputs.append(sample)
                study.process_sequence.append(sample_collection_process)

            else:
                source = Source(name=subject_json['id'])

                ncbitaxon = OntologySource(name='NCBITaxon',
                                           description="NCBI Taxonomy")
                characteristic_organism = Characteristic(
                    category=OntologyAnnotation(term="Organism"),
                    value=OntologyAnnotation(
                        term=subject_json['species'],
                        term_source=ncbitaxon,
                        term_accession=
                        'http://purl.bioontology.org/ontology/NCBITAXON/9606'))
                source.characteristics.append(characteristic_organism)
                study.sources.append(source)
                print(subject_json['id'])
                print(subject_json['species'])
                print(subject_json['type'])
        # for src in investigation.studies[0].materials:
        #
        # for sam in investigation.studies[0].materials:

        for sample_json in dcc_json['sample'].values():

            if 'cells' in sample_json['type']:
                material_separation_process = Process(
                    executes_protocol=study.get_prot(
                        sample_json['protocol.id']))
                material_separation_process.name = sample_json['id']
                # dealing with input material, check that the parent material is already among known samples or sources

                if len([
                        x for x in study.samples
                        if x.name == sample_json['parentID']
                ]) == 0:
                    material_in = Sample(name=sample_json['parentID'])
                    material_separation_process.inputs.append(material_in)
                    study.assays[0].samples.append(material_in)
                else:
                    print([
                        x for x in study.samples
                        if x.name == sample_json['parentID']
                    ])
                    material_separation_process.inputs.append([
                        x for x in study.samples
                        if x.name == sample_json['parentID']
                    ][0])

                material_out = Sample(name=sample_json['id'])
                material_type = Characteristic(
                    category=OntologyAnnotation(term='material_type'),
                    value=OntologyAnnotation(
                        term=sample_json['type'],
                        term_source=obi,
                        term_accession="http://purl.org/obo/OBI_xxxxxxx"))
                material_out.characteristics.append(material_type)
                material_separation_process.outputs.append(material_out)
                study.assays[0].samples.append(material_out)
                try:
                    sample_collection_process
                except NameError:
                    sample_collection_process = None
                if sample_collection_process is None:
                    sample_collection_process = Process(executes_protocol="")
                else:
                    # plink(protein_extraction_process, data_acq_process)
                    # plink(material_separation_process, protein_extraction_process)

                    plink(sample_collection_process,
                          protein_extraction_process)

            if 'protein_extract' in sample_json['type']:
                protein_extraction_process = Process(
                    executes_protocol=study.get_prot(
                        sample_json['protocol.id']))
                protein_extraction_process.name = sample_json['id']

                if len([
                        x for x in study.samples
                        if x.name == sample_json['parentID']
                ]) == 0:
                    material_in = Sample(name=sample_json['parentID'])
                    protein_extraction_process.inputs.append(material_in)
                    study.assays[0].samples.append(material_in)
                else:
                    # print([x for x in study.samples if x.name == sample_json['parentID']])
                    protein_extraction_process.inputs.append(material_in)

                # for material_in in study.samples:
                #     # print("OHO:", material_in.name)
                #     if material_in.name == sample_json['parentID']:
                #         # print("C:",sample_json['parentID'])
                #         #no need to create, just link to process
                #         protein_extraction_process.inputs.append(x)
                #     else:
                #         # print("D:", sample_json['parentID'])
                #         #create new material and link
                #         material_in = Sample(name=sample_json['parentID'])
                #         protein_extraction_process.inputs.append(material_in)

                material_out = Material(name=sample_json['id'])
                material_out.type = "Extract Name"
                material_type = Characteristic(
                    category=OntologyAnnotation(term='material_type'),
                    value=OntologyAnnotation(
                        term=sample_json['type'],
                        term_source=obi,
                        term_accession="http://purl.org/obo/OBI_1"))
                material_out.characteristics.append(material_type)

                study.assays[0].samples.append(material_in)
                study.assays[0].materials['other_material'].append(material_in)
                try:
                    material_separation_process
                except NameError:
                    material_separation_process = None
                if material_separation_process is None:
                    material_separation_process = Process(executes_protocol="")
                else:
                    # plink(protein_extraction_process, data_acq_process)
                    plink(material_separation_process,
                          protein_extraction_process)

            if 'polar' in sample_json['type']:

                material_in = Material(name=sample_json['parentID'])
                material_type = Characteristic(
                    category=OntologyAnnotation(term='material_type',
                                                term_source=obi),
                    value=OntologyAnnotation(term=sample_json['type'],
                                             term_source=obi))
                material_in.characteristics.append(material_type)
                study.assays[0].materials['other_material'].append(material_in)

                data_acq_process = Process(executes_protocol=study.get_prot(
                    sample_json['protocol.id']))
                data_acq_process.name = sample_json['id']
                datafile = DataFile(
                    filename='{filename}.txt'.format(filename='_'.join(
                        ['mass_isotopomer-data', studyid, sample_json['id']])),
                    label='Raw Data File')
                data_acq_process.outputs.append(datafile)
                # print(study.assays[0].technology_type.term)

                study.assays[0].data_files.append(datafile)
                try:
                    protein_extraction_process
                except NameError:
                    protein_extraction_process = None
                if protein_extraction_process is None:
                    protein_extraction_process = Process(executes_protocol="")
                else:
                    plink(protein_extraction_process, data_acq_process)

            # else:
            #     material_in = Material(name=sample_json['parentID'])
            #     material_out = Material(name=sample_json['id'])
            #     material_type = Characteristic(
            #         category=OntologyAnnotation(term="material_type"),
            #         value=OntologyAnnotation(term=sample_json['type'],
            #                                  term_source=obi,
            #                                  term_accession="http://purl.org/obo/OBI_1"))
            #     material_out.characteristics.append(material_type)
            #     process = Process(executes_protocol=sample_json['protocol.id'])
            #     process.name = sample_json['id']
            #     process.inputs.append(material_in)
            #     process.outputs.append(material_out)
            #
            #     study.assays[0].materials['other_material'].append(material_in)
            #     study.assays[0].materials['other_material'].append(material_out)

            if 'bulk_tissue' in sample_json['type']:
                bulk_process = Process(executes_protocol=study.get_prot(
                    sample_json['protocol.id']))
                bulk_process.name = sample_json['id']

                if len([
                        x for x in study.samples
                        if x.name == sample_json['parentID']
                ]) == 0:
                    material_in = Sample(name=sample_json['parentID'])
                    bulk_process.inputs.append(material_in)
                    study.assays[0].samples.append(material_in)
                else:
                    # print([x for x in study.samples if x.name == sample_json['parentID']])
                    bulk_process.inputs.append(material_in)

                    plink(sample_collection_process, bulk_process)

    data_rec_header = '\t'.join(
        ('metabolite name', 'assignment', 'signal intensity', 'retention time',
         'm/z', 'formula', 'adduct', 'isotopologue', 'sample identifier'))
    records = []
    for element in dcc_json['measurement']:
        # metabolite_name: -> compound
        # array['measurement'][element]['signal_intensity']
        record = '\t'.join((dcc_json['measurement'][element]['compound'],
                            dcc_json['measurement'][element]['assignment'],
                            dcc_json['measurement'][element]['raw_intensity'],
                            dcc_json['measurement'][element]['retention_time'],
                            dcc_json['measurement'][element]['corrected_mz'],
                            dcc_json['measurement'][element]['formula'],
                            dcc_json['measurement'][element]['adduct'],
                            dcc_json['measurement'][element]['isotopologue'],
                            dcc_json['measurement'][element]['sample.id']))
        # print(record)
        records.append(record)

    if not os.path.exists(output_path):
        os.makedirs(output_path)
        try:
            with open(
                    '{output_path}/{study_id}-maf-data-nih-dcc-json.txt'.
                    format(output_path=output_path,
                           study_id=studyid), 'w') as fh:
                print(
                    "'writing 'maf file document' to file from 'generate_maf_file' method:..."
                )
                fh.writelines(data_rec_header)
                fh.writelines('\n')
                for item in records:
                    fh.writelines(item)
                    fh.writelines('\n')

            print("writing 'investigation information' to file...")
            print(isatab.dumps(investigation))

            isatab.dump(investigation, output_path=output_path)
        except IOError:
            print("Error: in main() method can't open file or write data")