def create_descriptor(): """Returns a ISA-Tab descriptor using a simple sample plan for illustration.""" investigation = Investigation(identifier='I1') plan = SampleAssayPlan() plan.add_sample_type('liver') plan.add_sample_plan_record('liver', 5) plan.add_sample_type('blood') plan.add_sample_plan_record('blood', 3) plan.group_size = 2 f1 = StudyFactor(name='AGENT', factor_type=OntologyAnnotation(term='pertubation agent')) f2 = StudyFactor(name='INTENSITY', factor_type=OntologyAnnotation(term='intensity')) f3 = StudyFactor(name='DURATION', factor_type=OntologyAnnotation(term='time')) treatment_factory = TreatmentFactory(factors=[f1, f2, f3]) treatment_factory.add_factor_value(f1, {'cocaine', 'crack', 'aether'}) treatment_factory.add_factor_value(f2, {'low', 'medium', 'high'}) treatment_factory.add_factor_value(f3, {'short', 'long'}) ffactorial_design_treatments = treatment_factory\ .compute_full_factorial_design() treatment_sequence = TreatmentSequence( ranked_treatments=ffactorial_design_treatments) # treatment_factory.add_factor_value('intensity', 1.05) study = IsaModelObjectFactory(plan, treatment_sequence)\ .create_study_from_plan() study.filename = 's_study.txt' investigation.studies = [study] print(isatab.dumps(investigation))
def setUp(self): self.design = InterventionStudyDesign() self.agent = StudyFactor(name=BASE_FACTORS[0]['name'], factor_type=BASE_FACTORS[0]['type']) self.intensity = StudyFactor(name=BASE_FACTORS[1]['name'], factor_type=BASE_FACTORS[1]['type']) self.duration = StudyFactor(name=BASE_FACTORS[2]['name'], factor_type=BASE_FACTORS[2]['type']) self.first_treatment = Treatment( treatment_type=INTERVENTIONS['CHEMICAL'], factor_values=(FactorValue(factor_name=self.agent, value='crack'), FactorValue(factor_name=self.intensity, value='low'), FactorValue(factor_name=self.duration, value='medium'))) self.second_treatment = Treatment( treatment_type=INTERVENTIONS['CHEMICAL'], factor_values=(FactorValue(factor_name=self.agent, value='crack'), FactorValue(factor_name=self.intensity, value='high'), FactorValue(factor_name=self.duration, value='medium'))) self.test_sequence = TreatmentSequence( ranked_treatments=[(self.first_treatment, 1), (self.second_treatment, 2)]) self.sample_plan = SampleAssayPlan(group_size=10, sample_plan={}, assay_plan=None)
def setUp(self): self.investigation = Investigation(identifier='I1') self.f1 = StudyFactor( name='AGENT', factor_type=OntologyAnnotation(term='pertubation agent')) self.f2 = StudyFactor(name='INTENSITY', factor_type=OntologyAnnotation(term='intensity')) self.f3 = StudyFactor(name='DURATION', factor_type=OntologyAnnotation(term='time'))
def test_compute_full_factorial_design_empty_intensities(self): agent = StudyFactor(name=BASE_FACTORS[0]['name'], factor_type=BASE_FACTORS[0]['type']) intensity = StudyFactor(name=BASE_FACTORS[1]['name'], factor_type=BASE_FACTORS[1]['type']) duration = StudyFactor(name=BASE_FACTORS[2]['name'], factor_type=BASE_FACTORS[2]['type']) self.factory.add_factor_value(agent, {'cocaine', 'crack', 'aether'}) self.factory.add_factor_value(intensity, set()) self.factory.add_factor_value(duration, {'short', 'long'}) full_factorial = self.factory.compute_full_factorial_design() self.assertEqual(full_factorial, set())
def test_compute_full_factorial_design_empty_agents(self): agent = StudyFactor(name=BASE_FACTORS[0]['name'], factor_type=BASE_FACTORS[0]['type']) intensity = StudyFactor(name=BASE_FACTORS[1]['name'], factor_type=BASE_FACTORS[1]['type']) duration = StudyFactor(name=BASE_FACTORS[2]['name'], factor_type=BASE_FACTORS[2]['type']) self.factory.add_factor_value(agent, set()) self.factory.add_factor_value(intensity, {'low', 'medium', 'high'}) self.factory.add_factor_value(duration, {'short', 'long'}) full_factorial = self.factory.compute_full_factorial_design() self.assertEqual(full_factorial, set())
def setUp(self): self.maxDiff = None self.sequence = TreatmentSequence() self.agent = StudyFactor(name=BASE_FACTORS[0]['name'], factor_type=BASE_FACTORS[0]['type']) self.intensity = StudyFactor(name=BASE_FACTORS[1]['name'], factor_type=BASE_FACTORS[1]['type']) self.duration = StudyFactor(name=BASE_FACTORS[2]['name'], factor_type=BASE_FACTORS[2]['type']) self.test_treatment = Treatment( treatment_type=INTERVENTIONS['CHEMICAL'], factor_values=(FactorValue(factor_name=self.agent, value='crack'), FactorValue(factor_name=self.intensity, value='low'), FactorValue(factor_name=self.duration, value='short')))
def parse_experimental_factors(self, factors, factortypes, tsrs, tans): for factor, factortype, tsr, tan in zip_longest( factors, factortypes, tsrs, tans, fillvalue=''): if factor != '': # only add if there's a factor name factortype_oa = OntologyAnnotation( term=factortype, term_source=self._ts_dict.get(tsr), term_accession=tan) study_factor = StudyFactor( name=factor, factor_type=factortype_oa) self.ISA.studies[-1].factors.append(study_factor)
def unserialize_study_factor(json_obj): name = '' if 'factorName' in json_obj and json_obj['factorName'] is not None: name = json_obj['factorName'] factor_type = OntologyAnnotation() if 'factorType' in json_obj and json_obj['factorType'] is not None: factor_type = unserialize_ontology_annotation(json_obj['factorType']) comments = list() if 'comments' in json_obj and json_obj['comments'] is not None: for comment in json_obj['comments']: comments.append(unserialize_comment(comment)) return StudyFactor(name=name, factor_type=factor_type, comments=comments)
def unserialize_factor_value(json_obj): factor_name = StudyFactor() if 'factorName' in json_obj and json_obj['factorName'] is not None: factor_name = unserialize_study_factor(json_obj['factorName']) value = OntologyAnnotation() if 'value' in json_obj and json_obj['value'] is not None: value = unserialize_ontology_annotation(json_obj['value']) unit = OntologyAnnotation() if 'unit' in json_obj and json_obj['unit'] is not None: unit = unserialize_ontology_annotation(json_obj['value']) comments = list() if 'comments' in json_obj and json_obj['comments'] is not None: for comment in json_obj['comments']: comments.append(unserialize_comment(comment)) return FactorValue(factor_name=factor_name, value=value, unit=unit, comments=comments)
def create_descriptor(): """ Returns a simple but complete ISA-JSON 1.0 descriptor for illustration. """ # Create an empty Investigation object and set some values to the # instance variables. investigation = Investigation() investigation.identifier = "1" investigation.title = "My Simple ISA Investigation" investigation.description = \ "We could alternatively use the class constructor's parameters to " \ "set some default values at the time of creation, however we " \ "want to demonstrate how to use the object's instance variables " \ "to set values." investigation.submission_date = "2016-11-03" investigation.public_release_date = "2016-11-03" # Create an empty Study object and set some values. The Study must have a # filename, otherwise when we serialize it to ISA-Tab we would not know # where to write it. We must also attach the study to the investigation # by adding it to the 'investigation' object's list of studies. study = Study(filename="s_study.txt") study.identifier = "1" study.title = "My ISA Study" study.description = \ "Like with the Investigation, we could use the class constructor " \ "to set some default values, but have chosen to demonstrate in this " \ "example the use of instance variables to set initial values." study.submission_date = "2016-11-03" study.public_release_date = "2016-11-03" investigation.studies.append(study) # This is to show that ISA Comments can be used to annotate ISA objects, here ISA Study study.comments.append(Comment(name="Study Start Date", value="Sun")) # Some instance variables are typed with different objects and lists of # objects. For example, a Study can have a list of design descriptors. # A design descriptor is an Ontology Annotation describing the kind of # study at hand. Ontology Annotations should typically reference an # Ontology Source. We demonstrate a mix of using the class constructors # and setting values with instance variables. Note that the # OntologyAnnotation object 'intervention_design' links its 'term_source' # directly to the 'obi' object instance. To ensure the OntologySource # is encapsulated in the descriptor, it is added to a list of # 'ontology_source_references' in the Investigation object. The # 'intervention_design' object is then added to the list of # 'design_descriptors' held by the Study object. obi = OntologySource(name='OBI', description="Ontology for Biomedical Investigations") investigation.ontology_source_references.append(obi) intervention_design = OntologyAnnotation(term_source=obi) intervention_design.term = "intervention design" intervention_design.term_accession = \ "http://purl.obolibrary.org/obo/OBI_0000115" study.design_descriptors.append(intervention_design) # Other instance variables common to both Investigation and Study objects # include 'contacts' and 'publications', each with lists of corresponding # Person and Publication objects. contact = Person(first_name="Alice", last_name="Robertson", affiliation="University of Life", roles=[OntologyAnnotation(term='submitter')]) study.contacts.append(contact) publication = Publication(title="Experiments with Elephants", author_list="A. Robertson, B. Robertson") publication.pubmed_id = "12345678" publication.status = OntologyAnnotation(term="published") study.publications.append(publication) # To create the study graph that corresponds to the contents of the study # table file (the s_*.txt file), we need to create a process sequence. # To do this we use the Process class and attach it to the Study object's # 'process_sequence' list instance variable. Each process must be linked # with a Protocol object that is attached to a Study object's 'protocols' # list instance variable. The sample collection Process object usually has # as input a Source material and as output a Sample material. # Here we create one Source material object and attach it to our study. source = Source(name='source_material') study.sources.append(source) # Then we create three Sample objects, with organism as H**o Sapiens, and # attach them to the study. We use the utility function # batch_create_material() to clone a prototype material object. The # function automatiaclly appends an index to the material name. In this # case, three samples will be created, with the names 'sample_material-0', # 'sample_material-1' and 'sample_material-2'. prototype_sample = Sample(name='sample_material', derives_from=[source]) ncbitaxon = OntologySource(name='NCBITaxon', description="NCBI Taxonomy") investigation.ontology_source_references.append(ncbitaxon) characteristic_organism = Characteristic( category=OntologyAnnotation(term="Organism"), value=OntologyAnnotation( term="H**o Sapiens", term_source=ncbitaxon, term_accession="http://purl.bioontology.org/ontology/NCBITAXON/" "9606")) # Adding the description to the ISA Source Material: source.characteristics.append(characteristic_organism) study.sources.append(source) #declaring a new ontology and adding it to the list of resources used uberon = OntologySource(name='UBERON', description='Uber Anatomy Ontology') investigation.ontology_source_references.append(uberon) #preparing an ISA Characteristic object (~Material Property ) to annotate sample materials characteristic_organ = Characteristic( category=OntologyAnnotation(term="OrganismPart"), value=OntologyAnnotation( term="liver", term_source=uberon, term_accession="http://purl.bioontology.org/ontology/UBERON/" "123245")) prototype_sample.characteristics.append(characteristic_organ) study.samples = batch_create_materials(prototype_sample, n=3) # creates a batch of 3 samples # Now we create a single Protocol object that represents our sample # collection protocol, and attach it to the study object. Protocols must be # declared before we describe Processes, as a processing event of some sort # must execute some defined protocol. In the case of the class model, # Protocols should therefore be declared before Processes in order for the # Process to be linked to one. sample_collection_protocol = Protocol( name="sample collection", protocol_type=OntologyAnnotation(term="sample collection")) study.protocols.append(sample_collection_protocol) sample_collection_process = Process( executes_protocol=sample_collection_protocol) # adding a dummy Comment[] to ISA.protocol object study.protocols[0].comments.append( Comment(name="Study Start Date", value="Uranus")) study.protocols[0].comments.append( Comment(name="Study End Date", value="2017-08-11")) # checking that the ISA Protocool object has been modified # print(study.protocols[0]) # Creation of an ISA Study Factor object f = StudyFactor( name="treatment['modality']", factor_type=OntologyAnnotation(term="treatment['modality']")) # testing serialization to ISA-TAB of Comments attached to ISA objects. f.comments.append(Comment(name="Study Start Date", value="Saturn")) f.comments.append(Comment(name="Study End Date", value="2039-12-12")) print(f.comments[0].name, "|", f.comments[0].value) # checking that the ISA Factor object has been modified study.factors.append(f) # Next, we link our materials to the Process. In this particular case, we # are describing a sample collection process that takes one source # material, and produces three different samples. # # (source_material)->(sample collection)-> # [(sample_material-0), (sample_material-1), (sample_material-2)] for src in study.sources: sample_collection_process.inputs.append(src) for sam in study.samples: sample_collection_process.outputs.append(sam) # Finally, attach the finished Process object to the study # process_sequence. This can be done many times to describe multiple # sample collection events. study.process_sequence.append(sample_collection_process) #IMPORTANT: remember to populate the list of ontology categories used to annotation ISA Material in a Study: study.characteristic_categories.append(characteristic_organism.category) # Next, we build n Assay object and attach two protocols, # extraction and sequencing. assay = Assay(filename="a_assay.txt") extraction_protocol = Protocol( name='extraction', protocol_type=OntologyAnnotation(term="material extraction")) study.protocols.append(extraction_protocol) sequencing_protocol = Protocol( name='sequencing', protocol_type=OntologyAnnotation(term="material sequencing")) study.protocols.append(sequencing_protocol) # To build out assay graphs, we enumereate the samples from the # study-level, and for each sample we create an extraction process and # a sequencing process. The extraction process takes as input a sample # material, and produces an extract material. The sequencing process # takes the extract material and produces a data file. This will # produce three graphs, from sample material through to data, as follows: # # (sample_material-0)->(extraction)->(extract-0)->(sequencing)-> # (sequenced-data-0) # (sample_material-1)->(extraction)->(extract-1)->(sequencing)-> # (sequenced-data-1) # (sample_material-2)->(extraction)->(extract-2)->(sequencing)-> # (sequenced-data-2) # # Note that the extraction processes and sequencing processes are # distinctly separate instances, where the three # graphs are NOT interconnected. for i, sample in enumerate(study.samples): # create an extraction process that executes the extraction protocol extraction_process = Process(executes_protocol=extraction_protocol) # extraction process takes as input a sample, and produces an extract # material as output extraction_process.inputs.append(sample) material = Material(name="extract-{}".format(i)) material.type = "Extract Name" extraction_process.outputs.append(material) # create a sequencing process that executes the sequencing protocol sequencing_process = Process(executes_protocol=sequencing_protocol) sequencing_process.name = "assay-name-{}".format(i) sequencing_process.inputs.append(extraction_process.outputs[0]) # Sequencing process usually has an output data file datafile = DataFile(filename="sequenced-data-{}".format(i), label="Raw Data File", generated_from=[sample]) sequencing_process.outputs.append(datafile) # ensure Processes are linked plink(sequencing_process, extraction_process) # make sure the extract, data file, and the processes are attached to # the assay assay.samples.append(sample) assay.data_files.append(datafile) assay.other_material.append(material) assay.process_sequence.append(extraction_process) assay.process_sequence.append(sequencing_process) assay.measurement_type = OntologyAnnotation(term="gene sequencing") assay.technology_type = OntologyAnnotation( term="nucleotide sequencing") # attach the assay to the study study.assays.append(assay) import json from isatools.isajson import ISAJSONEncoder # To write JSON out, use the ISAJSONEncoder class with the json package # and use dump() or dumps(). Note that the extra parameters sort_keys, # indent and separators are to make the output more human-readable. return json.dumps(investigation, cls=ISAJSONEncoder, sort_keys=True, indent=4, separators=(',', ': '))
# c = Characteristic(category=OntologyAnnotation(term="germplasmDbId"), # value=OntologyAnnotation(term=str(ou['germplasmDbId']), term_source="", # term_accession="")) # characteristics.append(c) source = Source(name=ou['observationUnitDbId'], characteristics=characteristics) # print(source) study.sources.append(source) sample = Sample(name=ou["observationUnitDbId"]) if 'treatments' in ou.keys(): for element in ou['treatments']: for key in element.keys(): f = StudyFactor(name=key, factor_type=OntologyAnnotation(term=key)) if f not in study.factors: study.factors.append(f) fv = FactorValue(factor_name=f, value=OntologyAnnotation(term=str( element[key]), term_source="", term_accession="")) sample.factor_values.append(fv) print(sample) if 'observations' in ou.keys(): for ob in ou['observations']: phenotyping_process = Process( executes_protocol=phenotyping_protocol)
def convert(json_path, output_path): print(json_path) print(output_path) with open(json_path, 'r') as f: dcc_json = json.load(f) # print(array['protocol']) # for element in array['protocol']: # array['protocol'][element]['id'] # array['protocol'][element]['description'] # array['protocol'][element]['type'] # array['protocol'][element]['filename'] # for element in array['measurement']: # print(array['measurement'][element]['corrected_mz']) # for element in array['subject']: # print(array['subject'][element]['species']) # Building the Investigation Object and its elements: project_set_json = dcc_json.get('project') if len(project_set_json) == 0: raise IOError('No project found in input JSON') # print(next(iter(project_set_json))) project_json = next(iter(project_set_json.values())) investigation = Investigation(identifier=project_json['id']) obi = OntologySource(name='OBI', description='Ontology for Biomedical Investigations') investigation.ontology_source_references.append(obi) inv_person = Person( first_name=project_json['PI_first_name'], last_name=project_json['PI_last_name'], email=project_json['PI_email'], address=project_json['address'], affiliation=(', '.join( [project_json['department'], project_json['institution']])), roles=[ OntologyAnnotation(term="", term_source=obi, term_accession="http://purl.org/obo/OBI_1") ]) investigation.contacts.append(inv_person) study_set_json = dcc_json.get('study') if len(study_set_json) > 0: study_json = next(iter(study_set_json.values())) study = Study( identifier=study_json['id'], title=study_json['title'], description=study_json['description'], design_descriptors=[ OntologyAnnotation(term=study_json['type'], term_source=obi, term_accession="http://purl.org/obo/OBI_1") ], filename='s_{study_id}.txt'.format(study_id=study_json['id'])) investigation.studies = [study] studyid = study_json['id'] print(studyid) study_person = Person( first_name=study_json['PI_first_name'], last_name=study_json['PI_last_name'], email=study_json['PI_email'], address=study_json['address'], affiliation=(', '.join( [study_json['department'], study_json['institution']])), roles=[ OntologyAnnotation(term='principal investigator', term_source=obi, term_accession="http://purl.org/obo/OBI_1") ]) study.contacts.append(study_person) for factor_json in dcc_json['factor'].values(): factor = StudyFactor(name=factor_json['id']) study.factors.append(factor) for i, protocol_json in enumerate(dcc_json['protocol'].values()): oat_p = protocol_json['type'] oa_protocol_type = OntologyAnnotation( term=oat_p, term_source=obi, term_accession="http://purl.org/obo/OBI_1") study.protocols.append( Protocol(name=protocol_json['id'], protocol_type=oa_protocol_type, description=protocol_json['description'], uri=protocol_json['filename'])) if 'MS' in protocol_json['type']: study.assays.append( Assay(measurement_type=OntologyAnnotation( term='mass isotopologue distribution analysis', term_source=obi, term_accession="http://purl.org/obo/OBI_112"), technology_type=OntologyAnnotation( term='mass spectrometry', term_source=obi, term_accession="http://purl.org/obo/OBI_1"), filename='a_assay_ms_{count}.txt'.format(count=i))) if 'NMR' in protocol_json['type']: study.assays.append( Assay(measurement_type=OntologyAnnotation( term='isotopomer analysis', term_source=obi, term_accession="http://purl.org/obo/OBI_111"), technology_type=OntologyAnnotation( term='nmr spectroscopy', term_source=obi, term_accession="http://purl.org/obo/OBI_1"), filename='a_assay_nmr.txt')) for subject_json in dcc_json['subject'].values(): # print(array['subject'][element]) if "organism" in subject_json['type']: source = Source(name=subject_json['id']) ncbitaxon = OntologySource(name='NCBITaxon', description="NCBI Taxonomy") characteristic_organism = Characteristic( category=OntologyAnnotation(term="Organism"), value=OntologyAnnotation( term=subject_json['species'], term_source=ncbitaxon, term_accession= 'http://purl.bioontology.org/ontology/NCBITAXON/9606')) source.characteristics.append(characteristic_organism) study.sources.append(source) elif 'tissue_slice' in subject_json['type']: # print(array['subject'][element]['type']) source = Source(name=subject_json['id']) study.sources.append(source) ncbitaxon = OntologySource(name='NCBITaxon', description="NCBI Taxonomy") characteristic_organism = Characteristic( category=OntologyAnnotation(term="Organism"), value=OntologyAnnotation( term=subject_json['species'], term_source=ncbitaxon, term_accession= 'http://purl.bioontology.org/ontology/NCBITAXON/9606')) source.characteristics.append(characteristic_organism) sample = Sample(name=subject_json['id'], derives_from=subject_json['parentID']) characteristic_organismpart = Characteristic( category=OntologyAnnotation(term='organism_part'), value=OntologyAnnotation( term=subject_json['tissue_type'], term_source=obi, term_accession="http://purl.org/obo/OBI_1")) sample.characteristics.append(characteristic_organismpart) study.samples.append(sample) # print(study.samples[0].name) sample_collection_process = Process( executes_protocol=study.get_prot( subject_json['protocol.id'])) sample_collection_process.inputs.append(source) sample_collection_process.outputs.append(sample) study.process_sequence.append(sample_collection_process) else: source = Source(name=subject_json['id']) ncbitaxon = OntologySource(name='NCBITaxon', description="NCBI Taxonomy") characteristic_organism = Characteristic( category=OntologyAnnotation(term="Organism"), value=OntologyAnnotation( term=subject_json['species'], term_source=ncbitaxon, term_accession= 'http://purl.bioontology.org/ontology/NCBITAXON/9606')) source.characteristics.append(characteristic_organism) study.sources.append(source) print(subject_json['id']) print(subject_json['species']) print(subject_json['type']) # for src in investigation.studies[0].materials: # # for sam in investigation.studies[0].materials: for sample_json in dcc_json['sample'].values(): if 'cells' in sample_json['type']: material_separation_process = Process( executes_protocol=study.get_prot( sample_json['protocol.id'])) material_separation_process.name = sample_json['id'] # dealing with input material, check that the parent material is already among known samples or sources if len([ x for x in study.samples if x.name == sample_json['parentID'] ]) == 0: material_in = Sample(name=sample_json['parentID']) material_separation_process.inputs.append(material_in) study.assays[0].samples.append(material_in) else: print([ x for x in study.samples if x.name == sample_json['parentID'] ]) material_separation_process.inputs.append([ x for x in study.samples if x.name == sample_json['parentID'] ][0]) material_out = Sample(name=sample_json['id']) material_type = Characteristic( category=OntologyAnnotation(term='material_type'), value=OntologyAnnotation( term=sample_json['type'], term_source=obi, term_accession="http://purl.org/obo/OBI_xxxxxxx")) material_out.characteristics.append(material_type) material_separation_process.outputs.append(material_out) study.assays[0].samples.append(material_out) try: sample_collection_process except NameError: sample_collection_process = None if sample_collection_process is None: sample_collection_process = Process(executes_protocol="") else: # plink(protein_extraction_process, data_acq_process) # plink(material_separation_process, protein_extraction_process) plink(sample_collection_process, protein_extraction_process) if 'protein_extract' in sample_json['type']: protein_extraction_process = Process( executes_protocol=study.get_prot( sample_json['protocol.id'])) protein_extraction_process.name = sample_json['id'] if len([ x for x in study.samples if x.name == sample_json['parentID'] ]) == 0: material_in = Sample(name=sample_json['parentID']) protein_extraction_process.inputs.append(material_in) study.assays[0].samples.append(material_in) else: # print([x for x in study.samples if x.name == sample_json['parentID']]) protein_extraction_process.inputs.append(material_in) # for material_in in study.samples: # # print("OHO:", material_in.name) # if material_in.name == sample_json['parentID']: # # print("C:",sample_json['parentID']) # #no need to create, just link to process # protein_extraction_process.inputs.append(x) # else: # # print("D:", sample_json['parentID']) # #create new material and link # material_in = Sample(name=sample_json['parentID']) # protein_extraction_process.inputs.append(material_in) material_out = Material(name=sample_json['id']) material_out.type = "Extract Name" material_type = Characteristic( category=OntologyAnnotation(term='material_type'), value=OntologyAnnotation( term=sample_json['type'], term_source=obi, term_accession="http://purl.org/obo/OBI_1")) material_out.characteristics.append(material_type) study.assays[0].samples.append(material_in) study.assays[0].materials['other_material'].append(material_in) try: material_separation_process except NameError: material_separation_process = None if material_separation_process is None: material_separation_process = Process(executes_protocol="") else: # plink(protein_extraction_process, data_acq_process) plink(material_separation_process, protein_extraction_process) if 'polar' in sample_json['type']: material_in = Material(name=sample_json['parentID']) material_type = Characteristic( category=OntologyAnnotation(term='material_type', term_source=obi), value=OntologyAnnotation(term=sample_json['type'], term_source=obi)) material_in.characteristics.append(material_type) study.assays[0].materials['other_material'].append(material_in) data_acq_process = Process(executes_protocol=study.get_prot( sample_json['protocol.id'])) data_acq_process.name = sample_json['id'] datafile = DataFile( filename='{filename}.txt'.format(filename='_'.join( ['mass_isotopomer-data', studyid, sample_json['id']])), label='Raw Data File') data_acq_process.outputs.append(datafile) # print(study.assays[0].technology_type.term) study.assays[0].data_files.append(datafile) try: protein_extraction_process except NameError: protein_extraction_process = None if protein_extraction_process is None: protein_extraction_process = Process(executes_protocol="") else: plink(protein_extraction_process, data_acq_process) # else: # material_in = Material(name=sample_json['parentID']) # material_out = Material(name=sample_json['id']) # material_type = Characteristic( # category=OntologyAnnotation(term="material_type"), # value=OntologyAnnotation(term=sample_json['type'], # term_source=obi, # term_accession="http://purl.org/obo/OBI_1")) # material_out.characteristics.append(material_type) # process = Process(executes_protocol=sample_json['protocol.id']) # process.name = sample_json['id'] # process.inputs.append(material_in) # process.outputs.append(material_out) # # study.assays[0].materials['other_material'].append(material_in) # study.assays[0].materials['other_material'].append(material_out) if 'bulk_tissue' in sample_json['type']: bulk_process = Process(executes_protocol=study.get_prot( sample_json['protocol.id'])) bulk_process.name = sample_json['id'] if len([ x for x in study.samples if x.name == sample_json['parentID'] ]) == 0: material_in = Sample(name=sample_json['parentID']) bulk_process.inputs.append(material_in) study.assays[0].samples.append(material_in) else: # print([x for x in study.samples if x.name == sample_json['parentID']]) bulk_process.inputs.append(material_in) plink(sample_collection_process, bulk_process) data_rec_header = '\t'.join( ('metabolite name', 'assignment', 'signal intensity', 'retention time', 'm/z', 'formula', 'adduct', 'isotopologue', 'sample identifier')) records = [] for element in dcc_json['measurement']: # metabolite_name: -> compound # array['measurement'][element]['signal_intensity'] record = '\t'.join((dcc_json['measurement'][element]['compound'], dcc_json['measurement'][element]['assignment'], dcc_json['measurement'][element]['raw_intensity'], dcc_json['measurement'][element]['retention_time'], dcc_json['measurement'][element]['corrected_mz'], dcc_json['measurement'][element]['formula'], dcc_json['measurement'][element]['adduct'], dcc_json['measurement'][element]['isotopologue'], dcc_json['measurement'][element]['sample.id'])) # print(record) records.append(record) if not os.path.exists(output_path): os.makedirs(output_path) try: with open( '{output_path}/{study_id}-maf-data-nih-dcc-json.txt'. format(output_path=output_path, study_id=studyid), 'w') as fh: print( "'writing 'maf file document' to file from 'generate_maf_file' method:..." ) fh.writelines(data_rec_header) fh.writelines('\n') for item in records: fh.writelines(item) fh.writelines('\n') print("writing 'investigation information' to file...") print(isatab.dumps(investigation)) isatab.dump(investigation, output_path=output_path) except IOError: print("Error: in main() method can't open file or write data")
def create_study_sample_and_assay(client, brapi_study_id, isa_study, sample_collection_protocol, phenotyping_protocol): obsunit_to_isasample_mapping_dictionary = { "X": "X", "Y": "Y", "blockNumber": "Block Number", "plotNumber": "Plot Number", "plantNumber": "Plant Number", "observationLevel": "Observation unit type" } obsunit_to_isaassay_mapping_dictionary = { "X": "X", "Y": "Y", "blockNumber": "Block Number", "plotNumber": "Plot Number", "plantNumber": "Plant Number", "observationLevel": "Observation unit type" } allready_converted_obs_unit = [] # Allow to handle multiyear observation units for obs_unit in client.get_study_observation_units(brapi_study_id): # Getting the relevant germplasm used for that observation event: # --------------------------------------------------------------- this_source = isa_study.get_source(obs_unit['germplasmName']) #logger.debug("testing for the source reference: ", str(this_source)) # TODO Assumed one assay by study. Will need to move to one assay by level/datalink this_assay = isa_study.assays[0] # Sample == Observation Unit if this_source is not None and obs_unit['observationUnitName'] not in allready_converted_obs_unit: #The observationUnitName is the buisness ID of the Observation unit (ie plot number) while the ID is the PK this_isa_sample = Sample( name= obs_unit['observationUnitName'], #name=obs_unit['observationUnitDbId'] + "_" + obs_unit['observationUnitName'], derives_from=[this_source]) allready_converted_obs_unit.append(obs_unit['observationUnitName']) for key in obs_unit.keys(): if key in obsunit_to_isasample_mapping_dictionary.keys(): if isinstance(obsunit_to_isasample_mapping_dictionary[key], str) and str(obs_unit[key]) is not None : c = Characteristic(category=OntologyAnnotation(term=obsunit_to_isasample_mapping_dictionary[key]), value=OntologyAnnotation(term=str(obs_unit[key]), term_source="", term_accession="")) this_isa_sample.characteristics.append(c) # else: #no defult behaviour, it is maped or ignored # c = Characteristic(category=OntologyAnnotation(term=key), # value=OntologyAnnotation(term=str(obs_unit[key]), # term_source="", # term_accession="")) # this_isa_sample.characteristics.append(c) if key in obsunit_to_isaassay_mapping_dictionary.keys(): if isinstance(obsunit_to_isaassay_mapping_dictionary[key], str): c = Characteristic(category=OntologyAnnotation(term=obsunit_to_isaassay_mapping_dictionary[key]), value=OntologyAnnotation(term=str(obs_unit[key]), term_source="", term_accession="")) #TODO: quick workaround used to store observation units characteristics this_assay.comments.append(c) # if 'observationLevel' in obs_unit.keys(): # # TODO: if absent, a warning should be logged as this is a MIAPPE requirement # Looking for treatment in BRAPI and mapping to ISA Study Factor Value # -------------------------------------------------------------------- if 'treatments' in obs_unit.keys(): for element in obs_unit['treatments']: for key in element.keys(): f = StudyFactor(name=key, factor_type=OntologyAnnotation(term=key)) if f not in isa_study.factors: isa_study.factors.append(f) fv = FactorValue(factor_name=f, value=OntologyAnnotation(term=str(element[key]), term_source="", term_accession="")) this_isa_sample.factor_values.append(fv) isa_study.samples.append(this_isa_sample) # print("counting observations: ", i, "before: ", this_source.name) # TODO: Add Comment[Factor Values] : iterate through BRAPI treatments to obtain all possible values for a given Factor # Creating the corresponding ISA sample entity for structure the document: # ------------------------------------------------------------------------ sample_collection_process = Process(executes_protocol=sample_collection_protocol) sample_collection_process.performer = "n.a." sample_collection_process.date = datetime.datetime.today().isoformat() sample_collection_process.inputs.append(this_source) sample_collection_process.outputs.append(this_isa_sample) isa_study.process_sequence.append(sample_collection_process) #logger.debug(str(this_assay)) # Creating the relevant ISA protocol application / Assay from BRAPI Observation Events: # ------------------------------------------------------------------------------------- create_data_file(obs_unit, this_assay, sample_collection_process, this_isa_sample, phenotyping_protocol)
def test_add_factor_value_number(self): factor = StudyFactor(name=BASE_FACTORS[1]['name'], factor_type=BASE_FACTORS[1]['type']) self.factory.add_factor_value(factor, 1.05) self.assertEqual(self.factory.factors.get(factor), {1.05})
def test_add_factor_value_set(self): values_to_add = {'agent_orange', 'crack, cocaine'} factor = StudyFactor(name=BASE_FACTORS[0]['name'], factor_type=BASE_FACTORS[0]['type']) self.factory.add_factor_value(factor, values_to_add) self.assertEqual(self.factory.factors.get(factor), values_to_add)
def test_add_factor_value_str(self): factor = StudyFactor(name=BASE_FACTORS[0]['name'], factor_type=BASE_FACTORS[0]['type']) self.factory.add_factor_value(factor, 'agent_orange') self.assertEqual(self.factory.factors.get(factor), {'agent_orange'})
def test_compute_full_factorial_design(self): agent = StudyFactor(name=BASE_FACTORS[0]['name'], factor_type=BASE_FACTORS[0]['type']) intensity = StudyFactor(name=BASE_FACTORS[1]['name'], factor_type=BASE_FACTORS[1]['type']) duration = StudyFactor(name=BASE_FACTORS[2]['name'], factor_type=BASE_FACTORS[2]['type']) self.factory.add_factor_value(agent, {'cocaine', 'crack', 'aether'}) self.factory.add_factor_value(intensity, {'low', 'medium', 'high'}) self.factory.add_factor_value(duration, {'short', 'long'}) full_factorial = self.factory.compute_full_factorial_design() self.assertEqual( full_factorial, { Treatment(treatment_type=INTERVENTIONS['CHEMICAL'], factor_values=(FactorValue(factor_name=agent, value='cocaine'), FactorValue(factor_name=intensity, value='high'), FactorValue(factor_name=duration, value='long'))), Treatment(treatment_type=INTERVENTIONS['CHEMICAL'], factor_values=(FactorValue(factor_name=agent, value='cocaine'), FactorValue(factor_name=intensity, value='high'), FactorValue(factor_name=duration, value='short'))), Treatment(treatment_type=INTERVENTIONS['CHEMICAL'], factor_values=(FactorValue(factor_name=agent, value='cocaine'), FactorValue(factor_name=intensity, value='low'), FactorValue(factor_name=duration, value='long'))), Treatment(treatment_type=INTERVENTIONS['CHEMICAL'], factor_values=(FactorValue(factor_name=agent, value='cocaine'), FactorValue(factor_name=intensity, value='low'), FactorValue(factor_name=duration, value='short'))), Treatment(treatment_type=INTERVENTIONS['CHEMICAL'], factor_values=(FactorValue(factor_name=agent, value='cocaine'), FactorValue(factor_name=intensity, value='medium'), FactorValue(factor_name=duration, value='long'))), Treatment(treatment_type=INTERVENTIONS['CHEMICAL'], factor_values=(FactorValue(factor_name=agent, value='cocaine'), FactorValue(factor_name=intensity, value='medium'), FactorValue(factor_name=duration, value='short'))), Treatment(treatment_type=INTERVENTIONS['CHEMICAL'], factor_values=(FactorValue(factor_name=agent, value='crack'), FactorValue(factor_name=intensity, value='high'), FactorValue(factor_name=duration, value='long'))), Treatment(treatment_type=INTERVENTIONS['CHEMICAL'], factor_values=(FactorValue(factor_name=agent, value='crack'), FactorValue(factor_name=intensity, value='high'), FactorValue(factor_name=duration, value='short'))), Treatment(treatment_type=INTERVENTIONS['CHEMICAL'], factor_values=(FactorValue(factor_name=agent, value='crack'), FactorValue(factor_name=intensity, value='low'), FactorValue(factor_name=duration, value='long'))), Treatment(treatment_type=INTERVENTIONS['CHEMICAL'], factor_values=(FactorValue(factor_name=agent, value='crack'), FactorValue(factor_name=intensity, value='low'), FactorValue(factor_name=duration, value='short'))), Treatment(treatment_type=INTERVENTIONS['CHEMICAL'], factor_values=(FactorValue(factor_name=agent, value='crack'), FactorValue(factor_name=intensity, value='medium'), FactorValue(factor_name=duration, value='long'))), Treatment(treatment_type=INTERVENTIONS['CHEMICAL'], factor_values=(FactorValue(factor_name=agent, value='crack'), FactorValue(factor_name=intensity, value='medium'), FactorValue(factor_name=duration, value='short'))), Treatment(treatment_type=INTERVENTIONS['CHEMICAL'], factor_values=(FactorValue(factor_name=agent, value='aether'), FactorValue(factor_name=intensity, value='high'), FactorValue(factor_name=duration, value='long'))), Treatment(treatment_type=INTERVENTIONS['CHEMICAL'], factor_values=(FactorValue(factor_name=agent, value='aether'), FactorValue(factor_name=intensity, value='high'), FactorValue(factor_name=duration, value='short'))), Treatment(treatment_type=INTERVENTIONS['CHEMICAL'], factor_values=(FactorValue(factor_name=agent, value='aether'), FactorValue(factor_name=intensity, value='low'), FactorValue(factor_name=duration, value='long'))), Treatment(treatment_type=INTERVENTIONS['CHEMICAL'], factor_values=(FactorValue(factor_name=agent, value='aether'), FactorValue(factor_name=intensity, value='low'), FactorValue(factor_name=duration, value='short'))), Treatment(treatment_type=INTERVENTIONS['CHEMICAL'], factor_values=(FactorValue(factor_name=agent, value='aether'), FactorValue(factor_name=intensity, value='medium'), FactorValue(factor_name=duration, value='long'))), Treatment(treatment_type=INTERVENTIONS['CHEMICAL'], factor_values=(FactorValue(factor_name=agent, value='aether'), FactorValue(factor_name=intensity, value='medium'), FactorValue(factor_name=duration, value='short'))) })