def create_descriptor(): """ Returns a simple but complete ISA-JSON 1.0 descriptor for illustration. """ # Create an empty Investigation object and set some values to the # instance variables. investigation = Investigation() investigation.identifier = "1" investigation.title = "My Simple ISA Investigation" investigation.description = \ "We could alternatively use the class constructor's parameters to " \ "set some default values at the time of creation, however we " \ "want to demonstrate how to use the object's instance variables " \ "to set values." investigation.submission_date = "2016-11-03" investigation.public_release_date = "2016-11-03" # Create an empty Study object and set some values. The Study must have a # filename, otherwise when we serialize it to ISA-Tab we would not know # where to write it. We must also attach the study to the investigation # by adding it to the 'investigation' object's list of studies. study = Study(filename="s_study.txt") study.identifier = "1" study.title = "My ISA Study" study.description = \ "Like with the Investigation, we could use the class constructor " \ "to set some default values, but have chosen to demonstrate in this " \ "example the use of instance variables to set initial values." study.submission_date = "2016-11-03" study.public_release_date = "2016-11-03" investigation.studies.append(study) # This is to show that ISA Comments can be used to annotate ISA objects, here ISA Study study.comments.append(Comment(name="Study Start Date", value="Sun")) # Some instance variables are typed with different objects and lists of # objects. For example, a Study can have a list of design descriptors. # A design descriptor is an Ontology Annotation describing the kind of # study at hand. Ontology Annotations should typically reference an # Ontology Source. We demonstrate a mix of using the class constructors # and setting values with instance variables. Note that the # OntologyAnnotation object 'intervention_design' links its 'term_source' # directly to the 'obi' object instance. To ensure the OntologySource # is encapsulated in the descriptor, it is added to a list of # 'ontology_source_references' in the Investigation object. The # 'intervention_design' object is then added to the list of # 'design_descriptors' held by the Study object. obi = OntologySource(name='OBI', description="Ontology for Biomedical Investigations") investigation.ontology_source_references.append(obi) intervention_design = OntologyAnnotation(term_source=obi) intervention_design.term = "intervention design" intervention_design.term_accession = \ "http://purl.obolibrary.org/obo/OBI_0000115" study.design_descriptors.append(intervention_design) # Other instance variables common to both Investigation and Study objects # include 'contacts' and 'publications', each with lists of corresponding # Person and Publication objects. contact = Person(first_name="Alice", last_name="Robertson", affiliation="University of Life", roles=[OntologyAnnotation(term='submitter')]) study.contacts.append(contact) publication = Publication(title="Experiments with Elephants", author_list="A. Robertson, B. Robertson") publication.pubmed_id = "12345678" publication.status = OntologyAnnotation(term="published") study.publications.append(publication) # To create the study graph that corresponds to the contents of the study # table file (the s_*.txt file), we need to create a process sequence. # To do this we use the Process class and attach it to the Study object's # 'process_sequence' list instance variable. Each process must be linked # with a Protocol object that is attached to a Study object's 'protocols' # list instance variable. The sample collection Process object usually has # as input a Source material and as output a Sample material. # Here we create one Source material object and attach it to our study. source = Source(name='source_material') study.sources.append(source) # Then we create three Sample objects, with organism as H**o Sapiens, and # attach them to the study. We use the utility function # batch_create_material() to clone a prototype material object. The # function automatiaclly appends an index to the material name. In this # case, three samples will be created, with the names 'sample_material-0', # 'sample_material-1' and 'sample_material-2'. prototype_sample = Sample(name='sample_material', derives_from=[source]) ncbitaxon = OntologySource(name='NCBITaxon', description="NCBI Taxonomy") investigation.ontology_source_references.append(ncbitaxon) characteristic_organism = Characteristic( category=OntologyAnnotation(term="Organism"), value=OntologyAnnotation( term="H**o Sapiens", term_source=ncbitaxon, term_accession="http://purl.bioontology.org/ontology/NCBITAXON/" "9606")) # Adding the description to the ISA Source Material: source.characteristics.append(characteristic_organism) study.sources.append(source) #declaring a new ontology and adding it to the list of resources used uberon = OntologySource(name='UBERON', description='Uber Anatomy Ontology') investigation.ontology_source_references.append(uberon) #preparing an ISA Characteristic object (~Material Property ) to annotate sample materials characteristic_organ = Characteristic( category=OntologyAnnotation(term="OrganismPart"), value=OntologyAnnotation( term="liver", term_source=uberon, term_accession="http://purl.bioontology.org/ontology/UBERON/" "123245")) prototype_sample.characteristics.append(characteristic_organ) study.samples = batch_create_materials(prototype_sample, n=3) # creates a batch of 3 samples # Now we create a single Protocol object that represents our sample # collection protocol, and attach it to the study object. Protocols must be # declared before we describe Processes, as a processing event of some sort # must execute some defined protocol. In the case of the class model, # Protocols should therefore be declared before Processes in order for the # Process to be linked to one. sample_collection_protocol = Protocol( name="sample collection", protocol_type=OntologyAnnotation(term="sample collection")) study.protocols.append(sample_collection_protocol) sample_collection_process = Process( executes_protocol=sample_collection_protocol) # adding a dummy Comment[] to ISA.protocol object study.protocols[0].comments.append( Comment(name="Study Start Date", value="Uranus")) study.protocols[0].comments.append( Comment(name="Study End Date", value="2017-08-11")) # checking that the ISA Protocool object has been modified # print(study.protocols[0]) # Creation of an ISA Study Factor object f = StudyFactor( name="treatment['modality']", factor_type=OntologyAnnotation(term="treatment['modality']")) # testing serialization to ISA-TAB of Comments attached to ISA objects. f.comments.append(Comment(name="Study Start Date", value="Saturn")) f.comments.append(Comment(name="Study End Date", value="2039-12-12")) print(f.comments[0].name, "|", f.comments[0].value) # checking that the ISA Factor object has been modified study.factors.append(f) # Next, we link our materials to the Process. In this particular case, we # are describing a sample collection process that takes one source # material, and produces three different samples. # # (source_material)->(sample collection)-> # [(sample_material-0), (sample_material-1), (sample_material-2)] for src in study.sources: sample_collection_process.inputs.append(src) for sam in study.samples: sample_collection_process.outputs.append(sam) # Finally, attach the finished Process object to the study # process_sequence. This can be done many times to describe multiple # sample collection events. study.process_sequence.append(sample_collection_process) #IMPORTANT: remember to populate the list of ontology categories used to annotation ISA Material in a Study: study.characteristic_categories.append(characteristic_organism.category) # Next, we build n Assay object and attach two protocols, # extraction and sequencing. assay = Assay(filename="a_assay.txt") extraction_protocol = Protocol( name='extraction', protocol_type=OntologyAnnotation(term="material extraction")) study.protocols.append(extraction_protocol) sequencing_protocol = Protocol( name='sequencing', protocol_type=OntologyAnnotation(term="material sequencing")) study.protocols.append(sequencing_protocol) # To build out assay graphs, we enumereate the samples from the # study-level, and for each sample we create an extraction process and # a sequencing process. The extraction process takes as input a sample # material, and produces an extract material. The sequencing process # takes the extract material and produces a data file. This will # produce three graphs, from sample material through to data, as follows: # # (sample_material-0)->(extraction)->(extract-0)->(sequencing)-> # (sequenced-data-0) # (sample_material-1)->(extraction)->(extract-1)->(sequencing)-> # (sequenced-data-1) # (sample_material-2)->(extraction)->(extract-2)->(sequencing)-> # (sequenced-data-2) # # Note that the extraction processes and sequencing processes are # distinctly separate instances, where the three # graphs are NOT interconnected. for i, sample in enumerate(study.samples): # create an extraction process that executes the extraction protocol extraction_process = Process(executes_protocol=extraction_protocol) # extraction process takes as input a sample, and produces an extract # material as output extraction_process.inputs.append(sample) material = Material(name="extract-{}".format(i)) material.type = "Extract Name" extraction_process.outputs.append(material) # create a sequencing process that executes the sequencing protocol sequencing_process = Process(executes_protocol=sequencing_protocol) sequencing_process.name = "assay-name-{}".format(i) sequencing_process.inputs.append(extraction_process.outputs[0]) # Sequencing process usually has an output data file datafile = DataFile(filename="sequenced-data-{}".format(i), label="Raw Data File", generated_from=[sample]) sequencing_process.outputs.append(datafile) # ensure Processes are linked plink(sequencing_process, extraction_process) # make sure the extract, data file, and the processes are attached to # the assay assay.samples.append(sample) assay.data_files.append(datafile) assay.other_material.append(material) assay.process_sequence.append(extraction_process) assay.process_sequence.append(sequencing_process) assay.measurement_type = OntologyAnnotation(term="gene sequencing") assay.technology_type = OntologyAnnotation( term="nucleotide sequencing") # attach the assay to the study study.assays.append(assay) import json from isatools.isajson import ISAJSONEncoder # To write JSON out, use the ISAJSONEncoder class with the json package # and use dump() or dumps(). Note that the extra parameters sort_keys, # indent and separators are to make the output more human-readable. return json.dumps(investigation, cls=ISAJSONEncoder, sort_keys=True, indent=4, separators=(',', ': '))
def _exportISATAB(self, destinationPath, detailsDict): """ Export the dataset's metadata to the directory *destinationPath* as ISATAB detailsDict should have the format: detailsDict = { 'investigation_identifier' : "i1", 'investigation_title' : "Give it a title", 'investigation_description' : "Add a description", 'investigation_submission_date' : "2016-11-03", 'investigation_public_release_date' : "2016-11-03", 'first_name' : "Noureddin", 'last_name' : "Sadawi", 'affiliation' : "University", 'study_filename' : "my_ms_study", 'study_material_type' : "Serum", 'study_identifier' : "s1", 'study_title' : "Give the study a title", 'study_description' : "Add study description", 'study_submission_date' : "2016-11-03", 'study_public_release_date' : "2016-11-03", 'assay_filename' : "my_ms_assay" } :param str destinationPath: Path to a directory in which the output will be saved :param dict detailsDict: Contains several key, value pairs required to for ISATAB :raises IOError: If writing one of the files fails """ from isatools.model import Investigation, Study, Assay, OntologyAnnotation, OntologySource, Person, Publication, Protocol, Source from isatools.model import Comment, Sample, Characteristic, Process, Material, DataFile, ParameterValue, plink from isatools import isatab import isaExplorer as ie investigation = Investigation() investigation.identifier = detailsDict['investigation_identifier'] investigation.title = detailsDict['investigation_title'] investigation.description = detailsDict['investigation_description'] investigation.submission_date = detailsDict[ 'investigation_submission_date'] #use today if not specified investigation.public_release_date = detailsDict[ 'investigation_public_release_date'] study = Study(filename='s_' + detailsDict['study_filename'] + '.txt') study.identifier = detailsDict['study_identifier'] study.title = detailsDict['study_title'] study.description = detailsDict['study_description'] study.submission_date = detailsDict['study_submission_date'] study.public_release_date = detailsDict['study_public_release_date'] investigation.studies.append(study) obi = OntologySource( name='OBI', description="Ontology for Biomedical Investigations") investigation.ontology_source_references.append(obi) intervention_design = OntologyAnnotation(term_source=obi) intervention_design.term = "intervention design" intervention_design.term_accession = "http://purl.obolibrary.org/obo/OBI_0000115" study.design_descriptors.append(intervention_design) # Other instance variables common to both Investigation and Study objects include 'contacts' and 'publications', # each with lists of corresponding Person and Publication objects. contact = Person(first_name=detailsDict['first_name'], last_name=detailsDict['last_name'], affiliation=detailsDict['affiliation'], roles=[OntologyAnnotation(term='submitter')]) study.contacts.append(contact) publication = Publication(title="Experiments with Data", author_list="Auther 1, Author 2") publication.pubmed_id = "12345678" publication.status = OntologyAnnotation(term="published") study.publications.append(publication) # To create the study graph that corresponds to the contents of the study table file (the s_*.txt file), we need # to create a process sequence. To do this we use the Process class and attach it to the Study object's # 'process_sequence' list instance variable. Each process must be linked with a Protocol object that is attached to # a Study object's 'protocols' list instance variable. The sample collection Process object usually has as input # a Source material and as output a Sample material. sample_collection_protocol = Protocol( id_="sample collection", name="sample collection", protocol_type=OntologyAnnotation(term="sample collection")) aliquoting_protocol = Protocol( id_="aliquoting", name="aliquoting", protocol_type=OntologyAnnotation(term="aliquoting")) for index, row in self.sampleMetadata.iterrows(): src_name = row['Sample File Name'] source = Source(name=src_name) source.comments.append( Comment(name='Study Name', value=row['Study'])) study.sources.append(source) sample_name = src_name sample = Sample(name=sample_name, derives_from=[source]) # check if field exists first status = row[ 'Status'] if 'Status' in self.sampleMetadata.columns else 'N/A' characteristic_material_type = Characteristic( category=OntologyAnnotation(term="material type"), value=status) sample.characteristics.append(characteristic_material_type) #characteristic_material_role = Characteristic(category=OntologyAnnotation(term="material role"), value=row['AssayRole']) #sample.characteristics.append(characteristic_material_role) # check if field exists first age = row['Age'] if 'Age' in self.sampleMetadata.columns else 'N/A' characteristic_age = Characteristic( category=OntologyAnnotation(term="Age"), value=age, unit='Year') sample.characteristics.append(characteristic_age) # check if field exists first gender = row[ 'Gender'] if 'Gender' in self.sampleMetadata.columns else 'N/A' characteristic_gender = Characteristic( category=OntologyAnnotation(term="Gender"), value=gender) sample.characteristics.append(characteristic_gender) ncbitaxon = OntologySource(name='NCBITaxon', description="NCBI Taxonomy") characteristic_organism = Characteristic( category=OntologyAnnotation(term="Organism"), value=OntologyAnnotation( term="H**o Sapiens", term_source=ncbitaxon, term_accession= "http://purl.bioontology.org/ontology/NCBITAXON/9606")) sample.characteristics.append(characteristic_organism) study.samples.append(sample) # check if field exists first sampling_date = row['Sampling Date'] if not pandas.isnull( row['Sampling Date']) else None sample_collection_process = Process( id_='sam_coll_proc', executes_protocol=sample_collection_protocol, date_=sampling_date) aliquoting_process = Process(id_='sam_coll_proc', executes_protocol=aliquoting_protocol, date_=sampling_date) sample_collection_process.inputs = [source] aliquoting_process.outputs = [sample] # links processes plink(sample_collection_process, aliquoting_process) study.process_sequence.append(sample_collection_process) study.process_sequence.append(aliquoting_process) study.protocols.append(sample_collection_protocol) study.protocols.append(aliquoting_protocol) ### Add NMR Assay ### nmr_assay = Assay( filename='a_' + detailsDict['assay_filename'] + '.txt', measurement_type=OntologyAnnotation(term="metabolite profiling"), technology_type=OntologyAnnotation(term="NMR spectroscopy")) extraction_protocol = Protocol( name='extraction', protocol_type=OntologyAnnotation(term="material extraction")) study.protocols.append(extraction_protocol) nmr_protocol = Protocol( name='NMR spectroscopy', protocol_type=OntologyAnnotation(term="NMR Assay")) nmr_protocol.add_param('Run Order') #if 'Instrument' in self.sampleMetadata.columns: nmr_protocol.add_param('Instrument') #if 'Sample Batch' in self.sampleMetadata.columns: nmr_protocol.add_param('Sample Batch') nmr_protocol.add_param('Acquisition Batch') study.protocols.append(nmr_protocol) #for index, row in sampleMetadata.iterrows(): for index, sample in enumerate(study.samples): row = self.sampleMetadata.loc[ self.sampleMetadata['Sample File Name'].astype( str) == sample.name] # create an extraction process that executes the extraction protocol extraction_process = Process(executes_protocol=extraction_protocol) # extraction process takes as input a sample, and produces an extract material as output sample_name = sample.name sample = Sample(name=sample_name, derives_from=[source]) #print(row['Acquired Time'].values[0]) extraction_process.inputs.append(sample) material = Material(name="extract-{}".format(index)) material.type = "Extract Name" extraction_process.outputs.append(material) # create a ms process that executes the nmr protocol nmr_process = Process(executes_protocol=nmr_protocol, date_=datetime.isoformat( datetime.strptime( str(row['Acquired Time'].values[0]), '%Y-%m-%d %H:%M:%S'))) nmr_process.name = "assay-name-{}".format(index) nmr_process.inputs.append(extraction_process.outputs[0]) # nmr process usually has an output data file # check if field exists first assay_data_name = row['Assay data name'].values[ 0] if 'Assay data name' in self.sampleMetadata.columns else 'N/A' datafile = DataFile(filename=assay_data_name, label="NMR Assay Name", generated_from=[sample]) nmr_process.outputs.append(datafile) #nmr_process.parameter_values.append(ParameterValue(category='Run Order',value=str(i))) nmr_process.parameter_values = [ ParameterValue(category=nmr_protocol.get_param('Run Order'), value=row['Run Order'].values[0]) ] # check if field exists first instrument = row['Instrument'].values[ 0] if 'Instrument' in self.sampleMetadata.columns else 'N/A' nmr_process.parameter_values.append( ParameterValue(category=nmr_protocol.get_param('Instrument'), value=instrument)) # check if field exists first sbatch = row['Sample batch'].values[ 0] if 'Sample batch' in self.sampleMetadata.columns else 'N/A' nmr_process.parameter_values.append( ParameterValue(category=nmr_protocol.get_param('Sample Batch'), value=sbatch)) nmr_process.parameter_values.append( ParameterValue( category=nmr_protocol.get_param('Acquisition Batch'), value=row['Batch'].values[0])) # ensure Processes are linked forward and backward plink(extraction_process, nmr_process) # make sure the extract, data file, and the processes are attached to the assay nmr_assay.samples.append(sample) nmr_assay.data_files.append(datafile) nmr_assay.other_material.append(material) nmr_assay.process_sequence.append(extraction_process) nmr_assay.process_sequence.append(nmr_process) nmr_assay.measurement_type = OntologyAnnotation( term="metabolite profiling") nmr_assay.technology_type = OntologyAnnotation( term="NMR spectroscopy") # attach the assay to the study study.assays.append(nmr_assay) if os.path.exists(os.path.join(destinationPath, 'i_Investigation.txt')): ie.appendStudytoISA(study, destinationPath) else: isatab.dump(isa_obj=investigation, output_path=destinationPath)
def create_isa_files(investigation_id): # Create investigation dj_i = Investigation.objects.get(pk=investigation_id) itm_i = itm.Investigation(filename="i_investigation.txt") itm_i.identifier = "i1" itm_i.title = dj_i.name itm_i.description = dj_i.description ################################################################################################################ # STUDIES ################################################################################################################ itm_sample_d = {} # to traceback from django samples for i, dj_s in enumerate(dj_i.study_set.all()): itm_s = itm.Study(filename="s_study.txt") itm_s.identifier = "s" + str(i) itm_s.title = dj_s.name itm_s.description = dj_s.description itm_s.grant_number = dj_s.grant_number itm_s.public_release_date = dj_s.public_release_date.strftime( "%Y-%m-%d") if dj_s.public_release_date else '' itm_s.submission_date = dj_s.submission_date.strftime( "%Y-%m-%d") if dj_s.submission_date else '' itm_i.studies.append(itm_s) itm_i, itm_s = add_lcms_untargeted_meta(itm_i, itm_s, msms_performed=True) # Add study samples # loop through the study samples ################################################################################################################ # STUDY SAMPLES ################################################################################################################ for j, dj_ss in enumerate(dj_s.studysample_set.all()): # We are saying that each sample is derived from a different source material, this might not be true for # for all cases but should be fine for the resulting ISA-Tab structure for MetaboLights source = itm.Source(name='{} source'.format(dj_ss.sample_name)) itm_s.sources.append(source) # Sample material from the source itm_sample = itm.Sample(name=dj_ss.sample_name, derives_from=source) #===================== # Add organism for sample #===================== if dj_ss.organism: dj_org_ont = dj_ss.organism.ontologyterm source = check_ontology_source(itm_i, dj_org_ont.ontology_name) if not source: source = itm.OntologySource( name=dj_org_ont.ontology_prefix, description=dj_org_ont.ontology_name) itm_i.ontology_source_references.append(source) val = itm.OntologyAnnotation(term=dj_org_ont.name, term_source=source, term_accession=dj_org_ont.iri) else: val = itm.OntologyAnnotation(term='', term_source='', term_accession='') char = itm.Characteristic(category=itm.OntologyAnnotation( term="Organism", term_source="NCIT", term_accession="http://purl.obolibrary.org/obo/NCIT_C14250"), value=val) itm_sample.characteristics.append(char) # ===================== # Add organism part # ===================== if dj_ss.organism_part: dj_org_ont = dj_ss.organism_part.ontologyterm source = check_ontology_source(itm_i, dj_org_ont.ontology_name) if not source: source = itm.OntologySource( name=dj_org_ont.ontology_prefix, description=dj_org_ont.ontology_name) itm_i.ontology_source_references.append(source) val = itm.OntologyAnnotation(term=dj_org_ont.name, term_source=source, term_accession=dj_org_ont.iri) else: val = itm.OntologyAnnotation(term='', term_source='', term_accession='') char = itm.Characteristic(category=itm.OntologyAnnotation( term="Organism part", term_source="NCIT", term_accession="http://purl.obolibrary.org/obo/NCIT_C103199"), value=val) # Add organism part for sample (e.g. foot, eye, leg, whole organism...) itm_sample.characteristics.append(char) # Potential to add technical replicates (repeat extractions of the same material but # to confusing to use for DMA because we have extractions at different points (Liquid Extraction, SPE and # Fractionation itm_s.samples.append(itm_sample) itm_sample_d[dj_ss.id] = itm_sample # Create sample collection protocol (we just use 1 for all samples for the time being) but should technically # be divided into groups (if resulting ISA-Tab for MetaboLights is the same then we can just leave as is) sample_collection_protocol = itm.Protocol( name="sample collection", protocol_type=itm.OntologyAnnotation(term="sample collection")) itm_s.protocols.append(sample_collection_protocol) sample_collection_process = itm.Process( executes_protocol=sample_collection_protocol) # Next, we link our materials to the Process. In this particular case, we are describing a sample collection # process that takes one source material, and produces three different samples. # # (daphnia magna source) -> (sample collection) -> [(daphnia_material0-0), (daphnia_material0-1), (daphnia_material0-2)] # (solvent blank source) -> (sample collection) -> [(blank_material1-0), (blank_material1-1), (sample_material1-2)] for src in itm_s.sources: sample_collection_process.inputs.append(src) for sam in itm_s.samples: sample_collection_process.outputs.append(sam) # Finally, attach the finished Process object to the study process_sequence. This can be done many times to # describe multiple sample collection events. itm_s.process_sequence.append(sample_collection_process) ################################################################################################################ # ASSAYS ################################################################################################################ # get dictionary of Django protocols dj_p = {'ex': {}, 'spe': {}, 'chr': {}, 'meas': {}} for dj_a in dj_s.assay_set.all(): for dj_ad in dj_a.assaydetail_set.all(): ex = dj_ad.extractionprocess.extractionprotocol dj_p['ex'][ex.id] = ex spe = dj_ad.speprocess.speprotocol dj_p['spe'][spe.id] = spe chrom = dj_ad.chromatographyprocess.chromatographyprotocol dj_p['chr'][chrom.id] = chrom meas = dj_ad.measurementprocess.measurementprotocol dj_p['meas'][meas.id] = meas # Create isa tab protocols itm_p = {'ex': {}, 'spe': {}, 'chr': {}, 'meas': {}} # sequencing_protocol = itm.Protocol(name='sequencing', protocol_type=itm.OntologyAnnotation(term="material sequencing")) # itm_s.protocols.append(sequencing_protocol) for k, dj_ex in six.iteritems(dj_p['ex']): if dj_ex.name: nm = dj_ex.name else: nm = dj_ex.extractiontype.type #=========================================== # Get extraction protocols #=========================================== source = check_ontology_source(itm_i, 'CHMO') extraction_protocol = itm.Protocol( name='Extraction {}'.format(nm), protocol_type=itm.OntologyAnnotation(term="Extraction"), ) param = itm.ProtocolParameter(parameter_name=itm.OntologyAnnotation( term="Derivatization", term_source=source, term_accession='http://purl.obolibrary.org/obo/CHMO_0001485')) extraction_protocol.parameters.append(param) itm_s.protocols.append(extraction_protocol) itm_p['ex'][k] = extraction_protocol for k, dj_spe in six.iteritems(dj_p['spe']): if dj_spe.name: nm = dj_spe.name else: nm = dj_spe.spetype.type #=========================================== # Get chromatography protocols #=========================================== spe_protocol = itm.Protocol( name='Solid Phase Extraction {}'.format(nm), protocol_type=itm.OntologyAnnotation( term="Solid Phase Extraction"), components=itm.OntologyAnnotation(term=nm), description=dj_spe.description) itm_s.protocols.append(spe_protocol) itm_p['spe'][k] = spe_protocol for k, dj_chr in six.iteritems(dj_p['chr']): #=========================================== # Get chromatography protocols #=========================================== chromatography_protocol = itm.Protocol( name='Chromatography {}'.format(dj_chr.name), protocol_type=itm.OntologyAnnotation(term="Chromatography")) itm_s.protocols.append(chromatography_protocol) itm_p['chr'][k] = chromatography_protocol for k, dj_meas in six.iteritems(dj_p['meas']): #=========================================== # Get measurment protocols (just mass spec for now) #=========================================== if dj_meas.name: nm = dj_meas.name else: nm = dj_meas.measurementtechnique.type mass_spec_protocol = itm.Protocol( name='Mass spectrometry {}'.format(nm), protocol_type=itm.OntologyAnnotation(term="Mass spectrometry")) itm_s.protocols.append(mass_spec_protocol) itm_p['meas'][k] = mass_spec_protocol for dj_a in dj_s.assay_set.all(): itm_a = itm.Assay(filename="a_assay_{}.txt".format(dj_a.name)) # go through each details (which is linked to all the revelant process) for dj_ad in dj_a.assaydetail_set.all(): #################################### # Get extraction #################################### itm_ex_prot = itm_p['ex'][ dj_ad.extractionprocess.extractionprotocol.id] extraction_process = itm.Process(executes_protocol=itm_ex_prot) extraction_process.name = "extract-process-{}".format( dj_ad.code_field) material = itm.Material( name="extract-{}".format(dj_ad.code_field)) material.type = "Extract Name" extraction_process.outputs.append(material) ############################################################ ##### IMPORTANT: WE add the sample input here! ############# itm_samplei = itm_sample_d[dj_ad.studysample_id] extraction_process.inputs.append(itm_samplei) #################################### # Get SPE #################################### if dj_ad.speprocess: itm_spe_prot = itm_p['spe'][ dj_ad.speprocess.speprotocol.id] spe_process = itm.Process(executes_protocol=itm_spe_prot) spe_process.name = "spe-process-{}".format( dj_ad.code_field) spe_process.inputs.append(extraction_process.outputs[0]) material = itm.Material( name="SPE-Eluent-{}".format(dj_ad.code_field)) material.type = "Extract Name" spe_process.outputs.append(material) #################################### # Get chromatography #################################### itm_chr_prot = itm_p['chr'][ dj_ad.chromatographyprocess.chromatographyprotocol.id] chr_process = itm.Process(executes_protocol=itm_chr_prot) chr_process.name = "chr-process-{}".format(dj_ad.code_field) if dj_ad.speprocess: chr_process.inputs.append(spe_process.outputs[0]) else: chr_process.inputs.append(extraction_process.outputs[0]) material = itm.Material( name="Chromatography-Eluent-{}".format(dj_ad.code_field)) material.type = "Extract Name" chr_process.outputs.append(material) #################################### # Get measurements (mass spec only) #################################### itm_meas_prot = itm_p['meas'][ dj_ad.measurementprocess.measurementprotocol.id] meas_process = itm.Process(executes_protocol=itm_meas_prot) meas_process.name = "meas-process-{}".format(dj_ad.code_field) meas_process.inputs.append(chr_process.outputs[0]) # get output file for file_details in dj_ad.assayrun_set.all().values( 'run__mfile', 'run__mfile__original_filename'): datafile = itm.DataFile( filename=file_details['run__mfile__original_filename'], label="Raw Data File") meas_process.outputs.append(datafile) itm_a.data_files.append(datafile) if dj_ad.speprocess: itm.plink(extraction_process, spe_process) itm.plink(spe_process, chr_process) else: itm.plink(extraction_process, chr_process) itm.plink(chr_process, meas_process) itm_a.samples.append(itm_samplei) itm_a.other_material.append(material) itm_a.process_sequence.append(extraction_process) if dj_ad.speprocess: itm_a.process_sequence.append(spe_process) itm_a.process_sequence.append(chr_process) itm_a.process_sequence.append(meas_process) itm_a.measurement_type = itm.OntologyAnnotation( term="gene sequencing") itm_a.technology_type = itm.OntologyAnnotation( term="nucleotide sequencing") itm_s.assays.append(itm_a) # Note we haven't added factors yet return itm_i, json.dumps(itm_i, cls=ISAJSONEncoder, sort_keys=True, indent=4, separators=(',', ': '))
def convert(json_path, output_path): print(json_path) print(output_path) with open(json_path, 'r') as f: dcc_json = json.load(f) # print(array['protocol']) # for element in array['protocol']: # array['protocol'][element]['id'] # array['protocol'][element]['description'] # array['protocol'][element]['type'] # array['protocol'][element]['filename'] # for element in array['measurement']: # print(array['measurement'][element]['corrected_mz']) # for element in array['subject']: # print(array['subject'][element]['species']) # Building the Investigation Object and its elements: project_set_json = dcc_json.get('project') if len(project_set_json) == 0: raise IOError('No project found in input JSON') # print(next(iter(project_set_json))) project_json = next(iter(project_set_json.values())) investigation = Investigation(identifier=project_json['id']) obi = OntologySource(name='OBI', description='Ontology for Biomedical Investigations') investigation.ontology_source_references.append(obi) inv_person = Person( first_name=project_json['PI_first_name'], last_name=project_json['PI_last_name'], email=project_json['PI_email'], address=project_json['address'], affiliation=(', '.join( [project_json['department'], project_json['institution']])), roles=[ OntologyAnnotation(term="", term_source=obi, term_accession="http://purl.org/obo/OBI_1") ]) investigation.contacts.append(inv_person) study_set_json = dcc_json.get('study') if len(study_set_json) > 0: study_json = next(iter(study_set_json.values())) study = Study( identifier=study_json['id'], title=study_json['title'], description=study_json['description'], design_descriptors=[ OntologyAnnotation(term=study_json['type'], term_source=obi, term_accession="http://purl.org/obo/OBI_1") ], filename='s_{study_id}.txt'.format(study_id=study_json['id'])) investigation.studies = [study] studyid = study_json['id'] print(studyid) study_person = Person( first_name=study_json['PI_first_name'], last_name=study_json['PI_last_name'], email=study_json['PI_email'], address=study_json['address'], affiliation=(', '.join( [study_json['department'], study_json['institution']])), roles=[ OntologyAnnotation(term='principal investigator', term_source=obi, term_accession="http://purl.org/obo/OBI_1") ]) study.contacts.append(study_person) for factor_json in dcc_json['factor'].values(): factor = StudyFactor(name=factor_json['id']) study.factors.append(factor) for i, protocol_json in enumerate(dcc_json['protocol'].values()): oat_p = protocol_json['type'] oa_protocol_type = OntologyAnnotation( term=oat_p, term_source=obi, term_accession="http://purl.org/obo/OBI_1") study.protocols.append( Protocol(name=protocol_json['id'], protocol_type=oa_protocol_type, description=protocol_json['description'], uri=protocol_json['filename'])) if 'MS' in protocol_json['type']: study.assays.append( Assay(measurement_type=OntologyAnnotation( term='mass isotopologue distribution analysis', term_source=obi, term_accession="http://purl.org/obo/OBI_112"), technology_type=OntologyAnnotation( term='mass spectrometry', term_source=obi, term_accession="http://purl.org/obo/OBI_1"), filename='a_assay_ms_{count}.txt'.format(count=i))) if 'NMR' in protocol_json['type']: study.assays.append( Assay(measurement_type=OntologyAnnotation( term='isotopomer analysis', term_source=obi, term_accession="http://purl.org/obo/OBI_111"), technology_type=OntologyAnnotation( term='nmr spectroscopy', term_source=obi, term_accession="http://purl.org/obo/OBI_1"), filename='a_assay_nmr.txt')) for subject_json in dcc_json['subject'].values(): # print(array['subject'][element]) if "organism" in subject_json['type']: source = Source(name=subject_json['id']) ncbitaxon = OntologySource(name='NCBITaxon', description="NCBI Taxonomy") characteristic_organism = Characteristic( category=OntologyAnnotation(term="Organism"), value=OntologyAnnotation( term=subject_json['species'], term_source=ncbitaxon, term_accession= 'http://purl.bioontology.org/ontology/NCBITAXON/9606')) source.characteristics.append(characteristic_organism) study.sources.append(source) elif 'tissue_slice' in subject_json['type']: # print(array['subject'][element]['type']) source = Source(name=subject_json['id']) study.sources.append(source) ncbitaxon = OntologySource(name='NCBITaxon', description="NCBI Taxonomy") characteristic_organism = Characteristic( category=OntologyAnnotation(term="Organism"), value=OntologyAnnotation( term=subject_json['species'], term_source=ncbitaxon, term_accession= 'http://purl.bioontology.org/ontology/NCBITAXON/9606')) source.characteristics.append(characteristic_organism) sample = Sample(name=subject_json['id'], derives_from=subject_json['parentID']) characteristic_organismpart = Characteristic( category=OntologyAnnotation(term='organism_part'), value=OntologyAnnotation( term=subject_json['tissue_type'], term_source=obi, term_accession="http://purl.org/obo/OBI_1")) sample.characteristics.append(characteristic_organismpart) study.samples.append(sample) # print(study.samples[0].name) sample_collection_process = Process( executes_protocol=study.get_prot( subject_json['protocol.id'])) sample_collection_process.inputs.append(source) sample_collection_process.outputs.append(sample) study.process_sequence.append(sample_collection_process) else: source = Source(name=subject_json['id']) ncbitaxon = OntologySource(name='NCBITaxon', description="NCBI Taxonomy") characteristic_organism = Characteristic( category=OntologyAnnotation(term="Organism"), value=OntologyAnnotation( term=subject_json['species'], term_source=ncbitaxon, term_accession= 'http://purl.bioontology.org/ontology/NCBITAXON/9606')) source.characteristics.append(characteristic_organism) study.sources.append(source) print(subject_json['id']) print(subject_json['species']) print(subject_json['type']) # for src in investigation.studies[0].materials: # # for sam in investigation.studies[0].materials: for sample_json in dcc_json['sample'].values(): if 'cells' in sample_json['type']: material_separation_process = Process( executes_protocol=study.get_prot( sample_json['protocol.id'])) material_separation_process.name = sample_json['id'] # dealing with input material, check that the parent material is already among known samples or sources if len([ x for x in study.samples if x.name == sample_json['parentID'] ]) == 0: material_in = Sample(name=sample_json['parentID']) material_separation_process.inputs.append(material_in) study.assays[0].samples.append(material_in) else: print([ x for x in study.samples if x.name == sample_json['parentID'] ]) material_separation_process.inputs.append([ x for x in study.samples if x.name == sample_json['parentID'] ][0]) material_out = Sample(name=sample_json['id']) material_type = Characteristic( category=OntologyAnnotation(term='material_type'), value=OntologyAnnotation( term=sample_json['type'], term_source=obi, term_accession="http://purl.org/obo/OBI_xxxxxxx")) material_out.characteristics.append(material_type) material_separation_process.outputs.append(material_out) study.assays[0].samples.append(material_out) try: sample_collection_process except NameError: sample_collection_process = None if sample_collection_process is None: sample_collection_process = Process(executes_protocol="") else: # plink(protein_extraction_process, data_acq_process) # plink(material_separation_process, protein_extraction_process) plink(sample_collection_process, protein_extraction_process) if 'protein_extract' in sample_json['type']: protein_extraction_process = Process( executes_protocol=study.get_prot( sample_json['protocol.id'])) protein_extraction_process.name = sample_json['id'] if len([ x for x in study.samples if x.name == sample_json['parentID'] ]) == 0: material_in = Sample(name=sample_json['parentID']) protein_extraction_process.inputs.append(material_in) study.assays[0].samples.append(material_in) else: # print([x for x in study.samples if x.name == sample_json['parentID']]) protein_extraction_process.inputs.append(material_in) # for material_in in study.samples: # # print("OHO:", material_in.name) # if material_in.name == sample_json['parentID']: # # print("C:",sample_json['parentID']) # #no need to create, just link to process # protein_extraction_process.inputs.append(x) # else: # # print("D:", sample_json['parentID']) # #create new material and link # material_in = Sample(name=sample_json['parentID']) # protein_extraction_process.inputs.append(material_in) material_out = Material(name=sample_json['id']) material_out.type = "Extract Name" material_type = Characteristic( category=OntologyAnnotation(term='material_type'), value=OntologyAnnotation( term=sample_json['type'], term_source=obi, term_accession="http://purl.org/obo/OBI_1")) material_out.characteristics.append(material_type) study.assays[0].samples.append(material_in) study.assays[0].materials['other_material'].append(material_in) try: material_separation_process except NameError: material_separation_process = None if material_separation_process is None: material_separation_process = Process(executes_protocol="") else: # plink(protein_extraction_process, data_acq_process) plink(material_separation_process, protein_extraction_process) if 'polar' in sample_json['type']: material_in = Material(name=sample_json['parentID']) material_type = Characteristic( category=OntologyAnnotation(term='material_type', term_source=obi), value=OntologyAnnotation(term=sample_json['type'], term_source=obi)) material_in.characteristics.append(material_type) study.assays[0].materials['other_material'].append(material_in) data_acq_process = Process(executes_protocol=study.get_prot( sample_json['protocol.id'])) data_acq_process.name = sample_json['id'] datafile = DataFile( filename='{filename}.txt'.format(filename='_'.join( ['mass_isotopomer-data', studyid, sample_json['id']])), label='Raw Data File') data_acq_process.outputs.append(datafile) # print(study.assays[0].technology_type.term) study.assays[0].data_files.append(datafile) try: protein_extraction_process except NameError: protein_extraction_process = None if protein_extraction_process is None: protein_extraction_process = Process(executes_protocol="") else: plink(protein_extraction_process, data_acq_process) # else: # material_in = Material(name=sample_json['parentID']) # material_out = Material(name=sample_json['id']) # material_type = Characteristic( # category=OntologyAnnotation(term="material_type"), # value=OntologyAnnotation(term=sample_json['type'], # term_source=obi, # term_accession="http://purl.org/obo/OBI_1")) # material_out.characteristics.append(material_type) # process = Process(executes_protocol=sample_json['protocol.id']) # process.name = sample_json['id'] # process.inputs.append(material_in) # process.outputs.append(material_out) # # study.assays[0].materials['other_material'].append(material_in) # study.assays[0].materials['other_material'].append(material_out) if 'bulk_tissue' in sample_json['type']: bulk_process = Process(executes_protocol=study.get_prot( sample_json['protocol.id'])) bulk_process.name = sample_json['id'] if len([ x for x in study.samples if x.name == sample_json['parentID'] ]) == 0: material_in = Sample(name=sample_json['parentID']) bulk_process.inputs.append(material_in) study.assays[0].samples.append(material_in) else: # print([x for x in study.samples if x.name == sample_json['parentID']]) bulk_process.inputs.append(material_in) plink(sample_collection_process, bulk_process) data_rec_header = '\t'.join( ('metabolite name', 'assignment', 'signal intensity', 'retention time', 'm/z', 'formula', 'adduct', 'isotopologue', 'sample identifier')) records = [] for element in dcc_json['measurement']: # metabolite_name: -> compound # array['measurement'][element]['signal_intensity'] record = '\t'.join((dcc_json['measurement'][element]['compound'], dcc_json['measurement'][element]['assignment'], dcc_json['measurement'][element]['raw_intensity'], dcc_json['measurement'][element]['retention_time'], dcc_json['measurement'][element]['corrected_mz'], dcc_json['measurement'][element]['formula'], dcc_json['measurement'][element]['adduct'], dcc_json['measurement'][element]['isotopologue'], dcc_json['measurement'][element]['sample.id'])) # print(record) records.append(record) if not os.path.exists(output_path): os.makedirs(output_path) try: with open( '{output_path}/{study_id}-maf-data-nih-dcc-json.txt'. format(output_path=output_path, study_id=studyid), 'w') as fh: print( "'writing 'maf file document' to file from 'generate_maf_file' method:..." ) fh.writelines(data_rec_header) fh.writelines('\n') for item in records: fh.writelines(item) fh.writelines('\n') print("writing 'investigation information' to file...") print(isatab.dumps(investigation)) isatab.dump(investigation, output_path=output_path) except IOError: print("Error: in main() method can't open file or write data")
def create_data_file(obs_unit, this_assay, sample_collection_process, this_isa_sample, phenotyping_protocol): #TODO : reactivate data file generation, one by assay # Creating the relevant ISA protocol application / Assay from BRAPI Observation Events: # ------------------------------------------------------------------------------------- # Getting the ISA assay table generated by the 'create_isa_study' method by default # DEPRECATED: We are aiming to have one assay by level and one file by level/assay. This file # is the direct output of phenotyping-search/table call for j in range(len((obs_unit['observations']))): # !!!: fix isatab.py to access other protocol_type values to enable Assay Tab serialization phenotyping_process = Process(executes_protocol=phenotyping_protocol) phenotyping_process.name = "assay-name_(" + obs_unit["observationUnitName"] + ")_" + \ str(j) # print("assay name: ", j, "|", phenotyping_process.name) phenotyping_process.inputs.append(this_isa_sample) # Creating relevant protocol parameter values associated with the protocol application: # ------------------------------------------------------------------------------------- if 'season' in obs_unit['observations'][j].keys(): pv = ParameterValue( category=ProtocolParameter(parameter_name=OntologyAnnotation(term="season")), value=OntologyAnnotation(term=str(obs_unit['observations'][j]['season']), term_source="", term_accession="")) else: pv = ParameterValue( category=ProtocolParameter(parameter_name=OntologyAnnotation(term="season")), value=OntologyAnnotation(term="none reported", term_source="", term_accession="")) phenotyping_process.parameter_values.append(pv) # Creating relevant protocol parameter values associated with the protocol application: # ------------------------------------------------------------------------------------- if 'season' in obs_unit['observations'][j].keys(): pv = ParameterValue( category=ProtocolParameter(parameter_name=OntologyAnnotation(term="season")), value=OntologyAnnotation(term=str(obs_unit['observations'][j]['season']), term_source="", term_accession="")) else: pv = ParameterValue( category=ProtocolParameter(parameter_name=OntologyAnnotation(term="season")), value=OntologyAnnotation(term="none reported", term_source="", term_accession="")) phenotyping_process.parameter_values.append(pv) # Getting and setting values for performer and date of the protocol application: # ------------------------------------------------------------------------------------- # if obs_unit['observations'][j]['observationTimeStamp'] is not None: # phenotyping_process.date = str(obs_unit['observations'][j]['observationTimeStamp']) # else: # # TODO: implement testing and use of datetime.datetime.today().isoformat() # phenotyping_process.date = "not available" # if obs_unit['observations'][j]['collector'] is not None: # phenotyping_process.performer = str(obs_unit['observations'][j]['collector']) # else: # phenotyping_process.performer = "none reported" # Creating the ISA Raw Data Files associated with each ISA phenotyping assay: # ----------------------------------------------------------------------- # datafile = DataFile(filename="phenotyping-data.txt", # label="Raw Data File", # generated_from=[this_isa_sample]) # phenotyping_process.outputs.append(datafile) # Creating processes and linking this_assay.samples.append(this_isa_sample) # this_assay.process_sequence.append(sample_collection_process) this_assay.process_sequence.append(phenotyping_process) #this_assay.data_files.append(datafile) plink(sample_collection_process, phenotyping_process)
def create_descriptor(): """ Returns a simple but complete ISA-Tab 1.0 descriptor for illustration. """ # Create an empty Investigation object and set some values to the instance # variables. investigation = Investigation() investigation.identifier = "i1" investigation.title = "My Simple ISA Investigation" investigation.description = \ "We could alternatively use the class constructor's parameters to " \ "set some default values at the time of creation, however we want " \ "to demonstrate how to use the object's instance variables to " \ "set values." investigation.submission_date = "2016-11-03" investigation.public_release_date = "2016-11-03" # Create an empty Study object and set some values. The Study must have a # filename, otherwise when we serialize it to ISA-Tab we would not know # where to write it. We must also attach the study to the investigation by # adding it to the 'investigation' object's list of studies. study = Study(filename="s_study.txt") study.identifier = "s1" study.title = "My ISA Study" study.description = \ "Like with the Investigation, we could use the class constructor to " \ "set some default values, but have chosen to demonstrate in this " \ "example the use of instance variables to set initial values." study.submission_date = "2016-11-03" study.public_release_date = "2016-11-03" investigation.studies.append(study) # Some instance variables are typed with different objects and lists of # objects. For example, a Study can have a list of design descriptors. A # design descriptor is an Ontology Annotation describing the kind of study # at hand. Ontology Annotations should typically reference an Ontology # Source. We demonstrate a mix of using the class constructors and setting # values with instance variables. Note that the OntologyAnnotation object # 'intervention_design' links its 'term_source' directly to the 'obi' # object instance. To ensure the OntologySource is encapsulated in the # descriptor, it is added to a list of 'ontology_source_references' in # the Investigation object. The 'intervention_design' object is then # added to the list of 'design_descriptors' held by the Study object. obi = OntologySource( name='OBI', description="Ontology for Biomedical Investigations") investigation.ontology_source_references.append(obi) intervention_design = OntologyAnnotation(term_source=obi) intervention_design.term = "intervention design" intervention_design.term_accession = \ "http://purl.obolibrary.org/obo/OBI_0000115" study.design_descriptors.append(intervention_design) # Other instance variables common to both Investigation and Study objects # include 'contacts' and 'publications', each with lists of corresponding # Person and Publication objects. contact = Person( first_name="Alice", last_name="Robertson", affiliation="University of Life", roles=[ OntologyAnnotation( term='submitter')]) study.contacts.append(contact) publication = Publication( title="Experiments with Elephants", author_list="A. Robertson, B. Robertson") publication.pubmed_id = "12345678" publication.status = OntologyAnnotation(term="published") study.publications.append(publication) # To create the study graph that corresponds to the contents of the study # table file (the s_*.txt file), we need to create a process sequence. # To do this we use the Process class and attach it to the Study object's # 'process_sequence' list instance variable. Each process must be linked # with a Protocol object that is attached to a Study object's 'protocols' # list instance variable. The sample collection Process object usually has # as input a Source material and as output a Sample material. # Here we create one Source material object and attach it to our study. source = Source(name='source_material') study.sources.append(source) # Then we create three Sample objects, with organism as H**o Sapiens, and # attach them to the study. We use the utility function # batch_create_material() to clone a prototype material object. The # function automatiaclly appends an index to the material name. In this # case, three samples will be created, with the names 'sample_material-0', # 'sample_material-1' and 'sample_material-2'. prototype_sample = Sample(name='sample_material', derives_from=[source]) ncbitaxon = OntologySource(name='NCBITaxon', description="NCBI Taxonomy") characteristic_organism = Characteristic( category=OntologyAnnotation(term="Organism"), value=OntologyAnnotation( term="H**o Sapiens", term_source=ncbitaxon, term_accession="http://purl.bioontology.org/ontology/NCBITAXON/" "9606")) prototype_sample.characteristics.append(characteristic_organism) study.samples = batch_create_materials( prototype_sample, n=3) # creates a batch of 3 samples # Now we create a single Protocol object that represents our # sample collection protocol, and attach it to the study object. Protocols # must be declared before we describe Processes, as a processing event of # some sort must execute some defined protocol. In the case of the class # model, Protocols should therefore be declared before Processes in order # for the Process to be linked to one. sample_collection_protocol = Protocol( name="sample collection", protocol_type=OntologyAnnotation(term="sample collection")) study.protocols.append(sample_collection_protocol) sample_collection_process = Process( executes_protocol=sample_collection_protocol) # Next, we link our materials to the Process. In this particular case, # we are describing a sample collection process that takes one # source material, and produces three different samples. # # (source_material)->(sample collection)->[(sample_material-0), # (sample_material-1), (sample_material-2)] for src in study.sources: sample_collection_process.inputs.append(src) for sam in study.samples: sample_collection_process.outputs.append(sam) # Finally, attach the finished Process object to the study # process_sequence. This can be done many times to # describe multiple sample collection events. study.process_sequence.append(sample_collection_process) # Next, we build n Assay object and attach two protocols, extraction and # sequencing. assay = Assay(filename="a_assay.txt") extraction_protocol = Protocol( name='extraction', protocol_type=OntologyAnnotation( term="material extraction")) study.protocols.append(extraction_protocol) sequencing_protocol = Protocol( name='sequencing', protocol_type=OntologyAnnotation( term="material sequencing")) study.protocols.append(sequencing_protocol) # To build out assay graphs, we enumereate the samples from the # study-level, and for each sample we create an extraction process and a # sequencing process. The extraction process takes as input a # sample material, and produces an extract material. The sequencing # process takes the extract material and produces a data file. This will # produce three graphs, from sample material through to data, as follows: # # (sample_material-0)->(extraction)->(extract-0)->(sequencing)-> # (sequenced-data-0) # (sample_material-1)->(extraction)->(extract-1)->(sequencing)-> # (sequenced-data-1) # (sample_material-2)->(extraction)->(extract-2)->(sequencing)-> # (sequenced-data-2) # # Note that the extraction processes and sequencing processes are # distinctly separate instances, where the three graphs are NOT # interconnected. for i, sample in enumerate(study.samples): # create an extraction process that executes the extraction protocol extraction_process = Process(executes_protocol=extraction_protocol) # extraction process takes as input a sample, and produces an extract # material as output extraction_process.inputs.append(sample) material = Material(name="extract-{}".format(i)) material.type = "Extract Name" extraction_process.outputs.append(material) # create a sequencing process that executes the sequencing protocol sequencing_process = Process(executes_protocol=sequencing_protocol) sequencing_process.name = "assay-name-{}".format(i) sequencing_process.inputs.append(extraction_process.outputs[0]) # Sequencing process usually has an output data file datafile = DataFile( filename="sequenced-data-{}".format(i), label="Raw Data File", generated_from=[sample]) sequencing_process.outputs.append(datafile) # ensure Processes are linked forward and backward plink(extraction_process, sequencing_process) # make sure the extract, data file, and the processes are attached to # the assay assay.samples.append(sample) assay.data_files.append(datafile) assay.other_material.append(material) assay.process_sequence.append(extraction_process) assay.process_sequence.append(sequencing_process) assay.measurement_type = OntologyAnnotation(term="gene sequencing") assay.technology_type = OntologyAnnotation( term="nucleotide sequencing") # attach the assay to the study study.assays.append(assay) from isatools import isatab # dumps() writes out the ISA as a string representation of the ISA-Tab return isatab.dumps(investigation)