def execute_annotateMutations_lineage(self,experiment_id,sample_names_I=[], ref_genome_I='data/U00096.2.gb', ref_I = 'genbank',biologicalmaterial_id_I='MG1655'): '''Annotate mutations for date_stage01_resequencing_lineage based on position, reference genome, and reference genome biologicalmaterial_id''' genomeannotation = genome_annotations(annotation_I=ref_genome_I,annotation_ref_I=ref_I); print('Executing annotateMutations_lineage...') data_O = []; # query sample names from the experiment if sample_names_I: sample_names = sample_names_I; else: sample_names = []; sample_names = self.get_sampleNames_experimentID_dataStage01ResequencingLineage(experiment_id); for sn in sample_names: print('annotating mutation for sample_name ' + sn); # query rows that match the sample name rows = []; rows = self.get_row_experimentIDAndSampleName_dataStage01ResequencingLineage(experiment_id,sn); for row in rows: # annotate each mutation based on the position annotation = {}; annotation = genomeannotation._find_genesFromMutationPosition(row['mutation_data']['position']); row['mutation_genes'] = annotation['gene'] row['mutation_locations'] = annotation['location'] row['mutation_annotations'] = annotation['product'] # generate a link to ecogene for the genes row['mutation_links'] = []; for bnumber in annotation['locus_tag']: if bnumber: ecogenes = []; ecogenes = self.get_ecogeneAccessionNumber_biologicalmaterialIDAndOrderedLocusName_biologicalMaterialGeneReferences(biologicalmaterial_id_I,bnumber); if ecogenes: ecogene = ecogenes[0]; ecogene_link = genomeannotation._generate_httplink2gene_ecogene(ecogene['ecogene_accession_number']); row['mutation_links'].append(ecogene_link) else: print('no ecogene_accession_number found for ordered_locus_location ' + bnumber); data_O.append(row); # update rows in the database self.update_dataStage01ResequencingLineage(data_O);
def execute_mutateFilteredMutations(self,experiment_id,sample_names_I=[], annotation_I='data/U00096.2.gb', annotation_ref_I = 'genbank', sequence_I='data/U00096.2.fas', sequence_ref_I = 'fasta', codonUsageTable_I='data/ecoli_codonUsageTable.csv', IS_sequences_I='data/ecoli_IS_sequences.fasta', IS_sequences_ref_I = 'fasta', translation_table_I='Bacterial', ): '''Mutate filtered mutations to determine the change in dna, rna, and peptide sequences INPUT: experiment_id = string sample_names_I = [] of strings annotation_I = string, reference file for the sequencing annotation annotation_ref_I = string, reference file data base source sequence_I = string, reference file for the sequence sequence_I = string, reference file format codonUsageTable_I = string, reference file for the codon usage table IS_sequences_I = string, reference file for the insertion element sequences IS_sequences_ref_I = string, reference file format translation_table_I = string, translation table to use when converting from rna to peptide sequence ''' genomeannotation = genome_annotations(annotation_I=annotation_I,annotation_ref_I=annotation_ref_I, sequence_I=sequence_I,sequence_ref_I=sequence_ref_I, IS_sequences_I=IS_sequences_I,IS_sequences_ref_I=IS_sequences_ref_I, codonUsageTable_I=codonUsageTable_I); print('Executing annotation of filtered mutations...') data_O = []; # query sample names if sample_names_I: sample_names = sample_names_I; else: sample_names = []; sample_names = self.get_sampleNames_experimentID_dataStage01ResequencingMutationsFiltered(experiment_id); for sn in sample_names: print('analyzing sample_name ' + sn); data_O = []; data_codon_O=[]; # query mutation data: mutations = []; mutations = self.get_mutations_experimentIDAndSampleName_dataStage01ResequencingMutationsFiltered(experiment_id,sn); for end_cnt,mutation in enumerate(mutations): print('analyzing mutations') data_tmp = {}; # annotate each mutation based on the position annotation = {}; annotation = genomeannotation._mutate_peptideFromMutationData(mutation['mutation_data'],translation_table_I=translation_table_I); if not annotation['gene']: continue; data_tmp['mutation_genes'] = annotation['gene'] data_tmp['mutation_locations'] = annotation['location'] data_tmp['mutation_data'] = annotation['mutation_data'] data_tmp['dna_sequence_ref'] = annotation['dna_sequence_ref']; data_tmp['dna_sequence_new'] = annotation['dna_sequence_new']; data_tmp['rna_sequence_ref'] = annotation['rna_sequence_ref']; data_tmp['rna_sequence_new'] = annotation['rna_sequence_new']; data_tmp['peptide_sequence_ref'] = annotation['peptide_sequence_ref']; data_tmp['peptide_sequence_new'] = annotation['peptide_sequence_new']; data_tmp['mutation_class'] = annotation['mutation_class']; data_tmp['dna_feature_position'] = annotation['dna_feature_position'] data_tmp['dna_feature_ref'] = annotation['dna_feature_ref'] data_tmp['dna_feature_new'] = annotation['dna_feature_new'] data_tmp['rna_feature_position'] = annotation['rna_feature_position'] data_tmp['rna_feature_ref'] = annotation['rna_feature_ref'] data_tmp['rna_feature_new'] = annotation['rna_feature_new'] data_tmp['peptide_feature_position'] = annotation['peptide_feature_position'] data_tmp['peptide_feature_ref'] = annotation['peptide_feature_ref'] data_tmp['peptide_feature_new'] = annotation['peptide_feature_new'] data_tmp['experiment_id'] = mutation['experiment_id']; data_tmp['sample_name'] = mutation['sample_name']; data_tmp['dna_features_region'] = None; data_tmp['rna_features_region'] = None; data_tmp['peptide_features_region'] = None; frequency = 1.0; if 'frequency' in mutation['mutation_data']: frequency = mutation['mutation_data']['frequency']; data_tmp['mutation_frequency'] = frequency data_tmp['mutation_position'] = mutation['mutation_data']['position'] data_tmp['mutation_type'] = mutation['mutation_data']['type'] #data_tmp['mutation_data'] = mutation['mutation_data']; data_tmp['used_'] = True; data_tmp['comment_'] = None; #split into different tables depending on whether the peptide sequence changed if mutation['mutation_data']['type']=='SNP' and 'synonymous' in annotation['mutation_class']: data_tmp['codon_triplet_ref'] = annotation['codon_triplet_ref']; data_tmp['codon_triplet_new'] = annotation['codon_triplet_new']; data_tmp['codon_triplet_position'] = annotation['codon_triplet_position'] data_tmp['codon_fraction_ref'] = annotation['codon_fraction_ref'] data_tmp['codon_fraction_new'] = annotation['codon_fraction_new'] data_tmp['codon_frequency_ref'] = annotation['codon_frequency_ref'] data_tmp['codon_frequency_new'] = annotation['codon_frequency_new'] data_tmp['codon_frequency_units'] = annotation['codon_frequency_units'] data_codon_O.append(data_tmp); else: data_O.append(data_tmp); #upload the data to the database (each sample) if data_O: self.add_dataStage01ResequencingMutationsSeqChanges(data_O); if data_codon_O: self.add_rows_table('data_stage01_resequencing_mutationsCodonChanges',data_codon_O);
def execute_annotateFilteredMutations(self,experiment_id,sample_names_I=[], annotation_I='data/U00096.2.gb', annotation_ref_I = 'genbank', biologicalmaterial_id_I='MG1655', ): '''Annotate filtered mutations using a reference annotation INPUT: experiment_id = string sample_names_I = [] of strings annotation_I = string, reference file for the sequencing annotation annotation_ref_I = string, reference file data base source biologicalmaterial_id_I = string ''' genomeannotation = genome_annotations(annotation_I=annotation_I,annotation_ref_I=annotation_ref_I); print('Executing annotation of filtered mutations...') genotype_phenotype_O = []; # query sample names if sample_names_I: sample_names = sample_names_I; else: sample_names = []; sample_names = self.get_sampleNames_experimentID_dataStage01ResequencingMutationsFiltered(experiment_id); for sn in sample_names: print('analyzing sample_name ' + sn); # query mutation data: mutations = []; mutations = self.get_mutations_experimentIDAndSampleName_dataStage01ResequencingMutationsFiltered(experiment_id,sn); mutation_data_O = []; for end_cnt,mutation in enumerate(mutations): print('analyzing mutations') data_tmp = {}; # annotate each mutation based on the position annotation = {}; annotation = genomeannotation._find_genesFromMutationPosition(mutation['mutation_data']['position']); data_tmp['mutation_genes'] = annotation['gene'] data_tmp['mutation_locations'] = annotation['location'] data_tmp['mutation_annotations'] = annotation['product'] # generate a link to ecogene for the genes data_tmp['mutation_links'] = []; for bnumber in annotation['locus_tag']: if bnumber: ecogenes = []; ecogenes = self.get_ecogeneAccessionNumber_biologicalmaterialIDAndOrderedLocusName_biologicalMaterialGeneReferences(biologicalmaterial_id_I,bnumber); if ecogenes: ecogene = ecogenes[0]; ecogene_link = genomeannotation._generate_httplink2gene_ecogene(ecogene['ecogene_accession_number']); data_tmp['mutation_links'].append(ecogene_link) else: print('no ecogene_accession_number found for ordered_locus_location ' + bnumber); data_tmp['experiment_id'] = mutation['experiment_id']; data_tmp['sample_name'] = mutation['sample_name']; frequency = 1.0; if 'frequency' in mutation['mutation_data']: frequency = mutation['mutation_data']['frequency']; data_tmp['mutation_frequency'] = frequency data_tmp['mutation_position'] = mutation['mutation_data']['position'] data_tmp['mutation_type'] = mutation['mutation_data']['type'] data_tmp['mutation_data'] = mutation['mutation_data']; data_tmp['mutation_chromosome'] = 1; mutation_data_O.append(data_tmp); # add data to the database row = []; row = data_stage01_resequencing_mutationsAnnotated(data_tmp['experiment_id'], data_tmp['sample_name'], data_tmp['mutation_frequency'], data_tmp['mutation_type'], data_tmp['mutation_position'], data_tmp['mutation_data'], data_tmp['mutation_annotations'], data_tmp['mutation_genes'], data_tmp['mutation_locations'], data_tmp['mutation_links'], True, None); self.session.add(row); self.session.commit();
def execute_annotateAmplifications(self,experiment_id_I,sample_names_I=[],ref_genome_I='data/U00096.2.gb',ref_I = 'genbank',biologicalmaterial_id_I='MG1655'): '''Annotate mutations for date_stage01_resequencing_endpoints based on position, reference genome, and reference genome biologicalmaterial_id''' genomeannotation = genome_annotations(annotation_I=ref_genome_I,annotation_ref_I=ref_I); print('Executing annotateAmplifications...') data_O = []; experiment_id = experiment_id_I; if sample_names_I: sample_names = sample_names_I; else: sample_names = []; sample_names = self.get_sampleNames_experimentID_dataStage01ResequencingAmplifications(experiment_id); for cnt,sn in enumerate(sample_names): print('annotating amplifications for sample_name ' + sn); # get chromosomes chromosomes = []; chromosomes = self.get_chromosomes_experimentIDAndSampleName_dataStage01ResequencingAmplifications(experiment_id_I,sn); for chromosome in chromosomes: # get strands strands = [] strands = self.get_strands_experimentIDAndSampleNameAndChromosome_dataStage01ResequencingAmplifications(experiment_id_I,sn,chromosome); # remove visualization regions strands = [s for s in strands if not 'mean' in s]; for strand in strands: # get the start and stop of the indices genomic_starts,genomic_stops = [],[] genomic_starts,genomic_stops = self.get_startAndStops_experimentIDAndSampleNameAndChromosomeAndStrand_dataStage01ResequencingAmplifications(experiment_id_I,sn,chromosome,strand); # get the start and stop regions starts,stops = [],[] starts,stops = self.get_amplificationRegions_experimentIDAndSampleNameAndChromosomeAndStrand_dataStage01ResequencingAmplifications(experiment_id_I,sn,chromosome,strand); for start_cnt,start in enumerate(starts): # annotate each mutation based on the position annotations = []; annotations = genomeannotation._find_genesInRegion(start,stops[start_cnt]) for annotation in annotations: # record the data tmp = { 'experiment_id':experiment_id, 'sample_name':sn, 'genome_chromosome':chromosome, 'genome_strand':strand, 'strand_start':genomic_starts[0], 'strand_stop':genomic_stops[0], 'amplification_start':start, 'amplification_stop':stops[start_cnt], 'used_':True, 'comment_':None}; tmp['feature_genes'] = annotation['gene'] tmp['feature_locations'] = annotation['location'] tmp['feature_annotations'] = annotation['product'] tmp['feature_start'] = annotation['start']; tmp['feature_stop'] = annotation['stop']; tmp['feature_types'] = annotation['type'] # generate a link to ecogene for the genes tmp['feature_links'] = []; for bnumber in annotation['locus_tag']: if bnumber: ecogenes = []; ecogenes = self.get_ecogeneAccessionNumber_biologicalmaterialIDAndOrderedLocusName_biologicalMaterialGeneReferences(biologicalmaterial_id_I,bnumber); if ecogenes: ecogene = ecogenes[0]; ecogene_link = genomeannotation._generate_httplink2gene_ecogene(ecogene['ecogene_accession_number']); tmp['feature_links'].append(ecogene_link) else: print('no ecogene_accession_number found for ordered_locus_location ' + bnumber); data_O.append(tmp); # update rows in the database self.add_dataStage01ResequencingAmplificationAnnotations(data_O);