Exemplos de genome_annotations em Python, exemplos de sequencing_analysis.genome_annotations.genome_annotations em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: stage01_resequencing_lineage_execute.py Projeto: dmccloskey/SBaaS_resequencing

    def execute_annotateMutations_lineage(self,experiment_id,sample_names_I=[],
                                                 ref_genome_I='data/U00096.2.gb',
                                                 ref_I = 'genbank',biologicalmaterial_id_I='MG1655'):
        '''Annotate mutations for date_stage01_resequencing_lineage
        based on position, reference genome, and reference genome biologicalmaterial_id'''

        genomeannotation = genome_annotations(annotation_I=ref_genome_I,annotation_ref_I=ref_I);

        print('Executing annotateMutations_lineage...')
        data_O = [];
        # query sample names from the experiment
        if sample_names_I:
            sample_names = sample_names_I;
        else:
            sample_names = [];
            sample_names = self.get_sampleNames_experimentID_dataStage01ResequencingLineage(experiment_id);
        for sn in sample_names:
            print('annotating mutation for sample_name ' + sn);
            # query rows that match the sample name
            rows = [];
            rows = self.get_row_experimentIDAndSampleName_dataStage01ResequencingLineage(experiment_id,sn);
            for row in rows:
                # annotate each mutation based on the position
                annotation = {};
                annotation = genomeannotation._find_genesFromMutationPosition(row['mutation_data']['position']);
                row['mutation_genes'] = annotation['gene']
                row['mutation_locations'] = annotation['location']
                row['mutation_annotations'] = annotation['product']
                # generate a link to ecogene for the genes
                row['mutation_links'] = [];
                for bnumber in annotation['locus_tag']:
                    if bnumber:
                        ecogenes = [];
                        ecogenes = self.get_ecogeneAccessionNumber_biologicalmaterialIDAndOrderedLocusName_biologicalMaterialGeneReferences(biologicalmaterial_id_I,bnumber);
                        if ecogenes:
                            ecogene = ecogenes[0];
                            ecogene_link = genomeannotation._generate_httplink2gene_ecogene(ecogene['ecogene_accession_number']);
                            row['mutation_links'].append(ecogene_link)
                        else: print('no ecogene_accession_number found for ordered_locus_location ' + bnumber);
                data_O.append(row);
        # update rows in the database
        self.update_dataStage01ResequencingLineage(data_O);

Exemplo n.º 2

0

Exibir arquivo

Arquivo: stage01_resequencing_mutations_execute.py Projeto: dmccloskey/SBaaS_resequencing

    def execute_mutateFilteredMutations(self,experiment_id,sample_names_I=[],
                                                 annotation_I='data/U00096.2.gb',
                                                 annotation_ref_I = 'genbank',
                                                 sequence_I='data/U00096.2.fas',
                                                 sequence_ref_I = 'fasta',
                                                 codonUsageTable_I='data/ecoli_codonUsageTable.csv',
                                                IS_sequences_I='data/ecoli_IS_sequences.fasta',
                                                IS_sequences_ref_I = 'fasta',
                                                 translation_table_I='Bacterial',
                                                 ):
        '''Mutate filtered mutations to determine the change in dna, rna, and peptide sequences
        INPUT:
        experiment_id = string
        sample_names_I = [] of strings
        annotation_I = string, reference file for the sequencing annotation
        annotation_ref_I = string, reference file data base source
        sequence_I = string, reference file for the sequence
        sequence_I = string, reference file format
        codonUsageTable_I = string, reference file for the codon usage table
        IS_sequences_I = string, reference file for the insertion element sequences
        IS_sequences_ref_I = string, reference file format
        translation_table_I = string, translation table to use when converting from rna to peptide sequence
        '''

        genomeannotation = genome_annotations(annotation_I=annotation_I,annotation_ref_I=annotation_ref_I,
                                              sequence_I=sequence_I,sequence_ref_I=sequence_ref_I,
                                              IS_sequences_I=IS_sequences_I,IS_sequences_ref_I=IS_sequences_ref_I,
                                              codonUsageTable_I=codonUsageTable_I);

        print('Executing annotation of filtered mutations...')
        data_O = [];
        # query sample names
        if sample_names_I:
            sample_names = sample_names_I;
        else:
            sample_names = [];
            sample_names = self.get_sampleNames_experimentID_dataStage01ResequencingMutationsFiltered(experiment_id);
        for sn in sample_names:
            print('analyzing sample_name ' + sn);
            data_O = [];
            data_codon_O=[];
            # query mutation data:
            mutations = [];
            mutations = self.get_mutations_experimentIDAndSampleName_dataStage01ResequencingMutationsFiltered(experiment_id,sn);
            for end_cnt,mutation in enumerate(mutations):
                print('analyzing mutations')
                data_tmp = {};
                # annotate each mutation based on the position
                annotation = {};
                annotation = genomeannotation._mutate_peptideFromMutationData(mutation['mutation_data'],translation_table_I=translation_table_I);
                if not annotation['gene']: continue;
                data_tmp['mutation_genes'] = annotation['gene']
                data_tmp['mutation_locations'] = annotation['location']
                data_tmp['mutation_data'] = annotation['mutation_data']
                data_tmp['dna_sequence_ref'] = annotation['dna_sequence_ref'];
                data_tmp['dna_sequence_new'] = annotation['dna_sequence_new'];
                data_tmp['rna_sequence_ref'] = annotation['rna_sequence_ref'];
                data_tmp['rna_sequence_new'] = annotation['rna_sequence_new'];
                data_tmp['peptide_sequence_ref'] = annotation['peptide_sequence_ref'];
                data_tmp['peptide_sequence_new'] = annotation['peptide_sequence_new'];
                data_tmp['mutation_class'] = annotation['mutation_class'];
                data_tmp['dna_feature_position'] = annotation['dna_feature_position']
                data_tmp['dna_feature_ref'] = annotation['dna_feature_ref']
                data_tmp['dna_feature_new'] = annotation['dna_feature_new']
                data_tmp['rna_feature_position'] = annotation['rna_feature_position']
                data_tmp['rna_feature_ref'] = annotation['rna_feature_ref']
                data_tmp['rna_feature_new'] = annotation['rna_feature_new']
                data_tmp['peptide_feature_position'] = annotation['peptide_feature_position']
                data_tmp['peptide_feature_ref'] = annotation['peptide_feature_ref']
                data_tmp['peptide_feature_new'] = annotation['peptide_feature_new']
                data_tmp['experiment_id'] = mutation['experiment_id'];
                data_tmp['sample_name'] = mutation['sample_name'];
                data_tmp['dna_features_region'] = None;
                data_tmp['rna_features_region'] = None;
                data_tmp['peptide_features_region'] = None;
                frequency = 1.0;
                if 'frequency' in mutation['mutation_data']:
                    frequency = mutation['mutation_data']['frequency'];
                data_tmp['mutation_frequency'] = frequency
                data_tmp['mutation_position'] = mutation['mutation_data']['position']
                data_tmp['mutation_type'] = mutation['mutation_data']['type']
                #data_tmp['mutation_data'] = mutation['mutation_data'];
                data_tmp['used_'] = True;
                data_tmp['comment_'] = None;
                #split into different tables depending on whether the peptide sequence changed
                if mutation['mutation_data']['type']=='SNP' and 'synonymous' in annotation['mutation_class']:
                    data_tmp['codon_triplet_ref'] = annotation['codon_triplet_ref'];
                    data_tmp['codon_triplet_new'] = annotation['codon_triplet_new'];
                    data_tmp['codon_triplet_position'] = annotation['codon_triplet_position']
                    data_tmp['codon_fraction_ref'] = annotation['codon_fraction_ref']
                    data_tmp['codon_fraction_new'] = annotation['codon_fraction_new']
                    data_tmp['codon_frequency_ref'] = annotation['codon_frequency_ref']
                    data_tmp['codon_frequency_new'] = annotation['codon_frequency_new']
                    data_tmp['codon_frequency_units'] = annotation['codon_frequency_units']
                    data_codon_O.append(data_tmp);
                else:
                    data_O.append(data_tmp);
            #upload the data to the database (each sample)
            if data_O:
                self.add_dataStage01ResequencingMutationsSeqChanges(data_O);
            if data_codon_O:
                self.add_rows_table('data_stage01_resequencing_mutationsCodonChanges',data_codon_O);

Exemplo n.º 3

0

Exibir arquivo

Arquivo: stage01_resequencing_mutations_execute.py Projeto: dmccloskey/SBaaS_resequencing

    def execute_annotateFilteredMutations(self,experiment_id,sample_names_I=[],
                                                 annotation_I='data/U00096.2.gb',
                                                 annotation_ref_I = 'genbank',
                                                 biologicalmaterial_id_I='MG1655',
                                                 ):
        '''Annotate filtered mutations using a reference annotation
        INPUT:
        experiment_id = string
        sample_names_I = [] of strings
        annotation_I = string, reference file for the sequencing annotation
        annotation_ref_I = string, reference file data base source
        biologicalmaterial_id_I = string
        '''

        genomeannotation = genome_annotations(annotation_I=annotation_I,annotation_ref_I=annotation_ref_I);

        print('Executing annotation of filtered mutations...')
        genotype_phenotype_O = [];
        # query sample names
        if sample_names_I:
            sample_names = sample_names_I;
        else:
            sample_names = [];
            sample_names = self.get_sampleNames_experimentID_dataStage01ResequencingMutationsFiltered(experiment_id);
        for sn in sample_names:
            print('analyzing sample_name ' + sn);
            # query mutation data:
            mutations = [];
            mutations = self.get_mutations_experimentIDAndSampleName_dataStage01ResequencingMutationsFiltered(experiment_id,sn);
            mutation_data_O = [];
            for end_cnt,mutation in enumerate(mutations):
                print('analyzing mutations')
                data_tmp = {};
                # annotate each mutation based on the position
                annotation = {};
                annotation = genomeannotation._find_genesFromMutationPosition(mutation['mutation_data']['position']);
                data_tmp['mutation_genes'] = annotation['gene']
                data_tmp['mutation_locations'] = annotation['location']
                data_tmp['mutation_annotations'] = annotation['product']
                # generate a link to ecogene for the genes
                data_tmp['mutation_links'] = [];
                for bnumber in annotation['locus_tag']:
                    if bnumber:
                        ecogenes = [];
                        ecogenes = self.get_ecogeneAccessionNumber_biologicalmaterialIDAndOrderedLocusName_biologicalMaterialGeneReferences(biologicalmaterial_id_I,bnumber);
                        if ecogenes:
                            ecogene = ecogenes[0];
                            ecogene_link = genomeannotation._generate_httplink2gene_ecogene(ecogene['ecogene_accession_number']);
                            data_tmp['mutation_links'].append(ecogene_link)
                        else: print('no ecogene_accession_number found for ordered_locus_location ' + bnumber);
                data_tmp['experiment_id'] = mutation['experiment_id'];
                data_tmp['sample_name'] = mutation['sample_name'];
                frequency = 1.0;
                if 'frequency' in mutation['mutation_data']:
                    frequency = mutation['mutation_data']['frequency'];
                data_tmp['mutation_frequency'] = frequency
                data_tmp['mutation_position'] = mutation['mutation_data']['position']
                data_tmp['mutation_type'] = mutation['mutation_data']['type']
                data_tmp['mutation_data'] = mutation['mutation_data'];
                data_tmp['mutation_chromosome'] = 1;
                mutation_data_O.append(data_tmp);
                # add data to the database
                row = [];
                row = data_stage01_resequencing_mutationsAnnotated(data_tmp['experiment_id'],
                        data_tmp['sample_name'],
                        data_tmp['mutation_frequency'],
                        data_tmp['mutation_type'],
                        data_tmp['mutation_position'],
                        data_tmp['mutation_data'],
                        data_tmp['mutation_annotations'],
                        data_tmp['mutation_genes'],
                        data_tmp['mutation_locations'],
                        data_tmp['mutation_links'],
                        True,
                        None);
                self.session.add(row);
        self.session.commit();

Exemplo n.º 4

0

Exibir arquivo

Arquivo: stage01_resequencing_coverage_execute.py Projeto: dmccloskey/SBaaS_resequencing

    def execute_annotateAmplifications(self,experiment_id_I,sample_names_I=[],ref_genome_I='data/U00096.2.gb',ref_I = 'genbank',biologicalmaterial_id_I='MG1655'):
        '''Annotate mutations for date_stage01_resequencing_endpoints
        based on position, reference genome, and reference genome biologicalmaterial_id'''
        
        genomeannotation = genome_annotations(annotation_I=ref_genome_I,annotation_ref_I=ref_I);

        print('Executing annotateAmplifications...')
        data_O = [];
        experiment_id = experiment_id_I;
        if sample_names_I:
            sample_names = sample_names_I;
        else:
            sample_names = [];
            sample_names = self.get_sampleNames_experimentID_dataStage01ResequencingAmplifications(experiment_id);
        for cnt,sn in enumerate(sample_names):
            print('annotating amplifications for sample_name ' + sn);
            # get chromosomes
            chromosomes = [];
            chromosomes = self.get_chromosomes_experimentIDAndSampleName_dataStage01ResequencingAmplifications(experiment_id_I,sn);
            for chromosome in chromosomes:
                # get strands
                strands = []
                strands = self.get_strands_experimentIDAndSampleNameAndChromosome_dataStage01ResequencingAmplifications(experiment_id_I,sn,chromosome);
                # remove visualization regions
                strands = [s for s in strands if not 'mean' in s];
                for strand in strands:
                    # get the start and stop of the indices
                    genomic_starts,genomic_stops = [],[]
                    genomic_starts,genomic_stops = self.get_startAndStops_experimentIDAndSampleNameAndChromosomeAndStrand_dataStage01ResequencingAmplifications(experiment_id_I,sn,chromosome,strand);
                    # get the start and stop regions
                    starts,stops = [],[]
                    starts,stops = self.get_amplificationRegions_experimentIDAndSampleNameAndChromosomeAndStrand_dataStage01ResequencingAmplifications(experiment_id_I,sn,chromosome,strand);
                    for start_cnt,start in enumerate(starts):
                        # annotate each mutation based on the position
                        annotations = [];
                        annotations = genomeannotation._find_genesInRegion(start,stops[start_cnt])
                        for annotation in annotations:
                            # record the data
                            tmp = {
                                'experiment_id':experiment_id,
                                'sample_name':sn,
                                'genome_chromosome':chromosome,
                                'genome_strand':strand,
                                'strand_start':genomic_starts[0],
                                'strand_stop':genomic_stops[0],
                                'amplification_start':start,
                                'amplification_stop':stops[start_cnt],
                                'used_':True,
                                'comment_':None};
                            tmp['feature_genes'] = annotation['gene']
                            tmp['feature_locations'] = annotation['location']
                            tmp['feature_annotations'] = annotation['product']
                            tmp['feature_start'] = annotation['start'];
                            tmp['feature_stop'] = annotation['stop'];
                            tmp['feature_types'] = annotation['type']
                            # generate a link to ecogene for the genes
                            tmp['feature_links'] = [];
                            for bnumber in annotation['locus_tag']:
                                if bnumber:
                                    ecogenes = [];
                                    ecogenes = self.get_ecogeneAccessionNumber_biologicalmaterialIDAndOrderedLocusName_biologicalMaterialGeneReferences(biologicalmaterial_id_I,bnumber);
                                    if ecogenes:
                                        ecogene = ecogenes[0];
                                        ecogene_link = genomeannotation._generate_httplink2gene_ecogene(ecogene['ecogene_accession_number']);
                                        tmp['feature_links'].append(ecogene_link)
                                    else: print('no ecogene_accession_number found for ordered_locus_location ' + bnumber);
                            data_O.append(tmp);
        # update rows in the database
        self.add_dataStage01ResequencingAmplificationAnnotations(data_O);