def _render_other_transcripts(self, txs, transcriptIndicesToSkip, variant_type, ref_allele, alt_allele, start, end, is_longer_field=False): """ Create a list of transcripts that are not being chosen. Other transcripts are formatted <gene>_<transcript_id>_<variant_classification>_<protein_change> Note: There are other areas of Oncotator (e.g. Generic_GeneProteinPositionDatasource) that depend on this format. Changing it here may introduce bugs in other pieces of code. Also, do not include any transcript that would render as IGR. txs -- a list of transcripts to render. transcriptIndicesToSkip -- a list of transcripts that are being used (i.e. not an "other transcript"). This will usually be the canonical or any transcript chosen by tx_mode. is_longer_field -- generates a slightly longer other_transcripts field. For now that just adds the transcript change field. """ vcer = VariantClassifier() other_transcripts = list() for i, ot in enumerate(txs): if i not in transcriptIndicesToSkip: vc = vcer.variant_classify(tx=ot, variant_type=variant_type, ref_allele=ref_allele, alt_allele=alt_allele, start=start, end=end) if vc.get_vc() == VariantClassification.IGR: continue list_to_include = [ ot.get_gene(), ot.get_transcript_id(), vc.get_vc(), vcer.generate_protein_change_from_vc(vc) ] if is_longer_field: tx_change = vcer.generate_transcript_change_from_tx( ot, variant_type, vc, start, end, ref_allele, alt_allele) if tx_change is not None and tx_change != "": list_to_include.append(tx_change) o = '_'.join(list_to_include) o = o.strip('_') other_transcripts.append(o) return '|'.join(other_transcripts)
def annotate_mutation(self, mutation): chr = mutation.chr start = int(mutation.start) end = int(mutation.end) txs = self.get_transcripts_by_pos(chr, start, end) final_annotation_dict = self._create_blank_set_of_annotations() final_annotation_dict['variant_type'] = Annotation( value=TranscriptProviderUtils.infer_variant_type( mutation.ref_allele, mutation.alt_allele), datasourceName=self.title) chosen_tx = None # We have hit IGR if no transcripts come back. Most annotations can just use the blank set. if len(txs) == 0: final_annotation_dict[ 'variant_classification'] = self._create_basic_annotation( VariantClassification.IGR) nearest_genes = self._get_nearest_genes(chr, int(start), int(end)) final_annotation_dict[ 'other_transcripts'] = self._create_basic_annotation( value='%s (%s upstream) : %s (%s downstream)' % (nearest_genes[0][0], nearest_genes[0][1], nearest_genes[1][0], nearest_genes[1][1])) final_annotation_dict['gene'] = self._create_basic_annotation( 'Unknown') final_annotation_dict['gene_id'] = self._create_basic_annotation( '0') final_annotation_dict[ 'genome_change'] = self._create_basic_annotation( TranscriptProviderUtils.determine_genome_change( mutation.chr, mutation.start, mutation.end, mutation.ref_allele, mutation.alt_allele, final_annotation_dict['variant_type'].value)) else: # Choose the best effect transcript chosen_tx = self._choose_transcript( txs, self.get_tx_mode(), final_annotation_dict['variant_type'].value, mutation.ref_allele, mutation.alt_allele, start, end) vcer = VariantClassifier() final_annotation_dict[ 'annotation_transcript'] = self._create_basic_annotation( chosen_tx.get_transcript_id()) final_annotation_dict[ 'genome_change'] = self._create_basic_annotation( TranscriptProviderUtils.determine_genome_change( mutation.chr, mutation.start, mutation.end, mutation.ref_allele, mutation.alt_allele, final_annotation_dict['variant_type'].value)) final_annotation_dict[ 'transcript_position'] = self._create_basic_annotation( TranscriptProviderUtils.render_transcript_position( int(start), int(end), chosen_tx)) final_annotation_dict[ 'transcript_id'] = self._create_basic_annotation( chosen_tx.get_transcript_id()) variant_classfication = vcer.variant_classify( tx=chosen_tx, variant_type=final_annotation_dict['variant_type'].value, ref_allele=mutation.ref_allele, alt_allele=mutation.alt_allele, start=mutation.start, end=mutation.end) final_annotation_dict[ 'transcript_exon'] = self._create_basic_annotation( str(variant_classfication.get_exon_i() + 1)) final_annotation_dict[ 'variant_classification'] = self._create_basic_annotation( variant_classfication.get_vc()) final_annotation_dict[ 'secondary_variant_classification'] = self._create_basic_annotation( variant_classfication.get_secondary_vc()) final_annotation_dict[ 'protein_change'] = self._create_basic_annotation( vcer.generate_protein_change_from_vc( variant_classfication)) final_annotation_dict[ 'codon_change'] = self._create_basic_annotation( vcer.generate_codon_change_from_vc(chosen_tx, start, end, variant_classfication)) final_annotation_dict[ 'transcript_change'] = self._create_basic_annotation( vcer.generate_transcript_change_from_tx( chosen_tx, final_annotation_dict['variant_type'].value, variant_classfication, start, end, mutation.ref_allele, mutation.alt_allele)) final_annotation_dict[ 'transcript_strand'] = self._create_basic_annotation( chosen_tx.get_strand()) final_annotation_dict['gene'] = self._create_basic_annotation( chosen_tx.get_gene()) final_annotation_dict['gene_type'] = self._create_basic_annotation( chosen_tx.get_gene_type()) final_annotation_dict[ 'gencode_transcript_tags'] = self._create_basic_annotation( self._retrieve_gencode_tag_value(chosen_tx, 'tag')) final_annotation_dict[ 'gencode_transcript_status'] = self._create_basic_annotation( self._retrieve_gencode_tag_value(chosen_tx, 'transcript_status')) final_annotation_dict[ 'havana_transcript'] = self._create_basic_annotation( self._retrieve_gencode_tag_value(chosen_tx, 'havana_transcript')) final_annotation_dict['ccds_id'] = self._create_basic_annotation( self._retrieve_gencode_tag_value(chosen_tx, 'ccdsid')) final_annotation_dict[ 'gencode_transcript_type'] = self._create_basic_annotation( self._retrieve_gencode_tag_value(chosen_tx, 'transcript_type')) final_annotation_dict[ 'gencode_transcript_name'] = self._create_basic_annotation( self._retrieve_gencode_tag_value(chosen_tx, 'transcript_name')) other_transcript_value = self._render_other_transcripts( txs, [txs.index(chosen_tx)], final_annotation_dict['variant_type'].value, mutation.ref_allele, mutation.alt_allele, mutation.start, mutation.end) final_annotation_dict[ 'other_transcripts'] = self._create_basic_annotation( other_transcript_value) # final_annotation_dict['gene_id'].value mutation.addAnnotations(final_annotation_dict) # Add the HGVS annotations ... setting to "" if not available. hgvs_dict_annotations = self._create_hgvs_annotation_dict( mutation, chosen_tx) mutation.addAnnotations(hgvs_dict_annotations) return mutation
def annotate_mutation(self, mutation): chr = mutation.chr start = int(mutation.start) end = int(mutation.end) txs = self.get_transcripts_by_pos(chr, start, end) final_annotation_dict = self._create_blank_set_of_annotations() final_annotation_dict['variant_type'] = Annotation(value=TranscriptProviderUtils.infer_variant_type(mutation.ref_allele, mutation.alt_allele), datasourceName=self.title) chosen_tx = None # We have hit IGR if no transcripts come back. Most annotations can just use the blank set. if len(txs) == 0: final_annotation_dict['variant_classification'] = self._create_basic_annotation(VariantClassification.IGR) nearest_genes = self._get_nearest_genes(chr, int(start), int(end)) final_annotation_dict['other_transcripts'] = self._create_basic_annotation(value='%s (%s upstream) : %s (%s downstream)' % (nearest_genes[0][0], nearest_genes[0][1], nearest_genes[1][0], nearest_genes[1][1])) final_annotation_dict['gene'] = self._create_basic_annotation('Unknown') final_annotation_dict['gene_id'] = self._create_basic_annotation('0') final_annotation_dict['genome_change'] = self._create_basic_annotation(TranscriptProviderUtils.determine_genome_change(mutation.chr, mutation.start, mutation.end, mutation.ref_allele, mutation.alt_allele, final_annotation_dict['variant_type'].value)) else: # Choose the best effect transcript chosen_tx = self._choose_transcript(txs, self.get_tx_mode(), final_annotation_dict['variant_type'].value, mutation.ref_allele, mutation.alt_allele, start, end) vcer = VariantClassifier() final_annotation_dict['annotation_transcript'] = self._create_basic_annotation(chosen_tx.get_transcript_id()) final_annotation_dict['genome_change'] = self._create_basic_annotation(TranscriptProviderUtils.determine_genome_change(mutation.chr, mutation.start, mutation.end, mutation.ref_allele, mutation.alt_allele, final_annotation_dict['variant_type'].value)) final_annotation_dict['strand'] = self._create_basic_annotation(chosen_tx.get_strand()) final_annotation_dict['transcript_position'] = self._create_basic_annotation(TranscriptProviderUtils.render_transcript_position(int(start), int(end), chosen_tx)) final_annotation_dict['transcript_id'] = self._create_basic_annotation(chosen_tx.get_transcript_id()) variant_classfication = vcer.variant_classify(tx=chosen_tx, variant_type=final_annotation_dict['variant_type'].value, ref_allele=mutation.ref_allele, alt_allele=mutation.alt_allele, start=mutation.start, end=mutation.end) final_annotation_dict['transcript_exon'] = self._create_basic_annotation(str(variant_classfication.get_exon_i()+1)) final_annotation_dict['variant_classification'] = self._create_basic_annotation(variant_classfication.get_vc()) final_annotation_dict['secondary_variant_classification'] = self._create_basic_annotation(variant_classfication.get_secondary_vc()) final_annotation_dict['protein_change'] = self._create_basic_annotation(vcer.generate_protein_change_from_vc(variant_classfication)) final_annotation_dict['codon_change'] = self._create_basic_annotation(vcer.generate_codon_change_from_vc(chosen_tx, start, end, variant_classfication)) final_annotation_dict['transcript_change'] = self._create_basic_annotation(vcer.generate_transcript_change_from_tx(chosen_tx, final_annotation_dict['variant_type'].value, variant_classfication, start, end, mutation.ref_allele, mutation.alt_allele)) final_annotation_dict['transcript_strand'] = self._create_basic_annotation(chosen_tx.get_strand()) final_annotation_dict['gene'] = self._create_basic_annotation(chosen_tx.get_gene()) final_annotation_dict['gene_type'] = self._create_basic_annotation(chosen_tx.get_gene_type()) final_annotation_dict['gencode_transcript_tags'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'tag')) final_annotation_dict['gencode_transcript_status'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'transcript_status')) final_annotation_dict['havana_transcript'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'havana_transcript')) final_annotation_dict['ccds_id'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'ccdsid')) final_annotation_dict['gencode_transcript_type'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'transcript_type')) final_annotation_dict['gencode_transcript_name'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'transcript_name')) other_transcript_value = self._render_other_transcripts(txs, [txs.index(chosen_tx)], final_annotation_dict['variant_type'].value, mutation.ref_allele, mutation.alt_allele, mutation.start, mutation.end) final_annotation_dict['other_transcripts'] = self._create_basic_annotation(other_transcript_value) # final_annotation_dict['gene_id'].value mutation.addAnnotations(final_annotation_dict) # Add the HGVS annotations ... setting to "" if not available. hgvs_dict_annotations = self._create_hgvs_annotation_dict(mutation, chosen_tx) mutation.addAnnotations(hgvs_dict_annotations) return mutation