示例#1
0
def parse_hgvs_muts_file(hgvs_muts_file, raise_exception=True):
    hgvs_muts = []
    for m in [l.rstrip('\n') for l in open(hgvs_muts_file).readlines()]:
        if raise_exception:
            hgvs_mut = hgvs.HGVSName(m)
            hgvs_muts += [hgvs_mut]
        else:
            try:
                hgvs_mut = hgvs.HGVSName(m)
                hgvs_muts += [hgvs_mut]
            except hgvs.InvalidHGVSName:
                sys.stderr.write("Invalid HGVS found: {}\n".format(m))
                pass
    return hgvs_muts
示例#2
0
def convertGenomicPosToTranscriptPos(genomicPos, chrom, genome, transcript):
    """
    Given a genomic position, chrom (in format "chrN"), genome (SequenceFileDB for genome),
      and transcript (pyhgvs transcript object):
    Returns a string of the transcript position at the given genomic position
    """
    # use "T" and "A" for ref and alt because transcript position is not dependent on these values
    # converts genomic position to transcript position
    hgvs_name = str(pyhgvs.format_hgvs_name(chrom, genomicPos, "T", "A", genome, transcript))
    # parses out transcript position from full hgvs_name
    transcriptPos = str(pyhgvs.HGVSName(hgvs_name).cdna_start)
    return transcriptPos
示例#3
0
 def test_apply_hgvs(self):
     # p.Leu1303Phefs == c.3908dupT
     transcripts = \
         SeqIO.to_dict(SeqIO.parse("tests/test_data/BRCA1_transcripts.fa",
         "fasta"))
     brca1_mut = hgvs.HGVSName("ENST00000357654:c.3908dupT")
     normal_p = transcripts["ENST00000357654"].seq.translate()
     assert_equals("L", normal_p[1302])
     mut_c = verify.apply_hgvs(transcripts["ENST00000357654"].seq,
                               brca1_mut)
     assert_equals("TT", mut_c[3907:3909])
     mut_p = mut_c.translate()
     assert_equals("F", mut_p[1302])
示例#4
0
def parseVar(variantHGVS):
    '''
    Parses the given variant HGVS and returns a dictionary containing: 
    HGVS type, variant type, ref allele, and alt allele
    ''' 
    varHGVS = hgvs.HGVSName(str(variantHGVS))
    
    varParsed =  {"typeHGVS": varHGVS.kind,
                  "varRef": varHGVS.ref_allele,
                  "varAlt": varHGVS.alt_allele,
                  "varType": varHGVS.mutation_type}
    
    return varParsed
示例#5
0
 def get_hgvs_mutations(self, transcript_id, ignore_introns=True):
     hgvs_muts = []
     if ignore_introns:
         df = self.df[self.df.Variant_Classification != "Intron"]
     else:
         df = self.df
     for r in df.HGVS_coding_DNA_change:
         if r != "Exception_encountered":
             try:
                 h = hgvs.HGVSName(r)
                 # remove .version postfix
                 if h.transcript == transcript_id or ".".join(
                         h.transcript.split(".")[:-1]) == transcript_id:
                     hgvs_muts += [h]
             except:
                 sys.stderr.write("Invalid HGVS found: {}\n".format(r))
                 pass
     return hgvs_muts
def fix_mutalyzer(row, coord_hash):
    if not pandas.isnull(row['Chromosomal Variant']):
        return row['Chromosomal Variant'] + '___?'

    # else mutalyzer cannot handle
    try:
        hgvs_name = hgvs.HGVSName(row['Input Variant'])
    except:
        return '___'

    nm = hgvs_name.transcript
    if not nm:
        nm = row['Input Variant'].split(':')[0]
    start = hgvs_name.cdna_start.coord
    start_offset = hgvs_name.cdna_start.offset

    end = hgvs_name.cdna_end.coord
    end_offset = hgvs_name.cdna_end.offset

    if row['Input Variant'] in ('NM_004985.4:c.*1638A>G',
                                'NM_001363.4:c.*6G>A',
                                'NM_004985.4:c.*2591A>G',
                                'NM_004985.4:c.*2888A>G',
                                'NM_004985.4:c.*3377C>T'):
        return '___'

    if start == end and start_offset == 0 and start > 0:
        #print(nm, start, row['Input Variant'])
        try:
            chrom, g_coord, c_nuc, strand = coord_hash[nm][start]
            if strand == '+':
                ref, alt = hgvs_name.ref_allele, hgvs_name.alt_allele
            else:
                ref, alt = comp[hgvs_name.ref_allele], comp[
                    hgvs_name.alt_allele]
            return 'XXX:g.%s%s>%s' % (g_coord, ref, alt) + '___' + chrom
        except:
            return '___'
    return '___'
示例#7
0
# Parse the HGVS name into genomic coordinates and alleles.
chrom, offset, ref, alt = hgvs.parse_hgvs_name('NM_000352.3:c.215A>G',
                                               genome,
                                               get_transcript=get_transcript)
print(chrom, offset, ref, alt)
# Returns variant in VCF style: ('chr11', 17496508, 'T', 'C')
# Notice that since the transcript is on the negative strand, the alleles
# are reverse complemented during conversion.

# Format an HGVS name.
chrom, offset, ref, alt = ('chr11', 17496508, 'T', 'C')
transcript = get_transcript('NM_000352.3')
hgvs_name = hgvs.format_hgvs_name(chrom, offset, ref, alt, genome, transcript)
print(hgvs_name)
# Returns 'NM_000352.3(ABCC8):c.215A>G'

hgvs_name = hgvs.HGVSName('NM_000352.3:c.215-10A>G')
# fields of the HGVS name are available as attributes:
#
# hgvs_name.transcript = 'NM_000352.3'
# hgvs_name.kind = 'c'
# hgvs_name.mutation_type = '>'
# hgvs_name.cdna_start = hgvs.CDNACoord(215, -10)
# hgvs_name.cdna_end = hgvs.CDNACoord(215, -10)
# hgvs_name.ref_allele = 'A'
# hgvs_name.alt_allele = 'G'

print((hgvs_name.transcript, hgvs_name.kind, hgvs_name.mutation_type,
       hgvs_name.cdna_start, hgvs_name.cdna_end, hgvs_name.ref_allele,
       hgvs_name.alt_allele))
示例#8
0
def alter_coords_hgvs_sequential(h1, h2):
    """Change HGVS coords of h2 after applying h1"""
    if h1.kind == "c" and h2.kind == "c":
        if h1.mutation_type == ">":
            h3 = hgvs.HGVSName(h2.name)
        elif h1.mutation_type == "del":
            if h1.cdna_start.coord > h2.cdna_end.coord:
                h3 = hgvs.HGVSName(h2.name)
            elif h1.cdna_end.coord < h2.cdna_start.coord:
                h3 = hgvs.HGVSName(h2.name)
                h3.cdna_start = hgvs.CDNACoord(coord=h3.cdna_start.coord -
                                               len(h1.ref_allele))
                h3.cdna_end = hgvs.CDNACoord(coord=h3.cdna_end.coord -
                                             len(h1.ref_allele))
            else:
                raise (Exception(
                    "Overlapping del not implemented.\nhgvs1: {}\nhgvs2: {}".
                    format(h1, h2)))
        elif h1.mutation_type == "ins":
            if h1.cdna_start.coord > h2.cdna_end.coord:
                h3 = hgvs.HGVSName(h2.name)
            elif h1.cdna_end.coord < h2.cdna_start.coord:
                h3 = hgvs.HGVSName(h2.name)
                h3.cdna_start = hgvs.CDNACoord(coord=h3.cdna_start.coord +
                                               len(h1.alt_allele))
                h3.cdna_end = hgvs.CDNACoord(coord=h3.cdna_end.coord +
                                             len(h1.alt_allele))
            else:
                raise (Exception("Overlapping ins not implemented"))
        elif h1.mutation_type == "dup":
            if h1.cdna_start.coord > h2.cdna_end.coord:
                h3 = hgvs.HGVSName(h2.name)
            elif h1.cdna_end.coord < h2.cdna_start.coord:
                h3 = hgvs.HGVSName(h2.name)
                h3.cdna_start = hgvs.CDNACoord(coord=h3.cdna_start.coord +
                                               len(h1.alt_allele) -
                                               len(h1.ref_allele))
                h3.cdna_end = hgvs.CDNACoord(coord=h3.cdna_end.coord +
                                             len(h1.alt_allele) -
                                             len(h1.ref_allele))
            else:
                raise (Exception("Overlapping dup not implemented"))
        elif h1.mutation_type == "delins":
            if h1.cdna_start.coord > h2.cdna_end.coord:
                h3 = hgvs.HGVSName(h2.name)
            elif h1.cdna_end.coord < h2.cdna_start.coord:
                h3 = hgvs.HGVSName(h2.name)
                h3.cdna_start = hgvs.CDNACoord(coord=h3.cdna_start.coord -
                                               len(h1.ref_allele) +
                                               len(h1.alt_allele))
                h3.cdna_end = hgvs.CDNACoord(coord=h3.cdna_end.coord -
                                             len(h1.ref_allele) +
                                             len(h1.alt_allele))
            else:
                raise (Exception("Overlapping delins not implemented"))
        else:
            raise (Exception("Unexpected mutation_type {}".format(
                h1.mutation_type)))
        return h3
    else:
        raise (Exception("Only cDNA mutations have been implemented"))
示例#9
0
genome = SequenceFileDB(args.ref)

# Read RefSeq transcripts into a python dict.
#with open('/home/mbosio/projects/rtt/code/chrx.txt') as infile:
with open(args.transcript) as infile:
    transcripts = hgvs.utils.read_transcripts(infile)

# Provide a callback for fetching a transcript by its name.
def get_transcript(name):
    return transcripts.get(name)

with open(args.infile) as rd,open(args.outfile,'w') as wr:
    wr.write('\t'.join(['#Chr','Start','End','Ref','Alt','Group']) + '\n')
    for line in rd:
        #print line.strip()

        try:
            a = hgvs.HGVSName(line.strip())
            a.chrom='X'
            outlist  = hgvs.get_vcf_allele(a,genome)
            outlist = [str(x) for x in outlist]
            outlist.append('Cases')
            wr.write('\t'.join(outlist)+'\n')
         
        except :
            wr.write('Error:%s %s \n'%(line.strip(),sys.exc_info()[0]))
            print line.strip()
            print sys.exc_info()[0]
            pass