示例#1
0
    def __init__(self, line, percent_slop=0, fixed_slop=0):
        '''
        Initialize with slop for probabilities
        '''
        self.l = line

        (self.sv_type,
        chr_l,
        chr_r,
        self.strands,
        start_l,
        end_l,
        start_r,
        end_r,
        m) = l_bp.split_v(line)

        try:
            self.left = BreakpointInterval(chr_l, start_l, end_l, self.floats_from_tag(m, 'PRPOS'))
            self.right = BreakpointInterval(chr_r, start_r, end_r, self.floats_from_tag(m, 'PREND'))
        except RuntimeError as e:
            raise MissingProbabilitiesException(str(e))

        if ((percent_slop > 0) or (fixed_slop > 0)):
            self.left.pad_slop(percent_slop, fixed_slop)
            self.right.pad_slop(percent_slop, fixed_slop)
示例#2
0
    def __init__(self, line, percent_slop=0, fixed_slop=0):
        '''
        Initialize with slop for probabilities
        '''
        self.l = line

        (self.sv_type,
        chr_l,
        chr_r,
        self.strands,
        start_l,
        end_l,
        start_r,
        end_r,
        m) = l_bp.split_v(line)

        try:
            self.left = BreakpointInterval(chr_l, start_l, end_l, self.floats_from_tag(m, 'PRPOS'))
            self.right = BreakpointInterval(chr_r, start_r, end_r, self.floats_from_tag(m, 'PREND'))
        except RuntimeError as e:
            raise MissingProbabilitiesException(str(e))

        if ((percent_slop > 0) or (fixed_slop > 0)):
            self.left.pad_slop(percent_slop, fixed_slop)
            self.right.pad_slop(percent_slop, fixed_slop)
示例#3
0
def print_var_line(l):
    A = l.rstrip().split('\t')
    if A[4] not in ['<DEL>', '<DUP>', '<INV>']:
        [sv_type, chr_l, chr_r, start_l, end_l, start_r, end_r,
         m] = l_bp.split_v(l)

        CHROM = chr_r
        POS = m['END']
        ID = A[2] + '_2'
        REF = 'N'
        ALT = ''
        if ']' in A[4]:
            ALT = '[' + chr_l + ':' + A[1] + '[N'
        else:
            ALT = 'N]' + chr_l + ':' + A[1] + ']'
        QUAL = A[5]
        FILTER = '.'
        SVTYPE = 'BND'
        STRANDS = m['STRANDS']
        SVLEN = '0'
        CIPOS = m['CIEND']
        CIEND = m['CIPOS']
        CIPOS95 = m['CIEND95']
        CIEND95 = m['CIPOS95']
        IMPRECISE = 'IMPRECISE'
        SU = m['SU']
        PE = m['PE']
        SR = m['SR']
        PRPOS = m['PREND']
        PREND = m['PRPOS']
        SNAME = m['SNAME']
        EVENT = m['EVENT']
        SECONDARY = 'SECONDARY'
        MATEID = A[2] + '_1'

        INFO = ';'.join([
            'SVTYPE=' + str(SVTYPE), 'STRANDS=' + str(STRANDS),
            'SVLEN=' + str(SVLEN), 'CIPOS=' + str(CIPOS),
            'CIEND=' + str(CIEND), 'CIPOS95=' + str(CIPOS95),
            'CIEND95=' + str(CIEND95),
            str(IMPRECISE),
            str(SECONDARY), 'SU=' + str(SU), 'PE=' + str(PE), 'SR=' + str(SR),
            'PRPOS=' + str(PRPOS), 'PREND=' + str(PREND),
            'SNAME=' + str(SNAME), 'EVENT=' + str(EVENT),
            'MATEID=' + str(MATEID)
        ])

        O = [CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO]

        A[7] += ';MATEID=' + A[2] + '_2'
        A[2] += '_1'
        print '\t'.join(A[:8])
        print '\t'.join([str(o) for o in O])

    else:
        print l
示例#4
0
    def __init__(self, line, percent_slop=0, fixed_slop=0):
        '''
        Initialize with slop for probabilities
        '''
        self.l = line

        (self.sv_type,
        self.chr_l,
        self.chr_r,
        self.strands,
        self.start_l,
        self.end_l,
        self.start_r, 
        self.end_r, 
        m) = l_bp.split_v(line)

        # TODO Handle missing PRPOS and PREND with intelligent message. Pull out into method.
        self.p_l = [float(x) for x in m['PRPOS'].split(',')]
        self.p_r = [float(x) for x in m['PREND'].split(',')]

        slop_prob = 1e-100 # FIXME This is a constant. Pull out to make more obvious
        if ((percent_slop > 0) or (fixed_slop > 0)):

            l_slop = int(max(percent_slop * (self.end_l - self.start_l + 1), fixed_slop))
            r_slop = int(max(percent_slop * (self.end_r - self.start_r + 1), fixed_slop))

            # pad each interval with slop_prob on each side. TODO This should be a method
            self.start_l = self.start_l - l_slop
            self.end_l = self.end_l + l_slop
            new_p_l = [slop_prob] * l_slop + self.p_l + [slop_prob] * l_slop

            self.start_r = self.start_r - r_slop
            self.end_r = self.end_r + r_slop
            new_p_r = [slop_prob] * r_slop + self.p_r + [slop_prob] * r_slop

            # chew off overhang if self.start_l or self.start_r less than 0 TODO This should also be a method
            if self.start_l < 0:
                new_p_l = new_p_l[-self.start_l:]
                self.start_l = 0
            if self.start_r < 0:
                new_p_r = new_p_r[-self.start_r:]
                self.start_r = 0

            # normalize so each probability curve sums to 1. TODO Should be a method
            sum_p_l = sum(new_p_l)
            self.p_l = [float(x)/sum_p_l for x in new_p_l]
            sum_p_r = sum(new_p_r)
            self.p_r = [float(x)/sum_p_r for x in new_p_r]
示例#5
0
    def __init__(self, line, percent_slop=0, fixed_slop=0):
        '''
        Initialize with slop for probabilities
        '''
        self.l = line

        (self.sv_type, self.chr_l, self.chr_r, self.strands, self.start_l,
         self.end_l, self.start_r, self.end_r, m) = l_bp.split_v(line)

        # TODO Handle missing PRPOS and PREND with intelligent message. Pull out into method.
        self.p_l = [float(x) for x in m['PRPOS'].split(',')]
        self.p_r = [float(x) for x in m['PREND'].split(',')]

        slop_prob = 1e-100  # FIXME This is a constant. Pull out to make more obvious
        if ((percent_slop > 0) or (fixed_slop > 0)):

            l_slop = int(
                max(percent_slop * (self.end_l - self.start_l + 1),
                    fixed_slop))
            r_slop = int(
                max(percent_slop * (self.end_r - self.start_r + 1),
                    fixed_slop))

            # pad each interval with slop_prob on each side. TODO This should be a method
            self.start_l = self.start_l - l_slop
            self.end_l = self.end_l + l_slop
            new_p_l = [slop_prob] * l_slop + self.p_l + [slop_prob] * l_slop

            self.start_r = self.start_r - r_slop
            self.end_r = self.end_r + r_slop
            new_p_r = [slop_prob] * r_slop + self.p_r + [slop_prob] * r_slop

            # chew off overhang if self.start_l or self.start_r less than 0 TODO This should also be a method
            if self.start_l < 0:
                new_p_l = new_p_l[-self.start_l:]
                self.start_l = 0
            if self.start_r < 0:
                new_p_r = new_p_r[-self.start_r:]
                self.start_r = 0

            # normalize so each probability curve sums to 1. TODO Should be a method
            sum_p_l = sum(new_p_l)
            self.p_l = [float(x) / sum_p_l for x in new_p_l]
            sum_p_r = sum(new_p_r)
            self.p_r = [float(x) / sum_p_r for x in new_p_r]
示例#6
0
def print_var_line(l):
    A = l.rstrip().split('\t')

    if A[4] == '<INV>' and ('--:0' in A[7] or '++:0' in A[7]):
        [sv_type,chr_l,chr_r,strands,start_l,end_l,start_r,end_r,m] = \
                l_bp.split_v(l)

        STRAND_DICT = dict(x.split(':') for x in m['STRANDS'].split(','))
        for o in STRAND_DICT.keys():
            if STRAND_DICT[o] == '0':
                del(STRAND_DICT[o])
        STRANDS = ','.join(['%s:%s' % (o,STRAND_DICT[o]) for o in STRAND_DICT])

        if STRANDS[:2] == '++':
            ALT = 'N]' + chr_l + ':' + m['END'] + ']'
        elif STRANDS[:2] == '--':
            ALT = '[' + chr_l + ':' + m['END'] + '[N'

        SVTYPE = 'BND'
        CIPOS = m['CIEND']
        CIEND = m['CIPOS']
        CIPOS95 = m['CIEND95']
        CIEND95 = m['CIPOS95']
        IMPRECISE = 'IMPRECISE'
        SU = m['SU']
        PE = m['PE']
        SR = m['SR']
        PRPOS = m['PREND']
        PREND = m['PRPOS']
        SNAME = m['SNAME']
        EVENT = A[2]

        A[4] = ALT
        A[7] = ';'.join(['SVTYPE='   + str(SVTYPE),
                         'STRANDS='  + str(STRANDS),
                         'CIPOS='    + str(CIPOS),
                         'CIEND='    + str(CIEND),
                         'CIPOS95='  + str(CIPOS95),
                         'CIEND95='  + str(CIEND95),
                                       str(IMPRECISE),
                         'SU='       + str(SU),
                         'PE='       + str(PE),
                         'SR='       + str(SR),
                         'PRPOS='    + str(PRPOS),
                         'PREND='    + str(PREND),
                         'SNAME='    + str(SNAME),
                         'EVENT='    + str(EVENT)])

        # reconstruct the line
        l = '\t'.join(A)

    if A[4] not in ['<DEL>', '<DUP>', '<INV>']:
        [sv_type,chr_l,chr_r,strands,start_l,end_l,start_r,end_r,m] = \
                l_bp.split_v(l)

        CHROM = chr_r
        POS = m['END']
        ID = A[2] + '_2'
        REF = 'N'
        ALT = ''

        if A[4][0] == '[':
            ALT = '[' + chr_l + ':' + A[1] + '[N'
        elif A[4][0] == ']':
            ALT = 'N[' + chr_l + ':' + A[1] + '['
        elif A[4][-1] == '[':
            ALT = ']' + chr_l + ':' + A[1] + ']N'
        elif A[4][-1] == ']':
            ALT = 'N]' + chr_l + ':' + A[1] + ']'

        QUAL = A[5]
        FILTER = '.'
        SVTYPE = 'BND'
        STRANDS = m['STRANDS']
        CIPOS = m['CIEND']
        CIEND = m['CIPOS']
        CIPOS95 = m['CIEND95']
        CIEND95 = m['CIPOS95']
        IMPRECISE = 'IMPRECISE'
        SU = m['SU']
        PE = m['PE']
        SR = m['SR']
        PRPOS = m['PREND']
        PREND = m['PRPOS']
        SNAME = m['SNAME']
        EVENT = A[2]
        SECONDARY = 'SECONDARY'
        MATEID=A[2] + '_1'

        INFO = ';'.join(['SVTYPE='   + str(SVTYPE),
                         'STRANDS='  + str(STRANDS),
                         'CIPOS='    + str(CIPOS),
                         'CIEND='    + str(CIEND),
                         'CIPOS95='  + str(CIPOS95),
                         'CIEND95='  + str(CIEND95),
                                       str(IMPRECISE),
                                       str(SECONDARY),
                         'SU='       + str(SU),
                         'PE='       + str(PE),
                         'SR='       + str(SR),
                         'PRPOS='    + str(PRPOS),
                         'PREND='    + str(PREND),
                         'SNAME='    + str(SNAME),
                         'EVENT='    + str(EVENT),
                         'MATEID='   + str(MATEID)])

        O = [CHROM,POS,ID,REF,ALT,QUAL,FILTER,INFO]

        A[7] += ';MATEID=' + A[2] + '_2'
        A[2] += '_1'
        print '\t'.join(A[:8])
        print '\t'.join([str(o) for o in O])

    else:
        print '\t'.join(A[:8])
示例#7
0
def print_var_line(l):
    A = l.rstrip().split('\t')
    if A[4] not in ['<DEL>', '<DUP>', '<INV>']:
        [sv_type,chr_l,chr_r,start_l,end_l,start_r,end_r,m] = l_bp.split_v(l)

        CHROM = chr_r
        POS = m['END']
        ID = A[2] + '_2'
        REF = 'N'
        ALT = ''
        if ']' in A[4]:
            ALT = '[' + chr_l + ':' + A[1] + '[N'
        else:
            ALT = 'N]' + chr_l + ':' + A[1] + ']'
        QUAL = A[5]
        FILTER = '.'
        SVTYPE = 'BND'
        STRANDS = m['STRANDS']
        SVLEN = '0'
        CIPOS = m['CIEND']
        CIEND = m['CIPOS']
        CIPOS95 = m['CIEND95']
        CIEND95 = m['CIPOS95']
        IMPRECISE = 'IMPRECISE'
        SU = m['SU']
        PE = m['PE']
        SR = m['SR']
        PRPOS = m['PREND']
        PREND = m['PRPOS']
        SNAME = m['SNAME']
        EVENT = m['EVENT']
        SECONDARY = 'SECONDARY'
        MATEID=A[2] + '_1'

        INFO = ';'.join(['SVTYPE='   + str(SVTYPE),
                         'STRANDS='  + str(STRANDS),
                         'SVLEN='    + str(SVLEN),
                         'CIPOS='    + str(CIPOS),
                         'CIEND='    + str(CIEND),
                         'CIPOS95='  + str(CIPOS95),
                         'CIEND95='  + str(CIEND95),
                                       str(IMPRECISE),
                                       str(SECONDARY),
                         'SU='       + str(SU),
                         'PE='       + str(PE),
                         'SR='       + str(SR),
                         'PRPOS='    + str(PRPOS),
                         'PREND='    + str(PREND),
                         'SNAME='    + str(SNAME),
                         'EVENT='    + str(EVENT),
                         'MATEID='   + str(MATEID)])

        O = [CHROM,POS,ID,REF,ALT,QUAL,FILTER,INFO]

        A[7] += ';MATEID=' + A[2] + '_2'
        A[2] += '_1'
        print '\t'.join(A[:8])
        print '\t'.join([str(o) for o in O])

    else:
        print l
示例#8
0
def print_var_line(l):
    A = l.rstrip().split("\t")

    if A[4] == "<INV>" and ("--:0" in A[7] or "++:0" in A[7]):
        [sv_type, chr_l, chr_r, strands, start_l, end_l, start_r, end_r, m] = l_bp.split_v(l)

        STRAND_DICT = dict(x.split(":") for x in m["STRANDS"].split(","))
        for o in STRAND_DICT.keys():
            if STRAND_DICT[o] == "0":
                del (STRAND_DICT[o])
        STRANDS = ",".join(["%s:%s" % (o, STRAND_DICT[o]) for o in STRAND_DICT])

        if STRANDS[:2] == "++":
            ALT = "N]" + chr_l + ":" + m["END"] + "]"
        elif STRANDS[:2] == "--":
            ALT = "[" + chr_l + ":" + m["END"] + "[N"

        SVTYPE = "BND"
        CIPOS = m["CIEND"]
        CIEND = m["CIPOS"]
        CIPOS95 = m["CIEND95"]
        CIEND95 = m["CIPOS95"]
        IMPRECISE = "IMPRECISE"
        SU = m["SU"]
        PE = m["PE"]
        SR = m["SR"]
        PRPOS = m["PREND"]
        PREND = m["PRPOS"]
        SNAME = m["SNAME"]
        EVENT = A[2]

        A[4] = ALT
        A[7] = ";".join(
            [
                "SVTYPE=" + str(SVTYPE),
                "STRANDS=" + str(STRANDS),
                "CIPOS=" + str(CIPOS),
                "CIEND=" + str(CIEND),
                "CIPOS95=" + str(CIPOS95),
                "CIEND95=" + str(CIEND95),
                str(IMPRECISE),
                "SU=" + str(SU),
                "PE=" + str(PE),
                "SR=" + str(SR),
                "PRPOS=" + str(PRPOS),
                "PREND=" + str(PREND),
                "SNAME=" + str(SNAME),
                "EVENT=" + str(EVENT),
            ]
        )

        # reconstruct the line
        l = "\t".join(A)

    if A[4] not in ["<DEL>", "<DUP>", "<INV>"]:
        [sv_type, chr_l, chr_r, strands, start_l, end_l, start_r, end_r, m] = l_bp.split_v(l)

        CHROM = chr_r
        POS = m["END"]
        ID = A[2] + "_2"
        REF = "N"
        ALT = ""

        if A[4][0] == "[":
            ALT = "[" + chr_l + ":" + A[1] + "[N"
        elif A[4][0] == "]":
            ALT = "N[" + chr_l + ":" + A[1] + "["
        elif A[4][-1] == "[":
            ALT = "]" + chr_l + ":" + A[1] + "]N"
        elif A[4][-1] == "]":
            ALT = "N]" + chr_l + ":" + A[1] + "]"

        QUAL = A[5]
        FILTER = "."
        SVTYPE = "BND"
        STRANDS = m["STRANDS"]
        CIPOS = m["CIEND"]
        CIEND = m["CIPOS"]
        CIPOS95 = m["CIEND95"]
        CIEND95 = m["CIPOS95"]
        IMPRECISE = "IMPRECISE"
        SU = m["SU"]
        PE = m["PE"]
        SR = m["SR"]
        PRPOS = m["PREND"]
        PREND = m["PRPOS"]
        SNAME = m["SNAME"]
        EVENT = A[2]
        SECONDARY = "SECONDARY"
        MATEID = A[2] + "_1"

        INFO = ";".join(
            [
                "SVTYPE=" + str(SVTYPE),
                "STRANDS=" + str(STRANDS),
                "CIPOS=" + str(CIPOS),
                "CIEND=" + str(CIEND),
                "CIPOS95=" + str(CIPOS95),
                "CIEND95=" + str(CIEND95),
                str(IMPRECISE),
                str(SECONDARY),
                "SU=" + str(SU),
                "PE=" + str(PE),
                "SR=" + str(SR),
                "PRPOS=" + str(PRPOS),
                "PREND=" + str(PREND),
                "SNAME=" + str(SNAME),
                "EVENT=" + str(EVENT),
                "MATEID=" + str(MATEID),
            ]
        )

        O = [CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO]

        A[7] += ";MATEID=" + A[2] + "_2"
        A[2] += "_1"
        print "\t".join(A[:8])
        print "\t".join([str(o) for o in O])

    else:
        print "\t".join(A[:8])