Пример #1
0
def hashAlignment():
    objs = ''
    new_black = True
    ref_align = True
    obj_count = 1
    obj = classMAF.MAF(obj_count)
    objs_list = [obj]
    for line in open(ifile, 'r'):
        line = line.strip()
        if line == '':
            obj_count += 1
            ref_align = True
            obj = classMAF.MAF(obj_count)
            objs_list.append(obj)
        else:
            if not line.startswith('s '):
                obj.addData(line)
            else:
                if ref_align == True:
                    obj.addRefAlign(line)
                    ref_align = False
                else:
                    obj.addtargetAlign(line)

    for ob in objs_list[:-1]:
        print str(ob.RefChro()) + '\t'+ \
        str(ob.RefStart()) + '\t'+ \
        str(ob.RefEnd()) + '\t' + \
        str(ob.TargetChro()) + '_'+ \
        str(ob.TargetStart()) + '_'+ \
        str(ob.TargetEnd()) + '_' + \
        str(ob.TargetStrand()) + '\t' + \
        str(ob.identity()) + '\t' + \
        str(ob.RefStrand())
Пример #2
0
def hashAlignment(chrom, cds_coords, start):
    objs = ''
    new_black = True
    ref_align = True
    print_flag = False
    obj_count=1
    obj = classMAF.MAF(obj_count)
    objs_list = [obj]
    for line in open(align, 'r'):
        line = line.strip()
        if line == '':
            obj_count += 1
            ref_align = True
            obj = classMAF.MAF(obj_count)
            objs_list.append(obj)
        else:
            if not line.startswith('s '):
                obj.addData(line)
            else:
                if ref_align == True:
                    obj.addRefAlign(line)
                    ref_align = False
                else:
                    obj.addtargetAlign(line)
    
    query = candidate+'_Lj\t'
    target = candidate+'_Mt\t'
    blocks = {}
    align_coords={}
    for ob in objs_list[:-1]:
        if ob.RefChro() == chrom:
            temp = ob.AlignHash()
            if ob.RefStrand() == '+':
                for i in range(len(temp)):
                    if (int(ob.RefStart()) + i) in cds_coords:
                        if len(blocks) > 1 and last_block_len != len(blocks):
                            query += str('-'*99)
                            target += str('-'*99)
                            print_flag = True
                        query += temp[i][0]
                        target += temp[i][1]
                        last_block_len = len(blocks)
                        blocks[ob] = ''
                        
                        
            else:
                print 'Error at line'
                print str(obj)
                sys.exit('Refernce strand is - ')
    if print_flag == True:
        print query
        print target
Пример #3
0
def hashAlignment(chrom, cds_coords, o2):
    objs = ''
    new_black = True
    ref_align = True
    obj_count = 1
    obj = classMAF.MAF(obj_count)
    objs_list = [obj]
    for line in open(align, 'r'):
        line = line.strip()
        if line == '':
            obj_count += 1
            ref_align = True
            obj = classMAF.MAF(obj_count)
            objs_list.append(obj)
        else:
            if not line.startswith('s '):
                obj.addData(line)
            else:
                if ref_align == True:
                    obj.addRefAlign(line)
                    ref_align = False
                else:
                    obj.addtargetAlign(line)

    align_coords = {}
    for ob in objs_list[:-1]:
        if ob.RefChro() == chrom:
            temp = ob.AlignHash()
            if ob.RefStrand() == '+':
                for i in range(len(temp)):
                    align_coords[int(ob.RefStart()) + i + 1] = temp[i + 1]

                    ### print mismaches
                    if temp[i + 1][0] != temp[i + 1][1]:
                        if (int(ob.RefStart()) + i + 1) in cds_coords:
                            o2.write(ob.RefChro() + '\t' +
                                     str(int(ob.RefStart()) + i + 1) + '\t' +
                                     temp[i + 1][0] + '\t' + temp[i + 1][1] +
                                     '\t' + 'True' + '\n')
                        else:
                            o2.write(ob.RefChro() + '\t' +
                                     str(int(ob.RefStart()) + i + 1) + '\t' +
                                     temp[i + 1][0] + '\t' + temp[i + 1][1] +
                                     '\t' + 'False' + '\n')
            else:
                print 'Error at line'
                print str(obj)
                sys.exit('Refernce strand is - ')

    return align_coords
Пример #4
0
def hashAlignment(chrom):
    objs = ''
    new_black = True
    ref_align = True
    print_flag = False
    obj_count=1
    obj = classMAF.MAF(obj_count)
    objs_list = [obj]
    for line in open(align, 'r'):
        line = line.strip()
        if line == '':
            obj_count += 1
            ref_align = True
            obj = classMAF.MAF(obj_count)
            objs_list.append(obj)
        else:
            if not line.startswith('s '):
                obj.addData(line)
            else:
                if ref_align == True:
                    obj.addRefAlign(line)
                    ref_align = False
                else:
                    obj.addtargetAlign(line)
    
    ### hash align coords
    align_hash = {}
    for ob in objs_list[:-1]:
        if ob.RefChro() == chrom:
            temp = ob.AlignHash()
            if ob.RefStrand() == '+':
                for i in range(len(temp)):
                    align_hash[int(ob.RefStart()) + i] = ''
                        
                        
            else:
                print 'Error at line'
                print str(obj)
                sys.exit('Refernce strand is - ')
                
    return align_hash
Пример #5
0
def parseMAF():
    o = open(ifile + '.nonRed', 'w')
    if bed == True:
        b = open(ifile + '.bed', 'w')
    objs = ''
    new_black = True
    ref_align = True
    obj_count = 1
    obj = classMAF.MAF(obj_count)
    objs_list = [obj]
    for chrom in chro:
        for line in open(ifile, 'r'):
            line = line.strip()
            if line == '':
                obj_count += 1
                ref_align = True
                obj = classMAF.MAF(obj_count)
                objs_list.append(obj)
            else:
                if not line.startswith('s '):
                    obj.addData(line)
                else:
                    if ref_align == True:
                        obj.addRefAlign(line)
                        ref_align = False
                    else:
                        obj.addtargetAlign(line)

        chr_obj_pos = {}

        for ob in objs_list[:-1]:
            if ob.RefChro() == chrom:
                chr_obj_pos[int(ob.RefStart())] = ob

        chr_obj = []
        for key in sorted(chr_obj_pos):
            chr_obj.append(chr_obj_pos[key])

        hash_coords = {}
        hash_coords_obj = {}
        obj_list = {}
        obj_count = 0
        for obj in chr_obj:
            obj_count += 1
            if obj.RefChro() == chrom:
                for i in range(int(obj.RefStart()), int(obj.RefEnd()) + 1, 1):
                    if i in hash_coords:
                        if hash_coords_obj[i].RefSize() > obj.RefSize():
                            if obj in obj_list:
                                del obj_list[obj]
                            break
                        else:
                            hash_coords[i] = obj.RefSize()
                            if hash_coords_obj[i] in obj_list:
                                del obj_list[hash_coords_obj[i]]
                            if obj not in obj_list:
                                obj_list[obj] = ''
                            hash_coords_obj[i] = obj
                    else:
                        hash_coords[i] = obj.RefSize()
                        if obj not in obj_list:
                            obj_list[obj] = ''
                        hash_coords_obj[i] = obj
                print len(obj_list)
                print chrom, '{:9,.0f}'.format(i)
                print obj.TargetChro(), obj.TargetStart()

        for ob in obj_list:
            o.write(ob.Data() + '\n')

            if bed == True:
                b.write(ob.RefChro() + '\t' + str(ob.RefStart()) + '\t' +
                        str(ob.RefEnd()) + '\t' + ob.TargetChro() + '_' +
                        str(ob.TargetStart()) + '_' + str(ob.TargetEnd()) +
                        '\t' + str(ob.identity()) + '\t' +
                        str(ob.RefStrand()) + '\n')
    o.close()
    if bed == True:
        b.close()