Exemplo n.º 1
0
def main(argv):
    
    out = sys.stdout
    
    cds_d = {}
    try:
	flanksize = int(argv[1]) 
    except:
	flanksize = 300

    
    for line in open(argv[0]):
        line = line.strip().split('\t')
        attributes = get_attributes(line[8])
        
        if line[2] == 'CDS':
            id_ = attributes['Parent'].split(',')[0]
            cds_d[id_] = cds_d.get(id_, []) + [int(line[3]), int(line[4])]
    
    for id_, coding_sequences in sorted(cds_d.items()):
        p5, p3 = min(coding_sequences), max(coding_sequences)
        
        line = ['Chr%c' % id_[2], 'TAIR10', 'five_prime_flank', str(max(1, p5 - flanksize)), 
                str(p5), '.', '.', '.', 'Parent=%s' % id_]
        out.write('%s\n' % '\t'.join(line))
        line = ['Chr%c' % id_[2], 'TAIR10', 'three_prime_flank', str(p3), 
                str(p3 + flanksize), '.', '.', '.', 'Parent=%s' % id_]
        out.write('%s\n' % '\t'.join(line))
        
    
    pass
def process_intersected_gff(open_fn, transcript_info, out=sys.stdout):
    
    header = ['Contig',
              'AGI', 'tRNA_located_in', 'Start_feature', 'End_feature', 'Strand',  
              'AGI_tRNA', 'Start_tRNA', 'End_tRNA', 'Strand_tRNA', 'Length_tRNA',
              'Overlap[nt]', '%covered_tRNA', 'Transcript_found', 'Transcript_isMobile',
              'Same_strand']
    out.write(','.join(header) + '\n')    
    for line in open_fn:
        line = line.strip().split('\t')
        
        contig = line[0]
        start, end = map(int, line[3:5])
        strand = line[6]
        attributes = get_attributes(line[8])
        type_region = line[11]
        start_region, end_region = map(int, line[12:14])
        strand_region = line[15]
        attributes_region = get_attributes(line[17])
        overlap = int(line[18])
        
        trna_length = end - start + 1
        agi = attributes_region['Parent']
        if agi[:-2] in transcript_info:
            transcript_found = 'yes'
            if transcript_info[agi[:-2]]:
                transcript_is_mobile = 'yes'
            else:
                transcript_is_mobile = 'no'                
        else:
            transcript_found = 'no'
            transcript_is_mobile = 'unknown'        
            
        if strand == strand_region:
            same_strand = 'yes'
        else:
            same_strand = 'no'
        
        row = [contig, 
               agi, type_region, start_region, end_region, strand_region,
               attributes['ID'], start, end, strand, trna_length, 
               overlap, int(float(overlap)/trna_length * 100 + 0.5), 
               transcript_found, transcript_is_mobile, same_strand]
        
        out.write(','.join(map(str, row)) + '\n')
    return None