def fix_fusion(ref_f, genome_fa, out_file, no_fix, secondary_flag=0, denovo_flag=0): """ Realign fusion juncrions """ print('Start to fix fusion junctions...') fa = check_fasta(genome_fa) ref = parse_ref(ref_f, 2) annotated_fusion_f = 'annotated_fusion.txt.tmp' fusions, fusion_names, fixed_flag = fix_bed(annotated_fusion_f, ref, fa, no_fix, denovo_flag) total = 0 annotations = set() fixed_fusion_f = out_file if secondary_flag: secondary_f = open('low_conf_%s' % out_file, 'w') with open(fixed_fusion_f, 'w') as outf: for fus in fusion_names: reads = str(fusions[fus]) fixed = fixed_flag[fus] if fixed > 0: total += 1 fixed = str(fixed) name = 'circular_RNA/' + reads if fus.startswith('secondary'): _, loc, strand, left_info, right_info = fus.split('|') secondary_f.write('\t'.join([loc, name, fixed, strand, left_info, right_info])) secondary_f.write('\n') continue gene, iso, chrom, strand, index = fus.split() starts, ends = ref['\t'.join([gene, iso, chrom, strand])] exon_num = len(starts) intron_num = exon_num - 1 if ',' in index: # back spliced exons s, e = [int(x) for x in index.split(',')] if strand == '+': index_info = ','.join(str(x + 1) for x in range(s, e + 1)) else: index_info = ','.join(str(exon_num - x) for x in range(s, e + 1)) start = str(starts[s]) end = str(ends[e]) length = str(e - s + 1) sizes, offsets = generate_bed(int(start), starts[s:(e + 1)], ends[s:(e + 1)]) annotation_info = '\t'.join([chrom, start, end, sizes, offsets]) # remove circular RNA info duplications in denovo mode if denovo_flag and annotation_info in annotations: continue if s == 0: left_intron = 'None' else: left_intron = '%s:%d-%d' % (chrom, ends[s - 1], starts[s]) if e == len(ends) - 1: right_intron = 'None' else: right_intron = '%s:%d-%d' % (chrom, ends[e], starts[e + 1]) intron = '|'.join([left_intron, right_intron]) bed = '\t'.join([chrom, start, end, name, fixed, strand, start, start, '0,0,0', length, sizes, offsets, reads, 'circRNA', gene, iso, index_info, intron]) else: # ciRNAs index, start, end = index.split('|') size = str(int(end) - int(start)) annotation_info = '\t'.join([chrom, start, end, size, '0']) # remove circular RNA info duplications in denovo mode if denovo_flag and annotation_info in annotations: continue index = int(index) if strand == '+': index_info = str(index + 1) else: index_info = str(intron_num - index) intron = '%s:%d-%d' % (chrom, ends[index], starts[index + 1]) bed = '\t'.join([chrom, start, end, name, fixed, strand, start, start, '0,0,0', '1', size, '0', reads, 'ciRNA', gene, iso, index_info, intron]) if denovo_flag: # in denovo mode annotations.add(annotation_info) outf.write(bed + '\n') if secondary_flag: secondary_f.close() os.remove('annotated_fusion.txt.tmp') print('Fixed %d fusion junctions!' % total)
def fix_fusion(ref_f, genome_fa, out_dir, no_fix, denovo_flag=0): """ Realign fusion juncrions """ print('Start to fix fusion junctions...') fa = check_fasta(genome_fa) ref = parse_ref(ref_f, 2) annotated_fusion_f = '%s/annotated_fusion.txt' % out_dir fusions, fusion_names, fixed_flag = fix_bed(annotated_fusion_f, ref, fa, no_fix, denovo_flag) total = 0 annotations = set() fixed_fusion_f = '%s/circ_fusion.txt' % out_dir with open(fixed_fusion_f, 'w') as outf: for fus in fusion_names: reads = str(fusions[fus]) fixed = fixed_flag[fus] if fixed > 0: total += 1 fixed = str(fixed) name = 'circular_RNA/' + reads gene, iso, chrom, strand, index = fus.split() starts, ends = ref['\t'.join([gene, iso, chrom, strand])] exon_num = len(starts) intron_num = exon_num - 1 if ',' in index: # back spliced exons s, e = [int(x) for x in index.split(',')] if strand == '+': index_info = ','.join(str(x + 1) for x in xrange(s, e + 1)) else: index_info = ','.join(str(exon_num - x) for x in xrange(s, e + 1)) start = str(starts[s]) end = str(ends[e]) length = str(e - s + 1) sizes, offsets = generate_bed(int(start), starts[s:(e + 1)], ends[s:(e + 1)]) annotation_info = '\t'.join([chrom, start, end, sizes, offsets]) # remove circular RNA info duplications in denovo mode if denovo_flag and annotation_info in annotations: continue if s == 0: left_intron = 'None' else: left_intron = '%s:%d-%d' % (chrom, ends[s - 1], starts[s]) if e == len(ends) - 1: right_intron = 'None' else: right_intron = '%s:%d-%d' % (chrom, ends[e], starts[e + 1]) intron = '|'.join([left_intron, right_intron]) bed = '\t'.join([chrom, start, end, name, fixed, strand, start, start, '0,0,0', length, sizes, offsets, reads, 'circRNA', gene, iso, index_info, intron]) else: # ciRNAs index, start, end = index.split('|') size = str(int(end) - int(start)) annotation_info = '\t'.join([chrom, start, end, size, '0']) # remove circular RNA info duplications in denovo mode if denovo_flag and annotation_info in annotations: continue index = int(index) if strand == '+': index_info = str(index + 1) else: index_info = str(intron_num - index) intron = '%s:%d-%d' % (chrom, ends[index], starts[index + 1]) bed = '\t'.join([chrom, start, end, name, fixed, strand, start, start, '0,0,0', '1', size, '0', reads, 'ciRNA', gene, iso, index_info, intron]) if denovo_flag: # in denovo mode annotations.add(annotation_info) outf.write(bed + '\n') print('Fixed %d fusion junctions!' % total)