def main(argv=None): options, args = process_command_line(argv) f_gene = args[0] assert os.path.exists(f_gene) outhandle = sys.stdout if options.outfile: outfile = options.outfile outhandle = open(outfile, "w") llexons = set() for gr in reader_gene(open(f_gene), 'bed'): chrom = gr.get_chrom() exons = gr.get_exons() strand = gr.get_strand() if strand == "+": llexons.add((chrom, exons[-1][0], exons[-1][1], strand)) else: llexons.add((chrom, exons[0][0], exons[0][1], strand)) i = 0 for chrom, s, e, strand in sorted(llexons): i += 1 outhandle.write("\t".join(map(str, [ chrom, s, e, i, e - s, strand]))+"\n") if options.outfile: outhandle.close()
def read_gene(fh): """Return dict of gene_id:gene object, require name fields are unique""" id2gene = {} for gene in reader_gene(fh): id2gene[gene.get_name()] = gene return id2gene
def main(argv=None): options, args = process_command_line(argv) f_gene = args[0] assert os.path.exists(f_gene) outhandle = sys.stdout if options.outfile: outfile = options.outfile outhandle = open(outfile, "w") cinfo = {} if options.genome: for line in open(options.genome): chrom, size = line.strip().split("\t")[:2] cinfo[chrom] = int(size) for gr in reader_gene(open(f_gene), 'bed'): chrom = gr.get_chrom() if gr.get_strand() == "+": tes = gr.get_txEnd() s = tes - options.len_up e = tes + options.len_dn else: tes = gr.get_txStart() s = tes - options.len_dn e = tes + options.len_up if chrom in cinfo: e = min(e, cinfo[chrom]) s = max(s, 0) if s >= e: continue outhandle.write("\t".join(map(str, [ chrom, s, e, gr.get_score(), tes, gr.get_strand()]))+"\n") if options.outfile: outhandle.close()