feature_coords = load(open(feature_coords_fn)) print >> stderr, "Note: using FASTA file at %s" % fastafn f = open(gtffn) junctions = set() for row in f: # for each row in the input GTF # we need to get rid of first and last exons if row.strip().split('\t')[2] in ['start_codon', 'stop_codon', 'CDS']: continue if feature_coords_fn: # if we have the resource to check whether this is a terminal exon or not # print >> stderr,row.strip() strand = row.strip().split('\t')[6] feature, blank = parse_lines([row.strip()], strand, get_transcripts) # print >> stderr,feature is_terminal_feature, up_exon, down_exon = terminal_exon( list(feature)[0], feature_coords) if is_terminal_feature: # if this is terminal ignore it continue # now we know that this is not a terminal exon so there must be neighbouring exons up_exon_gtf = region_to_GTF(up_exon, feature_coords[up_exon], get_transcripts) down_exon_gtf = region_to_GTF(down_exon, feature_coords[down_exon], get_transcripts) if get_introns: up_exon_five, up_exon_three = GTFrow_to_5p3pcoords( up_exon_gtf, offset, use_chromnames) five, three = GTFrow_to_5p3pcoords(row, offset, use_chromnames) down_exon_five, down_exon_three = GTFrow_to_5p3pcoords( down_exon_gtf, offset, use_chromnames) junctions.add((up_exon_five, three, five, down_exon_three))
o = open(ofn, 'w') o_before = open(ofn + ".before", 'w') o_after = open(ofn + ".after", 'w') print >> stderr, "Extending exonic regions by %s bases. Specify your own with option '-e' (see help for details)" % extend exons = list() for row in open(exonfn): h1 = row.strip().split('\t')[1] for h in h1.split(','): exons.append(h) gex_coords = load(open(gexons_pic)) tabixfile = pysam.Tabixfile(tabixfn) for exon in exons: is_terminal_exon, prev_exon, next_exon = terminal_exon(exon, gex_coords) if is_terminal_exon: continue # get the coordinates coords = augment_region(gex_coords[exon], extend) coords_before = augment_region(gex_coords[prev_exon], extend) coords_after = augment_region(gex_coords[next_exon], extend) # whether to use 'chr' or not if not use_chrom_names: coords = coords[3:] coords_before = coords_before[3:] coords_after = coords_after[3:] # get the GTF rows that satisfy this rows = [
o = open(ofn,'w') o_before = open(ofn+".before",'w') o_after = open(ofn+".after",'w') print >> stderr,"Extending exonic regions by %s bases. Specify your own with option '-e' (see help for details)"%extend exons = list() for row in open(exonfn): h1 = row.strip().split('\t')[1] for h in h1.split(','): exons.append(h) gex_coords = load(open(gexons_pic)) tabixfile = pysam.Tabixfile(tabixfn) for exon in exons: is_terminal_exon,prev_exon,next_exon = terminal_exon(exon,gex_coords) if is_terminal_exon: continue # get the coordinates coords = augment_region(gex_coords[exon],extend) coords_before = augment_region(gex_coords[prev_exon],extend) coords_after = augment_region(gex_coords[next_exon],extend) # whether to use 'chr' or not if not use_chrom_names: coords = coords[3:] coords_before = coords_before[3:] coords_after = coords_after[3:] # get the GTF rows that satisfy this rows = [row for row in tabixfile.fetch(region=coords) if row.strip().split('\t')[2] == "exon"]
if feature_coords_fn: feature_coords = load(open(feature_coords_fn)) print >> stderr,"Note: using FASTA file at %s" % fastafn f = open(gtffn) junctions = set() for row in f: # for each row in the input GTF # we need to get rid of first and last exons if row.strip().split('\t')[2] in ['start_codon','stop_codon','CDS']: continue if feature_coords_fn: # if we have the resource to check whether this is a terminal exon or not # print >> stderr,row.strip() strand = row.strip().split('\t')[6] feature,blank = parse_lines([row.strip()],strand,get_transcripts) # print >> stderr,feature is_terminal_feature,up_exon,down_exon = terminal_exon(list(feature)[0],feature_coords) if is_terminal_feature: # if this is terminal ignore it continue # now we know that this is not a terminal exon so there must be neighbouring exons up_exon_gtf = region_to_GTF(up_exon,feature_coords[up_exon],get_transcripts) down_exon_gtf = region_to_GTF(down_exon,feature_coords[down_exon],get_transcripts) if get_introns: up_exon_five,up_exon_three = GTFrow_to_5p3pcoords(up_exon_gtf,offset,use_chromnames) five,three = GTFrow_to_5p3pcoords(row,offset,use_chromnames) down_exon_five,down_exon_three = GTFrow_to_5p3pcoords(down_exon_gtf,offset,use_chromnames) junctions.add((up_exon_five,three,five,down_exon_three)) else: five,three = GTFrow_to_5p3pcoords(row,offset,use_chromnames) junctions.add((five,three)) f.close()