tog2 = open(gtffn + ".downstream_introns", 'w') else: fivesfn = gtffn + ".fives.%s_%s.fasta" % (offset[0], offset[1]) threesfn = gtffn + ".threes.%s_%s.fasta" % (offset[2], offset[3]) f5 = open(fivesfn, 'w') f3 = open(threesfn, 'w') for pieces in junctions: if get_introns: seqs = list() for p in pieces: if p[:3] == "chr": q = p[3:-2] else: q = p[:-2] seq = fastafile.fetch(region=q) if p[-1] == '-': seq = complementDNA(seq[::-1]) seqs += [seq] else: five, three = pieces A = fastafile.fetch(region=five) B = fastafile.fetch(region=three) if five[-1] == '-': A = complementDNA(A[::-1]) # reverse the string if three[-1] == '-': B = complementDNA(B[::-1]) if get_introns: u5, u3, d5, d3 = pieces print >> tog1, "\t".join( [u5, u3] + [seqs[0][:offset[0]] + seqs[0][offset[0]:].lower()] + [seqs[1][:offset[2]].lower() + seqs[1][offset[2]:]])
except IndexError: print >> sys.stderr, "Usage: script.py <exon_fn> <gene_fn> <tabix_fn>" sys.exit(0) fastafile = pysam.Fastafile(fasta_fn) keys = dict() f = open(exon_fn) for row in f: keys[tuple(row.strip().split(':'))] = [] f.close() f = open(gene_fn) c = 0 for row in f: if c > 5: break l = row.strip().split('\t') sd = l[5] if (l[0], l[1]) in keys: coord = l[2] + ":" + l[3] + "-" + l[4] results = fastafile.fetch(region=coord) print ">%s" % coord + ":" + sd if sd == '+': print results elif sd == '-': print complementDNA(results)[::-1] c += 0 f.close() fastafile.close()
#!/home/paulk/software/bin/python from __future__ import division from sys import argv,exit,stderr import pysam from key_functions import complementDNA fastafile = pysam.Fastafile("resources/refs/hg19/Homo_sapiens.GRCh37.66.dna.fa") f = open("u12_introns_all_norm_ps_details.txt") for row in f: if row[0] == 'i': continue l = row.strip().split('\t') c1 = map(str,[l[8][3:],int(l[11]),int(l[11])+1]) c2 = map(str,[l[8][3:],int(l[12])-1,int(l[12])]) reg1 = c1[0]+":"+c1[1]+"-"+c1[2] reg2 = c2[0]+":"+c2[1]+"-"+c2[2] if l[-6] == "U12-U2": print l[4], if l[14] == '-': print complementDNA(fastafile.fetch(region=reg2))[::-1]+"-"+complementDNA(fastafile.fetch(region=reg1))[::-1] else: print fastafile.fetch(region=reg1)+"-"+fastafile.fetch(region=reg2) f.close()
tog2 = open(gtffn+".downstream_introns",'w') else: fivesfn = gtffn+".fives.%s_%s.fasta"%(offset[0],offset[1]) threesfn = gtffn+".threes.%s_%s.fasta"%(offset[2],offset[3]) f5 = open(fivesfn,'w') f3 = open(threesfn,'w') for pieces in junctions: if get_introns: seqs = list() for p in pieces: if p[:3] == "chr": q = p[3:-2] else: q = p[:-2] seq = fastafile.fetch(region=q) if p[-1] == '-': seq = complementDNA(seq[::-1]) seqs += [seq] else: five,three = pieces A = fastafile.fetch(region=five) B = fastafile.fetch(region=three) if five[-1] == '-': A = complementDNA(A[::-1]) # reverse the string if three[-1] == '-': B = complementDNA(B[::-1]) if get_introns: u5,u3,d5,d3 = pieces print >> tog1,"\t".join([u5,u3]+[seqs[0][:offset[0]]+seqs[0][offset[0]:].lower()]+[seqs[1][:offset[2]].lower()+seqs[1][offset[2]:]]) print >> tog2,"\t".join([d5,d3]+[seqs[2][:offset[0]]+seqs[2][offset[0]:].lower()]+[seqs[3][:offset[2]].lower()+seqs[3][offset[2]:]]) else:
#!/home/paulk/software/bin/python from __future__ import division from sys import argv, exit, stderr import pysam from key_functions import complementDNA fastafile = pysam.Fastafile( "resources/refs/hg19/Homo_sapiens.GRCh37.66.dna.fa") f = open("u12_introns_all_norm_ps_details.txt") for row in f: if row[0] == 'i': continue l = row.strip().split('\t') c1 = map(str, [l[8][3:], int(l[11]), int(l[11]) + 1]) c2 = map(str, [l[8][3:], int(l[12]) - 1, int(l[12])]) reg1 = c1[0] + ":" + c1[1] + "-" + c1[2] reg2 = c2[0] + ":" + c2[1] + "-" + c2[2] if l[-6] == "U12-U2": print l[4], if l[14] == '-': print complementDNA( fastafile.fetch(region=reg2))[::-1] + "-" + complementDNA( fastafile.fetch(region=reg1))[::-1] else: print fastafile.fetch(region=reg1) + "-" + fastafile.fetch( region=reg2) f.close()
except IndexError: print >> sys.stderr,"Usage: script.py <exon_fn> <gene_fn> <tabix_fn>" sys.exit(0) fastafile = pysam.Fastafile(fasta_fn) keys = dict() f = open(exon_fn) for row in f: keys[tuple(row.strip().split(':'))] = [] f.close() f = open(gene_fn) c = 0 for row in f: if c > 5: break l = row.strip().split('\t') sd = l[5] if (l[0],l[1]) in keys: coord = l[2]+":"+l[3]+"-"+l[4] results = fastafile.fetch(region=coord) print ">%s" % coord+":"+sd if sd == '+': print results elif sd == '-': print complementDNA(results)[::-1] c += 0 f.close() fastafile.close()