def testReader(self): # test basic format: i.e. for jaspar wms = [wm for wm in pwm.Reader(StringIO(basicPwm),format="basic", \ background=background,score_correction=False)] assert len(wms) == 1 # test transfac format wms = [wm for wm in pwm.Reader(StringIO(transfacPwm),format="transfac", \ background=background,score_correction=False)] assert len(wms) == 1 wm = wms[0] dScores = wm.score_seq(dSeq) assert len(dScores) == 2 assert "%.4f %.4f %.4f %.4f" % (dScores[0][0], dScores[0][1], dScores[1][0], dScores[1][1]) == dScoresExpected qdSeq = [] for (ix, nt) in enumerate(dSeq): qdSeq.append(dict()) qdSeq[ix][nt] = 1.0 qScores = wm.score_seq(qdSeq) assert len(qScores) == 2 assert "%.4f %.4f %.4f %.4f" % (qScores[0][0], qScores[0][1], qScores[1][0], qScores[1][1]) == dScoresExpected qScores = wm.score_seq(qSeq) assert len(qScores) == 1 assert "%.4f %.4f" % (qScores[0][0], qScores[0][1]) == qScoresExpected
def main(): pwm_file = sys.argv[1] splist = sys.argv[2] if len(sys.argv) == 4: inmaf = open(sys.argv[3]) else: inmaf = sys.stdin # read alignment species species = [] for sp in splist.split(','): species.append(sp) # read weight matrices pwm = {} for wm in pwmx.Reader(open(pwm_file), format='basic'): pwm[wm.id] = wm fbunch = {} for scoremax, index, headers in MafScorer(pwm, species, inmaf): for k, matrix in scoremax.items(): fname = k + '.mx' if fname not in fbunch: fbunch[fname] = open(fname, 'w') print("Writing", fname, file=sys.stderr) for i in range(len(matrix)): for j in range(len(matrix[i])): print("%.2f" % matrix[i][j], end=' ', file=fbunch[fname]) print(file=fbunch[fname]) for file in fbunch.values(): file.close()
def main(): if len(sys.argv) < 6: print("%s transfac|basic pwmfile inmaf threshold spec1,spec2,... " % sys.argv[0], file=sys.stderr) sys.exit(0) pwm = {} format = sys.argv[1] for wm in pwmx.Reader(open(sys.argv[2]), format=format): pwm[wm.id] = wm inmaf = open(sys.argv[3]) threshold = float(sys.argv[4]) species = [] for sp in sys.argv[5].split(','): species.append(sp) for maf in align_maf.Reader(inmaf): mafchrom = maf.components[0].src.split('.')[1] mafstart = maf.components[0].start mafend = maf.components[0].end reftext = maf.components[0].text # maf block scores for each matrix for scoremax, width, headers in MafBlockScorer(pwm, species, maf): blocklength = width mafsrc, mafstart, mafend = headers[0] mafchrom = mafsrc.split('.')[1] # lists of scores for each position in scoremax for id, mx in scoremax.items(): for offset in range(blocklength): # scan all species with threshold for i in range(len(species)): if mx[i][offset] > threshold: refstart = mafstart + offset - reftext.count( '-', 0, offset) refend = refstart + len(pwm[id]) data = " ".join([ "%.2f" % mx[x][offset] for x in range(len(species)) ]) # underscore spaces in the name print(mafchrom, refstart, refend, id.replace(' ', '_'), data) break
def main(): if len(sys.argv) < 5: print >> sys.stderr, "%s transfac|basic pwmfile inmaf threshold [motif]" % sys.argv[ 0] sys.exit(2) r = pwmx.Reader(open(sys.argv[2]), format=sys.argv[1]) pwm = iter(r).next() inmaf = open(sys.argv[3]) threshold = float(sys.argv[4]) if len(sys.argv) > 5: motif = sys.argv[5] else: motif = None for maf in align_maf.Reader(inmaf): for mafmotif, pwm_score, motif_score in MafMotifSelect( maf, pwm, motif, threshold): #mafwrite( mafmotif,pwm_score,motif_score) print mafmotif, pwm_score, motif_score print 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzz'
#FOX 6-mer motif fm = """>FOX6 0 0 0 100 0 0 100 0 0 100 0 0 100 0 0 0 0 0 0 100 0 0 100 0 """ background = { 'A':.28,'C':.21, 'G':.24, 'T':.27 } #genome background. not transcriptome :( FOXwm = [w for w in pwm.Reader(StringIO.StringIO(fm),format="basic", background=background,score_correction=True)][0] hg19Phylo = hg19Phylogeny() fasta = pyfasta.Fasta("/home/lovci/data/Genome/hg19/all.fa", flatten_inplace=True) def alnToPWM(aln, id = "id", background=background, nSpecies = 46.0): #import StringIO #s = StringIO.StringIO() #s.write(">" + id + "\n") sz = aln.components[0].size ar = np.ndarray( shape=(sz,4)) for rowN, i in enumerate(aln.column_iter()):