def toTAMOmotifs(motifList,seqData=None): """Returns a list of TAMO motif Objects when given a list of the native motif Objs. If seqData is a dict, it is assumed to be nucFreqs (seqData['A']==.26), else, it is assumed to be a path to a seqPopulation that the motifs will be used on for stats to be calculated against.""" from TAMO.MotifTools import Motif_from_counts if not seqData: if type(seqData) == type({}): try: aFreq = seqData['A'] cFreq = seqData['C'] gFreq = seqData['G'] tFreq = seqData['T'] except KeyError: raise InvalidOptionError("toTAMOmotifs: Unrecognized key in seqData as dict.") elif type(seqData) == type(''): sDict = ParseFastA(seqData).toDict() seqData = seqStats(sDict) del(sDict) tot = float(seqData['nonNs']) aFreq = seqData['aCnt']/tot cFreq = seqData['cCnt']/tot gFreq = seqData['gCnt']/tot tFreq = seqData['tCnt']/tot else: aFreq = 0.25 cFreq = 0.25 gFreq = 0.25 tFreq = 0.25 tList = [] for m in motifList: counts = [] for i in range(len(m)): counts.append({'A':m.pwm['A'][i],'C':m.pwm['C'][i],'G':m.pwm['G'][i],'T':m.pwm['T'][i]}) t = Motif_from_counts(counts,beta=0.01,bg={'A':aFreq,'C':cFreq,'G':gFreq,'T':tFreq}) t.id = m.id try: t.sigvalue = m.sigvalue except AttributeError: pass tList.append(t) return tList
tamoMotifs = [] for jMat in jasparFiles: tempMat = map(lambda l: l.strip().split(), open(jMat, "rU").readlines()) ## eval() inteligently converts text numbers to int or float! # for i in range(len(tempMat)): # for j in range(len(tempMat[i])): # tempMat[i][j] = eval(tempMat[i][j]) # transpose matrix tempMat = transpose(mat(tempMat)).tolist() for i in range(len(tempMat)): tempMat[i] = { "A": eval(tempMat[i][0]), "C": eval(tempMat[i][1]), "G": eval(tempMat[i][2]), "T": eval(tempMat[i][3]), } jasparTAMO_Motif = Motif_from_counts(tempMat, bg=nucBack) jasparTAMO_Motif.sourceFile = jMat.split("/")[-1] tamoMotifs.append(jasparTAMO_Motif) # print to file cPickle.dump(tamoMotifs, open(pklFile, "w")) save_motifs(tamoMotifs, tmoFile, kmer_count=60) print "Done."