def SearchCARE(filename, dbname, bgfile): """ 在序列中搜索顺式调控元件motif """ basename = filename.split(".")[0] if not os.path.exists(dbname): CARE = CAREdb(dbname) CARE.importMotifs("CARE.txt") else: CARE = CAREdb(dbname) Motif_lst = loadMotifs(CARE) scanned = checkScannedLog("%s.log" % basename) scanlog = open("%s.log" % basename, "a") scanned_path = "%s/Scan" % basename if not os.path.exists(scanned_path): os.makedirs(scanned_path) for entry in Motif_lst: (motif_name, Sequence) = entry print motif_name, Sequence if Sequence in scanned: print "skip" continue CARE.addScanned(ScanMotif(Sequence, filename, bgfile)) scanned.append(Sequence) scanlog.write(Sequence + "\n")
def dumpCoMotif(co_Occur,Merged_REF_SeqName_dist,output,zip=False): care=CAREdb(dbname) Seqnames=care.loadDB2dict('fa_file',0,1) if not os.path.exists(output): os.makedirs(output) if not os.path.exists("%s/gene_list" % output): os.makedirs("%s/gene_list" % output) #Co-occur for motifa,motifb in co_Occur.keys(): print motifa,motifb filename="%s&%s.lst" % (motifa,motifb) outfile=open("%s/gene_list/%s" % (output,filename),'w') outfile.write("\n".join([Seqnames[k] for k in co_Occur[(motifa,motifb)].keys()])) outfile.close() #single motif motifs=[] [motifs.extend(k) for k in co_Occur.keys()] motifs=set(motifs) for motif in motifs: filename="%s.lst" % motif outfile=open("%s/gene_list/%s" % (output,filename),'w') outfile.write("\n".join([Seqnames[k] for k in Merged_REF_SeqName_dist[motif].keys()])) outfile.close()
""" select count(distinct REF_SeqName) from cache.Instance,cache.Scanned where cache.Scanned.REF_MotifSeq=cache.Instance.REF_MotifSeq and REF_Motif=1 """ ] SQL3=[ """ SELECT REF_SeqName,REF_Motif,REF_Organism,Description,start,stop,strand,pValue FROM cache.Instance,cache.Scanned WHERE cache.Instance.REF_MotifSeq=cache.Scanned.REF_MotifSeq AND REF_SeqName in ('40747','10669','1620','5828','30256','10402') """, ] benchmark=CAREdb('RAP_3kbp.db') benchmark.cache('Instance') benchmark.cache('Scanned',['REF_SeqName','REF_MotifSeq','start','stop','strand','pValue'],SeqName=['40747','10669','1620','5828','30256','10402']) for sql in SQL3: print sql timestamp=time.time() benchmark.cur.execute(sql) print [rs[0] for rs in benchmark.cur.fetchall()] print time.time()-timestamp ''' print "------------" tables=['Scanned','cache.Scanned'] for tname in tables: timestamp=time.time() #benchmark.select(tname)