def crawl(self,list_domains): main_thread = threading.currentThread() #domaines=self.db.new_domaines.distinct('domaine') domains=list_domains.split(',') threadpool=[] lock=threading.Lock() rec=Record(self.db_value,lock) rec.start() i=0 for domain in domains: i=i+1 cw=CrawlerThread(domain,self.db,lock) cw.run() if i % 5==0: for t in threading.enumerate(): if t is not main_thread: t.join(2) stop=True while(stop): for t in threadpool: if not t.IsActive(): threadpool.remove(t) if len(threadpool)==0: stop=False
def crawl(self, list_domains): main_thread = threading.currentThread() #domaines=self.db.new_domaines.distinct('domaine') domains = list_domains.split(',') threadpool = [] lock = threading.Lock() rec = Record(self.db_value, lock) rec.start() i = 0 for domain in domains: i = i + 1 cw = CrawlerThread(domain, self.db, lock) cw.run() if i % 5 == 0: for t in threading.enumerate(): if t is not main_thread: t.join(2) stop = True while (stop): for t in threadpool: if not t.IsActive(): threadpool.remove(t) if len(threadpool) == 0: stop = False
def crawl(self): main_thread = threading.currentThread() domaines=self.db.new_domaines.distinct('domaine') threadpool=[] rec=Record(self.db_value) rec.start() i=0 for domaine in domaines: i=i+1 cw=CrawlerThread(domaine,self.db) cw.start() if i % 30==0: for t in threading.enumerate(): if t is not main_thread: t.join(2) stop=True while(stop): for t in threadpool: if not t.IsActive(): threadpool.remove(t) if len(threadpool)==0: stop=False