Пример #1
0
 def crawl(self,list_domains):
     main_thread = threading.currentThread()
     #domaines=self.db.new_domaines.distinct('domaine')
     domains=list_domains.split(',')
     threadpool=[]
     lock=threading.Lock()
     rec=Record(self.db_value,lock)
     rec.start()
     i=0
     for domain in domains:
         i=i+1
         cw=CrawlerThread(domain,self.db,lock)        
         cw.run()        
     
         if i % 5==0:
             for t in threading.enumerate():
                 if t is not main_thread:
                     t.join(2)
     stop=True
 
     while(stop):
         for t in threadpool:
             if not t.IsActive():
                 threadpool.remove(t)
             if len(threadpool)==0:
                 stop=False
Пример #2
0
    def crawl(self, list_domains):
        main_thread = threading.currentThread()
        #domaines=self.db.new_domaines.distinct('domaine')
        domains = list_domains.split(',')
        threadpool = []
        lock = threading.Lock()
        rec = Record(self.db_value, lock)
        rec.start()
        i = 0
        for domain in domains:
            i = i + 1
            cw = CrawlerThread(domain, self.db, lock)
            cw.run()

            if i % 5 == 0:
                for t in threading.enumerate():
                    if t is not main_thread:
                        t.join(2)
        stop = True

        while (stop):
            for t in threadpool:
                if not t.IsActive():
                    threadpool.remove(t)
                if len(threadpool) == 0:
                    stop = False
Пример #3
0
 def crawl(self):
     main_thread = threading.currentThread()
     domaines=self.db.new_domaines.distinct('domaine')
     threadpool=[]
     rec=Record(self.db_value)
     rec.start()
     i=0
     for domaine in domaines:
         i=i+1
         cw=CrawlerThread(domaine,self.db)        
         cw.start()        
     
         if i % 30==0:
             for t in threading.enumerate():
                 if t is not main_thread:
                     t.join(2)
     stop=True
 
     while(stop):
         for t in threadpool:
             if not t.IsActive():
                 threadpool.remove(t)
             if len(threadpool)==0:
                 stop=False