def __process_items(self, to_process): "Procesa los items" self.log("Comenzando procesado. Usando %d browsers" % self.get_paralel_num()) # crea el pool de browsers self.__browser_pool = _BrowserPool() for _ in range(self.get_paralel_num()): self.__browser_pool.add_browser(self.__get_browser( self.get_process_item_browser_type() ) ) # pone a procesar todos los items progress = eta.Progress(len(to_process), unit = "pag") pipeline = ItemProcesorPipeline(self.get_paralel_num()) start = time() # convierte los items a una lista para tenerlos todos antes de # agregarlos al pipeline to_process = list(to_process) for item_manager_item in to_process: # print "pusheando %d" % item_manager_item.item_num pipeline.push(self.__process_item_call_sequence, item_manager_item, len(to_process), progress, pipeline ) pipeline.wait_end() # espera a que termine de procesar processing_time_str = eta.time_string(time() - start) self.log("Procesamiento terminado en %s" % processing_time_str)
def __process_pages(self, page_manager): "Procesa las páginas" paralel_num = self.get_paralel_num() if paralel_num == 1: self.log("Comenzando procesado. Usando un browser") else: self.log("Comenzando procesado. Usando %d browsers" % paralel_num) pipeline = ItemProcesorPipeline(paralel_num) start = time() progress = Progress(0, "pagina") page_manager.on_add_page_suscribe(_AddPageListener( pipeline, self.__process_page_call_sequence, progress ) ) while True: pending_page = page_manager.get_pending_page() if pending_page == None: break pipeline.push( self.__process_page_call_sequence, page_manager, pending_page, pipeline, progress ) pipeline.wait_end() # espera a que termine de procesar processing_time_str = eta.time_string(time() - start) self.log("Procesamiento terminado en %s" % processing_time_str)
@author: iavas ''' from sdf import ItemProcesorPipeline from time import time, sleep if __name__ == '__main__': def func(k): if k % 2 != 0: #raise ValueError, "%d not pair" % k print("k = %d" % k) sleep(0.1) else: print("k = %d" % k) start = time() pipeline = ItemProcesorPipeline(1) for i in range(200): pipeline.push(func, i) pipeline.wait_end() print((time() - start)) start = time() for i in range(200): func(i) print((time() - start)) print("COCOA!")