def clone_with_timeout(src: str, dest: str, clone_func: Callable[[], None], timeout: float) -> None: """Clone a repository with timeout. Args: src: clone source dest: clone destination clone_func: callable that does the actual cloning timeout: timeout in seconds """ errors: Queue = Queue() process = Process(target=_clone_task, args=(clone_func, errors)) process.start() process.join(timeout) if process.is_alive(): process.terminate() # Give it literally a second (in successive steps of 0.1 second), # then kill it. # Can't use `process.join(1)` here, billiard appears to be bugged # https://github.com/celery/billiard/issues/270 killed = False for _ in range(10): time.sleep(0.1) if not process.is_alive(): break else: killed = True os.kill(process.pid, signal.SIGKILL) raise CloneTimeout(src, timeout, killed) if not errors.empty(): raise CloneFailure(src, dest, errors.get())
def _reflash(self, path): """ this will execute the upgrade operation in another process because the SSH connection may hang indefinitely while reflashing and would block the program; setting a timeout to `exec_command` doesn't seem to take effect so at least we can stop the process using `subprocess.join(timeout=self.UPGRADE_TIMEOUT)` """ def upgrade(conn, path, timeout): conn.connect() conn.exec_command('sysupgrade -v -c {0}'.format(path), timeout=timeout) conn.close() subprocess = Process( target=upgrade, args=[self, path, self.UPGRADE_TIMEOUT] ) subprocess.start() self.log('Upgrade operation in progress...') subprocess.join(timeout=self.UPGRADE_TIMEOUT) self.log('SSH connection closed, will wait {0} seconds before ' 'attempting to reconnect...'.format(self.SLEEP_TIME)) sleep(self.SLEEP_TIME) # kill the subprocess if it has hanged if subprocess.is_alive(): subprocess.terminate() subprocess.join()
def _reflash(self, path): """ this will execute the upgrade operation in another process because the SSH connection may hang indefinitely while reflashing and would block the program; setting a timeout to `exec_command` doesn't seem to take effect on some OpenWRT versions so at least we can stop the process using `subprocess.join(timeout=self.UPGRADE_TIMEOUT)` """ self.disconnect() command = self.get_upgrade_command(path) def upgrade(conn, path, timeout): conn.connect() conn.exec_command(command, timeout=timeout) conn.disconnect() subprocess = Process(target=upgrade, args=[self, path, self.UPGRADE_TIMEOUT]) subprocess.start() self.log('Upgrade operation in progress...') subprocess.join(timeout=self.UPGRADE_TIMEOUT) self.log( f'SSH connection closed, will wait {self.RECONNECT_DELAY} seconds before ' 'attempting to reconnect...') sleep(self.RECONNECT_DELAY) # kill the subprocess if it has hanged if subprocess.is_alive(): subprocess.terminate() subprocess.join()
def screenshot_endpoint( self, ip_address=None, port=None, hostname=None, use_ssl=False, use_sni=False, path="/", in_separate_process=False, ): """ Take a screenshot of the given endpoint, save it to a local temporary file, and return the local file path. :param ip_address: The IP address where the web service resides. :param port: The port where the web service resides. :param hostname: The hostname to request. :param use_ssl: Whether or not to use SSL to request the endpoint. :param use_sni: Whether or not the endpoint uses SNI. :param path: The path of the resource to screenshot. :param in_separate_process: Whether or not to take the screenshot in a separate process. This is to address the incredibly long time that the Selenium webdriver can take when it hangs. :return: A tuple containing (1) the local file path where the screenshot was saved and (2) whether or not the screenshot was taken successfully. """ logger.debug( "Now attempting to take a screenshot of the web service at %s:%s (%s). Hostname is %s, SNI support is %s." % (ip_address, port, "using SSL" if use_ssl else "plain HTTP", hostname, use_sni)) self.__set_endpoint( ip_address=ip_address, port=port, hostname=hostname, use_ssl=use_ssl, use_sni=use_sni, path=path, ) self._output_file_path = self.get_temporary_file_path() if in_separate_process: process = Process(target=self.__take_screenshot) try: process.start() process.join(config.selenium_screenshot_delay + config.inspection_screenshot_join_timeout) except IOError as e: if e.errno == errno.EINTR: logger.warning("Interrupted system call error received.") else: raise e finally: if process.is_alive(): print("PROCESS IS ALIVE - PID IS %s" % (process.pid, )) os.kill(process.pid, signal.SIGTERM) else: self.__take_screenshot() return self.output_file_path, FilesystemHelper.does_file_exist( self.output_file_path)
class LongCalculation(QProgressDialog): """ Multiprocessing based worker for mesh and eigenvalue calculations. This is necessary to make sure GUI is not blocked while mesh is built, or when eigenvalue calculations are performed. Transformations do not need as much time, unless there is one implemented without numpy vectorized coordinate calculations. """ res = None def __init__(self, fun, args, postprocess, job): """ Build multiprocessing queues and start worker. """ super(LongCalculation, self).__init__(job, "Cancel", 0, 0) self.setModal(True) self.input = Queue() self.output = Queue() self.input.put((fun, args, postprocess)) self.proc = Process(target=worker, args=(self.input, self.output)) self.proc.start() self.timer = QTimer() self.timer.timeout.connect(self.update) self.timer.start(10) def update(self): """ Check if worker is done, and close dialog. """ try: out = self.output.get(block=False) if isinstance(out, basestring): self.setLabelText(out) return if out is None: self.done(0) return self.res = out self.timer.stop() self.proc.join() del self.proc self.done(1) except: pass def cleanUp(self): """ Kill the running processes if cancelled/failed. """ if self.proc: while self.proc.is_alive(): self.proc.terminate() del self.proc self.timer.stop()
class MessageConsole(textconsole.SimpleConsole): ''' a message console for MAVProxy ''' def __init__(self, title='MAVProxy: console'): if platform.system() == 'Darwin': forking_enable(False) textconsole.SimpleConsole.__init__(self) self.title = title self.menu_callback = None self.parent_pipe_recv, self.child_pipe_send = Pipe(duplex=False) self.child_pipe_recv, self.parent_pipe_send = Pipe(duplex=False) self.close_event = Event() self.close_event.clear() self.child = Process(target=self.child_task) self.child.start() self.child_pipe_send.close() self.child_pipe_recv.close() t = threading.Thread(target=self.watch_thread) t.daemon = True t.start() def child_task(self): '''child process - this holds all the GUI elements''' self.parent_pipe_send.close() self.parent_pipe_recv.close() import wx_processguard from wx_loader import wx from wxconsole_ui import ConsoleFrame app = wx.App(False) app.frame = ConsoleFrame(state=self, title=self.title) app.frame.SetDoubleBuffered(True) app.frame.Show() app.MainLoop() def watch_thread(self): '''watch for menu events from child''' from mp_settings import MPSetting try: while True: msg = self.parent_pipe_recv.recv() if self.menu_callback is not None: self.menu_callback(msg) time.sleep(0.1) except EOFError: pass def write(self, text, fg='black', bg='white'): '''write to the console''' try: self.parent_pipe_send.send(Text(text, fg, bg)) except Exception: pass def set_status(self, name, text='', row=0, fg='black', bg='white'): '''set a status value''' if self.is_alive(): self.parent_pipe_send.send(Value(name, text, row, fg, bg)) def set_menu(self, menu, callback): if self.is_alive(): self.parent_pipe_send.send(menu) self.menu_callback = callback def close(self): '''close the console''' self.close_event.set() if self.is_alive(): self.child.join(2) def is_alive(self): '''check if child is still going''' return self.child.is_alive()
class CrawlProcess(): _model = Task __instance = None count = 0 @staticmethod def get_instance(): """ Static access method. """ print(CrawlProcess.__instance) if CrawlProcess.__instance == None: CrawlProcess() return CrawlProcess.__instance def __init__(self): if CrawlProcess.__instance != None: raise Exception("This class is a singleton!") else: CrawlProcess.__instance = self #self.process = Process(target=CrawlProcess.crawl) self.process = None self.crawler_process = None self.task = None self.q = Queue() self.parent_conn, self.child_conn = Pipe() #@classmethod #def crawl(cls, q): #@classmethod #def crawl(cls, process, q): @classmethod def crawl(cls, q, conn): print() print() print('***************************************************************************************') print('crawl') def close(spider, reason): print(f'{multiprocessing.current_process().name}: *!!CLOSE') write_in_a_file('CrawlerProcess.signal.close', {'reason': reason}, 'task.txt') t = Task.objects.get_latest_crawler_task() d = datetime.today() t.description = f'spider closed with count: {CrawlProcess.count} at {str(d)}' t.result = CrawlProcess.count t.save() def open(spider): print(f'{multiprocessing.current_process().name}: *!!OPEN') try: name = spider.name except: name = str(spider) write_in_a_file('CrawlerProcess.signal.open', {'spider': name}, 'task.txt') CrawlProcess.count = 0 try: t = Task.objects.get_latest_crawler_task() t.name = str(process.pid) t.save() except Exception as e: t.name = e t.save() #q.put_nowait() print() def scraped(item, response, spider): print(f'{multiprocessing.current_process().name}: *!!SCRAPED') print() CrawlProcess.count = CrawlProcess.count + 1 n = CrawlProcess.count write_in_a_file('CrawlerProcess.signal.scraped_item', {'response': response, 'count': n}, 'task.txt') try: q.get_nowait() q.put_nowait(n) except: q.put_nowait(n) def stopped(*args, **kwargs): write_in_a_file('CrawlerProcess.signal.stopped', {'args': args, 'kwargs': kwargs}, 'task.txt') def error(*args, **kwargs): write_in_a_file('CrawlerProcess.signal.error', {'args': args, 'kwargs': kwargs}, 'task.txt') def send_by_pipe(item): try: conn.send(item) #conn.close() except Exception as e: write_in_a_file('CrawlProcess._crawl: error conn.send', {'conn error': e}, 'debug.txt') process = CrawlerProcess(get_project_settings()) write_in_a_file('CrawlProcess.crawl: first', {'crawler_process': str(process), 'dir process': dir(process)}, 'debug.txt') send_by_pipe(process) write_in_a_file('CrawlProcess.crawl: second', {'crawler_process': str(process), 'dir process': dir(process)},'debug.txt') process.crawl(InfoempleoSpider()) write_in_a_file('CrawlProcess.crawl: third', {'crawler_process': str(process), 'dir process': dir(process)},'debug.txt') crawler = Crawler(InfoempleoSpider()) crawler.signals.connect(open, signal=signals.spider_opened) crawler.signals.connect(scraped, signal=signals.item_scraped) crawler.signals.connect(close, signal=signals.spider_closed) crawler.signals.connect(stopped, signal=signals.engine_stopped) crawler.signals.connect(error, signal=signals.spider_error) write_in_a_file('CrawlProcess.crawl: before', {'crawler_process': str(process),'dir process': dir(process)},'debug.txt') process.crawl(crawler) write_in_a_file('CrawlProcess.crawl: after', {'crawler_process': str(process), 'dir process': dir(process)}, 'debug.txt') process.start() write_in_a_file('CrawlProcess._crawl: process started', {'crawler_process': str(process), 'dir process': dir(process)}, 'debug.txt') print('***************************************************************************************') print(f'CrawlerProcess: {process}') print(dir(process)) print('***************************************************************************************') print() print() write_in_a_file('CrawlProcess.crawl', {'CrawlerProcess': str(process), 'dir(CrawlerProcess)': dir(process)}, 'task.txt') process.join() write_in_a_file('CrawlProcess.crawl: process.join', {}, 'task.txt') write_in_a_file('CrawlProcess.crawl: process.join', {}, 'spider.txt') print('Crawler Process has Finished!!!!!') @classmethod def crawl2(cls, q): while CrawlProcess.count < 15: # print(f'doing something: {CrawlProcess.count}') CrawlProcess.count = CrawlProcess.count + 1 n = CrawlProcess.count try: q.get_nowait() except: pass q.put(n) if CrawlProcess.count % 5 == 0: # print(f'qsize: {q.qsize()}') time.sleep(5) def _clear_queue(self): while not self.q.empty(): self.q.get_nowait() def _init_process(self, user): print(f'CrawlerProcess.init_process') self.q.put_nowait(0) self.process = Process(target=CrawlProcess.crawl, args=(self.q, self.child_conn,)) self.task = Task(user=user, state=Task.STATE_PENDING, type=Task.TYPE_CRAWLER) def _start_process(self): print(f'CrawlerProcess._start_process') self.init_datetime = timezone.now() # Before create the task self.process.start() self.task.pid = self.process.pid write_in_a_file('CrawlProcess._start_process: process started', {'pid': self.process.pid}, 'debug.txt') self.task.state = Task.STATE_RUNNING self.task.save() self.crawler_process = self.parent_conn.recv() write_in_a_file('CrawlProcess._start_process: conn.recv', {'crawler_process':str(self.crawler_process), 'dir crawler_process':dir(self.crawler_process)}, 'debug.txt') write_in_a_file('CrawlProcess._start_process', {'CrawlerProcess': str(self.crawler_process), 'dir(CrawlerProcess)': dir(self.crawler_process)},'task.txt') def _reset_process(self, state=Task.STATE_FINISHED): print(f'CrawlerProcess._reset_process({state})') try: self.process.terminate() write_in_a_file('_reset_process terminated (from stop)', {'is_running': self.process.is_alive()}, 'debug.txt') self.task.result = CrawlProcess.count self.task.state = state self.task.save() self.process.join() # ! IMPORTANT after .terminate -> .join write_in_a_file('_reset_process joinned (from stop)', {'is_running': self.process.is_alive()}, 'debug.txt') except: pass try: self.result = self.q.get_nowait() except Exception as e: pass self._clear_queue() def _update_process(self): print('CrawlerProcess._update_process') print(f'process is alive: {self.process and self.process.is_alive()}') if self.process and not self.process.is_alive(): self._reset_process() def start(self, user=None, **kwargs): """ Si el proceso no está vivo es que o no se ha iniciado aún o que ya ha terminado, así que se guardan los datos almacenados y se ejecuta el proceso. Si el proceso está vivo no se hace nada. :param user: The uses that make the request :param kwargs: :return: """ print(f'self.q.empty(): {self.q.empty()}') print(f'self.q.qsize(): {self.q.qsize()}') if not self.is_scrapping(): if self.task and (self.task.state == Task.STATE_RUNNING): self._reset_process() self._init_process(user) self._start_process() def stop(self): print(f'CrawleProcess.stop') self._reset_process(Task.STATE_INCOMPLETE) # self.crawler_process.stop() #self.crawler_process.join() def join(self): self.process.join() def get_actual_task(self): self._update_process() return self.task def get_latest_task(self): last_task = Task.objects.get_latest_crawler_task() # If the latest task from de db has state equal STATE_RUNNING and not is the actual task will be an incomplete task... #... and would have to update its state is_an_incomplete_task = ( last_task and last_task.state == Task.STATE_RUNNING and (not self.task or self.task.pk != last_task.pk) ) if is_an_incomplete_task: last_task.state = Task.STATE_INCOMPLETE last_task.save() return last_task def is_scrapping(self): print(CrawlProcess.is_scrapping) if self.process: return self.process.is_alive() else: return False def _get_scraped_jobs(self): latest_task = Task.objects.get_latest_crawler_task() return Job.objects.filter(Q(created_at__gte=latest_task.created_at) | Q(updated_at__gte=latest_task.created_at)) def get_scraped_items_number(self): print() print('!!!! CrawlProcess.get_scraped_items_number');print(); count = CrawlProcess.count try: print(self.q) #print(f'CrawlProcess.count: {CrawlProcess.count}') #print(f'qsize: {self.q.qsize()}') count = self.q.get(block=True, timeout=5) CrawlProcess.count = count print(f'q.count: {count}') except Exception as e: print(f'get_scraped_items_number') # save_error(e, {'count': count}) return count def get_scraped_items_percentage(self): # Calcula el total con los items scrapeados de la tarea enterior count = self.get_scraped_items_number() task = Task.objects.get_latest_finished_crawler_task() if task: old_result = task.result or 20000 else: old_result = 20000 if count < old_result: total = old_result else: total = count db_count = self._get_scraped_jobs().count() try: percentage = round(db_count/total, 2) except: percentage = 0 if percentage >= 0.95 and self.is_scrapping(): percentage = 0.95 return percentage