def testMQ(self): mq = self.mq0 data = [str(random.randint(10000, 50000)) for _ in range(20)] c = Clock() mq.put(data, flush=True) gets = [] while True: get = mq.get() if get is None: break gets.append(get) self.assertEqual(sorted(data), sorted(gets)) print(c.clock()) # test mq client data = str(random.randint(10000, 50000)) self.client.put(data) get = self.client.get() self.assertEqual(data, get) self.client.put(Url('http://qinxuye.me', priority=1)) get = self.client.get(priority=1) self.assertEqual(get.url, 'http://qinxuye.me') # test put into different priorities self.client.put(Url('http://qinxuye.me', priority=0)) self.client.put(Url('http://qinxuye.me/about', priority=1)) self.client.put(u'三星') self.assertEqual( self.client.get(priority=1).url, 'http://qinxuye.me/about') self.assertEqual(self.client.get(priority=0).url, 'http://qinxuye.me') print(c.clock())
def _run_local_job(self, job_path, overwrite=False, rpc_server=None, settings=None): job_desc = import_job_desc(job_path) if settings is not None: job_desc.update_settings(settings) base_name = job_desc.uniq_name self.env['job_desc'][base_name] = job_desc addr_dirname = self.addr.replace('.', '_').replace(':', '_') working_dir = os.path.join(self.working_dir, 'worker', addr_dirname) clear = job_desc.settings.job.clear job_name, working_dir = self._get_name_and_dir( working_dir, base_name, overwrite=overwrite, clear=clear) clock = Clock() job = Job(self, job_path, job_name, job_desc=job_desc, working_dir=working_dir, rpc_server=rpc_server, manager=self.manager) t = threading.Thread(target=job.run, args=(True, )) t.start() stopped = multiprocessing.Event() def stop(signum, frame): if 'main' not in multiprocessing.current_process().name.lower(): return if stopped.is_set(): return else: stopped.set() self.logger.debug("Catch interrupt signal, start to stop") job.shutdown() if rpc_server: rpc_server.shutdown() signal.signal(signal.SIGINT, stop) signal.signal(signal.SIGTERM, stop) idle_times = 0 while t.is_alive(): if job.get_status() == FINISHED: break if job.get_status() == IDLE: idle_times += 1 if idle_times > MAX_IDLE_TIMES: break else: idle_times = 0 try: t.join(5) except IOError: break need_shutdown = False if not job.stopped.is_set() and job.get_status() == FINISHED: self.logger.debug('All objects have been fetched, try to finish job') need_shutdown = True elif not stopped.is_set() and not t.is_alive(): need_shutdown = True elif not job.stopped.is_set() and job.get_status() == IDLE: self.logger.debug('No bundle or url to perform, try to finish job') need_shutdown = True if need_shutdown is True: job.shutdown() if rpc_server: rpc_server.shutdown() self.logger.debug('Job id:%s finished, spend %.2f seconds for running' % ( job_name, clock.clock()))
def _run_local_job(self, job_path, overwrite=False, rpc_server=None, settings=None): job_desc = import_job_desc(job_path) if settings is not None: job_desc.update_settings(settings) base_name = job_desc.uniq_name self.env['job_desc'][base_name] = job_desc working_dir = os.path.join(self.working_dir, 'worker') clear = job_desc.settings.job.clear job_name, working_dir = self._get_name_and_dir(working_dir, base_name, overwrite=overwrite, clear=clear) clock = Clock() job = Job(self, job_path, job_name, job_desc=job_desc, working_dir=working_dir, rpc_server=rpc_server, manager=self.manager) t = threading.Thread(target=job.run, args=(True, )) t.start() stopped = multiprocessing.Event() def stop(signum, frame): if 'main' not in multiprocessing.current_process().name.lower(): return if stopped.is_set(): return else: stopped.set() self.logger.debug("Catch interrupt signal, start to stop") job.shutdown() if rpc_server: rpc_server.shutdown() signal.signal(signal.SIGINT, stop) signal.signal(signal.SIGTERM, stop) idle_times = 0 while t.is_alive(): if job.get_status() == FINISHED: break if job.get_status() == IDLE: idle_times += 1 if idle_times > MAX_IDLE_TIMES: break else: idle_times = 0 try: t.join(5) except IOError: break need_shutdown = False if not job.stopped.is_set() and job.get_status() == FINISHED: self.logger.debug( 'All objects have been fetched, try to finish job') need_shutdown = True elif not stopped.is_set() and not t.is_alive(): need_shutdown = True elif not job.stopped.is_set() and job.get_status() == IDLE: self.logger.debug('No bundle or url to perform, try to finish job') need_shutdown = True if need_shutdown is True: job.shutdown() if rpc_server: rpc_server.shutdown() self.logger.debug( 'Job id:%s finished, spend %.2f seconds for running' % (job_name, clock.clock()))
def run(self): try: curr_priority = 0 priority_deals = [True for _ in range(self.full_priorities)] while not self.stopped.is_set(): priority_name = 'inc' if curr_priority == self.n_priorities \ else curr_priority is_inc = priority_name == 'inc' while not self.nonsuspend.wait(5): continue if self.stopped.is_set(): break if priority_deals[curr_priority] is True: self.logger.debug('start to process priority: %s' % priority_name) last = self.priorities_secs[curr_priority] clock = Clock() self.runnings = [] try: no_budgets_times = 0 while not self.stopped.is_set(): if clock.clock() >= last: break if not is_inc: if self._has_not_finished(curr_priority): no_budgets_times = 0 self._get_unit(curr_priority, self.runnings) else: if self.settings.job.size=='auto': self._get_unit(curr_priority, self.runnings) # if get unit success, then apply budget, in case budget are wasted if len(self.runnings)>0: status = self._apply(no_budgets_times) if status == CANNOT_APPLY: priority_deals[curr_priority] = False break elif status == APPLY_FAIL: no_budgets_times += 1 if len(self.runnings) == 0: continue else: no_budgets_times = 0 # keep compability else: status = self._apply(no_budgets_times) if status == CANNOT_APPLY: priority_deals[curr_priority] = False break elif status == APPLY_FAIL: no_budgets_times += 1 if len(self.runnings) == 0: continue else: no_budgets_times = 0 self._get_unit(curr_priority, self.runnings) else: self._get_unit(curr_priority, self.runnings) if len(self.runnings) == 0: priority_deals[curr_priority] = False break else: priority_deals[curr_priority] = True if self.is_bundle: self.logger.debug('process bundle from priority %s' % priority_name) rest = min(last - clock.clock(), MAX_BUNDLE_RUNNING_SECONDS) if rest <= 0: break self.running = self.runnings.pop() obj = self.executor.execute(self.running, rest, is_inc=is_inc) else: self.running = self.runnings.pop() obj = self.executor.execute(self.running, is_inc=is_inc) self.running = None if obj is not None: self.runnings.insert(0, obj) self.runnings = OrderedDict.fromkeys(self.runnings).keys() finally: self.priorities_objs[curr_priority].extend(self.runnings) curr_priority = (curr_priority + 1) % self.full_priorities finally: self.counter_client.sync() self.save()
def run(self): try: curr_priority = 0 priority_deals = [True for _ in range(self.full_priorities)] while not self.stopped.is_set(): priority_name = 'inc' if curr_priority == self.n_priorities \ else curr_priority is_inc = priority_name == 'inc' while not self.nonsuspend.wait(5): continue if self.stopped.is_set(): break if priority_deals[curr_priority] is True: self.logger.debug('start to process priority: %s' % priority_name) last = self.priorities_secs[curr_priority] clock = Clock() self.runnings = [] try: no_budgets_times = 0 while not self.stopped.is_set(): if clock.clock() >= last: break if not is_inc: if self._has_not_finished(curr_priority): no_budgets_times = 0 self._get_unit(curr_priority, self.runnings) else: status = self._apply(no_budgets_times) if status == CANNOT_APPLY: priority_deals[curr_priority] = False break elif status == APPLY_FAIL: no_budgets_times += 1 if len(self.runnings) == 0: continue else: no_budgets_times = 0 self._get_unit(curr_priority, self.runnings) else: self._get_unit(curr_priority, self.runnings) if len(self.runnings) == 0: priority_deals[curr_priority] = False break else: priority_deals[curr_priority] = True if self.is_bundle: self.logger.debug( 'process bundle from priority %s' % priority_name) rest = min(last - clock.clock(), MAX_BUNDLE_RUNNING_SECONDS) if rest <= 0: break self.running = self.runnings.pop() obj = self.executor.execute(self.running, rest, is_inc=is_inc) else: self.running = self.runnings.pop() obj = self.executor.execute(self.running, is_inc=is_inc) self.running = None if obj is not None: self.runnings.insert(0, obj) self.runnings = OrderedDict.fromkeys(self.runnings).keys() finally: self.priorities_objs[curr_priority].extend(self.runnings) curr_priority = (curr_priority+1) % self.full_priorities finally: self.counter_client.sync() self.save()