def terminate_job_task(msg): assert coro is not None yield self.lock.acquire() try: _job = unserialize(msg) compute = self.computations[_job.compute_id] assert addr[0] == compute.scheduler_ip_addr job_info = self.job_infos.pop(_job.uid, None) except: logger.debug('Ignoring job request from %s', addr[0]) raise StopIteration finally: self.lock.release() if job_info is None: logger.debug('Job %s completed; ignoring cancel request from %s', _job.uid, addr[0]) raise StopIteration logger.debug('Terminating job %s', _job.uid) job_info.proc.terminate() if isinstance(job_info.proc, multiprocessing.Process): for x in xrange(20): if job_info.proc.is_alive(): yield coro.sleep(0.1) else: logger.debug('Process "%s" for job %s terminated', compute.name, _job.uid) break else: logger.warning('Could not kill process %s', compute.name) raise StopIteration else: assert isinstance(job_info.proc, subprocess.Popen) for x in xrange(20): rc = job_info.proc.poll() logger.debug('Program "%s" for job %s terminated with %s', compute.name, _job.uid, rc) if rc is not None: break if x == 10: logger.debug('Killing job %s', _job.uid) job_info.proc.kill() yield coro.sleep(0.1) else: logger.warning('Could not kill process %s', compute.name) raise StopIteration reply_addr = (addr[0], compute.job_result_port) reply = _JobReply(_job, self.ext_ip_addr) job_info = _DispyJobInfo(reply, reply_addr, compute) reply.status = DispyJob.Terminated yield self._send_job_reply(job_info, resending=False, coro=coro)
def job_request_task(msg): assert coro is not None try: _job = unserialize(msg) except: logger.debug('Ignoring job request from %s', addr[0]) logger.debug(traceback.format_exc()) raise StopIteration yield self.lock.acquire() compute = self.computations.get(_job.compute_id, None) if compute is not None: if compute.scheduler_ip_addr != self.scheduler_ip_addr: compute = None yield self.lock.release() if self.avail_cpus == 0: logger.warning('All cpus busy') try: yield conn.send_msg('NAK (all cpus busy)') except: pass raise StopIteration elif compute is None: logger.warning('Invalid computation %s', _job.compute_id) try: yield conn.send_msg('NAK (invalid computation %s)' % _job.compute_id) except: pass raise StopIteration reply_addr = (compute.scheduler_ip_addr, compute.job_result_port) logger.debug('New job id %s from %s', _job.uid, addr[0]) files = [] for f in _job.files: tgt = os.path.join(compute.dest_path, os.path.basename(f['name'])) try: fd = open(tgt, 'wb') fd.write(f['data']) fd.close() except: logger.warning('Could not save file "%s"', tgt) continue try: os.utime(tgt, (f['stat'].st_atime, f['stat'].st_mtime)) os.chmod(tgt, stat.S_IMODE(f['stat'].st_mode)) except: logger.debug('Could not set modes for "%s"', tgt) files.append(tgt) _job.files = files if compute.type == _Compute.func_type: reply = _JobReply(_job, self.ext_ip_addr) job_info = _DispyJobInfo(reply, reply_addr, compute) args = (job_info, self.certfile, self.keyfile, _job.args, _job.kwargs, self.reply_Q, compute.name, compute.code, compute.dest_path, _job.files) try: yield conn.send_msg('ACK') except: logger.warning('Failed to send response for new job to %s', str(addr)) raise StopIteration job_info.job_reply.status = DispyJob.Running job_info.proc = multiprocessing.Process(target=_dispy_job_func, args=args) yield self.lock.acquire() self.avail_cpus -= 1 compute.pending_jobs += 1 self.job_infos[_job.uid] = job_info self.lock.release() job_info.proc.start() raise StopIteration elif compute.type == _Compute.prog_type: try: yield conn.send_msg('ACK') except: logger.warning('Failed to send response for new job to %s', str(addr)) raise StopIteration reply = _JobReply(_job, self.ext_ip_addr) job_info = _DispyJobInfo(reply, reply_addr, compute) job_info.job_reply.status = DispyJob.Running yield self.lock.acquire() self.job_infos[_job.uid] = job_info self.avail_cpus -= 1 compute.pending_jobs += 1 yield self.lock.release() prog_thread = threading.Thread(target=self.__job_program, args=(_job, job_info)) prog_thread.start() raise StopIteration else: try: yield conn.send_msg('NAK (invalid computation type "%s")' % compute.type) except: logger.warning('Failed to send response for new job to %s', str(addr))