Exemplo n.º 1
0
 def terminate_job_task(msg):
     assert coro is not None
     yield self.lock.acquire()
     try:
         _job = unserialize(msg)
         compute = self.computations[_job.compute_id]
         assert addr[0] == compute.scheduler_ip_addr
         job_info = self.job_infos.pop(_job.uid, None)
     except:
         logger.debug('Ignoring job request from %s', addr[0])
         raise StopIteration
     finally:
         self.lock.release()
     if job_info is None:
         logger.debug('Job %s completed; ignoring cancel request from %s',
                      _job.uid, addr[0])
         raise StopIteration
     logger.debug('Terminating job %s', _job.uid)
     job_info.proc.terminate()
     if isinstance(job_info.proc, multiprocessing.Process):
         for x in xrange(20):
             if job_info.proc.is_alive():
                 yield coro.sleep(0.1)
             else:
                 logger.debug('Process "%s" for job %s terminated', compute.name, _job.uid)
                 break
         else:
             logger.warning('Could not kill process %s', compute.name)
             raise StopIteration
     else:
         assert isinstance(job_info.proc, subprocess.Popen)
         for x in xrange(20):
             rc = job_info.proc.poll()
             logger.debug('Program "%s" for job %s terminated with %s',
                          compute.name, _job.uid, rc)
             if rc is not None:
                 break
             if x == 10:
                 logger.debug('Killing job %s', _job.uid)
                 job_info.proc.kill()
             yield coro.sleep(0.1)
         else:
             logger.warning('Could not kill process %s', compute.name)
             raise StopIteration
     reply_addr = (addr[0], compute.job_result_port)
     reply = _JobReply(_job, self.ext_ip_addr)
     job_info = _DispyJobInfo(reply, reply_addr, compute)
     reply.status = DispyJob.Terminated
     yield self._send_job_reply(job_info, resending=False, coro=coro)
Exemplo n.º 2
0
        def job_request_task(msg):
            assert coro is not None
            try:
                _job = unserialize(msg)
            except:
                logger.debug('Ignoring job request from %s', addr[0])
                logger.debug(traceback.format_exc())
                raise StopIteration
            yield self.lock.acquire()
            compute = self.computations.get(_job.compute_id, None)
            if compute is not None:
                if compute.scheduler_ip_addr != self.scheduler_ip_addr:
                    compute = None
            yield self.lock.release()
            if self.avail_cpus == 0:
                logger.warning('All cpus busy')
                try:
                    yield conn.send_msg('NAK (all cpus busy)')
                except:
                    pass
                raise StopIteration
            elif compute is None:
                logger.warning('Invalid computation %s', _job.compute_id)
                try:
                    yield conn.send_msg('NAK (invalid computation %s)' % _job.compute_id)
                except:
                    pass
                raise StopIteration

            reply_addr = (compute.scheduler_ip_addr, compute.job_result_port)
            logger.debug('New job id %s from %s', _job.uid, addr[0])
            files = []
            for f in _job.files:
                tgt = os.path.join(compute.dest_path, os.path.basename(f['name']))
                try:
                    fd = open(tgt, 'wb')
                    fd.write(f['data'])
                    fd.close()
                except:
                    logger.warning('Could not save file "%s"', tgt)
                    continue
                try:
                    os.utime(tgt, (f['stat'].st_atime, f['stat'].st_mtime))
                    os.chmod(tgt, stat.S_IMODE(f['stat'].st_mode))
                except:
                    logger.debug('Could not set modes for "%s"', tgt)
                files.append(tgt)
            _job.files = files

            if compute.type == _Compute.func_type:
                reply = _JobReply(_job, self.ext_ip_addr)
                job_info = _DispyJobInfo(reply, reply_addr, compute)
                args = (job_info, self.certfile, self.keyfile,
                        _job.args, _job.kwargs, self.reply_Q,
                        compute.name, compute.code, compute.dest_path, _job.files)
                try:
                    yield conn.send_msg('ACK')
                except:
                    logger.warning('Failed to send response for new job to %s', str(addr))
                    raise StopIteration
                job_info.job_reply.status = DispyJob.Running
                job_info.proc = multiprocessing.Process(target=_dispy_job_func, args=args)
                yield self.lock.acquire()
                self.avail_cpus -= 1
                compute.pending_jobs += 1
                self.job_infos[_job.uid] = job_info
                self.lock.release()
                job_info.proc.start()
                raise StopIteration
            elif compute.type == _Compute.prog_type:
                try:
                    yield conn.send_msg('ACK')
                except:
                    logger.warning('Failed to send response for new job to %s', str(addr))
                    raise StopIteration
                reply = _JobReply(_job, self.ext_ip_addr)
                job_info = _DispyJobInfo(reply, reply_addr, compute)
                job_info.job_reply.status = DispyJob.Running
                yield self.lock.acquire()
                self.job_infos[_job.uid] = job_info
                self.avail_cpus -= 1
                compute.pending_jobs += 1
                yield self.lock.release()
                prog_thread = threading.Thread(target=self.__job_program, args=(_job, job_info))
                prog_thread.start()
                raise StopIteration
            else:
                try:
                    yield conn.send_msg('NAK (invalid computation type "%s")' % compute.type)
                except:
                    logger.warning('Failed to send response for new job to %s', str(addr))