Пример #1
0
def _dispy_job_func(__dispy_job_info, __dispy_job_certfile, __dispy_job_keyfile,
                    __dispy_job_args, __dispy_job_kwargs, __dispy_reply_Q,
                    __dispy_job_name, __dispy_job_code,
                    __dispy_path, __dispy_job_files=[]):
    """Internal use only.
    """
    os.chdir(__dispy_path)
    sys.stdout = io.StringIO()
    sys.stderr = io.StringIO()
    __dispy_job_reply = __dispy_job_info.job_reply
    sys.path = [__dispy_path] + sys.path
    try:
        exec(marshal.loads(__dispy_job_code))
        globals().update(locals())
        __dispy_job_args = unserialize(__dispy_job_args)
        __dispy_job_kwargs = unserialize(__dispy_job_kwargs)
        __func = globals()[__dispy_job_name]
        __dispy_job_reply.result = __func(*__dispy_job_args, **__dispy_job_kwargs)
        __dispy_job_reply.status = DispyJob.Finished
    except:
        __dispy_job_reply.exception = traceback.format_exc()
        __dispy_job_reply.status = DispyJob.Terminated
    for f in __dispy_job_files:
        if os.path.isfile(f):
            try:
                os.remove(f)
            except:
                logger.debug('Could not remove "%s"', f)
    __dispy_job_reply.stdout = sys.stdout.getvalue()
    __dispy_job_reply.stderr = sys.stderr.getvalue()
    signal.signal(signal.SIGTERM, signal.SIG_IGN)
    __dispy_reply_Q.put(__dispy_job_reply)
Пример #2
0
 def sched_udp_proc(self, coro=None):
     coro.set_daemon()
     sched_sock = asyncoro.AsyncSocket(socket.socket(socket.AF_INET, socket.SOCK_DGRAM))
     sched_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
     sched_sock.bind(('', self.scheduler_port))
     while 1:
         msg, addr = yield sched_sock.recvfrom(1024)
         if (not msg.startswith('PING:'.encode()) or
            not self.scheduler_ip_addrs or not self.scheduler_port):
             logger.debug('Ignoring ping message from %s (%s)', addr[0], addr[1])
             continue
         try:
             info = asyncoro.unserialize(msg[len('PING:'.encode()):])
             logger.debug('sched_sock: %s', info)
             assert info['version'] == __version__
             # assert isinstance(info['cpus'], int)
         except:
             logger.debug(traceback.format_exc())
         msg = {'ip_addrs': self.scheduler_ip_addrs, 'port': self.scheduler_port,
                'version': __version__}
         if info.get('relay', None):
             logger.debug('Ignoring ping back from %s: %s', addr[0], info)
             continue
         msg['relay'] = 'y'
         relay_sock = asyncoro.AsyncSocket(socket.socket(socket.AF_INET, socket.SOCK_DGRAM))
         yield relay_sock.sendto('PING:'.encode() + asyncoro.serialize(msg),
                                 (info['ip_addr'], info['port']))
         relay_sock.close()
Пример #3
0
 def sched_udp_proc(self, coro=None):
     coro.set_daemon()
     sched_sock = asyncoro.AsyncSocket(
         socket.socket(socket.AF_INET, socket.SOCK_DGRAM))
     sched_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
     sched_sock.bind(('', self.scheduler_port))
     while 1:
         msg, addr = yield sched_sock.recvfrom(1024)
         if (not msg.startswith('PING:'.encode())
                 or not self.scheduler_ip_addrs or not self.scheduler_port):
             logger.debug('Ignoring ping message from %s (%s)', addr[0],
                          addr[1])
             continue
         try:
             info = asyncoro.unserialize(msg[len('PING:'.encode()):])
             logger.debug('sched_sock: %s' % info)
             assert info['version'] == __version__
             # assert isinstance(info['cpus'], int)
         except:
             logger.debug(traceback.format_exc())
         msg = {
             'ip_addrs': self.scheduler_ip_addrs,
             'port': self.scheduler_port,
             'version': __version__
         }
         if info.get('relay', None):
             logger.debug('Ignoring ping back from %s: %s', addr[0], info)
             continue
         msg['relay'] = 'y'
         relay_sock = asyncoro.AsyncSocket(
             socket.socket(socket.AF_INET, socket.SOCK_DGRAM))
         yield relay_sock.sendto('PING:'.encode() + asyncoro.serialize(msg),
                                 (info['ip_addr'], info['port']))
         relay_sock.close()
Пример #4
0
    def listen_udp_proc(self, coro=None):
        coro.set_daemon()
        bc_sock = asyncoro.AsyncSocket(
            socket.socket(socket.AF_INET, socket.SOCK_DGRAM))
        bc_sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1)

        if self.scheduler_ip_addrs and self.scheduler_port:
            relay_request = {
                'ip_addrs': self.scheduler_ip_addrs,
                'port': self.scheduler_port,
                'version': __version__,
                'sign': None
            }
            bc_sock.sendto(
                'PING:'.encode() + asyncoro.serialize(relay_request),
                ('<broadcast>', self.node_port))
        bc_sock.close()

        listen_sock = asyncoro.AsyncSocket(
            socket.socket(socket.AF_INET, socket.SOCK_DGRAM))
        listen_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        listen_sock.bind(('', self.listen_port))

        while 1:
            msg, addr = yield listen_sock.recvfrom(1024)
            if not msg.startswith('PING:'.encode()):
                logger.debug('Ignoring message "%s" from %s',
                             msg[:min(len(msg), 5)], addr[0])
                continue
            logger.debug('Ping message from %s (%s)', addr[0], addr[1])
            try:
                info = asyncoro.unserialize(msg[len('PING:'.encode()):])
                if info['version'] != __version__:
                    logger.warning(
                        'Ignoring %s due to version mismatch: %s / %s',
                        info['ip_addrs'], info['version'], __version__)
                    continue
                self.scheduler_ip_addrs = info['ip_addrs'] + [addr[0]]
                self.scheduler_port = info['port']
            except:
                logger.debug('Ignoring ping message from %s (%s)', addr[0],
                             addr[1])
                logger.debug(traceback.format_exc())
                continue
            if info.get('relay', None):
                logger.debug('Ignoring ping back (from %s)', addr[0])
                continue
            logger.debug('relaying ping from %s / %s' %
                         (info['ip_addrs'], addr[0]))
            if self.node_port == self.listen_port:
                info[
                    'relay'] = 'y'  # 'check if this message loops back to self
            bc_sock = asyncoro.AsyncSocket(
                socket.socket(socket.AF_INET, socket.SOCK_DGRAM))
            bc_sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1)
            yield bc_sock.sendto('PING:'.encode() + asyncoro.serialize(info),
                                 ('<broadcast>', self.node_port))
            bc_sock.close()
Пример #5
0
 def xfer_file_task(msg):
     assert coro is not None
     try:
         xf = unserialize(msg)
     except:
         logger.debug('Ignoring file trasnfer request from %s', addr[0])
         raise StopIteration
     resp = ''
     if xf.compute_id not in self.computations:
         logger.error('computation "%s" is invalid' % xf.compute_id)
         raise StopIteration
     tgt = os.path.join(self.computations[xf.compute_id].dest_path,
                        os.path.basename(xf.name))
     if os.path.isfile(tgt):
         if _same_file(tgt, xf):
             yield self.lock.acquire()
             if tgt in self.file_uses:
                 self.file_uses[tgt] += 1
             else:
                 self.file_uses[tgt] = 1
             yield self.lock.release()
             resp = 'ACK'
         else:
             logger.warning('File "%s" already exists with different status as "%s"',
                            xf.name, tgt)
     if not resp:
         logger.debug('Copying file %s to %s (%s)', xf.name, tgt, xf.stat_buf.st_size)
         try:
             fd = open(tgt, 'wb')
             n = 0
             while n < xf.stat_buf.st_size:
                 data = yield conn.recvall(min(xf.stat_buf.st_size-n, 10240000))
                 if not data:
                     break
                 fd.write(data)
                 n += len(data)
                 if self.max_file_size and n > self.max_file_size:
                     logger.warning('File "%s" is too big (%s); it is truncated', tgt, n)
                     break
             fd.close()
             if n < xf.stat_buf.st_size:
                 resp = 'NAK (read only %s bytes)' % n
             else:
                 resp = 'ACK'
                 logger.debug('Copied file %s, %s', tgt, resp)
                 os.utime(tgt, (xf.stat_buf.st_atime, xf.stat_buf.st_mtime))
                 os.chmod(tgt, stat.S_IMODE(xf.stat_buf.st_mode))
                 self.file_uses[tgt] = 1
         except:
             logger.warning('Copying file "%s" failed with "%s"',
                            xf.name, traceback.format_exc())
             resp = 'NACK'
         try:
             yield conn.send_msg(resp)
         except:
             logger.debug('Could not send reply for "%s"', xf.name)
     raise StopIteration # xfer_file_task
Пример #6
0
 def terminate_job_task(msg):
     assert coro is not None
     yield self.lock.acquire()
     try:
         _job = unserialize(msg)
         compute = self.computations[_job.compute_id]
         assert addr[0] == compute.scheduler_ip_addr
         job_info = self.job_infos.pop(_job.uid, None)
     except:
         logger.debug('Ignoring job request from %s', addr[0])
         raise StopIteration
     finally:
         self.lock.release()
     if job_info is None:
         logger.debug('Job %s completed; ignoring cancel request from %s',
                      _job.uid, addr[0])
         raise StopIteration
     logger.debug('Terminating job %s', _job.uid)
     job_info.proc.terminate()
     if isinstance(job_info.proc, multiprocessing.Process):
         for x in xrange(20):
             if job_info.proc.is_alive():
                 yield coro.sleep(0.1)
             else:
                 logger.debug('Process "%s" for job %s terminated', compute.name, _job.uid)
                 break
         else:
             logger.warning('Could not kill process %s', compute.name)
             raise StopIteration
     else:
         assert isinstance(job_info.proc, subprocess.Popen)
         for x in xrange(20):
             rc = job_info.proc.poll()
             logger.debug('Program "%s" for job %s terminated with %s',
                          compute.name, _job.uid, rc)
             if rc is not None:
                 break
             if x == 10:
                 logger.debug('Killing job %s', _job.uid)
                 job_info.proc.kill()
             yield coro.sleep(0.1)
         else:
             logger.warning('Could not kill process %s', compute.name)
             raise StopIteration
     reply_addr = (addr[0], compute.job_result_port)
     reply = _JobReply(_job, self.ext_ip_addr)
     job_info = _DispyJobInfo(reply, reply_addr, compute)
     reply.status = DispyJob.Terminated
     yield self._send_job_reply(job_info, resending=False, coro=coro)
Пример #7
0
    def listen_udp_proc(self, coro=None):
        coro.set_daemon()
        bc_sock = asyncoro.AsyncSocket(socket.socket(socket.AF_INET, socket.SOCK_DGRAM))
        bc_sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1)

        if self.scheduler_ip_addrs and self.scheduler_port:
            relay_request = {'ip_addrs': self.scheduler_ip_addrs, 'port': self.scheduler_port,
                             'version': __version__, 'sign': None}
            bc_sock.sendto('PING:'.encode() + asyncoro.serialize(relay_request),
                           ('<broadcast>', self.node_port))
        bc_sock.close()

        listen_sock = asyncoro.AsyncSocket(socket.socket(socket.AF_INET, socket.SOCK_DGRAM))
        listen_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        listen_sock.bind(('', self.listen_port))

        while 1:
            msg, addr = yield listen_sock.recvfrom(1024)
            if not msg.startswith('PING:'.encode()):
                logger.debug('Ignoring message "%s" from %s',
                             msg[:min(len(msg), 5)], addr[0])
                continue
            logger.debug('Ping message from %s (%s)', addr[0], addr[1])
            try:
                info = asyncoro.unserialize(msg[len('PING:'.encode()):])
                if info['version'] != __version__:
                    logger.warning('Ignoring %s due to version mismatch: %s / %s',
                                   info['ip_addrs'], info['version'], __version__)
                    continue
                self.scheduler_ip_addrs = info['ip_addrs'] + [addr[0]]
                self.scheduler_port = info['port']
            except:
                logger.debug('Ignoring ping message from %s (%s)', addr[0], addr[1])
                logger.debug(traceback.format_exc())
                continue
            if info.get('relay', None):
                logger.debug('Ignoring ping back (from %s)', addr[0])
                continue
            logger.debug('relaying ping from %s / %s', info['ip_addrs'], addr[0])
            if self.node_port == self.listen_port:
                info['relay'] = 'y'  # 'check if this message loops back to self
            bc_sock = asyncoro.AsyncSocket(socket.socket(socket.AF_INET, socket.SOCK_DGRAM))
            bc_sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1)
            yield bc_sock.sendto('PING:'.encode() + asyncoro.serialize(info),
                                 ('<broadcast>', self.node_port))
            bc_sock.close()
Пример #8
0
 def tcp_task(conn, addr, coro=None):
     conn.settimeout(5)
     try:
         msg = yield conn.recvall(auth_len)
         msg = yield conn.recv_msg()
     except:
         logger.debug(traceback.format_exc())
         logger.debug('Ignoring invalid TCP message from %s:%s' %
                      (addr[0], addr[1]))
         raise StopIteration
     finally:
         conn.close()
     logger.debug('Ping message from %s (%s)', addr[0], addr[1])
     try:
         info = asyncoro.unserialize(msg[len('PING:'.encode()):])
         if info['version'] != __version__:
             logger.warning(
                 'Ignoring %s due to version mismatch: %s / %s',
                 info['ip_addrs'], info['version'], __version__)
             raise StopIteration
         # TODO: since dispynetrelay is not aware of computations
         # closing, if more than one client sends ping, nodes will
         # respond to different clients
         self.scheduler_ip_addrs = info['ip_addrs'] + [addr[0]]
         self.scheduler_port = info['port']
     except:
         logger.debug('Ignoring ping message from %s (%s)', addr[0],
                      addr[1])
         logger.debug(traceback.format_exc())
         raise StopIteration
     if info.get('relay', None):
         logger.debug('Ignoring ping back (from %s)', addr[0])
         raise StopIteration
     logger.debug('relaying ping from %s / %s' %
                  (info['ip_addrs'], addr[0]))
     if self.node_port == self.listen_port:
         info[
             'relay'] = 'y'  # 'check if this message loops back to self
     bc_sock = asyncoro.AsyncSocket(
         socket.socket(socket.AF_INET, socket.SOCK_DGRAM))
     bc_sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1)
     yield bc_sock.sendto('PING:'.encode() + asyncoro.serialize(info),
                          ('<broadcast>', self.node_port))
     bc_sock.close()
Пример #9
0
 def tcp_task(conn, addr, coro=None):
     conn.settimeout(5)
     try:
         msg = yield conn.recvall(auth_len)
         msg = yield conn.recv_msg()
     except:
         logger.debug(traceback.format_exc())
         logger.debug('Ignoring invalid TCP message from %s:%s', addr[0], addr[1])
         raise StopIteration
     finally:
         conn.close()
     logger.debug('Ping message from %s (%s)', addr[0], addr[1])
     try:
         info = asyncoro.unserialize(msg[len('PING:'.encode()):])
         if info['version'] != __version__:
             logger.warning('Ignoring %s due to version mismatch: %s / %s',
                            info['ip_addrs'], info['version'], __version__)
             raise StopIteration
         # TODO: since dispynetrelay is not aware of computations
         # closing, if more than one client sends ping, nodes will
         # respond to different clients
         self.scheduler_ip_addrs = info['ip_addrs'] + [addr[0]]
         self.scheduler_port = info['port']
     except:
         logger.debug('Ignoring ping message from %s (%s)', addr[0], addr[1])
         logger.debug(traceback.format_exc())
         raise StopIteration
     if info.get('relay', None):
         logger.debug('Ignoring ping back (from %s)', addr[0])
         raise StopIteration
     logger.debug('relaying ping from %s / %s', info['ip_addrs'], addr[0])
     if self.node_port == self.listen_port:
         info['relay'] = 'y'  # 'check if this message loops back to self
     bc_sock = asyncoro.AsyncSocket(socket.socket(socket.AF_INET, socket.SOCK_DGRAM))
     bc_sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1)
     yield bc_sock.sendto('PING:'.encode() + asyncoro.serialize(info),
                          ('<broadcast>', self.node_port))
     bc_sock.close()
Пример #10
0
    def __job_program(self, _job, job_info):
        compute = self.computations[_job.compute_id]
        program = [compute.name]
        args = unserialize(_job.args)
        program.extend(args)
        logger.debug('Executing "%s"', str(program))
        reply = job_info.job_reply
        try:
            os.chdir(compute.dest_path)
            env = {}
            env.update(os.environ)
            env['PATH'] = compute.dest_path + ':' + env['PATH']
            job_info.proc = subprocess.Popen(program, stdout=subprocess.PIPE,
                                             stderr=subprocess.PIPE, env=env)

            assert isinstance(job_info.proc, subprocess.Popen)
            reply.stdout, reply.stderr = job_info.proc.communicate()
            reply.result = job_info.proc.returncode
            reply.status = DispyJob.Finished
        except:
            logger.debug('Executing %s failed with %s', str(program), str(sys.exc_info()))
            reply.exception = traceback.format_exc()
            reply.status = DispyJob.Terminated
        self.reply_Q.put(reply)
Пример #11
0
    def relay_pings(self, ip_addr='', netmask=None, node_port=51348,
                    scheduler_node=None, scheduler_port=51347):
        netaddr = None
        if not netmask:
            try:
                ip_addr, bits = ip_addr.split('/')
                socket.inet_aton(ip_addr)
                netmask = (0xffffffff << (32 - int(bits))) & 0xffffffff
                netaddr = (struct.unpack('>L', socket.inet_aton(ip_addr))[0]) & netmask
            except:
                netmask = '255.255.255.255'
        if ip_addr:
            socket.inet_aton(ip_addr)
        else:
            ip_addr = socket.gethostbyname(socket.gethostname())
        if not netaddr and netmask:
            try:
                if isinstance(netmask, str):
                    netmask = struct.unpack('>L', socket.inet_aton(netmask))[0]
                else:
                    assert isinstance(netmask, int)
                assert netmask > 0
                netaddr = (struct.unpack('>L', socket.inet_aton(ip_addr))[0]) & netmask
            except:
                logger.warning('Invalid netmask')

        try:
            socket.inet_ntoa(struct.pack('>L', netaddr))
            socket.inet_ntoa(struct.pack('>L', netmask))
        except:
            netaddr = netmask = None

        scheduler_version = _dispy_version

        bc_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        bc_sock.bind(('', 0))
        bc_sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1)

        scheduler_ip_addr = _node_ipaddr(scheduler_node)
        if scheduler_ip_addr and scheduler_port:
            relay_request = serialize({'ip_addr':scheduler_ip_addr, 'port':scheduler_port,
                                       'version':_dispy_version, 'sign':None})
            bc_sock.sendto('PING:%s' % relay_request, ('<broadcast>', node_port))

        node_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        node_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        node_sock.bind(('', node_port))
        sched_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        sched_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        sched_sock.bind(('', scheduler_port))
        logger.info('Listening on %s:%s/%s', ip_addr, node_port, scheduler_port)
        while True:
            ready = select.select([node_sock, sched_sock], [], [])[0]
            for sock in ready:
                if sock == node_sock:
                    msg, addr = node_sock.recvfrom(1024)
                    if not msg.startswith('PING:'):
                        logger.debug('Ignoring message "%s" from %s',
                                     msg[:min(len(msg), 5)], addr[0])
                        continue
                    if netaddr and (struct.unpack('>L', socket.inet_aton(addr[0]))[0] & netmask) == netaddr:
                        logger.debug('Ignoring own ping (from %s)', addr[0])
                        continue
                    logger.debug('Ping message from %s (%s)', addr[0], addr[1])
                    try:
                        info = unserialize(msg[len('PING:'):])
                        scheduler_ip_addr = info['ip_addr']
                        scheduler_port = info['port']
                        assert info['version'] == _dispy_version
                        # scheduler_sign = info['sign']
                        assert isinstance(scheduler_port, int)
                    except:
                        logger.debug('Ignoring ping message from %s (%s)', addr[0], addr[1])
                        logger.debug(traceback.format_exc())
                        continue
                    logger.debug('relaying ping from %s / %s' % (info['ip_addr'], addr[0]))
                    if scheduler_ip_addr is None:
                        info['ip_addr'] = scheduler_ip_addr = addr[0]
                    relay_request = serialize(info)
                    bc_sock.sendto('PING:%s' % relay_request, ('<broadcast>', node_port))
                else:
                    assert sock == sched_sock
                    msg, addr = sched_sock.recvfrom(1024)
                    if msg.startswith('PING:') and scheduler_ip_addr and scheduler_port:
                        try:
                            info = unserialize(msg[len('PONG:'):])
                            assert info['version'] == _dispy_version
                            assert isinstance(info['ip_addr'], str)
                            assert isinstance(info['port'], int)
                            # assert isinstance(info['cpus'], int)
                            info['scheduler_ip_addr'] = scheduler_ip_addr
                            relay_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
                            relay_sock.sendto('PING:' + serialize(info),
                                              (scheduler_ip_addr, scheduler_port))
                            relay_sock.close()
                        except:
                            logger.debug(traceback.format_exc())
                            # raise
                            logger.debug('Ignoring ping message from %s (%s)', addr[0], addr[1])
Пример #12
0
        def retrieve_job_task(msg):
            assert coro is not None
            try:
                req = unserialize(msg)
                assert req['uid'] is not None
                assert req['hash'] is not None
                assert req['compute_id'] is not None
            except:
                resp = serialize('Invalid job')
                try:
                    yield conn.send_msg(resp)
                except:
                    pass
                raise StopIteration

            job_info = self.job_infos.get(req['uid'], None)
            resp = None
            if job_info is not None:
                try:
                    yield conn.send_msg(serialize(job_info.job_reply))
                    ack = yield conn.recv_msg()
                    # no need to check ack
                except:
                    logger.debug('Could not send reply for job %s', req['uid'])
                raise StopIteration

            for d in os.listdir(self.dest_path_prefix):
                info_file = os.path.join(self.dest_path_prefix, d,
                                         '_dispy_job_reply_%s' % req['uid'])
                if os.path.isfile(info_file):
                    try:
                        fd = open(info_file, 'rb')
                        job_reply = pickle.load(fd)
                        fd.close()
                    except:
                        job_reply = None
                    if hasattr(job_reply, 'hash') and job_reply.hash == req['hash']:
                        try:
                            yield conn.send_msg(serialize(job_reply))
                            ack = yield conn.recv_msg()
                            assert ack == 'ACK'
                        except:
                            logger.debug('Could not send reply for job %s', req['uid'])
                            raise StopIteration
                        try:
                            os.remove(info_file)
                            yield self.lock.acquire()
                            compute = self.computations.get(req['compute_id'], None)
                            if compute is not None:
                                compute.pending_results -= 1
                                if compute.pending_results == 0:
                                    compute.zombie = True
                                    self.cleanup_computation(compute)
                            self.lock.release()
                        except:
                            logger.debug('Could not remove "%s"', info_file)
                        raise StopIteration
            else:
                resp = serialize('Invalid job: %s' % req['uid'])

            if resp:
                try:
                    yield conn.send_msg(resp)
                except:
                    pass
Пример #13
0
        def add_computation_task(msg):
            assert coro is not None
            try:
                compute = unserialize(msg)
            except:
                logger.debug('Ignoring computation request from %s', addr[0])
                try:
                    yield conn.send_msg('Invalid computation request')
                except:
                    logger.warning('Failed to send reply to %s', str(addr))
                raise StopIteration
            yield self.lock.acquire()
            if not ((self.scheduler_ip_addr is None) or
                    (self.scheduler_ip_addr == compute.scheduler_ip_addr and \
                     self.scheduler_port == compute.scheduler_port)):
                logger.debug('Ignoring computation request from %s: %s, %s, %s',
                             compute.scheduler_ip_addr, self.scheduler_ip_addr,
                             self.avail_cpus, self.cpus)
                self.lock.release()
                try:
                    yield conn.send_msg('Busy')
                except:
                    pass
                raise StopIteration

            resp = 'ACK'
            if compute.dest_path and isinstance(compute.dest_path, str):
                compute.dest_path = compute.dest_path.strip(os.sep)
            else:
                for x in xrange(20):
                    compute.dest_path = os.urandom(8).encode('hex')
                    if compute.dest_path.find(os.sep) >= 0:
                        continue
                    if not os.path.isdir(os.path.join(self.dest_path_prefix, compute.dest_path)):
                        break
                else:
                    logger.warning('Failed to create unique dest_path: %s', compute.dest_path)
                    resp = 'NACK'
            compute.dest_path = os.path.join(self.dest_path_prefix, compute.dest_path)
            try:
                os.makedirs(compute.dest_path)
                os.chmod(compute.dest_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
                logger.debug('dest_path for "%s": %s', compute.name, compute.dest_path)
            except:
                logger.warning('Invalid destination path: "%s"', compute.dest_path)
                if os.path.isdir(compute.dest_path):
                    os.rmdir(compute.dest_path)
                self.lock.release()
                try:
                    yield conn.send_msg('NACK (Invalid dest_path)')
                except:
                    logger.warning('Failed to send reply to %s', str(addr))
                raise StopIteration
            if compute.id in self.computations:
                logger.warning('Computation "%s" (%s) is being replaced',
                               compute.name, compute.id)
            setattr(compute, 'last_pulse', time.time())
            setattr(compute, 'pending_jobs', 0)
            setattr(compute, 'pending_results', 0)
            setattr(compute, 'zombie', False)
            logger.debug('xfer_files given: %s', ','.join(xf.name for xf in compute.xfer_files))
            if compute.type == _Compute.func_type:
                try:
                    code = compile(compute.code, '<string>', 'exec')
                except:
                    logger.warning('Computation "%s" could not be compiled', compute.name)
                    if os.path.isdir(compute.dest_path):
                        os.rmdir(compute.dest_path)
                    self.lock.release()
                    try:
                        yield conn.send_msg('NACK (Compilation failed)')
                    except:
                        logger.warning('Failed to send reply to %s', str(addr))
                    raise StopIteration
                compute.code = marshal.dumps(code)
            elif compute.type == _Compute.prog_type:
                assert not compute.code
                compute.name = os.path.join(compute.dest_path, os.path.basename(compute.name))

            xfer_files = []
            for xf in compute.xfer_files:
                tgt = os.path.join(compute.dest_path, os.path.basename(xf.name))
                try:
                    if _same_file(tgt, xf):
                        logger.debug('Ignoring file "%s" / "%s"', xf.name, tgt)
                        if tgt not in self.file_uses:
                            self.file_uses[tgt] = 0
                        self.file_uses[tgt] += 1
                        continue
                except:
                    pass
                if self.max_file_size and xf.stat_buf.st_size > self.max_file_size:
                    resp = 'NACK (file "%s" too big)' % xf.name
                else:
                    xfer_files.append(xf)
            if resp == 'ACK' and ((self.scheduler_ip_addr is not None) and \
                                  (self.scheduler_ip_addr != compute.scheduler_ip_addr)):
                resp = 'NACK (busy)'
            if resp == 'ACK':
                self.computations[compute.id] = compute
                self.scheduler_ip_addr = compute.scheduler_ip_addr
                self.scheduler_port = compute.scheduler_port
                self.pulse_interval = compute.pulse_interval
                self.lock.release()
                if xfer_files:
                    resp += ':XFER_FILES:' + serialize(xfer_files)
                try:
                    yield conn.send_msg(resp)
                except:
                    assert self.scheduler_ip_addr == compute.scheduler_ip_addr
                    yield self.lock.acquire()
                    del self.computations[compute.id]
                    self.scheduler_ip_addr = None
                    self.scheduler_port = None
                    self.pulse_interval = None
                    self.lock.release()
                else:
                    self.timer_coro.resume(True)
            else:
                self.lock.release()
                if os.path.isdir(compute.dest_path):
                    os.rmdir(compute.dest_path)
                try:
                    yield conn.send_msg(resp)
                except:
                    pass
Пример #14
0
        def job_request_task(msg):
            assert coro is not None
            try:
                _job = unserialize(msg)
            except:
                logger.debug('Ignoring job request from %s', addr[0])
                logger.debug(traceback.format_exc())
                raise StopIteration
            yield self.lock.acquire()
            compute = self.computations.get(_job.compute_id, None)
            if compute is not None:
                if compute.scheduler_ip_addr != self.scheduler_ip_addr:
                    compute = None
            yield self.lock.release()
            if self.avail_cpus == 0:
                logger.warning('All cpus busy')
                try:
                    yield conn.send_msg('NAK (all cpus busy)')
                except:
                    pass
                raise StopIteration
            elif compute is None:
                logger.warning('Invalid computation %s', _job.compute_id)
                try:
                    yield conn.send_msg('NAK (invalid computation %s)' % _job.compute_id)
                except:
                    pass
                raise StopIteration

            reply_addr = (compute.scheduler_ip_addr, compute.job_result_port)
            logger.debug('New job id %s from %s', _job.uid, addr[0])
            files = []
            for f in _job.files:
                tgt = os.path.join(compute.dest_path, os.path.basename(f['name']))
                try:
                    fd = open(tgt, 'wb')
                    fd.write(f['data'])
                    fd.close()
                except:
                    logger.warning('Could not save file "%s"', tgt)
                    continue
                try:
                    os.utime(tgt, (f['stat'].st_atime, f['stat'].st_mtime))
                    os.chmod(tgt, stat.S_IMODE(f['stat'].st_mode))
                except:
                    logger.debug('Could not set modes for "%s"', tgt)
                files.append(tgt)
            _job.files = files

            if compute.type == _Compute.func_type:
                reply = _JobReply(_job, self.ext_ip_addr)
                job_info = _DispyJobInfo(reply, reply_addr, compute)
                args = (job_info, self.certfile, self.keyfile,
                        _job.args, _job.kwargs, self.reply_Q,
                        compute.name, compute.code, compute.dest_path, _job.files)
                try:
                    yield conn.send_msg('ACK')
                except:
                    logger.warning('Failed to send response for new job to %s', str(addr))
                    raise StopIteration
                job_info.job_reply.status = DispyJob.Running
                job_info.proc = multiprocessing.Process(target=_dispy_job_func, args=args)
                yield self.lock.acquire()
                self.avail_cpus -= 1
                compute.pending_jobs += 1
                self.job_infos[_job.uid] = job_info
                self.lock.release()
                job_info.proc.start()
                raise StopIteration
            elif compute.type == _Compute.prog_type:
                try:
                    yield conn.send_msg('ACK')
                except:
                    logger.warning('Failed to send response for new job to %s', str(addr))
                    raise StopIteration
                reply = _JobReply(_job, self.ext_ip_addr)
                job_info = _DispyJobInfo(reply, reply_addr, compute)
                job_info.job_reply.status = DispyJob.Running
                yield self.lock.acquire()
                self.job_infos[_job.uid] = job_info
                self.avail_cpus -= 1
                compute.pending_jobs += 1
                yield self.lock.release()
                prog_thread = threading.Thread(target=self.__job_program, args=(_job, job_info))
                prog_thread.start()
                raise StopIteration
            else:
                try:
                    yield conn.send_msg('NAK (invalid computation type "%s")' % compute.type)
                except:
                    logger.warning('Failed to send response for new job to %s', str(addr))
Пример #15
0
    def tcp_serve_task(self, conn, addr, coro=None):
        conn = AsynCoroSocket(conn, blocking=False,
                              keyfile=self.keyfile, certfile=self.certfile)
        def job_request_task(msg):
            assert coro is not None
            try:
                _job = unserialize(msg)
            except:
                logger.debug('Ignoring job request from %s', addr[0])
                logger.debug(traceback.format_exc())
                raise StopIteration
            yield self.lock.acquire()
            compute = self.computations.get(_job.compute_id, None)
            if compute is not None:
                if compute.scheduler_ip_addr != self.scheduler_ip_addr:
                    compute = None
            yield self.lock.release()
            if self.avail_cpus == 0:
                logger.warning('All cpus busy')
                try:
                    yield conn.send_msg('NAK (all cpus busy)')
                except:
                    pass
                raise StopIteration
            elif compute is None:
                logger.warning('Invalid computation %s', _job.compute_id)
                try:
                    yield conn.send_msg('NAK (invalid computation %s)' % _job.compute_id)
                except:
                    pass
                raise StopIteration

            reply_addr = (compute.scheduler_ip_addr, compute.job_result_port)
            logger.debug('New job id %s from %s', _job.uid, addr[0])
            files = []
            for f in _job.files:
                tgt = os.path.join(compute.dest_path, os.path.basename(f['name']))
                try:
                    fd = open(tgt, 'wb')
                    fd.write(f['data'])
                    fd.close()
                except:
                    logger.warning('Could not save file "%s"', tgt)
                    continue
                try:
                    os.utime(tgt, (f['stat'].st_atime, f['stat'].st_mtime))
                    os.chmod(tgt, stat.S_IMODE(f['stat'].st_mode))
                except:
                    logger.debug('Could not set modes for "%s"', tgt)
                files.append(tgt)
            _job.files = files

            if compute.type == _Compute.func_type:
                reply = _JobReply(_job, self.ext_ip_addr)
                job_info = _DispyJobInfo(reply, reply_addr, compute)
                args = (job_info, self.certfile, self.keyfile,
                        _job.args, _job.kwargs, self.reply_Q,
                        compute.name, compute.code, compute.dest_path, _job.files)
                try:
                    yield conn.send_msg('ACK')
                except:
                    logger.warning('Failed to send response for new job to %s', str(addr))
                    raise StopIteration
                job_info.job_reply.status = DispyJob.Running
                job_info.proc = multiprocessing.Process(target=_dispy_job_func, args=args)
                yield self.lock.acquire()
                self.avail_cpus -= 1
                compute.pending_jobs += 1
                self.job_infos[_job.uid] = job_info
                self.lock.release()
                job_info.proc.start()
                raise StopIteration
            elif compute.type == _Compute.prog_type:
                try:
                    yield conn.send_msg('ACK')
                except:
                    logger.warning('Failed to send response for new job to %s', str(addr))
                    raise StopIteration
                reply = _JobReply(_job, self.ext_ip_addr)
                job_info = _DispyJobInfo(reply, reply_addr, compute)
                job_info.job_reply.status = DispyJob.Running
                yield self.lock.acquire()
                self.job_infos[_job.uid] = job_info
                self.avail_cpus -= 1
                compute.pending_jobs += 1
                yield self.lock.release()
                prog_thread = threading.Thread(target=self.__job_program, args=(_job, job_info))
                prog_thread.start()
                raise StopIteration
            else:
                try:
                    yield conn.send_msg('NAK (invalid computation type "%s")' % compute.type)
                except:
                    logger.warning('Failed to send response for new job to %s', str(addr))

        def add_computation_task(msg):
            assert coro is not None
            try:
                compute = unserialize(msg)
            except:
                logger.debug('Ignoring computation request from %s', addr[0])
                try:
                    yield conn.send_msg('Invalid computation request')
                except:
                    logger.warning('Failed to send reply to %s', str(addr))
                raise StopIteration
            yield self.lock.acquire()
            if not ((self.scheduler_ip_addr is None) or
                    (self.scheduler_ip_addr == compute.scheduler_ip_addr and \
                     self.scheduler_port == compute.scheduler_port)):
                logger.debug('Ignoring computation request from %s: %s, %s, %s',
                             compute.scheduler_ip_addr, self.scheduler_ip_addr,
                             self.avail_cpus, self.cpus)
                self.lock.release()
                try:
                    yield conn.send_msg('Busy')
                except:
                    pass
                raise StopIteration

            resp = 'ACK'
            if compute.dest_path and isinstance(compute.dest_path, str):
                compute.dest_path = compute.dest_path.strip(os.sep)
            else:
                for x in xrange(20):
                    compute.dest_path = os.urandom(8).encode('hex')
                    if compute.dest_path.find(os.sep) >= 0:
                        continue
                    if not os.path.isdir(os.path.join(self.dest_path_prefix, compute.dest_path)):
                        break
                else:
                    logger.warning('Failed to create unique dest_path: %s', compute.dest_path)
                    resp = 'NACK'
            compute.dest_path = os.path.join(self.dest_path_prefix, compute.dest_path)
            try:
                os.makedirs(compute.dest_path)
                os.chmod(compute.dest_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
                logger.debug('dest_path for "%s": %s', compute.name, compute.dest_path)
            except:
                logger.warning('Invalid destination path: "%s"', compute.dest_path)
                if os.path.isdir(compute.dest_path):
                    os.rmdir(compute.dest_path)
                self.lock.release()
                try:
                    yield conn.send_msg('NACK (Invalid dest_path)')
                except:
                    logger.warning('Failed to send reply to %s', str(addr))
                raise StopIteration
            if compute.id in self.computations:
                logger.warning('Computation "%s" (%s) is being replaced',
                               compute.name, compute.id)
            setattr(compute, 'last_pulse', time.time())
            setattr(compute, 'pending_jobs', 0)
            setattr(compute, 'pending_results', 0)
            setattr(compute, 'zombie', False)
            logger.debug('xfer_files given: %s', ','.join(xf.name for xf in compute.xfer_files))
            if compute.type == _Compute.func_type:
                try:
                    code = compile(compute.code, '<string>', 'exec')
                except:
                    logger.warning('Computation "%s" could not be compiled', compute.name)
                    if os.path.isdir(compute.dest_path):
                        os.rmdir(compute.dest_path)
                    self.lock.release()
                    try:
                        yield conn.send_msg('NACK (Compilation failed)')
                    except:
                        logger.warning('Failed to send reply to %s', str(addr))
                    raise StopIteration
                compute.code = marshal.dumps(code)
            elif compute.type == _Compute.prog_type:
                assert not compute.code
                compute.name = os.path.join(compute.dest_path, os.path.basename(compute.name))

            xfer_files = []
            for xf in compute.xfer_files:
                tgt = os.path.join(compute.dest_path, os.path.basename(xf.name))
                try:
                    if _same_file(tgt, xf):
                        logger.debug('Ignoring file "%s" / "%s"', xf.name, tgt)
                        if tgt not in self.file_uses:
                            self.file_uses[tgt] = 0
                        self.file_uses[tgt] += 1
                        continue
                except:
                    pass
                if self.max_file_size and xf.stat_buf.st_size > self.max_file_size:
                    resp = 'NACK (file "%s" too big)' % xf.name
                else:
                    xfer_files.append(xf)
            if resp == 'ACK' and ((self.scheduler_ip_addr is not None) and \
                                  (self.scheduler_ip_addr != compute.scheduler_ip_addr)):
                resp = 'NACK (busy)'
            if resp == 'ACK':
                self.computations[compute.id] = compute
                self.scheduler_ip_addr = compute.scheduler_ip_addr
                self.scheduler_port = compute.scheduler_port
                self.pulse_interval = compute.pulse_interval
                self.lock.release()
                if xfer_files:
                    resp += ':XFER_FILES:' + serialize(xfer_files)
                try:
                    yield conn.send_msg(resp)
                except:
                    assert self.scheduler_ip_addr == compute.scheduler_ip_addr
                    yield self.lock.acquire()
                    del self.computations[compute.id]
                    self.scheduler_ip_addr = None
                    self.scheduler_port = None
                    self.pulse_interval = None
                    self.lock.release()
                else:
                    self.timer_coro.resume(True)
            else:
                self.lock.release()
                if os.path.isdir(compute.dest_path):
                    os.rmdir(compute.dest_path)
                try:
                    yield conn.send_msg(resp)
                except:
                    pass

        def xfer_file_task(msg):
            assert coro is not None
            try:
                xf = unserialize(msg)
            except:
                logger.debug('Ignoring file trasnfer request from %s', addr[0])
                raise StopIteration
            resp = ''
            if xf.compute_id not in self.computations:
                logger.error('computation "%s" is invalid' % xf.compute_id)
                raise StopIteration
            tgt = os.path.join(self.computations[xf.compute_id].dest_path,
                               os.path.basename(xf.name))
            if os.path.isfile(tgt):
                if _same_file(tgt, xf):
                    yield self.lock.acquire()
                    if tgt in self.file_uses:
                        self.file_uses[tgt] += 1
                    else:
                        self.file_uses[tgt] = 1
                    yield self.lock.release()
                    resp = 'ACK'
                else:
                    logger.warning('File "%s" already exists with different status as "%s"',
                                   xf.name, tgt)
            if not resp:
                logger.debug('Copying file %s to %s (%s)', xf.name, tgt, xf.stat_buf.st_size)
                try:
                    fd = open(tgt, 'wb')
                    n = 0
                    while n < xf.stat_buf.st_size:
                        data = yield conn.recvall(min(xf.stat_buf.st_size-n, 10240000))
                        if not data:
                            break
                        fd.write(data)
                        n += len(data)
                        if self.max_file_size and n > self.max_file_size:
                            logger.warning('File "%s" is too big (%s); it is truncated', tgt, n)
                            break
                    fd.close()
                    if n < xf.stat_buf.st_size:
                        resp = 'NAK (read only %s bytes)' % n
                    else:
                        resp = 'ACK'
                        logger.debug('Copied file %s, %s', tgt, resp)
                        os.utime(tgt, (xf.stat_buf.st_atime, xf.stat_buf.st_mtime))
                        os.chmod(tgt, stat.S_IMODE(xf.stat_buf.st_mode))
                        self.file_uses[tgt] = 1
                except:
                    logger.warning('Copying file "%s" failed with "%s"',
                                   xf.name, traceback.format_exc())
                    resp = 'NACK'
                try:
                    yield conn.send_msg(resp)
                except:
                    logger.debug('Could not send reply for "%s"', xf.name)
            raise StopIteration # xfer_file_task

        def terminate_job_task(msg):
            assert coro is not None
            yield self.lock.acquire()
            try:
                _job = unserialize(msg)
                compute = self.computations[_job.compute_id]
                assert addr[0] == compute.scheduler_ip_addr
                job_info = self.job_infos.pop(_job.uid, None)
            except:
                logger.debug('Ignoring job request from %s', addr[0])
                raise StopIteration
            finally:
                self.lock.release()
            if job_info is None:
                logger.debug('Job %s completed; ignoring cancel request from %s',
                             _job.uid, addr[0])
                raise StopIteration
            logger.debug('Terminating job %s', _job.uid)
            job_info.proc.terminate()
            if isinstance(job_info.proc, multiprocessing.Process):
                for x in xrange(20):
                    if job_info.proc.is_alive():
                        yield coro.sleep(0.1)
                    else:
                        logger.debug('Process "%s" for job %s terminated', compute.name, _job.uid)
                        break
                else:
                    logger.warning('Could not kill process %s', compute.name)
                    raise StopIteration
            else:
                assert isinstance(job_info.proc, subprocess.Popen)
                for x in xrange(20):
                    rc = job_info.proc.poll()
                    logger.debug('Program "%s" for job %s terminated with %s',
                                 compute.name, _job.uid, rc)
                    if rc is not None:
                        break
                    if x == 10:
                        logger.debug('Killing job %s', _job.uid)
                        job_info.proc.kill()
                    yield coro.sleep(0.1)
                else:
                    logger.warning('Could not kill process %s', compute.name)
                    raise StopIteration
            reply_addr = (addr[0], compute.job_result_port)
            reply = _JobReply(_job, self.ext_ip_addr)
            job_info = _DispyJobInfo(reply, reply_addr, compute)
            reply.status = DispyJob.Terminated
            yield self._send_job_reply(job_info, resending=False, coro=coro)

        def retrieve_job_task(msg):
            assert coro is not None
            try:
                req = unserialize(msg)
                assert req['uid'] is not None
                assert req['hash'] is not None
                assert req['compute_id'] is not None
            except:
                resp = serialize('Invalid job')
                try:
                    yield conn.send_msg(resp)
                except:
                    pass
                raise StopIteration

            job_info = self.job_infos.get(req['uid'], None)
            resp = None
            if job_info is not None:
                try:
                    yield conn.send_msg(serialize(job_info.job_reply))
                    ack = yield conn.recv_msg()
                    # no need to check ack
                except:
                    logger.debug('Could not send reply for job %s', req['uid'])
                raise StopIteration

            for d in os.listdir(self.dest_path_prefix):
                info_file = os.path.join(self.dest_path_prefix, d,
                                         '_dispy_job_reply_%s' % req['uid'])
                if os.path.isfile(info_file):
                    try:
                        fd = open(info_file, 'rb')
                        job_reply = pickle.load(fd)
                        fd.close()
                    except:
                        job_reply = None
                    if hasattr(job_reply, 'hash') and job_reply.hash == req['hash']:
                        try:
                            yield conn.send_msg(serialize(job_reply))
                            ack = yield conn.recv_msg()
                            assert ack == 'ACK'
                        except:
                            logger.debug('Could not send reply for job %s', req['uid'])
                            raise StopIteration
                        try:
                            os.remove(info_file)
                            yield self.lock.acquire()
                            compute = self.computations.get(req['compute_id'], None)
                            if compute is not None:
                                compute.pending_results -= 1
                                if compute.pending_results == 0:
                                    compute.zombie = True
                                    self.cleanup_computation(compute)
                            self.lock.release()
                        except:
                            logger.debug('Could not remove "%s"', info_file)
                        raise StopIteration
            else:
                resp = serialize('Invalid job: %s' % req['uid'])

            if resp:
                try:
                    yield conn.send_msg(resp)
                except:
                    pass

        # tcp_serve_task starts
        try:
            req = yield conn.recvall(len(self.auth_code))
            assert req == self.auth_code
        except:
            logger.warning('Ignoring request; invalid client authentication?')
            conn.close()
            raise StopIteration
        msg = yield conn.recv_msg()
        if not msg:
            conn.close()
            raise StopIteration
        if msg.startswith('JOB:'):
            msg = msg[len('JOB:'):]
            yield job_request_task(msg)
            conn.close()
        elif msg.startswith('COMPUTE:'):
            msg = msg[len('COMPUTE:'):]
            yield add_computation_task(msg)
            conn.close()
        elif msg.startswith('FILEXFER:'):
            msg = msg[len('FILEXFER:'):]
            yield xfer_file_task(msg)
            conn.close()
        elif msg.startswith('DEL_COMPUTE:'):
            msg = msg[len('DEL_COMPUTE:'):]
            try:
                info = unserialize(msg)
                compute_id = info['ID']
                yield self.lock.acquire()
                compute = self.computations.get(compute_id, None)
                if compute is None:
                    logger.warning('Computation "%s" is not valid', compute_id)
                else:
                    compute.zombie = True
                    self.cleanup_computation(compute)
                self.lock.release()
            except:
                logger.debug('Deleting computation failed with %s', traceback.format_exc())
                # raise
            conn.close()
        elif msg.startswith('TERMINATE_JOB:'):
            msg = msg[len('TERMINATE_JOB:'):]
            yield terminate_job_task(msg)
            conn.close()
        elif msg.startswith('RETRIEVE_JOB:'):
            msg = msg[len('RETRIEVE_JOB:'):]
            yield retrieve_job_task(msg)
            conn.close()
        else:
            logger.warning('Invalid request "%s" from %s',
                           msg[:min(10, len(msg))], addr[0])
            resp = 'NAK (invalid command: %s)' % (msg[:min(10, len(msg))])
            try:
                yield conn.send_msg(resp)
            except:
                logger.warning('Failed to send reply to %s', str(addr))
            conn.close()
Пример #16
0
    def udp_server(self, scheduler_ip_addr, coro=None):
        assert coro is not None
        coro.set_daemon()
        if self.avail_cpus == self.cpus:
            yield self.send_pong_msg(coro=coro)
        pong_msg = {'ip_addr':self.ext_ip_addr, 'name':self.name, 'port':self.address[1],
                    'cpus':self.cpus, 'sign':self.signature, 'version':_dispy_version}
        pong_msg = 'PONG:' + serialize(pong_msg)

        if scheduler_ip_addr:
            sock = AsynCoroSocket(socket.socket(socket.AF_INET, socket.SOCK_DGRAM))
            try:
                yield sock.sendto(pong_msg, (scheduler_ip_addr, self.scheduler_port))
            except:
                logger.warning("Couldn't send ping message to %s:%s",
                               scheduler_ip_addr, self.scheduler_port)
            finally:
                sock.close()

        while True:
            msg, addr = yield self.udp_sock.recvfrom(1024)
            # TODO: process each message as separate Coro, so
            # exceptions are contained?
            if msg.startswith('PING:'):
                if self.cpus != self.avail_cpus:
                    logger.debug('Busy (%s/%s); ignoring ping message from %s',
                                 self.cpus, self.avail_cpus, addr[0])
                    continue
                try:
                    info = unserialize(msg[len('PING:'):])
                    socket.inet_aton(info['scheduler_ip_addr'])
                    assert isinstance(info['scheduler_port'], int)
                    assert info['version'] == _dispy_version
                    addr = (info['scheduler_ip_addr'], info['scheduler_port'])
                except:
                    # raise
                    logger.debug('Ignoring ping message from %s (%s)', addr[0], addr[1])
                    continue
                yield self.udp_sock.sendto(pong_msg, addr)
            elif msg.startswith('PULSE:'):
                try:
                    info = unserialize(msg[len('PULSE:'):])
                    assert info['ip_addr'] == self.scheduler_ip_addr
                    yield self.lock.acquire()
                    for compute in self.computations.itervalues():
                        compute.last_pulse = time.time()
                    yield self.lock.release()
                except:
                    logger.warning('Ignoring PULSE from %s', addr[0])
            elif msg.startswith('SERVERPORT:'):
                try:
                    req = unserialize(msg[len('SERVERPORT:'):])
                    sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
                    reply = {'ip_addr':self.address[0], 'port':self.address[1],
                             'sign':self.signature, 'version':_dispy_version}
                    sock = AsynCoroSocket(sock, blocking=False)
                    sock.settimeout(1)
                    yield sock.sendto(serialize(reply), (req['ip_addr'], req['port']))
                    sock.close()
                except:
                    logger.debug(traceback.format_exc())
                    # pass
            else:
                logger.warning('Ignoring ping message from %s', addr[0])
Пример #17
0
    def relay_pings(self,
                    ip_addr='',
                    netmask=None,
                    node_port=51348,
                    scheduler_node=None,
                    scheduler_port=51347):
        netaddr = None
        if not netmask:
            try:
                ip_addr, bits = ip_addr.split('/')
                socket.inet_aton(ip_addr)
                netmask = (0xffffffff << (32 - int(bits))) & 0xffffffff
                netaddr = (struct.unpack(
                    '>L', socket.inet_aton(ip_addr))[0]) & netmask
            except:
                netmask = '255.255.255.255'
        if ip_addr:
            socket.inet_aton(ip_addr)
        else:
            ip_addr = socket.gethostbyname(socket.gethostname())
        if not netaddr and netmask:
            try:
                if isinstance(netmask, str):
                    netmask = struct.unpack('>L', socket.inet_aton(netmask))[0]
                else:
                    assert isinstance(netmask, int)
                assert netmask > 0
                netaddr = (struct.unpack(
                    '>L', socket.inet_aton(ip_addr))[0]) & netmask
            except:
                logger.warning('Invalid netmask')

        try:
            socket.inet_ntoa(struct.pack('>L', netaddr))
            socket.inet_ntoa(struct.pack('>L', netmask))
        except:
            netaddr = netmask = None

        bc_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        bc_sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1)

        scheduler_ip_addrs = list(
            filter(lambda ip: bool(ip), [_node_ipaddr(scheduler_node)]))
        if scheduler_ip_addrs and scheduler_port:
            relay_request = {
                'ip_addrs': scheduler_ip_addrs,
                'port': scheduler_port,
                'version': _dispy_version,
                'sign': None
            }
            bc_sock.sendto(b'PING:' + serialize(relay_request),
                           ('<broadcast>', node_port))
        bc_sock.close()

        node_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        node_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        node_sock.bind(('', node_port))
        sched_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        sched_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        sched_sock.bind(('', scheduler_port))
        logger.info('Listening on %s:%s/%s', ip_addr, node_port,
                    scheduler_port)
        while True:
            ready = select.select([node_sock, sched_sock], [], [])[0]
            for sock in ready:
                if sock == node_sock:
                    msg, addr = node_sock.recvfrom(1024)
                    if not msg.startswith(b'PING:'):
                        logger.debug('Ignoring message "%s" from %s',
                                     msg[:min(len(msg), 5)], addr[0])
                        continue
                    if netaddr and \
                       (struct.unpack('>L', socket.inet_aton(addr[0]))[0] & netmask) == netaddr:
                        logger.debug('Ignoring ping back (from %s)', addr[0])
                        continue
                    logger.debug('Ping message from %s (%s)', addr[0], addr[1])
                    try:
                        info = unserialize(msg[len(b'PING:'):])
                        if info['version'] != _dispy_version:
                            logger.warning(
                                'Ignoring %s due to version mismatch: %s / %s',
                                info['ip_addrs'], info['version'],
                                _dispy_version)
                            continue
                        scheduler_ip_addrs = info['ip_addrs'] + [addr[0]]
                        scheduler_port = info['port']
                    except:
                        logger.debug('Ignoring ping message from %s (%s)',
                                     addr[0], addr[1])
                        logger.debug(traceback.format_exc())
                        continue
                    logger.debug('relaying ping from %s / %s' %
                                 (info['ip_addrs'], addr[0]))
                    bc_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
                    bc_sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST,
                                       1)
                    bc_sock.sendto(b'PING:' + serialize(info),
                                   ('<broadcast>', node_port))
                    bc_sock.close()
                else:
                    assert sock == sched_sock
                    msg, addr = sched_sock.recvfrom(1024)
                    if msg.startswith(
                            b'PING:'
                    ) and scheduler_ip_addrs and scheduler_port:
                        try:
                            info = unserialize(msg[len(b'PING:'):])
                            if netaddr and info.get('scheduler_ip_addr', None) and \
                               (struct.unpack('>L', socket.inet_aton(info['scheduler_ip_addr']))[0] & netmask) == netaddr:
                                logger.debug('Ignoring ping back (from %s)' %
                                             addr[0])
                                continue
                            assert info['version'] == _dispy_version
                            # assert isinstance(info['cpus'], int)
                            msg = {
                                'ip_addrs': scheduler_ip_addrs,
                                'port': scheduler_port,
                                'version': _dispy_version
                            }
                            relay_sock = socket.socket(socket.AF_INET,
                                                       socket.SOCK_DGRAM)
                            relay_sock.sendto(b'PING:' + serialize(msg),
                                              (info['ip_addr'], info['port']))
                            relay_sock.close()
                        except:
                            logger.debug(traceback.format_exc())
                            # raise
                            logger.debug('Ignoring ping message from %s (%s)',
                                         addr[0], addr[1])
Пример #18
0
    def relay_pings(self, ip_addr='', netmask=None, node_port=51348,
                    scheduler_node=None, scheduler_port=51347):
        netaddr = None
        if not netmask:
            try:
                ip_addr, bits = ip_addr.split('/')
                socket.inet_aton(ip_addr)
                netmask = (0xffffffff << (32 - int(bits))) & 0xffffffff
                netaddr = (struct.unpack('>L', socket.inet_aton(ip_addr))[0]) & netmask
            except:
                netmask = '255.255.255.255'
        if ip_addr:
            socket.inet_aton(ip_addr)
        else:
            ip_addr = socket.gethostbyname(socket.gethostname())
        if not netaddr and netmask:
            try:
                if isinstance(netmask, str):
                    netmask = struct.unpack('>L', socket.inet_aton(netmask))[0]
                else:
                    assert isinstance(netmask, int)
                assert netmask > 0
                netaddr = (struct.unpack('>L', socket.inet_aton(ip_addr))[0]) & netmask
            except:
                logger.warning('Invalid netmask')

        try:
            socket.inet_ntoa(struct.pack('>L', netaddr))
            socket.inet_ntoa(struct.pack('>L', netmask))
        except:
            netaddr = netmask = None

        scheduler_version = _dispy_version

        bc_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        bc_sock.bind(('', 0))
        bc_sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1)

        scheduler_ip_addr = _node_ipaddr(scheduler_node)
        if scheduler_ip_addr and scheduler_port:
            relay_request = serialize({'scheduler_ip_addr':scheduler_ip_addr,
                                       'scheduler_port':scheduler_port,
                                       'version':scheduler_version})
            bc_sock.sendto('PING:%s' % relay_request, ('<broadcast>', node_port))

        ping_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        ping_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        ping_sock.bind(('', node_port))
        pong_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        pong_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        pong_sock.bind(('', scheduler_port))
        logger.info('Listening on %s:%s', ip_addr, node_port)
        last_ping = 0
        while True:
            ready = select.select([ping_sock, pong_sock], [], [])[0]
            for sock in ready:
                if sock == ping_sock:
                    msg, addr = ping_sock.recvfrom(1024)
                    if not msg.startswith('PING:'):
                        logger.debug('Ignoring message "%s" from %s',
                                     msg[:max(len(msg), 5)], addr[0])
                        continue
                    if netaddr and (struct.unpack('>L', socket.inet_aton(addr[0]))[0] & netmask) == netaddr:
                        logger.debug('Ignoring own ping (from %s)', addr[0])
                        continue
                    if (time.time() - last_ping) < 10:
                        logger.warning('Ignoring ping (from %s) for 10 more seconds', addr[0])
                        time.sleep(10)
                    last_ping = time.time()
                    logger.debug('Ping message from %s (%s)', addr[0], addr[1])
                    try:
                        data = unserialize(msg[len('PING:'):])
                        scheduler_ip_addr = data['scheduler_ip_addr']
                        scheduler_port = data['scheduler_port']
                        scheduler_version = data['version']
                        assert isinstance(scheduler_ip_addr, str)
                        assert isinstance(scheduler_port, int)
                    except:
                        logger.debug('Ignoring ping message from %s (%s)', addr[0], addr[1])
                        continue
                    relay_request = serialize({'scheduler_ip_addr':scheduler_ip_addr,
                                               'scheduler_port':scheduler_port,
                                               'version':scheduler_version})
                    bc_sock.sendto('PING:%s' % relay_request, ('<broadcast>', node_port))
                else:
                    assert sock == pong_sock
                    msg, addr = pong_sock.recvfrom(1024)
                    if not msg.startswith('PONG:'):
                        logger.debug('Ignoring pong message "%s" from %s',
                                     msg[:max(len(msg), 5)], addr[0])
                        continue
                    # if netaddr and (struct.unpack('>L', socket.inet_aton(addr[0]))[0] & netmask) == netaddr:
                    #     logger.debug('Ignoring own pong (from %s)', addr[0])
                    #     continue
                    if not (scheduler_ip_addr and scheduler_port):
                        logger.debug('Ignoring pong message from %s', str(addr))
                        continue
                    logger.debug('Pong message from %s (%s)', addr[0], addr[1])
                    try:
                        pong = unserialize(msg[len('PONG:'):])
                        assert isinstance(pong['host'], str)
                        assert isinstance(pong['port'], int)
                        assert isinstance(pong['cpus'], int)
                        relay_request = serialize({'scheduler_ip_addr':scheduler_ip_addr,
                                                   'scheduler_port':scheduler_port,
                                                   'version':scheduler_version})
                        relay_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
                        relay_sock.sendto('PING:%s' % relay_request,
                                          (pong['host'], node_port))
                        relay_sock.close()
                    except:
                        # raise
                        logger.debug('Ignoring pong message from %s (%s)', addr[0], addr[1])