Пример #1
0
class IDallocator(object):
    def __init__(self,start):
        self.mutex = RLock()
        self.start = start
        self.off = start
        self.free = []
        

    def acquire_id(self):
        self.mutex.acquire()
        if len(self.free) > 0:
            id = self.free.pop()
            return id
        id = self.off
        self.off = self.off + 1
        self.mutex.release()
        return id
    
    def release_id(self,id):
        self.mutex.acquire()
        self.free.append(id)
        #所有已分配的id都在空闲池中,则重置池和分配器
        if len(self.free) == (self.off - self.start):
            self.off = self.start
            self.free = []
        self.mutex.release()
Пример #2
0
class RWLock:
    def __init__(self):
        self._w_lock = RLock()
        self._r_lock = RLock()
        self._readers = 0
        self._reader_released_event: Event
        self.read = LockContextManager(self.acquire_read, self.release_read)
        self.write = LockContextManager(self.acquire_write, self.release_write)

    def acquire_write(self):
        with self._r_lock:
            if self._readers == 0:
                self._w_lock.acquire()
                return
        self._reader_released_event.wait()
        self.acquire_write()

    def release_write(self):
        self._w_lock.release()

    def acquire_read(self):
        with self._w_lock:
            with self._r_lock:
                if self._readers == 0: self._reader_released_event = Event()
                self._readers += 1

    def release_read(self):
        with self._w_lock:
            with self._r_lock:
                self._readers -= 1
                if self._readers == 0: self._reader_released_event.set()
Пример #3
0
class WSClientTransport(WebSocketClient):
    APP_FACTORY = None

    def __init__(self, url):
        self._close_event = Event()
        # patch socket.sendall to protect it with lock,
        # in order to prevent sending data from multiple greenlets concurrently
        WebSocketClient.__init__(self, url)
        self._app = None
        self._lock = RLock()
        _sendall = self.sock.sendall

        def sendall(data):
            self._lock.acquire()
            try:
                _sendall(data)
            except:
                raise
            finally:
                self._lock.release()
        self.sock.sendall = sendall

    def connect(self):
        super(WSClientTransport, self).connect()
        self._app = self.APP_FACTORY(self)
        log.info("Connected to websocket server {0}".format(self.url))

    def closed(self, code, reason=None):
        app, self._app = self._app, None
        if app:
            app.on_close()
        self._close_event.set()

    def ponged(self, pong):
        pass

    def received_message(self, message):
        log.debug("Received message {0}".format(message))
        if self._app:
            self._app.on_received_packet(STRING(message))
        else:
            log.warning('Websocket client app already closed')

    def send_packet(self, data):
        log.debug("Sending message {0}".format(data))
        self.send(data)

    def force_shutdown(self):
        # called by the upper layer, and no callback will be possible when closed
        self._app = None
        self.close()
        self._close_event.set()
        log.info('Websocket client closed')

    def wait_close(self):
        self._close_event.wait()

    def app(self):
        return self._app
Пример #4
0
Файл: ws.py Проект: dulton/IVR
class WSServerTransport(WebSocket):
    APP_FACTORY = None

    def __init__(self, *args, **kwargs):
        super(WSServerTransport, self).__init__(*args, **kwargs)
        self._app = None

    def opened(self):
        # patch socket.sendall to protect it with lock,
        # in order to prevent sending data from multiple greenlets concurrently
        self._lock = RLock()
        _sendall = self.sock.sendall

        def sendall(data):
            self._lock.acquire()
            try:
                _sendall(data)
            except:
                raise
            finally:
                self._lock.release()

        self.sock.sendall = sendall

        # create app
        if not self.environ.get('QUERY_STRING'):
            query = {}
        else:
            query = urlparse.parse_qs(self.environ['QUERY_STRING'])
        for key, value in query.iteritems():
            query[key] = value[0]
        self._app = self.APP_FACTORY(self, query)

    def closed(self, code, reason=None):
        app, self._app = self._app, None
        if app:
            app.on_close()

    def ponged(self, pong):
        pass

    def received_message(self, message):
        log.debug("Received message {0}".format(message))
        self._app.on_received_packet(STRING(message))

    def send_packet(self, data):
        log.debug("Sending message {0}".format(data))
        self.send(data)

    def force_shutdown(self):
        # called by the upper layer, and no callback will be possible when closed
        log.info("shutdown")
        self._app = None
        self.close()
Пример #5
0
Файл: ws.py Проект: dulton/IVR
class WSServerTransport(WebSocket):
    APP_FACTORY = None

    def __init__(self, *args, **kwargs):
        super(WSServerTransport, self).__init__(*args, **kwargs)
        self._app = None

    def opened(self):
        # patch socket.sendall to protect it with lock,
        # in order to prevent sending data from multiple greenlets concurrently
        self._lock = RLock()
        _sendall = self.sock.sendall

        def sendall(data):
            self._lock.acquire()
            try:
                _sendall(data)
            except:
                raise
            finally:
                self._lock.release()
        self.sock.sendall = sendall

        # create app
        if not self.environ.get('QUERY_STRING'):
            query = {}
        else:
            query = urlparse.parse_qs(self.environ['QUERY_STRING'])
        for key, value in query.iteritems():
            query[key] = value[0]
        self._app = self.APP_FACTORY(self, query)

    def closed(self, code, reason=None):
        app, self._app = self._app, None
        if app:
            app.on_close()

    def ponged(self, pong):
        pass

    def received_message(self, message):
        log.debug("Received message {0}".format(message))
        self._app.on_received_packet(STRING(message))

    def send_packet(self, data):
        log.debug("Sending message {0}".format(data))
        self.send(data)

    def force_shutdown(self):
        # called by the upper layer, and no callback will be possible when closed
        log.info("shutdown")
        self._app = None
        self.close()
Пример #6
0
Файл: ws.py Проект: dulton/IVR
class WSClientTransport(WebSocketClient):
    APP_FACTORY = None

    def __init__(self, url):
        self._close_event = Event()
        # patch socket.sendall to protect it with lock,
        # in order to prevent sending data from multiple greenlets concurrently
        WebSocketClient.__init__(self, url)
        self._app = None
        self._lock = RLock()
        _sendall = self.sock.sendall

        def sendall(data):
            self._lock.acquire()
            try:
                _sendall(data)
            except:
                raise
            finally:
                self._lock.release()

        self.sock.sendall = sendall

    def connect(self):
        super(WSClientTransport, self).connect()
        self._app = self.APP_FACTORY(self)
        log.info("Connected to websocket server {0}".format(self.url))

    def closed(self, code, reason=None):
        if self._app:
            self._app.on_close()
        self._close_event.set()

    def ponged(self, pong):
        pass

    def received_message(self, message):
        log.debug("Received message {0}".format(message))
        self._app.on_received_packet(STRING(message))

    def send_packet(self, data):
        log.debug("Sending message {0}".format(data))
        self.send(data)

    def force_shutdown(self):
        # called by the upper layer, and no callback will be possible when closed
        self._app = None
        self.close()
        self._close_event.set()

    def wait_close(self):
        self._close_event.wait()
Пример #7
0
class ThreadSafeFSM(InstrumentFSM):
    def __init__(self, states, events, enter_event, exit_event):
        self._lock = RLock()
        super(ThreadSafeFSM, self).__init__(states, events, enter_event, exit_event)
    def on_event(self, event, *args, **kwargs):
        with self._lock:
            return super(ThreadSafeFSM, self).on_event(event, *args, **kwargs)
    def on_event_if_free(self, event, *args, **kwargs):
        if not self._lock.acquire(blocking=False):
            raise FSMLockedError
        try:
            retval = super(ThreadSafeFSM, self).on_event(event, *args, **kwargs)
        finally:
            self._lock.release()
        return retval
Пример #8
0
class ThreadSafeFSM(InstrumentFSM):
    def __init__(self, states, events, enter_event, exit_event):
        self._lock = RLock()
        super(ThreadSafeFSM, self).__init__(states, events, enter_event,
                                            exit_event)

    def on_event(self, event, *args, **kwargs):
        with self._lock:
            return super(ThreadSafeFSM, self).on_event(event, *args, **kwargs)

    def on_event_if_free(self, event, *args, **kwargs):
        if not self._lock.acquire(blocking=False):
            raise FSMLockedError
        try:
            retval = super(ThreadSafeFSM,
                           self).on_event(event, *args, **kwargs)
        finally:
            self._lock.release()
        return retval
Пример #9
0
class Client(object):
    class PendingCall(object):
        def __init__(self, id, method, args=None):
            self.id = id
            self.method = method
            self.args = args
            self.result = None
            self.error = None
            self.completed = Event()
            self.callback = None

    class SubscribedEvent(object):
        def __init__(self, name, *filters):
            self.name = name
            self.refcount = 0
            self.filters = filters

        def match(self, name, args):
            if self.name != name:
                return False

            if self.filters:
                return match(args, *self.filters)

    def __init__(self):
        self.pending_calls = {}
        self.pending_events = []
        self.event_handlers = {}
        self.rpc = None
        self.event_callback = None
        self.error_callback = None
        self.rpc_callback = None
        self.receive_thread = None
        self.token = None
        self.event_distribution_lock = RLock()
        self.event_emission_lock = RLock()
        self.default_timeout = 20
        self.scheme = None
        self.transport = None
        self.parsed_url = None
        self.last_event_burst = None
        self.use_bursts = False
        self.event_cv = Event()
        self.event_thread = None

    def __pack(self, namespace, name, args, id=None):
        return dumps({
            'namespace': namespace,
            'name': name,
            'args': args,
            'id': str(id if id is not None else uuid.uuid4())
        })

    def __call_timeout(self, call):
        pass

    def __call(self, pending_call, call_type='call', custom_payload=None):
        if custom_payload is None:
            payload = {
                'method': pending_call.method,
                'args': pending_call.args,
            }
        else:
            payload = custom_payload

        self.__send(self.__pack(
            'rpc',
            call_type,
            payload,
            pending_call.id
        ))

    def __send_event(self, name, params):
        self.__send(self.__pack(
            'events',
            'event',
            {'name': name, 'args': params}
        ))

    def __send_event_burst(self):
        with self.event_emission_lock:
            self.__send(self.__pack(
                'events',
                'event_burst',
                {'events': list([{'name': t[0], 'args': t[1]} for t in self.pending_events])},
            ))

            del self.pending_events[:]

    def __send_error(self, id, errno, msg, extra=None):
        payload = {
            'code': errno,
            'message': msg
        }

        if extra is not None:
            payload.update(extra)

        self.__send(self.__pack('rpc', 'error', id=id, args=payload))

    def __send_response(self, id, resp):
        self.__send(self.__pack('rpc', 'response', id=id, args=resp))

    def __send(self, data):
        debug_log('<- {0}', data)
        self.transport.send(data)

    def recv(self, message):
        if isinstance(message, bytes):
            message = message.decode('utf-8')
        debug_log('-> {0}', message)
        try:
            msg = loads(message)
        except ValueError as err:
            if self.error_callback is not None:
                self.error_callback(ClientError.INVALID_JSON_RESPONSE, err)
            return

        self.decode(msg)

    def __process_event(self, name, args):
        self.event_distribution_lock.acquire()
        if name in self.event_handlers:
            for h in self.event_handlers[name]:
                h(args)

        if self.event_callback:
            self.event_callback(name, args)

        self.event_distribution_lock.release()

    def __event_emitter(self):
        while True:
            self.event_cv.wait()

            while len(self.pending_events) > 0:
                time.sleep(0.1)
                with self.event_emission_lock:
                    self.__send_event_burst()

    def wait_forever(self):
        if os.getenv("DISPATCHERCLIENT_TYPE") == "GEVENT":
            import gevent
            while True:
                gevent.sleep(60)
        else:
            while True:
                time.sleep(60)

    def drop_pending_calls(self):
        message = "Connection closed"
        for key, call in self.pending_calls.items():
            call.result = None
            call.error = {
                "code":  errno.ECONNABORTED,
                "message": message
            }
            call.completed.set()
            del self.pending_calls[key]

    def decode(self, msg):
        if 'namespace' not in msg:
            self.error_callback(ClientError.INVALID_JSON_RESPONSE)
            return

        if 'name' not in msg:
            self.error_callback(ClientError.INVALID_JSON_RESPONSE)
            return

        if msg['namespace'] == 'events' and msg['name'] == 'event':
            args = msg['args']
            t = spawn_thread(target=self.__process_event, args=(args['name'], args['args']))
            t.start()
            return

        if msg['namespace'] == 'events' and msg['name'] == 'event_burst':
            args = msg['args']
            for i in args['events']:
                t = spawn_thread(target=self.__process_event, args=(i['name'], i['args']))
                t.start()
            return

        if msg['namespace'] == 'events' and msg['name'] == 'logout':
            self.error_callback(ClientError.LOGOUT)
            return

        if msg['namespace'] == 'rpc':
            if msg['name'] == 'call':
                if self.rpc is None:
                    self.__send_error(msg['id'], errno.EINVAL, 'Server functionality is not supported')
                    return

                if 'args' not in msg:
                    self.__send_error(msg['id'], errno.EINVAL, 'Malformed request')
                    return

                args = msg['args']
                if 'method' not in args or 'args' not in args:
                    self.__send_error(msg['id'], errno.EINVAL, 'Malformed request')
                    return

                def run_async(msg, args):
                    try:
                        result = self.rpc.dispatch_call(args['method'], args['args'], sender=self)
                    except rpc.RpcException as err:
                        self.__send_error(msg['id'], err.code, err.message)
                    else:
                        self.__send_response(msg['id'], result)

                t = spawn_thread(target=run_async, args=(msg, args))
                t.start()
                return

            if msg['name'] == 'response':
                if msg['id'] in self.pending_calls.keys():
                    call = self.pending_calls[msg['id']]
                    call.result = msg['args']
                    call.completed.set()
                    if call.callback is not None:
                        call.callback(msg['args'])

                    del self.pending_calls[str(call.id)]
                else:
                    if self.error_callback is not None:
                        self.error_callback(ClientError.SPURIOUS_RPC_RESPONSE, msg['id'])

            if msg['name'] == 'error':
                if msg['id'] in self.pending_calls.keys():
                    call = self.pending_calls[msg['id']]
                    call.result = None
                    call.error = msg['args']
                    call.completed.set()
                    del self.pending_calls[str(call.id)]
                if self.error_callback is not None:
                    self.error_callback(ClientError.RPC_CALL_ERROR)

    def parse_url(self, url):
        self.parsed_url = urlsplit(url, scheme="http")
        self.scheme = self.parsed_url.scheme

    def connect(self, url, **kwargs):
        self.parse_url(url)
        if not self.scheme:
            self.scheme = kwargs.get('scheme',"ws")
        else:
            if 'scheme' in kwargs:
                raise ValueError('Connection scheme cannot be delared in both url and arguments.')
        if self.scheme is "http":
            self.scheme = "ws"

        builder = ClientTransportBuilder()
        self.transport = builder.create(self.scheme)
        self.transport.connect(self.parsed_url, self, **kwargs)
        debug_log('Connection opened, local address {0}', self.transport.address)

        if self.use_bursts:
            self.event_thread = spawn_thread(target=self.__event_emitter, args=())
            self.event_thread.start()

    def login_user(self, username, password, timeout=None):
        call = self.PendingCall(uuid.uuid4(), 'auth')
        self.pending_calls[str(call.id)] = call
        self.__call(call, call_type='auth', custom_payload={'username': username, 'password': password})
        call.completed.wait(timeout)
        if call.error:
            raise rpc.RpcException(
                call.error['code'],
                call.error['message'],
                call.error['extra'] if 'extra' in call.error else None)

        self.token = call.result[0]

    def login_service(self, name, timeout=None):
        call = self.PendingCall(uuid.uuid4(), 'auth')
        self.pending_calls[str(call.id)] = call
        self.__call(call, call_type='auth_service', custom_payload={'name': name})
        if call.error:
            raise rpc.RpcException(
                call.error['code'],
                call.error['message'],
                call.error['extra'] if 'extra' in call.error else None)

        call.completed.wait(timeout)

    def login_token(self, token, timeout=None):
        call = self.PendingCall(uuid.uuid4(), 'auth')
        self.pending_calls[str(call.id)] = call
        self.__call(call, call_type='auth_token', custom_payload={'token': token})
        call.completed.wait(timeout)
        if call.error:
            raise rpc.RpcException(
                call.error['code'],
                call.error['message'],
                call.error['extra'] if 'extra' in call.error else None)

        self.token = call.result[0]

    def disconnect(self):
        debug_log('Closing connection, local address {0}', self.transport.address)
        self.transport.close()

    def enable_server(self):
        self.rpc = rpc.RpcContext()

    def on_event(self, callback):
        self.event_callback = callback

    def on_call(self, callback):
        self.rpc_callback = callback

    def on_error(self, callback):
        self.error_callback = callback

    def subscribe_events(self, *masks):
        self.__send(self.__pack('events', 'subscribe', masks))

    def unsubscribe_events(self, *masks):
        self.__send(self.__pack('events', 'unsubscribe', masks))

    def register_service(self, name, impl):
        if self.rpc is None:
            raise RuntimeError('Call enable_server() first')

        self.rpc.register_service_instance(name, impl)
        self.call_sync('plugin.register_service', name)

    def unregister_service(self, name):
        if self.rpc is None:
            raise RuntimeError('Call enable_server() first')

        self.rpc.unregister_service(name)
        self.call_sync('plugin.unregister_service', name)

    def resume_service(self, name):
        if self.rpc is None:
            raise RuntimeError('Call enable_server() first')

        self.call_sync('plugin.resume_service', name)

    def register_schema(self, name, schema):
        if self.rpc is None:
            raise RuntimeError('Call enable_server() first')

        self.call_sync('plugin.register_schema', name, schema)

    def unregister_schema(self, name):
        if self.rpc is None:
            raise RuntimeError('Call enable_server() first')

        self.call_sync('plugin.unregister_schema', name)

    def call_async(self, name, callback, *args):
        call = self.PendingCall(uuid.uuid4(), name, args)
        self.pending_calls[call.id] = call

    def call_sync(self, name, *args, **kwargs):
        timeout = kwargs.pop('timeout', self.default_timeout)
        call = self.PendingCall(uuid.uuid4(), name, args)
        self.pending_calls[str(call.id)] = call
        self.__call(call)

        if not call.completed.wait(timeout):
            if self.error_callback:
                self.error_callback(ClientError.RPC_CALL_TIMEOUT, method=call.method, args=call.args)

            raise rpc.RpcException(errno.ETIMEDOUT, 'Call timed out')

        if call.result is None and call.error is not None:
            raise rpc.RpcException(
                call.error['code'],
                call.error['message'],
                call.error['extra'] if 'extra' in call.error else None)

        return call.result

    def call_task_sync(self, name, *args):
        tid = self.call_sync('task.submit', name, args)
        self.call_sync('task.wait', tid, timeout=3600)
        return self.call_sync('task.status', tid)

    def submit_task(self, name, *args):
        return self.call_sync('task.submit', name, args)

    def emit_event(self, name, params):
        if not self.use_bursts:
            self.__send_event(name, params)
        else:
            self.pending_events.append((name, params))
            self.event_cv.set()
            self.event_cv.clear()

    def register_event_handler(self, name, handler):
        if name not in self.event_handlers:
            self.event_handlers[name] = []

        self.event_handlers[name].append(handler)
        self.subscribe_events(name)
        return handler

    def unregister_event_handler(self, name, handler):
        self.event_handlers[name].remove(handler)

    def exec_and_wait_for_event(self, event, match_fn, fn, timeout=None):
        done = Event()
        self.subscribe_events(event)
        self.event_distribution_lock.acquire()

        try:
            fn()
        except:
            self.event_distribution_lock.release()
            raise

        def handler(args):
            if match_fn(args):
                done.set()

        self.register_event_handler(event, handler)
        self.event_distribution_lock.release()
        done.wait(timeout=timeout)
        self.unregister_event_handler(event, handler)

    def test_or_wait_for_event(self, event, match_fn, initial_condition_fn, timeout=None):
        done = Event()
        self.subscribe_events(event)
        self.event_distribution_lock.acquire()

        if initial_condition_fn():
            self.event_distribution_lock.release()
            return

        def handler(args):
            if match_fn(args):
                done.set()

        self.register_event_handler(event, handler)
        self.event_distribution_lock.release()
        done.wait(timeout=timeout)
        self.unregister_event_handler(event, handler)

    def get_lock(self, name):
        self.call_sync('lock.init', name)
        return rpc.ServerLockProxy(self, name)
Пример #10
0
class SubNameBrute(object):
    def __init__(self, *params):
        self.domain, self.options, self.process_num, self.dns_servers, self.next_subs, \
            self.scan_count, self.found_count, self.queue_size_array, tmp_dir = params
        self.dns_count = len(self.dns_servers)
        self.scan_count_local = 0
        self.found_count_local = 0
        self.resolvers = [dns.resolver.Resolver(configure=False) for _ in range(self.options.threads)]
        for r in self.resolvers:
            r.lifetime = 4
            r.timeout = 10.0
        self.queue = PriorityQueue()
        self.priority = 0
        self.ip_dict = {}
        self.found_subs = set()
        self.timeout_subs = {}
        self.count_time = time.time()
        self.outfile = open('%s/%s_part_%s.txt' % (tmp_dir, self.domain, self.process_num), 'w')
        self.normal_names_set = set()
        self.load_sub_names()
        self.lock = RLock()
        self.threads_status = ['1'] * self.options.threads

    def load_sub_names(self):
        normal_lines = []
        wildcard_lines = []
        wildcard_set = set()
        regex_list = []
        lines = set()
        with open(self.options.file) as inFile:
            for line in inFile.readlines():
                sub = line.strip()
                if not sub or sub in lines:
                    continue
                lines.add(sub)

                brace_count = sub.count('{')
                if brace_count > 0:
                    wildcard_lines.append((brace_count, sub))
                    sub = sub.replace('{alphnum}', '[a-z0-9]')
                    sub = sub.replace('{alpha}', '[a-z]')
                    sub = sub.replace('{num}', '[0-9]')
                    if sub not in wildcard_set:
                        wildcard_set.add(sub)
                        regex_list.append('^' + sub + '$')
                else:
                    normal_lines.append(sub)
                    self.normal_names_set.add(sub)

        if regex_list:
            pattern = '|'.join(regex_list)
            _regex = re.compile(pattern)
            for line in normal_lines:
                if _regex.search(line):
                    normal_lines.remove(line)

        for _ in normal_lines[self.process_num::self.options.process]:
            self.queue.put((0, _))    # priority set to 0
        for _ in wildcard_lines[self.process_num::self.options.process]:
            self.queue.put(_)

    def scan(self, j):
        self.resolvers[j].nameservers = [self.dns_servers[j % self.dns_count]] + self.dns_servers

        while True:
            try:

                if time.time() - self.count_time > 1.0:
                    self.lock.acquire()
                    self.scan_count.value += self.scan_count_local
                    self.scan_count_local = 0
                    self.queue_size_array[self.process_num] = self.queue.qsize()
                    if self.found_count_local:
                        self.found_count.value += self.found_count_local
                        self.found_count_local = 0
                    self.count_time = time.time()
                    self.lock.release()
                brace_count, sub = self.queue.get_nowait()
                self.threads_status[j] = '1'
                if brace_count > 0:
                    brace_count -= 1
                    if sub.find('{next_sub}') >= 0:
                        for _ in self.next_subs:
                            self.queue.put((0, sub.replace('{next_sub}', _)))
                    if sub.find('{alphnum}') >= 0:
                        for _ in 'abcdefghijklmnopqrstuvwxyz0123456789':
                            self.queue.put((brace_count, sub.replace('{alphnum}', _, 1)))
                    elif sub.find('{alpha}') >= 0:
                        for _ in 'abcdefghijklmnopqrstuvwxyz':
                            self.queue.put((brace_count, sub.replace('{alpha}', _, 1)))
                    elif sub.find('{num}') >= 0:
                        for _ in '0123456789':
                            self.queue.put((brace_count, sub.replace('{num}', _, 1)))
                    continue
            except gevent.queue.Empty as e:
                self.threads_status[j] = '0'
                gevent.sleep(0.5)
                if '1' not in self.threads_status:
                    break
                else:
                    continue

            try:

                if sub in self.found_subs:
                    continue

                self.scan_count_local += 1
                cur_domain = sub + '.' + self.domain
                answers = self.resolvers[j].query(cur_domain)

                if answers:
                    self.found_subs.add(sub)
                    ips = ', '.join(sorted([answer.address for answer in answers]))
                    if ips in ['1.1.1.1', '127.0.0.1', '0.0.0.0', '0.0.0.1']:
                        continue
                    if self.options.i and is_intranet(answers[0].address):
                        continue

                    try:
                        self.scan_count_local += 1
                        answers = self.resolvers[j].query(cur_domain, 'cname')
                        cname = answers[0].target.to_unicode().rstrip('.')
                        if cname.endswith(self.domain) and cname not in self.found_subs:
                            cname_sub = cname[:len(cname) - len(self.domain) - 1]    # new sub
                            if cname_sub not in self.normal_names_set:
                                self.found_subs.add(cname)
                                self.queue.put((0, cname_sub))
                    except Exception as e:
                        pass

                    first_level_sub = sub.split('.')[-1]
                    max_found = 20

                    if self.options.w:
                        first_level_sub = ''
                        max_found = 3

                    if (first_level_sub, ips) not in self.ip_dict:
                        self.ip_dict[(first_level_sub, ips)] = 1
                    else:
                        self.ip_dict[(first_level_sub, ips)] += 1
                        if self.ip_dict[(first_level_sub, ips)] > max_found:
                            continue

                    self.found_count_local += 1

                    self.outfile.write(cur_domain.ljust(30) + '\t' + ips + '\n')
                    self.outfile.flush()
                    try:
                        self.scan_count_local += 1
                        self.resolvers[j].query('lijiejie-test-not-existed.' + cur_domain)
                    except (dns.resolver.NXDOMAIN, ) as e:    # dns.resolver.NoAnswer
                        if self.queue.qsize() < 50000:
                            for _ in self.next_subs:
                                self.queue.put((0, _ + '.' + sub))
                        else:
                            self.queue.put((1, '{next_sub}.' + sub))
                    except Exception as e:
                        pass

            except (dns.resolver.NXDOMAIN, dns.resolver.NoAnswer) as e:
                pass
            except dns.resolver.NoNameservers as e:
                self.queue.put((0, sub))    # Retry
            except dns.exception.Timeout as e:
                self.timeout_subs[sub] = self.timeout_subs.get(sub, 0) + 1
                if self.timeout_subs[sub] <= 1:
                    self.queue.put((0, sub))    # Retry
            except Exception as e:
                import traceback
                traceback.print_exc()
                with open('errors.log', 'a') as errFile:
                    errFile.write('[%s] %s\n' % (type(e), str(e)))

    def run(self):
        threads = [gevent.spawn(self.scan, i) for i in range(self.options.threads)]
        gevent.joinall(threads)
Пример #11
0
class Task:
    """
	Task描述一次服务数据请求处理
	Task任务链式钩挂
	"""

    class Profile:
        def __init__(self):
            self.start_time = 0
            self.end_time = 0
            self.last_watch_time = 0  #最近一次观察记录时间
            self.status = JobStatusType.STOPPED
            self.result = None  # task 运行结果

    def __init__(self, proxy, runner):
        self.proxy = proxy
        self.next = None
        self.prev = None
        self.task_id = JobService.instance().generateUniqueID()
        self.runner = runner
        self.profile = Task.Profile()
        self.locker = RLock()

    def chainNext(self, task):
        self.next = task
        self.next.prev = self
        return self

    def getUniqueID(self):
        return self.task_id

    @property
    def ID(self):
        return self.getUniqueID()

    def execute(self, job):
        self.locker.acquire()
        try:
            task_id = self.getUniqueID()
            result = self.proxy.createTask(task_id, job)
            if result.status == CallReturnStatusValueType.SUCC:
                self.profile.start_time = int(time.time())
                self.profile.status = JobStatusType.RUNNING
                JobService.instance().onJobTaskStarted(self)
            return result
        finally:
            self.locker.release()

    def onFinished(self, task_result):
        self.locker.acquire()
        try:
            self.profile.end_time = int(time.time())
            self.profile.status = JobStatusType.FINISHED
            self.profile.result = task_result
            self.runner.getProfile().result = task_result  #

            self.runner.onTaskFinished(self)
        finally:
            self.locker.release()

    def onError(self, task_result={}):
        self.locker.acquire()
        try:
            self.profile.end_time = int(time.time())
            self.profile.status = JobStatusType.FAILED

            self.runner.getProfile().result = task_result
            self.runner.onTaskError(self)
        finally:
            self.locker.release()

    def onWatchTime(self):
        return
        try:
            result = self.proxy.watchTask(self.getUniqueID())
            self.profile.last_watch_time = int(time.time())
        except:
            traceback.print_exc()
Пример #12
0
class ResourceGraph(object):
    def __init__(self):
        self.logger = logging.getLogger('ResourceGraph')
        self.mutex = RLock()
        self.root = Resource('root')
        self.resources = nx.DiGraph()
        self.resources.add_node(self.root)

    def lock(self):
        self.mutex.acquire()

    def unlock(self):
        self.mutex.release()

    @property
    def nodes(self):
        return self.resources.nodes()

    def add_resource(self, resource, parents=None):
        with self.mutex:
            if not resource:
                raise ResourceError('Invalid resource')

            if self.get_resource(resource.name):
                raise ResourceError('Resource {0} already exists'.format(
                    resource.name))

            self.resources.add_node(resource)
            if not parents:
                parents = ['root']

            for p in parents:
                node = self.get_resource(p)
                if not node:
                    raise ResourceError(
                        'Invalid parent resource {0}'.format(p))

                self.resources.add_edge(node, resource)

    def remove_resource(self, name):
        with self.mutex:
            resource = self.get_resource(name)

            if not resource:
                return

            for i in nx.descendants(self.resources, resource):
                self.resources.remove_node(i)

            self.resources.remove_node(resource)

    def remove_resources(self, names):
        with self.mutex:
            for name in names:
                resource = self.get_resource(name)

                if not resource:
                    return

                for i in nx.descendants(self.resources, resource):
                    self.resources.remove_node(i)

                self.resources.remove_node(resource)

    def update_resource(self, name, new_parents):
        with self.mutex:
            resource = self.get_resource(name)

            if not resource:
                return

            for i in self.resources.predecessors(resource):
                self.resources.remove_edge(i, resource)

            for p in new_parents:
                node = self.get_resource(p)
                if not node:
                    raise ResourceError(
                        'Invalid parent resource {0}'.format(p))

                self.resources.add_edge(node, resource)

    def get_resource(self, name):
        f = [i for i in self.resources.nodes() if i.name == name]
        return f[0] if len(f) > 0 else None

    def get_resource_dependencies(self, name):
        res = self.get_resource(name)
        for i, _ in self.resources.in_edges([res]):
            yield i.name

    def acquire(self, *names):
        if not names:
            return

        with self.mutex:
            self.logger.debug('Acquiring following resources: %s',
                              ','.join(names))

            for name in names:
                res = self.get_resource(name)
                if not res:
                    raise ResourceError('Resource {0} not found'.format(name))

                for i in nx.descendants(self.resources, res):
                    if i.busy:
                        raise ResourceError(
                            'Cannot acquire, some of dependent resources are busy'
                        )

                res.busy = True

    def can_acquire(self, *names):
        if not names:
            return True

        with self.mutex:
            self.logger.log(TRACE, 'Trying to acquire following resources: %s',
                            ','.join(names))

            for name in names:
                res = self.get_resource(name)
                if not res:
                    return False

                if res.busy:
                    return False

                for i in nx.descendants(self.resources, res):
                    if i.busy:
                        return False

            return True

    def release(self, *names):
        if not names:
            return

        with self.mutex:
            self.logger.debug('Releasing following resources: %s',
                              ','.join(names))

            for name in names:
                res = self.get_resource(name)
                res.busy = False
Пример #13
0
class ScpSever():
    def __init__(self,conn):
        self.conn = conn
        self.closed = False
        self.connerr = None
        self.conn_mutex = RLock()
        self.conn_cond = Semaphore(0)
        
    def read(self,size):
        conn, err = self.acquire_conn()
        if err: #conn is closed
            return '',err
        data,err = conn.read(size)
        if err:
            #freeze, waiting for reuse
            conn.freeze()
            self.connerr = err
        return data, None


    def write(self,data):
        conn, err = self.acquire_conn()
        if err: #conn is closed
            return '',err
        err = self.conn.write(data)
        if err:
            #freeze, waiting for reuse
            conn.freeze()
            self.connerr = err
        return None
    
    @with_goto
    def close(self):
        self.conn_mutex.acquire()
        if self.closed:
            goto .end
        self.conn.close()
        self.closed = True
        self.connerr = error
        label .end
        self.conn_cond.release()
        self.conn_mutex.release()
        return self.connerr

    #超时计数
    def _star_wait(self):
        reuse_timeout = int(config['listen']['reuse_time'])
        self.time_task = Timer(reuse_timeout,self.close)
        self.time_task.start()

    def _stop_wait(self):
        self.time_task.cancel()

    def _cond_wait(self):
        self.conn_mutex.release()
        self.conn_cond.acquire()
        self.conn_mutex.acquire()

    def acquire_conn(self):
        self.conn_mutex.acquire()
        conn = None
        connerr = None
        while True:
            if self.closed:
                connerr = self.connerr
                break
            elif self.connerr:
                self._star_wait()
                self._cond_wait()
                self._stop_wait()
            else:
                conn = self.conn
                break
        self.conn_mutex.release()
        return conn, connerr

    @with_goto
    def replace_conn(self, conn):
        self.conn_mutex.acquire()
        ret = False
        if self.closed:
            goto .end
        #close old conn
        self.conn.close()
        #set new status
        self.conn = conn
        self.connerr = None
        ret = True
        label .end
        self.conn_cond.release()
        self.conn_mutex.release()
        return ret
Пример #14
0
class ResourceGraph(object):
    def __init__(self):
        self.logger = logging.getLogger('ResourceGraph')
        self.mutex = RLock()
        self.root = Resource('root')
        self.resources = nx.DiGraph()
        self.resources.add_node(self.root)

    def lock(self):
        self.mutex.acquire()

    def unlock(self):
        self.mutex.release()

    @property
    def nodes(self):
        return self.resources.nodes()

    def add_resource(self, resource, parents=None):
        self.lock()

        if not resource:
            self.unlock()
            raise ResourceError('Invalid resource')

        if self.get_resource(resource.name):
            self.unlock()
            raise ResourceError('Resource {0} already exists'.format(resource.name))

        self.resources.add_node(resource)
        if not parents:
            parents = ['root']

        for p in parents:
            node = self.get_resource(p)
            if not node:
                self.unlock()
                raise ResourceError('Invalid parent resource {0}'.format(p))

            self.resources.add_edge(node, resource)

        self.unlock()

    def remove_resource(self, name):
        self.lock()
        resource = self.get_resource(name)

        if not resource:
            self.unlock()
            return

        for i in nx.descendants(self.resources, resource):
            self.resources.remove_node(i)

        self.resources.remove_node(resource)
        self.unlock()

    def update_resource(self, name, new_parents):
        self.lock()
        resource = self.get_resource(name)

        if not resource:
            self.unlock()
            return

        for i in nx.descendants(self.resources, resource):
            self.resources.remove_node(i)

        for p in new_parents:
            node = self.get_resource(p)
            if not node:
                self.unlock()
                raise ResourceError('Invalid parent resource {0}'.format(p))

            self.resources.add_edge(node, resource)

        self.unlock()

    def get_resource(self, name):
        f = [i for i in self.resources.nodes() if i.name == name]
        return f[0] if len(f) > 0 else None

    def get_resource_dependencies(self, name):
        res = self.get_resource(name)
        for i, _ in self.resources.in_edges([res]):
            yield i.name

    def acquire(self, *names):
        self.lock()
        self.logger.debug('Acquiring following resources: %s', ','.join(names))

        for name in names:
            res = self.get_resource(name)
            if not res:
                raise ResourceError('Resource {0} not found'.format(name))

            for i in nx.descendants(self.resources, res):
                if i.busy:
                    self.unlock()
                    raise ResourceError('Cannot acquire, some of dependent resources are busy')

            res.busy = True

        self.unlock()

    def can_acquire(self, *names):
        self.lock()
        self.logger.debug('Trying to acquire following resources: %s', ','.join(names))

        for name in names:
            res = self.get_resource(name)
            if not res:
                self.unlock()
                return False

            if res.busy:
                self.unlock()
                return False

            for i in nx.descendants(self.resources, res):
                if i.busy:
                    self.unlock()
                    return False

        self.unlock()
        return True

    def release(self, *names):
        self.lock()
        self.logger.debug('Releasing following resources: %s', ','.join(names))

        for name in names:
            res = self.get_resource(name)
            res.busy = False

        self.unlock()
Пример #15
0
class ResourceGraph(object):
    def __init__(self):
        self.logger = logging.getLogger('ResourceGraph')
        self.mutex = RLock()
        self.root = Resource('root')
        self.resources = nx.DiGraph()
        self.resources.add_node(self.root)

    def lock(self):
        self.mutex.acquire()

    def unlock(self):
        self.mutex.release()

    @property
    def nodes(self):
        return self.resources.nodes()

    def add_resource(self, resource, parents=None, children=None):
        with self.mutex:
            if not resource:
                raise ResourceError('Invalid resource')
    
            if self.get_resource(resource.name):
                raise ResourceError('Resource {0} already exists'.format(resource.name))
    
            self.resources.add_node(resource)
            if not parents:
                parents = ['root']
    
            for p in parents:
                node = self.get_resource(p)
                if not node:
                    continue
    
                self.resources.add_edge(node, resource)

            for p in children or []:
                node = self.get_resource(p)
                if not node:
                    raise ResourceError('Invalid child resource {0}'.format(p))

    def remove_resource(self, name):
        with self.mutex:
            resource = self.get_resource(name)
    
            if not resource:
                return
    
            for i in nx.descendants(self.resources, resource):
                self.resources.remove_node(i)
    
            self.resources.remove_node(resource)

    def remove_resources(self, names):
        with self.mutex:
            for name in names:
                resource = self.get_resource(name)
    
                if not resource:
                    return
    
                for i in nx.descendants(self.resources, resource):
                    self.resources.remove_node(i)
    
                self.resources.remove_node(resource)

    def rename_resource(self, oldname, newname):
        with self.mutex:
            resource = self.get_resource(oldname)

            if not resource:
                return

            resource.name = newname

    def update_resource(self, name, new_parents, new_children=None):
        with self.mutex:
            resource = self.get_resource(name)
    
            if not resource:
                return
    
            for i in self.resources.predecessors(resource):
                self.resources.remove_edge(i, resource)
    
            for p in new_parents:
                node = self.get_resource(p)
                if not node:
                    continue
    
                self.resources.add_edge(node, resource)

            for p in new_children or []:
                node = self.get_resource(p)
                if not node:
                    raise ResourceError('Invalid child resource {0}'.format(p))

                self.resources.add_edge(resource, node)

    def get_resource(self, name):
        f = [i for i in self.resources.nodes() if i.name == name]
        return f[0] if len(f) > 0 else None

    def get_resource_dependencies(self, name):
        res = self.get_resource(name)
        for i, _ in self.resources.in_edges([res]):
            yield i.name

    def acquire(self, *names):
        if not names:
            return

        with self.mutex:
            self.logger.debug('Acquiring following resources: %s', ','.join(names))
    
            for name in names:
                res = self.get_resource(name)
                if not res:
                    raise ResourceError('Resource {0} not found'.format(name))
    
                for i in nx.descendants(self.resources, res):
                    if i.busy:
                        raise ResourceError('Cannot acquire, some of dependent resources are busy')
    
                res.busy = True

    def can_acquire(self, *names):
        if not names:
            return True

        with self.mutex:
            self.logger.log(TRACE, 'Trying to acquire following resources: %s', ','.join(names))
    
            for name in names:
                res = self.get_resource(name)
                if not res:
                    return False
    
                if res.busy:
                    return False
    
                for i in nx.descendants(self.resources, res):
                    if i.busy:
                        return False
    
            return True

    def release(self, *names):
        if not names:
            return

        with self.mutex:
            self.logger.debug('Releasing following resources: %s', ','.join(names))
    
            for name in names:
                res = self.get_resource(name)
                res.busy = False

    def draw(self, path):
        return nx.write_dot(nx.relabel_nodes(self.resources, lambda n: f'"{n.name}"'), path)
Пример #16
0
class HttpScannerOutput(object):

    def __init__(self, args):
        # TODO: make separate queues for fast logging
        self.args = args
        self.lock = RLock()

        # Colorama init
        init()
        # Initialise logging
        self._init_logger()
        # Initialise output
        self._init_output()
        # Stats
        self.urls_scanned = 0

    def _init_output(self):
        # Initialise output
        self._init_requests_output()
        self._init_csv()
        self._init_json()
        self._init_dump()
        self._init_db()

    def _init_logger(self):
        """
        Init logger
        :return: None
        """
        if self.args.log_file is not None:
            self.logger = logging.getLogger('httpscan_logger')
            self.logger.setLevel(logging.DEBUG if self.args.debug else logging.INFO)
            handler = logging.FileHandler(self.args.log_file)
            handler.setFormatter(
                logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%d.%m.%Y %H:%M:%S'))
            self.logger.addHandler(handler)
        else:
            self.logger = None

    def _init_requests_output(self):
        """
        Init requests library output
        :return: None
        """
        if self.args.debug:
            # Enable requests lib debug output
            HTTPConnection.debuglevel = 5
            packages.urllib3.add_stderr_logger()
            logging.basicConfig()
            logging.getLogger().setLevel(logging.DEBUG)
            requests_log = logging.getLogger("requests.packages.urllib3")
            requests_log.setLevel(logging.DEBUG)
            requests_log.propagate = True
        else:
            # Surpress InsecureRequestWarning: Unverified HTTPS request is being made
            packages.urllib3.disable_warnings()

    def _init_csv(self):
        """
        Initialise CSV output
        :return:
        """
        if self.args.output_csv is None:
            self.csv = None
        else:
            # TODO: check if file exists
            self.csv = writer(open(self.args.output_csv, 'wb'), delimiter=';', quoting=QUOTE_ALL)
            self.csv.writerow(['url', 'status', 'length', 'headers'])

    def _init_json(self):
        """
        Initialise JSON output
        :return: None
        """
        self.json = None if self.args.output_json is None else io.open(self.args.output_json, 'w', encoding='utf-8')

    def _init_dump(self):
        """
        Initialise dump folder
        :return: None
        """
        self.dump = path.abspath(self.args.dump) if self.args.dump is not None else None
        if self.dump is not None and not path.exists(self.dump):
            makedirs(self.dump)

    def _init_db(self):
        """
        Initialise database output. Create database and table if missing.
        :return: None
        """
        if self.args.output_database is None:
            self.engine = None
            return

        # Check and create database if needed
        if not database_exists(self.args.output_database):
            create_database(self.args.output_database, encoding='utf8')

        # Create table
        self.engine = create_engine(self.args.output_database)
        self.metadata = MetaData()
        self.scan_table = Table('httpscan', self.metadata,
                                Column('id', Integer, primary_key=True),
                                Column('url', String),
                                Column('status', Integer),
                                Column('length', Integer),
                                Column('headers', String)
                                )
        self.metadata.create_all(self.engine)

    def write(self, **kwargs):
        spawn(self.write_func, **kwargs)

    def write_func(self, **kwargs):
        # Acquire lock
        self.lock.acquire()

        # Output
        self._display_progress(**kwargs)
        self._write_log(**kwargs)

        # Check for exception
        if kwargs['exception'] is None:
            self._filter_and_write(**kwargs)

        # Realse lock
        self.lock.release()

    def _display_progress(self, **kwargs):
        # TODO: add detailed stats
        # Calculate progreess
        percentage = '{percent:.2%}'.format(percent=float(self.urls_scanned) / self.args.urls_count)

        # Generate and print colored output
        out = '[%s] [worker:%02i] [%s]\t%s -> status:%i ' % (
            helper.str_now(), kwargs['worker'], percentage, kwargs['url'], kwargs['status'])
        if kwargs['exception'] is not None:
            out += 'error: (%s)' % str(kwargs['exception'])
        else:
            out += 'length: %s' % naturalsize(int(kwargs['length']))
        if kwargs['status'] == 200:
            print(Fore.GREEN + out + Fore.RESET)
        elif 400 <= kwargs['status'] < 500 or kwargs['status'] == -1:
            print(Fore.RED + out + Fore.RESET)
        else:
            print(Fore.YELLOW + out + Fore.RESET)

    def _filter_and_write(self, **kwargs):
        # Filter responses and save responses that are matching ignore, allow rules
        if (self.args.allow is None and self.args.ignore is None) or \
                (self.args.allow is not None and kwargs['status'] in self.args.allow) or \
                (self.args.ignore is not None and kwargs['status'] not in self.args.ignore):
            self._write_csv(**kwargs)
            self._write_json(**kwargs)
            self._write_dump(**kwargs)
            self._write_db(**kwargs)

    def _kwargs_to_params(self, **kwargs):
        return {'url': kwargs['url'], 'status': kwargs['status'], 'length': kwargs['length'],
                'headers': str(kwargs['response'].headers)}

    def _write_log(self, **kwargs):
        # Write to log file
        if self.logger is None:
            return

        out = '[worker:%02i] %s %s %i' % (kwargs['worker'], kwargs['url'], kwargs['status'], kwargs['length'])
        if kwargs['exception'] is None:
            self.logger.info(out)
        else:
            self.logger.error("%s %s" % (out, str(kwargs['exception'])))

    def _write_csv(self, **kwargs):
        if self.csv is not None:
            self.csv.writerow([kwargs['url'], kwargs['status'], kwargs['length'], str(kwargs['response'].headers)])

    def _write_json(self, **kwargs):
        if self.json is None:
            return

        # TODO: bugfix appending json
        self.json.write(unicode(dumps(self._kwargs_to_params(**kwargs), ensure_ascii=False)))

    def _write_dump(self, **kwargs):
        if kwargs['response'] is None or self.dump is None:
            return

        # Generate folder and file path
        parsed = urlparse(kwargs['url'])
        host_folder = path.join(self.dump, parsed.netloc)
        p, f = path.split(parsed.path)
        folder = path.join(host_folder, p[1:])
        if not path.exists(folder):
            makedirs(folder)
        filename = path.join(folder, f)

        # Get all content
        try:
            content = kwargs['response'].content
        except Exception as exception:
            self.write_log('Failed to get content for %s Exception: %s' % (kwargs['url'], str(exception)))
            return

        # Save contents to file
        with open(filename, 'wb') as f:
            f.write(content)

    def _write_db(self, **kwargs):
        if self.engine is None:
            return

        # TODO: check if url exists in table
        params = self._kwargs_to_params(**kwargs)
        self.engine.execute(self.scan_table.insert().execution_options(autocommit=True), params)

    def write_log(self, msg, loglevel=logging.INFO):
        """
        Write message to log file
        :param msg:
        :param loglevel:
        :return: None
        """
        if self.logger is None:
            return

        self.lock.acquire()
        if loglevel == logging.INFO:
            self.logger.info(msg)
        elif loglevel == logging.DEBUG:
            self.logger.debug(msg)
        elif loglevel == logging.ERROR:
            self.logger.error(msg)
        elif loglevel == logging.WARNING:
            self.logger.warning(msg)

        self.lock.release()

    def print_and_log(self, msg, loglevel=logging.INFO):
        # TODO: make separate logging
        print('[%s] %s' % (helper.str_now(), msg))
        self.write_log(msg, loglevel)
Пример #17
0
class DataMgr(Greenlet):
    pickle_names = ['_msgs', '_users', 'send_queue', 'pending_online_users']
    data_version = 1000

    def __init__(self, logger):
        Greenlet.__init__(self)
        self.logger = logger
        self._users_lock = RLock()
        self._msgs = {}
        self._users = {}
        self.send_queue = Queue()
        self.pending_online_users = Queue()
        self.bootstrap()
        self._dying = False
        self.start()

    def bootstrap(self):
        """Restore data from disk"""
        _ = opath.join(DATA_DIR, DM_PKL_NAME)
        if opath.exists(_):
            _ = pickle.load(file(_, 'rb'))
            if '_version' not in _ or _['_version'] != DataMgr.data_version:
                raise Exception(" pkl file mismatch:program(%d) file(%d)" % (DataMgr.data_version, None if '_version' not in _ else _['_version']))
            self.__dict__.update(_)

    def shutdown(self):
        """Save data to disk"""
        self._dying = True
        self.logger.debug('[DM] saving data to disk...')
        self._save_cache()

    def reset(self):
        """reset in-memory data and disk data"""
        self.send_queue = Queue()
        self.pending_online_users = Queue()
        _ = opath.join(DATA_DIR, DM_PKL_NAME)
        if opath.exists(_):
            os.remove(_)

    def _save_cache(self):
        # fixme: save to external database not implemented
        _ = {'_version':DataMgr.data_version}
        for k in DataMgr.pickle_names:
            if k in self.__dict__:
                _[k] = self.__dict__[k]
        #pickle.dump(_, file(opath.join(DATA_DIR, DM_PKL_NAME), 'wb'), pickle.HIGHEST_PROTOCOL)

    def msg_add(self, msg):
        """add message to msg_queue

            :param msg: msg to add
            :type msg: MessageObj
        """
        if not isinstance(msg, MessageObj):
            raise ValueError(" argument is not a MessageObj")
        self._msgs[msg.msgid] = msg

    def msg_get(self, msgid):
        """get message by msgid

            :param msgid: message id
            :type msgid: int
        """
        if msgid not in self._msgs:
            raise IndexError(" msgid %s not in queue" % idx)
        return self._msgs[msgid]

    def msg_del(self, msgid):
        """del message by msgid

            :param msgid: message id
            :type msgid: int
        """
        del self._msgs[msgid]

    def msg_set(self, msgid, msg):
        self._msgs[msgid] = msg

    @property
    def msg_count(self):
        """get message queue length
        """
        return len(self._msgs)
    
    def set_user_online(self, guid):
        """set user to online

        this will generate a UserObj instance

            :param guid: user guid
            :type guid: int
        """
        #TODO get userid from rid
        uid = "u" + guid
        u = UserObj(uid, guid)
        self.users_add(u)
        self.pending_online_users.put(guid)

    def set_user_offline(self, guid):
        """set user to offline

            :param guid: user guid
        """
        #TODO get userid from rid
        self.users_del(guid)

    def users_add(self, u):
        """add a user instance to user queue

            :param u: user instance
            :type u: UserObj
        """
        if not isinstance(u, UserObj):
            raise ValueError(" argument is not a UserObj")
        self._users_lock.acquire()
        self._users[u.guid] = u
        self._users_lock.release()

    def users_get(self, guid):
        """get user by guid

            :param guid: user guid
        """
        if guid not in self._users:
            raise IndexError(" guid %s not in users list" % guid)
        return self._users[guid]

    def users_del(self, guid):
        """del user by guid

            :param guid: user guid
        """
        if '-' in guid:  # convert to bytes
            guid = binascii.unhexlify(guid)
        if guid not in self._users:
            raise IndexError(" guid %s not in users list" % guid)
        self._users_lock.acquire()
        del self._users[guid]
        self._users_lock.release()

    @property
    def users_count(self):
        """get user queue length
        """
        return len(self._users)

    def make_bundle(self, send_func, user_keys = None):
        """make bundle and call send_func

            :param send_func: the function to call on generated bundles
            :type send_func: lambda, function, instancemethod
            :param user_keys: user guid list to do the match func
            :type send_func: list

        """
        user_keys = user_keys or self._users.keys()
        self.logger.debug('[DM] begin mapping of %du * %dm' % (len(user_keys), self.msg_count))
        cnt = 0
        user_keys = sorted(user_keys, key = lambda x:self._users[x].pr, reverse = True)
        for k in user_keys:
            u = self._users[k]
            for _k, m in self._msgs.iteritems():
                _ = u.gen_bundle(m)
                if _:
                    cnt += 1
                    send_func(_)
        if cnt:
            self.logger.debug('[DM] queued %d new bundles' % cnt)
        return cnt


    def run(self):
        """the background thread that automatically do n*m mapping
        """
        self.mongo_instance = mongo()
        while not self._dying:
            msgids = self.mongo_instance.event_get_id(0)
            for i in msgids:
                # generate new MessageObj instance
                m = MessageObj(
                    payload_callback = lambda d = i:self.mongo_instance.event_get_single_info(d),
                    msgid = i
                )
                self.msg_add(m)
            gevent.sleep(60)
            self._save_cache()
Пример #18
0
class ResourceGraph(object):
    def __init__(self):
        self.logger = logging.getLogger("ResourceGraph")
        self.mutex = RLock()
        self.root = Resource("root")
        self.resources = nx.DiGraph()
        self.resources.add_node(self.root)

    def lock(self):
        self.mutex.acquire()

    def unlock(self):
        self.mutex.release()

    @property
    def nodes(self):
        return self.resources.nodes()

    def add_resource(self, resource, parents=None):
        with self.mutex:
            if not resource:
                raise ResourceError("Invalid resource")

            if self.get_resource(resource.name):
                raise ResourceError("Resource {0} already exists".format(resource.name))

            self.resources.add_node(resource)
            if not parents:
                parents = ["root"]

            for p in parents:
                node = self.get_resource(p)
                if not node:
                    raise ResourceError("Invalid parent resource {0}".format(p))

                self.resources.add_edge(node, resource)

    def remove_resource(self, name):
        with self.mutex:
            resource = self.get_resource(name)

            if not resource:
                return

            for i in nx.descendants(self.resources, resource):
                self.resources.remove_node(i)

            self.resources.remove_node(resource)

    def remove_resources(self, names):
        with self.mutex:
            for name in names:
                resource = self.get_resource(name)

                if not resource:
                    return

                for i in nx.descendants(self.resources, resource):
                    self.resources.remove_node(i)

                self.resources.remove_node(resource)

    def update_resource(self, name, new_parents):
        with self.mutex:
            resource = self.get_resource(name)

            if not resource:
                return

            for i in self.resources.predecessors(resource):
                self.resources.remove_edge(i, resource)

            for p in new_parents:
                node = self.get_resource(p)
                if not node:
                    raise ResourceError("Invalid parent resource {0}".format(p))

                self.resources.add_edge(node, resource)

    def get_resource(self, name):
        f = [i for i in self.resources.nodes() if i.name == name]
        return f[0] if len(f) > 0 else None

    def get_resource_dependencies(self, name):
        res = self.get_resource(name)
        for i, _ in self.resources.in_edges([res]):
            yield i.name

    def acquire(self, *names):
        if not names:
            return

        with self.mutex:
            self.logger.debug("Acquiring following resources: %s", ",".join(names))

            for name in names:
                res = self.get_resource(name)
                if not res:
                    raise ResourceError("Resource {0} not found".format(name))

                for i in nx.descendants(self.resources, res):
                    if i.busy:
                        raise ResourceError("Cannot acquire, some of dependent resources are busy")

                res.busy = True

    def can_acquire(self, *names):
        if not names:
            return True

        with self.mutex:
            self.logger.log(TRACE, "Trying to acquire following resources: %s", ",".join(names))

            for name in names:
                res = self.get_resource(name)
                if not res:
                    return False

                if res.busy:
                    return False

                for i in nx.descendants(self.resources, res):
                    if i.busy:
                        return False

            return True

    def release(self, *names):
        if not names:
            return

        with self.mutex:
            self.logger.debug("Releasing following resources: %s", ",".join(names))

            for name in names:
                res = self.get_resource(name)
                res.busy = False
Пример #19
0
class DataMgr(Greenlet):
    pickle_names = ['_msgs', '_users', 'send_queue', 'pending_online_users']
    data_version = 1000

    def __init__(self, logger):
        Greenlet.__init__(self)
        self.logger = logger
        self._users_lock = RLock()
        self._msgs = {}
        self._users = {}
        self.send_queue = Queue()
        self.pending_online_users = Queue()
        self.bootstrap()
        self._dying = False
        self.start()

    def bootstrap(self):
        """Restore data from disk"""
        _ = opath.join(DATA_DIR, DM_PKL_NAME)
        if opath.exists(_):
            _ = pickle.load(file(_, 'rb'))
            if '_version' not in _ or _['_version'] != DataMgr.data_version:
                raise Exception(
                    " pkl file mismatch:program(%d) file(%d)" %
                    (DataMgr.data_version,
                     None if '_version' not in _ else _['_version']))
            self.__dict__.update(_)

    def shutdown(self):
        """Save data to disk"""
        self._dying = True
        self.logger.debug('[DM] saving data to disk...')
        self._save_cache()

    def reset(self):
        """reset in-memory data and disk data"""
        self.send_queue = Queue()
        self.pending_online_users = Queue()
        _ = opath.join(DATA_DIR, DM_PKL_NAME)
        if opath.exists(_):
            os.remove(_)

    def _save_cache(self):
        # fixme: save to external database not implemented
        _ = {'_version': DataMgr.data_version}
        for k in DataMgr.pickle_names:
            if k in self.__dict__:
                _[k] = self.__dict__[k]
        #pickle.dump(_, file(opath.join(DATA_DIR, DM_PKL_NAME), 'wb'), pickle.HIGHEST_PROTOCOL)

    def msg_add(self, msg):
        """add message to msg_queue

            :param msg: msg to add
            :type msg: MessageObj
        """
        if not isinstance(msg, MessageObj):
            raise ValueError(" argument is not a MessageObj")
        self._msgs[msg.msgid] = msg

    def msg_get(self, msgid):
        """get message by msgid

            :param msgid: message id
            :type msgid: int
        """
        if msgid not in self._msgs:
            raise IndexError(" msgid %s not in queue" % idx)
        return self._msgs[msgid]

    def msg_del(self, msgid):
        """del message by msgid

            :param msgid: message id
            :type msgid: int
        """
        del self._msgs[msgid]

    def msg_set(self, msgid, msg):
        self._msgs[msgid] = msg

    @property
    def msg_count(self):
        """get message queue length
        """
        return len(self._msgs)

    def set_user_online(self, guid):
        """set user to online

        this will generate a UserObj instance

            :param guid: user guid
            :type guid: int
        """
        #TODO get userid from rid
        uid = "u" + guid
        u = UserObj(uid, guid)
        self.users_add(u)
        self.pending_online_users.put(guid)

    def set_user_offline(self, guid):
        """set user to offline

            :param guid: user guid
        """
        #TODO get userid from rid
        self.users_del(guid)

    def users_add(self, u):
        """add a user instance to user queue

            :param u: user instance
            :type u: UserObj
        """
        if not isinstance(u, UserObj):
            raise ValueError(" argument is not a UserObj")
        self._users_lock.acquire()
        self._users[u.guid] = u
        self._users_lock.release()

    def users_get(self, guid):
        """get user by guid

            :param guid: user guid
        """
        if guid not in self._users:
            raise IndexError(" guid %s not in users list" % guid)
        return self._users[guid]

    def users_del(self, guid):
        """del user by guid

            :param guid: user guid
        """
        if '-' in guid:  # convert to bytes
            guid = binascii.unhexlify(guid)
        if guid not in self._users:
            raise IndexError(" guid %s not in users list" % guid)
        self._users_lock.acquire()
        del self._users[guid]
        self._users_lock.release()

    @property
    def users_count(self):
        """get user queue length
        """
        return len(self._users)

    def make_bundle(self, send_func, user_keys=None):
        """make bundle and call send_func

            :param send_func: the function to call on generated bundles
            :type send_func: lambda, function, instancemethod
            :param user_keys: user guid list to do the match func
            :type send_func: list

        """
        user_keys = user_keys or self._users.keys()
        self.logger.debug('[DM] begin mapping of %du * %dm' %
                          (len(user_keys), self.msg_count))
        cnt = 0
        user_keys = sorted(user_keys,
                           key=lambda x: self._users[x].pr,
                           reverse=True)
        for k in user_keys:
            u = self._users[k]
            for _k, m in self._msgs.iteritems():
                _ = u.gen_bundle(m)
                if _:
                    cnt += 1
                    send_func(_)
        if cnt:
            self.logger.debug('[DM] queued %d new bundles' % cnt)
        return cnt

    def run(self):
        """the background thread that automatically do n*m mapping
        """
        self.mongo_instance = mongo()
        while not self._dying:
            msgids = self.mongo_instance.event_get_id(0)
            for i in msgids:
                # generate new MessageObj instance
                m = MessageObj(payload_callback=lambda d=i: self.mongo_instance
                               .event_get_single_info(d),
                               msgid=i)
                self.msg_add(m)
            gevent.sleep(60)
            self._save_cache()
Пример #20
0
class Balancer(object):
    def __init__(self, dispatcher):
        self.dispatcher = dispatcher
        self.task_list = []
        self.task_queue = Queue()
        self.resource_graph = dispatcher.resource_graph
        self.queues = {}
        self.threads = []
        self.executors = []
        self.logger = logging.getLogger('Balancer')
        self.dispatcher.require_collection('tasks', 'serial', type='log')
        self.create_initial_queues()
        self.start_executors()
        self.distribution_lock = RLock()
        self.debugger = None
        self.debugged_tasks = None
        self.dispatcher.register_event_type('task.changed')

        # Lets try to get `EXECUTING|WAITING|CREATED` state tasks
        # from the previous dispatcher instance and set their
        # states to 'FAILED' since they are no longer running
        # in this instance of the dispatcher
        for stale_task in dispatcher.datastore.query('tasks', ('state', 'in', ['EXECUTING', 'WAITING', 'CREATED'])):
            self.logger.info('Stale Task ID: {0} Name: {1} being set to FAILED'.format(
                stale_task['id'],
                stale_task['name']
            ))

            stale_task.update({
                'state': 'FAILED',
                'error': {
                    'message': 'dispatcher process died',
                    'code': errno.EINTR,
                }
            })

            dispatcher.datastore.update('tasks', stale_task['id'], stale_task)

    def create_initial_queues(self):
        self.resource_graph.add_resource(Resource('system'))

    def start_executors(self):
        for i in range(0, self.dispatcher.configstore.get('middleware.executors_count')):
            self.logger.info('Starting task executor #{0}...'.format(i))
            self.executors.append(TaskExecutor(self, i))

    def start(self):
        self.threads.append(gevent.spawn(self.distribution_thread))
        self.logger.info("Started")

    def schema_to_list(self, schema):
        return {
            'type': 'array',
            'items': schema,
            'minItems': sum([1 for x in schema if 'mandatory' in x and x['mandatory']]),
            'maxItems': len(schema)
        }

    def verify_schema(self, clazz, args):
        if not hasattr(clazz, 'params_schema'):
            return []

        schema = self.schema_to_list(clazz.params_schema)
        val = validator.DefaultDraft4Validator(schema, resolver=self.dispatcher.rpc.get_schema_resolver(schema))
        return list(val.iter_errors(args))

    def submit(self, name, args, sender, env=None):
        if name not in self.dispatcher.tasks:
            self.logger.warning("Cannot submit task: unknown task type %s", name)
            raise RpcException(errno.EINVAL, "Unknown task type {0}".format(name))

        task = Task(self.dispatcher, name)
        task.user = sender.user.name
        task.session_id = sender.session_id
        task.created_at = datetime.utcnow()
        task.clazz = self.dispatcher.tasks[name]
        task.args = copy.deepcopy(args)

        if env:
            if not isinstance(env, dict):
                raise ValueError('env must be a dict')

            task.environment = copy.deepcopy(env)

        if self.debugger:
            for m in self.debugged_tasks:
                if fnmatch.fnmatch(name, m):
                    task.debugger = self.debugger

        task.id = self.dispatcher.datastore.insert("tasks", task)
        task.set_state(TaskState.CREATED)
        self.task_queue.put(task)
        self.logger.info("Task %d submitted (type: %s, class: %s)", task.id, name, task.clazz)
        return task.id

    def verify_subtask(self, parent, name, args):
        clazz = self.dispatcher.tasks[name]
        instance = clazz(self.dispatcher, self.dispatcher.datastore)
        return instance.verify(*args)

    def run_subtask(self, parent, name, args):
        args = list(args)
        task = Task(self.dispatcher, name)
        task.created_at = datetime.utcnow()
        task.clazz = self.dispatcher.tasks[name]
        task.args = args
        task.instance = task.clazz(self.dispatcher, self.dispatcher.datastore)
        task.instance.verify(*task.args)
        task.id = self.dispatcher.datastore.insert("tasks", task)
        task.parent = parent

        if self.debugger:
            for m in self.debugged_tasks:
                if fnmatch.fnmatch(name, m):
                    task.debugger = self.debugger

        task.set_state(TaskState.CREATED)
        self.task_list.append(task)
        # If we actually have a non `None` parent task then, add
        # the current subtask to the parent task's subtasks list too
        if parent is not None:
            parent.subtask_ids.append(task.id)
        task.start()
        return task

    def join_subtasks(self, *tasks):
        for i in tasks:
            i.join()

    def abort(self, id):
        task = self.get_task(id)
        if not task:
            self.logger.warning("Cannot abort task: unknown task id %d", id)
            return

        success = False
        if task.started_at is None:
            success = True
        else:
            try:
                task.executor.abort()
                # Also try to abort any subtasks that might have been running
                for st in task.subtask_ids:
                    self.abort(st)
            except:
                pass
        if success:
            task.ended.set()
            task.set_state(TaskState.ABORTED, TaskStatus(0, "Aborted"))
            self.logger.debug("Task ID: %d, Name: %s aborted by user", task.id, task.name)

    def task_exited(self, task):
        self.resource_graph.release(*task.resources)
        self.schedule_tasks()

    def schedule_tasks(self):
        """
        This function is called when:
        1) any new task is submitted to any of the queues
        2) any task exists

        :return:
        """
        for task in [t for t in self.task_list if t.state == TaskState.WAITING]:
            if not self.resource_graph.can_acquire(*task.resources):
                continue

            self.resource_graph.acquire(*task.resources)
            self.threads.append(task.start())

    def distribution_thread(self):
        while True:
            self.task_queue.peek()
            self.distribution_lock.acquire()
            task = self.task_queue.get()

            try:
                self.logger.debug("Picked up task %d: %s with args %s", task.id, task.name, task.args)

                errors = self.verify_schema(self.dispatcher.tasks[task.name], task.args)
                if len(errors) > 0:
                    errors = list(validator.serialize_errors(errors))
                    self.logger.warning("Cannot submit task {0}: schema verification failed with errors {1}".format(
                        task.name,
                        errors
                    ))
                    raise ValidationException(extra=errors)

                task.instance = task.clazz(self.dispatcher, self.dispatcher.datastore)
                task.resources = task.instance.verify(*task.args)

                if type(task.resources) is not list:
                    raise ValueError("verify() returned something else than resource list")

            except Exception as err:
                self.logger.warning("Cannot verify task %d: %s", task.id, err)
                task.set_state(TaskState.FAILED, TaskStatus(0), serialize_error(err))
                self.task_list.append(task)
                task.ended.set()
                self.distribution_lock.release()

                if not isinstance(Exception, VerifyException):
                    self.dispatcher.report_error('Task {0} verify() method raised invalid exception', err)

                continue

            task.set_state(TaskState.WAITING)
            self.task_list.append(task)
            self.distribution_lock.release()
            self.schedule_tasks()
            self.logger.debug("Task %d assigned to resources %s", task.id, ','.join(task.resources))

    def assign_executor(self, task):
        for i in self.executors:
            if i.state == WorkerState.IDLE:
                i.checked_in.wait()
                self.logger.info("Task %d assigned to executor #%d", task.id, i.index)
                task.executor = i
                i.state = WorkerState.EXECUTING
                return

        # Out of executors! Need to spawn new one
        executor = TaskExecutor(self, len(self.executors))
        self.executors.append(executor)
        executor.checked_in.wait()
        executor.state = WorkerState.EXECUTING
        task.executor = executor
        self.logger.info("Task %d assigned to executor #%d", task.id, executor.index)

    def dispose_executors(self):
        for i in self.executors:
            i.die()

    def get_active_tasks(self):
        return [x for x in self.task_list if x.state in (
            TaskState.CREATED,
            TaskState.WAITING,
            TaskState.EXECUTING)]

    def get_tasks(self, type=None):
        if type is None:
            return self.task_list

        return [x for x in self.task_list if x.state == type]

    def get_task(self, id):
        self.distribution_lock.acquire()
        t = first_or_default(lambda x: x.id == id, self.task_list)
        if not t:
            t = first_or_default(lambda x: x.id == id, self.task_queue.queue)

        self.distribution_lock.release()
        return t

    def get_executor_by_key(self, key):
        return first_or_default(lambda t: t.key == key, self.executors)

    def get_executor_by_sender(self, sender):
        return first_or_default(lambda t: t.conn == sender, self.executors)
Пример #21
0
class TimerPool(object):
    def __init__(self):
        self.pool = {}
        self.lock = RLock()
        self.config_time = 0
        super(TimerPool, self).__init__()

    def add(self, task_obj):
        self.lock.acquire()

        old_t = timer_model.next_update_timestamp.get(task_obj.get_key())
        old_last_t = timer_model.last_update_timestamp.get(task_obj.get_key())

        if old_t and old_last_t and 0 < old_t <= time.time(
        ) and old_t > old_last_t:
            self.pool[str(task_obj.get_key())] = task_obj
            task_obj.next_time = old_t
            task_obj.set_next_time(old_t)
            timer_model.next_update_timestamp[task_obj.get_key()] = int(old_t)
        else:
            self.pool[str(task_obj.get_key())] = task_obj
            timer_model.next_update_timestamp[task_obj.get_key()] = int(
                task_obj.get_next_time())
        timer_model.save()
        self.lock.release()

    def is_config_out(self):
        """ 是否更新配置

        :return:
        """
        cv = ConfigVersionManager.get_config_version_obj(
            config_type=getattr(settings, 'CONFIG_TYPE', 1))
        for name in CONFIG_NAME:
            if game_config.config_version.get(name) != cv.versions.get(name):
                return True
        return False

    def start(self, ):
        """# start: docstring
        args:
            :    ---    arg
        returns:
            0    ---
        """
        gevent.joinall([gevent.Greenlet.spawn(self.loop)])

    def loop(self, ):
        """# start: docstring
        args:
            :    ---    arg
        returns:
            0    ---
        """
        while 1:
            gevent.sleep(1)
            now = time.time()
            self.lock.acquire()

            if self.config_time % 60 == 0:
                # from logics.share import  debug_sync_change_time
                # 注意, 正式环境禁止启动此函数
                if settings.DEBUG:
                    debug_sync_change_time(self)

                if self.is_config_out():
                    game_config.load_all()
                    reload_all_config(self)

            pool_copy = copy.copy(self.pool)
            del_list = []
            for k, task_obj in pool_copy.iteritems():
                if now >= int(task_obj.get_next_time()):
                    try:
                        if task_obj.is_global() == 1:
                            # gevent.joinall([gevent.Greenlet.spawn(task_obj.get_func())], raise_error=True)
                            gevent.joinall(
                                [gevent.Greenlet.spawn(task_obj.get_func())])
                        elif task_obj.is_global() == 2:
                            world_ids = list(
                                set([
                                    value['world_id'] for value in
                                    game_config.server_pk_world.itervalues()
                                ]))
                            for world_id in world_ids:
                                print_log(
                                    'world func: ', task_obj.get_func(),
                                    datetime.datetime.fromtimestamp(
                                        task_obj.next_time))
                                # gevent.joinall([gevent.Greenlet.spawn(task_obj.get_func(), world_id)], raise_error=True)
                                gevent.joinall([
                                    gevent.Greenlet.spawn(
                                        task_obj.get_func(), world_id)
                                ])
                        else:
                            for server_name, server_cfg in settings.SERVERS.iteritems(
                            ):
                                if server_name == 'master': continue
                                # if server_cfg['config_type'] != CONFIG_TYPE: continue
                                if settings.get_config_type(
                                        server_name) != CONFIG_TYPE:
                                    continue
                                print_log(
                                    'func: ', task_obj.get_func(),
                                    datetime.datetime.fromtimestamp(
                                        task_obj.next_time))
                                # gevent.joinall([gevent.Greenlet.spawn(task_obj.get_func(), server_name)], raise_error=True)
                                gevent.joinall([
                                    gevent.Greenlet.spawn(
                                        task_obj.get_func(), server_name)
                                ])
                        timer_model.last_update_timestamp[
                            task_obj.get_key()] = now
                        if task_obj.is_repeat():
                            task_obj.parser()
                            timer_model.next_update_timestamp[
                                task_obj.get_key()] = int(
                                    task_obj.get_next_time())
                        else:
                            del_list.append(k)
                        timer_model.save()
                        print_log('timer, run %s, is_repeat: %s' %
                                  (k, str(task_obj.is_repeat())))
                    except:
                        trackback(msg='timer, timer %s ERROR: ' % str(k))
            for k in del_list:
                del self.pool[k]
            self.config_time += 1
            self.lock.release()
Пример #22
0
class Balancer(object):
    def __init__(self, dispatcher):
        self.dispatcher = dispatcher
        self.task_list = []
        self.task_queue = Queue()
        self.resource_graph = dispatcher.resource_graph
        self.threads = []
        self.executors = []
        self.logger = logging.getLogger("Balancer")
        self.dispatcher.require_collection("tasks", "serial", type="log")
        self.create_initial_queues()
        self.start_executors()
        self.schedule_lock = RLock()
        self.distribution_lock = RLock()
        self.debugger = None
        self.debugged_tasks = None
        self.dispatcher.register_event_type("task.changed")

        # Lets try to get `EXECUTING|WAITING|CREATED` state tasks
        # from the previous dispatcher instance and set their
        # states to 'FAILED' since they are no longer running
        # in this instance of the dispatcher
        for stale_task in dispatcher.datastore.query("tasks", ("state", "in", ["EXECUTING", "WAITING", "CREATED"])):
            self.logger.info(
                "Stale task ID: {0}, name: {1} being set to FAILED".format(stale_task["id"], stale_task["name"])
            )

            stale_task.update(
                {
                    "state": "FAILED",
                    "error": {
                        "type": "TaskException",
                        "message": "dispatcher process died",
                        "code": errno.EINTR,
                        "stacktrace": "",
                        "extra": None,
                    },
                }
            )

            dispatcher.datastore.update("tasks", stale_task["id"], stale_task)

    def create_initial_queues(self):
        self.resource_graph.add_resource(Resource("system"))

    def start_executors(self):
        for i in range(0, self.dispatcher.configstore.get("middleware.executors_count")):
            self.logger.info("Starting task executor #{0}...".format(i))
            self.executors.append(TaskExecutor(self, i))

    def start(self):
        self.threads.append(gevent.spawn(self.distribution_thread))
        self.logger.info("Started")

    def schema_to_list(self, schema):
        return {
            "type": "array",
            "items": schema,
            "minItems": sum([1 for x in schema if "mandatory" in x and x["mandatory"]]),
            "maxItems": len(schema),
        }

    def verify_schema(self, clazz, args, strict=False):
        if not hasattr(clazz, "params_schema"):
            return []

        schema = self.schema_to_list(clazz.params_schema)
        val = validator.create_validator(schema, resolver=self.dispatcher.rpc.get_schema_resolver(schema))
        if strict:
            val.fail_read_only = True
        else:
            val.remove_read_only = True

        return list(val.iter_errors(args))

    def submit(self, name, args, sender, env=None):
        if name not in self.dispatcher.tasks:
            self.logger.warning("Cannot submit task: unknown task type %s", name)
            raise RpcException(errno.EINVAL, "Unknown task type {0}".format(name))

        task = Task(self.dispatcher, name)
        task.user = sender.user.name
        task.session_id = sender.session_id
        task.created_at = datetime.utcnow()
        task.clazz = self.dispatcher.tasks[name]
        task.hooks = self.dispatcher.task_hooks.get(name, {})
        task.args = copy.deepcopy(args)
        task.strict_verify = "strict_validation" in sender.enabled_features

        if env:
            if not isinstance(env, dict):
                raise ValueError("env must be a dict")

            task.environment = copy.deepcopy(env)

        if self.debugger:
            for m in self.debugged_tasks:
                if fnmatch.fnmatch(name, m):
                    task.debugger = self.debugger

        if "RUN_AS_USER" in task.environment:
            task.user = task.environment["RUN_AS_USER"]

        task.environment["SENDER_ADDRESS"] = sender.client_address
        task.id = self.dispatcher.datastore.insert("tasks", task)
        task.set_state(TaskState.CREATED)
        self.task_queue.put(task)
        self.logger.info("Task %d submitted (type: %s, class: %s)", task.id, name, task.clazz)
        return task.id

    def submit_with_upload(self, task_name, args, sender, env=None):
        task_metadata = self.dispatcher.tasks[task_name]._get_metadata()
        schema = task_metadata["schema"]

        if schema is None:
            raise RpcException(errno.ENOENT, "Task {0} has no schema associated with it".format(task_name))
        upload_token_list = []
        for idx, arg in enumerate(schema):
            if arg.get("type") == "fd":
                rfd, wfd = os.pipe()
                token = self.dispatcher.token_store.issue_token(
                    FileToken(
                        user=sender.user,
                        lifetime=60,
                        direction="upload",
                        file=FileObjectPosix(wfd, "wb", close=True),
                        name=str(uuid.uuid4()),
                        size=None,
                    )
                )
                upload_token_list.append(token)
                args[idx] = FileDescriptor(rfd)
        task_id = self.submit(task_name, args, sender, env)
        return task_id, upload_token_list

    def submit_with_download(self, task_name, args, sender, env=None):
        task_metadata = self.dispatcher.tasks[task_name]._get_metadata()
        schema = task_metadata["schema"]
        url_list = []

        if schema is None:
            raise RpcException(errno.ENOENT, "Task {0} has no schema associated with it".format(task_name))

        for idx, arg in enumerate(schema):
            if arg.get("type") == "fd":
                rfd, wfd = os.pipe()
                url_list.append(
                    "/dispatcher/filedownload?token={0}".format(
                        self.dispatcher.token_store.issue_token(
                            FileToken(
                                user=sender.user,
                                lifetime=60,
                                direction="download",
                                file=FileObjectPosix(rfd, "rb", close=True),
                                name=args[idx],
                            )
                        )
                    )
                )
                args[idx] = FileDescriptor(wfd)
        task_id = self.submit(task_name, args, sender, env)
        return task_id, url_list

    def verify_subtask(self, parent, name, args):
        clazz = self.dispatcher.tasks[name]
        instance = clazz(self.dispatcher, self.dispatcher.datastore)
        return instance.verify(*args)

    def run_subtask(self, parent, name, args, env=None):
        args = list(args)
        task = Task(self.dispatcher, name)
        task.created_at = datetime.utcnow()
        task.clazz = self.dispatcher.tasks[name]
        task.hooks = self.dispatcher.task_hooks.get(name, {})
        task.args = args
        task.instance = task.clazz(self.dispatcher, self.dispatcher.datastore)
        task.instance.verify(*task.args)
        task.description = task.instance.describe(*task.args)
        task.id = self.dispatcher.datastore.insert("tasks", task)
        task.parent = parent
        task.environment = {}

        if parent:
            task.environment = copy.deepcopy(parent.environment)
            task.environment["parent"] = parent.id
            task.user = parent.user

        if env:
            if not isinstance(env, dict):
                raise ValueError("env must be a dict")

            task.environment.update(env)

        if self.debugger:
            for m in self.debugged_tasks:
                if fnmatch.fnmatch(name, m):
                    task.debugger = self.debugger

        task.set_state(TaskState.CREATED)
        self.task_list.append(task)

        task.start()
        return task

    def join_subtasks(self, *tasks):
        for i in tasks:
            i.join()

    def abort(self, id, error=None):
        task = self.get_task(id)
        if not task:
            self.logger.warning("Cannot abort task: unknown task id %d", id)
            return

        success = False
        if task.started_at is None:
            success = True
        else:
            try:
                task.executor.abort()
            except:
                pass
        if success:
            task.ended.set()
            if error:
                task.set_state(TaskState.FAILED, TaskStatus(0), serialize_error(error))
                self.logger.debug("Task ID: %d, name: %s aborted with error", task.id, task.name)
            else:
                task.set_state(TaskState.ABORTED, TaskStatus(0, "Aborted"))
                self.logger.debug("Task ID: %d, name: %s aborted by user", task.id, task.name)

    def task_exited(self, task):
        self.resource_graph.release(*task.resources)
        self.schedule_tasks(True)

    def schedule_tasks(self, exit=False):
        """
        This function is called when:
        1) any new task is submitted to any of the queues
        2) any task exists
        """
        with self.schedule_lock:
            started = 0
            executing_tasks = [t for t in self.task_list if t.state == TaskState.EXECUTING]
            waiting_tasks = [t for t in self.task_list if t.state == TaskState.WAITING]

            for task in waiting_tasks:
                if not self.resource_graph.can_acquire(*task.resources):
                    continue

                self.resource_graph.acquire(*task.resources)
                self.threads.append(task.start())
                started += 1

            if not started and not executing_tasks and (exit or len(waiting_tasks) == 1):
                for task in waiting_tasks:
                    # Check whether or not task waits on nonexistent resources. If it does,
                    # abort it 'cause there's no chance anymore that missing resources will appear.
                    if any(self.resource_graph.get_resource(res) is None for res in task.resources):
                        self.logger.warning("Aborting task {0}: deadlock".format(task.id))
                        self.abort(task.id, VerifyException(errno.EBUSY, "Resource deadlock avoided"))

    def distribution_thread(self):
        while True:
            self.task_queue.peek()
            self.distribution_lock.acquire()
            task = self.task_queue.get()

            try:
                self.logger.debug("Picked up task %d: %s with args %s", task.id, task.name, task.args)

                errors = self.verify_schema(self.dispatcher.tasks[task.name], task.args, task.strict_verify)
                if len(errors) > 0:
                    errors = list(validator.serialize_errors(errors))
                    self.logger.warning(
                        "Cannot submit task {0}: schema verification failed with errors {1}".format(task.name, errors)
                    )
                    raise ValidationException(extra=errors)

                task.instance = task.clazz(self.dispatcher, self.dispatcher.datastore)
                task.resources = task.instance.verify(*task.args)
                task.description = task.instance.describe(*task.args)

                if type(task.resources) is not list:
                    raise ValueError("verify() returned something else than resource list")

            except Exception as err:
                self.logger.warning("Cannot verify task %d: %s", task.id, err)
                task.set_state(TaskState.FAILED, TaskStatus(0), serialize_error(err))
                task.ended.set()
                self.distribution_lock.release()

                if not isinstance(err, VerifyException):
                    self.dispatcher.report_error("Task {0} verify() method raised invalid exception".format(err), err)

                continue

            task.set_state(TaskState.WAITING)
            self.task_list.append(task)
            self.distribution_lock.release()
            self.schedule_tasks()
            if task.resources:
                self.logger.debug("Task %d assigned to resources %s", task.id, ",".join(task.resources))

    def assign_executor(self, task):
        for i in self.executors:
            with i.cv:
                if i.state == WorkerState.IDLE:
                    self.logger.info("Task %d assigned to executor #%d", task.id, i.index)
                    task.executor = i
                    i.state = WorkerState.ASSIGNED
                    return

        # Out of executors! Need to spawn new one
        executor = TaskExecutor(self, len(self.executors))
        self.executors.append(executor)
        with executor.cv:
            executor.cv.wait_for(lambda: executor.state == WorkerState.IDLE)
            executor.state = WorkerState.ASSIGNED
            task.executor = executor
            self.logger.info("Task %d assigned to executor #%d", task.id, executor.index)

    def dispose_executors(self):
        for i in self.executors:
            i.die()

    def get_active_tasks(self):
        return [x for x in self.task_list if x.state in (TaskState.CREATED, TaskState.WAITING, TaskState.EXECUTING)]

    def get_tasks(self, type=None):
        if type is None:
            return self.task_list

        return [x for x in self.task_list if x.state == type]

    def get_task(self, id):
        self.distribution_lock.acquire()
        t = first_or_default(lambda x: x.id == id, self.task_list)
        if not t:
            t = first_or_default(lambda x: x.id == id, self.task_queue.queue)

        self.distribution_lock.release()
        return t

    def get_executor_by_key(self, key):
        return first_or_default(lambda t: t.key == key, self.executors)

    def get_executor_by_sender(self, sender):
        return first_or_default(lambda t: t.conn == sender, self.executors)
Пример #23
0
class NettingChannel:
    def __init__(
            self,
            jsonrpc_client,
            channel_address,
            poll_timeout=DEFAULT_POLL_TIMEOUT):

        self.address = channel_address
        self.client = jsonrpc_client
        self.poll_timeout = poll_timeout
        # Prevents concurrent deposit, close, or settle operations on the same channel
        self.channel_operations_lock = RLock()
        self.client = jsonrpc_client
        self.node_address = privatekey_to_address(self.client.privkey)
        self.proxy = jsonrpc_client.new_contract_proxy(
            CONTRACT_MANAGER.get_abi(CONTRACT_NETTING_CHANNEL),
            address_encoder(channel_address),
        )

        # check we are a participant of the given channel
        self.detail()
        self._check_exists()

    def _check_exists(self):
        check_address_has_code(self.client, self.address, 'Netting Channel')

    def _call_and_check_result(self, function_name: str):
        call_result = self.proxy.call(function_name)

        if call_result == b'':
            self._check_exists()
            raise RuntimeError(
                "Call to '{}' returned nothing".format(function_name)
            )

        return call_result

    def token_address(self):
        """ Returns the type of token that can be transferred by the channel.

        Raises:
            AddressWithoutCode: If the channel was settled prior to the call.
        """
        address = self._call_and_check_result('tokenAddress')
        return address_decoder(address)

    def detail(self):
        """ Returns a dictionary with the details of the netting channel.

        Raises:
            AddressWithoutCode: If the channel was settled prior to the call.
        """
        data = self._call_and_check_result('addressAndBalance')

        settle_timeout = self.settle_timeout()
        our_address = privatekey_to_address(self.client.privkey)

        if address_decoder(data[0]) == our_address:
            return {
                'our_address': address_decoder(data[0]),
                'our_balance': data[1],
                'partner_address': address_decoder(data[2]),
                'partner_balance': data[3],
                'settle_timeout': settle_timeout,
            }

        if address_decoder(data[2]) == our_address:
            return {
                'our_address': address_decoder(data[2]),
                'our_balance': data[3],
                'partner_address': address_decoder(data[0]),
                'partner_balance': data[1],
                'settle_timeout': settle_timeout,
            }

        raise ValueError('We [{}] are not a participant of the given channel ({}, {})'.format(
            pex(our_address),
            data[0],
            data[2],
        ))

    def settle_timeout(self):
        """ Returns the netting channel settle_timeout.

        Raises:
            AddressWithoutCode: If the channel was settled prior to the call.
        """
        return self._call_and_check_result('settleTimeout')

    def opened(self):
        """ Returns the block in which the channel was created.

        Raises:
            AddressWithoutCode: If the channel was settled prior to the call.
        """
        return self._call_and_check_result('opened')

    def closed(self):
        """ Returns the block in which the channel was closed or 0.

        Raises:
            AddressWithoutCode: If the channel was settled prior to the call.
        """
        return self._call_and_check_result('closed')

    def closing_address(self):
        """ Returns the address of the closer, if the channel is closed, None
        otherwise.

        Raises:
            AddressWithoutCode: If the channel was settled prior to the call.
        """
        closer = self.proxy.call('closingAddress')

        if closer:
            return address_decoder(closer)

        return None

    def can_transfer(self):
        """ Returns True if the channel is opened and the node has deposit in
        it.

        Note: Having a deposit does not imply having a balance for off-chain
        transfers.

        Raises:
            AddressWithoutCode: If the channel was settled prior to the call.
        """
        closed = self.closed()

        if closed != 0:
            return False

        return self.detail()['our_balance'] > 0

    def deposit(self, amount):
        """ Deposit amount token in the channel.

        Raises:
            AddressWithoutCode: If the channel was settled prior to the call.
            ChannelBusyError: If the channel is busy with another operation
            RuntimeError: If the netting channel token address is empty.
        """
        if not isinstance(amount, int):
            raise ValueError('amount needs to be an integral number.')

        token_address = self.token_address()

        token = Token(
            self.client,
            token_address,
            self.poll_timeout,
        )
        current_balance = token.balance_of(self.node_address)

        if current_balance < amount:
            raise ValueError('deposit [{}] cant be larger than the available balance [{}].'.format(
                amount,
                current_balance,
            ))

        if log.isEnabledFor(logging.INFO):
            log.info(
                'deposit called',
                node=pex(self.node_address),
                contract=pex(self.address),
                amount=amount,
            )

        if not self.channel_operations_lock.acquire(0):
            raise ChannelBusyError(
                f'Channel with address {self.address} is '
                f'busy with another ongoing operation.'
            )

        self.channel_operations_lock.release()

        with self.channel_operations_lock:
            transaction_hash = estimate_and_transact(
                self.proxy,
                'deposit',
                amount,
            )

            self.client.poll(
                unhexlify(transaction_hash),
                timeout=self.poll_timeout,
            )

            receipt_or_none = check_transaction_threw(self.client, transaction_hash)
            if receipt_or_none:
                log.critical(
                    'deposit failed',
                    node=pex(self.node_address),
                    contract=pex(self.address),
                )

                self._check_exists()
                raise TransactionThrew('Deposit', receipt_or_none)

            if log.isEnabledFor(logging.INFO):
                log.info(
                    'deposit successful',
                    node=pex(self.node_address),
                    contract=pex(self.address),
                    amount=amount,
                )

    def close(self, nonce, transferred_amount, locksroot, extra_hash, signature):
        """ Close the channel using the provided balance proof.

        Raises:
            AddressWithoutCode: If the channel was settled prior to the call.
            ChannelBusyError: If the channel is busy with another operation.
        """

        if log.isEnabledFor(logging.INFO):
            log.info(
                'close called',
                node=pex(self.node_address),
                contract=pex(self.address),
                nonce=nonce,
                transferred_amount=transferred_amount,
                locksroot=encode_hex(locksroot),
                extra_hash=encode_hex(extra_hash),
                signature=encode_hex(signature),
            )

        if not self.channel_operations_lock.acquire(0):
            raise ChannelBusyError(
                f'Channel with address {self.address} is '
                f'busy with another ongoing operation.'
            )

        self.channel_operations_lock.release()

        with self.channel_operations_lock:
            transaction_hash = estimate_and_transact(
                self.proxy,
                'close',
                nonce,
                transferred_amount,
                locksroot,
                extra_hash,
                signature,
            )
            self.client.poll(unhexlify(transaction_hash), timeout=self.poll_timeout)

            receipt_or_none = check_transaction_threw(self.client, transaction_hash)
            if receipt_or_none:
                log.critical(
                    'close failed',
                    node=pex(self.node_address),
                    contract=pex(self.address),
                    nonce=nonce,
                    transferred_amount=transferred_amount,
                    locksroot=encode_hex(locksroot),
                    extra_hash=encode_hex(extra_hash),
                    signature=encode_hex(signature),
                )
                self._check_exists()
                raise TransactionThrew('Close', receipt_or_none)

            if log.isEnabledFor(logging.INFO):
                log.info(
                    'close successful',
                    node=pex(self.node_address),
                    contract=pex(self.address),
                    nonce=nonce,
                    transferred_amount=transferred_amount,
                    locksroot=encode_hex(locksroot),
                    extra_hash=encode_hex(extra_hash),
                    signature=encode_hex(signature),
                )

    def update_transfer(self, nonce, transferred_amount, locksroot, extra_hash, signature):
        if signature:
            if log.isEnabledFor(logging.INFO):
                log.info(
                    'updateTransfer called',
                    node=pex(self.node_address),
                    contract=pex(self.address),
                    nonce=nonce,
                    transferred_amount=transferred_amount,
                    locksroot=encode_hex(locksroot),
                    extra_hash=encode_hex(extra_hash),
                    signature=encode_hex(signature),
                )

            transaction_hash = estimate_and_transact(
                self.proxy,
                'updateTransfer',
                nonce,
                transferred_amount,
                locksroot,
                extra_hash,
                signature,
            )

            self.client.poll(
                unhexlify(transaction_hash),
                timeout=self.poll_timeout,
            )

            receipt_or_none = check_transaction_threw(self.client, transaction_hash)
            if receipt_or_none:
                log.critical(
                    'updateTransfer failed',
                    node=pex(self.node_address),
                    contract=pex(self.address),
                    nonce=nonce,
                    transferred_amount=transferred_amount,
                    locksroot=encode_hex(locksroot),
                    extra_hash=encode_hex(extra_hash),
                    signature=encode_hex(signature),
                )
                self._check_exists()
                raise TransactionThrew('Update Transfer', receipt_or_none)

            if log.isEnabledFor(logging.INFO):
                log.info(
                    'updateTransfer successful',
                    node=pex(self.node_address),
                    contract=pex(self.address),
                    nonce=nonce,
                    transferred_amount=transferred_amount,
                    locksroot=encode_hex(locksroot),
                    extra_hash=encode_hex(extra_hash),
                    signature=encode_hex(signature),
                )

    def withdraw(self, unlock_proof):
        if log.isEnabledFor(logging.INFO):
            log.info(
                'withdraw called',
                node=pex(self.node_address),
                contract=pex(self.address),
            )

        if isinstance(unlock_proof.lock_encoded, messages.Lock):
            raise ValueError('unlock must be called with a lock encoded `.as_bytes`')

        merkleproof_encoded = ''.join(unlock_proof.merkle_proof)

        transaction_hash = estimate_and_transact(
            self.proxy,
            'withdraw',
            unlock_proof.lock_encoded,
            merkleproof_encoded,
            unlock_proof.secret,
        )

        self.client.poll(unhexlify(transaction_hash), timeout=self.poll_timeout)
        receipt_or_none = check_transaction_threw(self.client, transaction_hash)

        if receipt_or_none:
            log.critical(
                'withdraw failed',
                node=pex(self.node_address),
                contract=pex(self.address),
                lock=unlock_proof,
            )
            self._check_exists()
            raise TransactionThrew('Withdraw', receipt_or_none)

        elif log.isEnabledFor(logging.INFO):
            log.info(
                'withdraw successful',
                node=pex(self.node_address),
                contract=pex(self.address),
                lock=unlock_proof,
            )

    def settle(self):
        """ Settle the channel.

        Raises:
            ChannelBusyError: If the channel is busy with another operation
        """
        if log.isEnabledFor(logging.INFO):
            log.info(
                'settle called',
                node=pex(self.node_address),
            )

        if not self.channel_operations_lock.acquire(0):
            raise ChannelBusyError(
                f'Channel with address {self.address} is '
                f'busy with another ongoing operation'
            )

        self.channel_operations_lock.release()

        with self.channel_operations_lock:
            transaction_hash = estimate_and_transact(
                self.proxy,
                'settle',
            )

            self.client.poll(unhexlify(transaction_hash), timeout=self.poll_timeout)
            receipt_or_none = check_transaction_threw(self.client, transaction_hash)
            if receipt_or_none:
                log.info(
                    'settle failed',
                    node=pex(self.node_address),
                    contract=pex(self.address),
                )
                self._check_exists()
                raise TransactionThrew('Settle', receipt_or_none)

            if log.isEnabledFor(logging.INFO):
                log.info(
                    'settle successful',
                    node=pex(self.node_address),
                    contract=pex(self.address),
                )

    def events_filter(
            self,
            topics: Optional[List],
            from_block: Optional[int] = None,
            to_block: Optional[int] = None) -> Filter:
        """ Install a new filter for an array of topics emitted by the netting contract.
        Args:
            topics: A list of event ids to filter for. Can also be None,
                    in which case all events are queried.
            from_block: The block number at which to start looking for events.
            to_block: The block number at which to stop looking for events.
        Return:
            Filter: The filter instance.
        """
        netting_channel_address_bin = self.proxy.contract_address
        filter_id_raw = new_filter(
            self.client,
            netting_channel_address_bin,
            topics=topics,
            from_block=from_block,
            to_block=to_block
        )

        return Filter(
            self.client,
            filter_id_raw,
        )

    def all_events_filter(self, from_block=None, to_block=None):
        """ Install a new filter for all the events emitted by the current netting channel contract

        Return:
            Filter: The filter instance.
        """
        return self.events_filter(None, from_block, to_block)
Пример #24
0
class Crawler(object):
    """定向爬虫类"""

    http_debuglevel = 0

    #: 预定义网页编码。
    encoding = None

    #: 设置User Agent,有时候模拟Google Bot会有事倍功半的效果。
    user_agent = 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)'

    # 页面语言,有些网站会以这个为标记实现国际化
    accept_language = 'zh_CN'

    # 可接受的数据类型
    accept_mine = 'text/html,application/xhtml+xml,' \
                  'application/xml;q=0.9,*/*;q=0.8'

    #: 最大重定向次数,防止重定向陷阱。
    max_redirects = 20

    #: 每个爬虫的最大并发连接数。
    max_connections = 10

    #: 超时。
    timeout = 360

    #: 最大失败尝试次数。
    max_retries = 1000

    #: 每次尝试后递增休眠间隔。
    #: 例如 ``sleep_seconds = 2`` ,那么第一次连接失败会休眠2秒,第二次会休眠4秒,第三次会休眠6秒。
    sleep_seconds = 1

    #: Bloom容量
    bloom_capacity = 10000000

    #: Bloom预计错误率
    bloom_error_rate = 0.0001

    #: HTTP代理
    proxies = None

    #: 错误日志存放处
    dump_dir = 'dump/'
    is_stop = True
    stopped = False
    name = None

    retry_with_broken_content = False
    retry_with_no_content = False

    #: 如果服务器遇到这些error code,当做正常页面处理
    ignore_server_error_code = ()

    #: 如果服务器遇到这些error code,不进行重试,直接忽略掉
    do_not_retry_with_server_error_code = ()

    lock = None
    logger = logging.getLogger('Crawler')


    def __init__(self):
        httplib.HTTPConnection.debuglevel = self.http_debuglevel
        self.network = NetworkManager(crawler=self)

        self.pool = Pool()
        self.lock = RLock()
        self.bloom_filters = {}
        self.name = self.__class__.__name__
        self._status = {
            'process_count': 0,
            'is_stop': True,
            'run_seconds': 0,
            'crawler_name': self.name,
        }



        # def sync_bloom(self):

        #     """强行同步Bloom到文件"""

    #
    #     while not self.is_stop:
    #         for key in self.bloom_filters.keys():
    #             self.bloom_filters[key].sync()
    #         gevent.sleep(1)



    def work(self):
        """启动爬虫。"""

        if self.lock.acquire(blocking=False):
            self.logger.info('Starting crawler %s' % self.name)
            self.stopped = False
            self._status['is_stop'] = False
            self.pool.spawn(self.run)
            self.pool.join()
            self.network.join()
            self._status['is_stop'] = True
            self.logger.info('Finished crawler %s' % self.name)
            self.lock.release()

    def on_server_error(self, response):
        """服务器错误回调。

        :param response:
        :raise ServerError:
        """
        self.logger.warning('Something wrong with server.')
        raise ServerError('Error Code:%s' % response.status_code)


    def on_proxies_error(self, proxy):
        pass

    def on_parse_error(self, error):
        """页面分析错误回调

        :param error:
        """

    def fetch_proxies(self):
        pass

    def stop(self):
        """停止爬虫。


        """
        self.logger.info('Stopping crawler %s' % self.name)
        self.stopped = True
        while not self.network._request_queue.empty():
            self.network._request_queue.get()


    def status(self):
        """返回爬虫状态。


        :return: :rtype:
        """
        return self._status


    def run(self):
        """这里编写启动爬虫的工作。
        必须重载此函数,推倒第一块多米诺骨牌。

        """
        raise NotImplementedError
Пример #25
0
class SubNameBrute(object):
    def __init__(self, *params):
        (
            self.domain,
            self.options,
            self.process_num,
            self.dns_servers,
            self.next_subs,
            self.scan_count,
            self.found_count,
            self.queue_size_array,
            tmp_dir,
        ) = params
        self.dns_count = len(self.dns_servers)
        self.scan_count_local = 0
        self.found_count_local = 0
        self.resolvers = [
            dns.resolver.Resolver(configure=False)
            for _ in range(self.options.threads)
        ]
        for r in self.resolvers:
            r.lifetime = r.timeout = 10.0
        self.queue = PriorityQueue()
        self.priority = 0
        self.ip_dict = {}
        self.found_subs = set()
        self.timeout_subs = {}
        self.count_time = time.time()
        self.outfile = open(
            "%s/%s_part_%s.txt" % (tmp_dir, self.domain, self.process_num),
            "w")
        self.normal_names_set = set()
        self.load_sub_names()
        self.lock = RLock()

    def load_sub_names(self):
        normal_lines = []
        wildcard_lines = []
        wildcard_set = set()
        regex_list = []
        lines = set()
        with open(self.options.file) as inFile:
            for line in inFile.readlines():
                sub = line.strip()
                if not sub or sub in lines:
                    continue
                lines.add(sub)

                brace_count = sub.count("{")
                if brace_count > 0:
                    wildcard_lines.append((brace_count, sub))
                    sub = sub.replace("{alphnum}", "[a-z0-9]")
                    sub = sub.replace("{alpha}", "[a-z]")
                    sub = sub.replace("{num}", "[0-9]")
                    if sub not in wildcard_set:
                        wildcard_set.add(sub)
                        regex_list.append("^" + sub + "$")
                else:
                    normal_lines.append(sub)
                    self.normal_names_set.add(sub)

        if regex_list:
            pattern = "|".join(regex_list)
            _regex = re.compile(pattern)
            for line in normal_lines:
                if _regex.search(line):
                    normal_lines.remove(line)

        for _ in normal_lines[self.process_num::self.options.process]:
            self.queue.put((0, _))  # priority set to 0
        for _ in wildcard_lines[self.process_num::self.options.process]:
            self.queue.put(_)

    def scan(self, j):
        self.resolvers[j].nameservers = [self.dns_servers[j % self.dns_count]
                                         ] + self.dns_servers

        while True:
            try:
                self.lock.acquire()
                if time.time() - self.count_time > 1.0:
                    self.scan_count.value += self.scan_count_local
                    self.scan_count_local = 0
                    self.queue_size_array[
                        self.process_num] = self.queue.qsize()
                    if self.found_count_local:
                        self.found_count.value += self.found_count_local
                        self.found_count_local = 0
                    self.count_time = time.time()
                self.lock.release()
                brace_count, sub = self.queue.get(timeout=3.0)
                if brace_count > 0:
                    brace_count -= 1
                    if sub.find("{next_sub}") >= 0:
                        for _ in self.next_subs:
                            self.queue.put((0, sub.replace("{next_sub}", _)))
                    if sub.find("{alphnum}") >= 0:
                        for _ in "abcdefghijklmnopqrstuvwxyz0123456789":
                            self.queue.put(
                                (brace_count, sub.replace("{alphnum}", _, 1)))
                    elif sub.find("{alpha}") >= 0:
                        for _ in "abcdefghijklmnopqrstuvwxyz":
                            self.queue.put(
                                (brace_count, sub.replace("{alpha}", _, 1)))
                    elif sub.find("{num}") >= 0:
                        for _ in "0123456789":
                            self.queue.put(
                                (brace_count, sub.replace("{num}", _, 1)))
                    continue
            except gevent.queue.Empty as e:
                break

            try:

                if sub in self.found_subs:
                    continue

                self.scan_count_local += 1
                cur_domain = sub + "." + self.domain
                answers = self.resolvers[j].query(cur_domain)

                if answers:
                    self.found_subs.add(sub)
                    ips = ", ".join(
                        sorted([answer.address for answer in answers]))
                    if ips in ["1.1.1.1", "127.0.0.1", "0.0.0.0", "0.0.0.1"]:
                        continue
                    if self.options.i and is_intranet(answers[0].address):
                        continue

                    try:
                        self.scan_count_local += 1
                        answers = self.resolvers[j].query(cur_domain, "cname")
                        cname = answers[0].target.to_unicode().rstrip(".")
                        if cname.endswith(
                                self.domain) and cname not in self.found_subs:
                            cname_sub = cname[:len(cname) - len(self.domain) -
                                              1]  # new sub
                            if cname_sub not in self.normal_names_set:
                                self.found_subs.add(cname)
                                self.queue.put((0, cname_sub))
                    except Exception as e:
                        pass

                    first_level_sub = sub.split(".")[-1]
                    if (first_level_sub, ips) not in self.ip_dict:
                        self.ip_dict[(first_level_sub, ips)] = 1
                    else:
                        self.ip_dict[(first_level_sub, ips)] += 1
                        if self.ip_dict[(first_level_sub, ips)] > 30:
                            continue

                    self.found_count_local += 1

                    self.outfile.write(
                        cur_domain.ljust(30) + "\t" + ips + "\n")
                    self.outfile.flush()
                    try:
                        self.scan_count_local += 1
                        self.resolvers[j].query("test-not-existed." +
                                                cur_domain)
                    except (dns.resolver.NXDOMAIN, dns.resolver.NoAnswer) as e:
                        if self.queue.qsize() < 10000:
                            for _ in self.next_subs:
                                self.queue.put((0, _ + "." + sub))
                        else:
                            self.queue.put((1, "{next_sub}." + sub))
                    except Exception as e:
                        pass

            except (dns.resolver.NXDOMAIN, dns.resolver.NoAnswer) as e:
                pass
            except dns.resolver.NoNameservers as e:
                self.queue.put((0, sub))  # Retry
            except dns.exception.Timeout as e:
                self.timeout_subs[sub] = self.timeout_subs.get(sub, 0) + 1
                if self.timeout_subs[sub] <= 2:
                    self.queue.put((0, sub))  # Retry
            except Exception as e:
                import traceback

                traceback.print_exc()
                with open("errors.log", "a") as errFile:
                    errFile.write("[%s] %s\n" % (type(e), str(e)))

    def run(self):
        threads = [
            gevent.spawn(self.scan, i) for i in range(self.options.threads)
        ]
        gevent.joinall(threads)
Пример #26
0
class QueueHandler(object):

    def __init__(self, logger, pending_online_users, make_func, send_func):
        """Initialize Queue Handler

            :param logger: logger object
            :type logger: Logger
            :param pending_online_users: online users queue
            :type pending_online_users: gevent.queue
            :param make_func: the function to make bundle
            :type make_func: lambda,instancemethod,function
            :param send_func: the function to send bundle
            :type send_func: lambda,instancemethod,function
        """
        self.alive = True
        self.last_idx = None
        self.logger = logger
        self.pending_online_users = pending_online_users
        self._pause_lock = RLock()
        self._make_func = make_func
        self._send_func = send_func#self._send_func
        #self.daemon = True
        #self.start()

    def shutdown(self):
        self.alive = False
        #put None to notify running thread
        gevent.killall(self.greenlets)

    def run(self):
        self.greenlets = [
            gevent.spawn(self.main_loop),
            gevent.spawn(self.online_loop)
        ]

    def pause(self):
        self._pause_lock.acquire()

    def resume(self):
        self._pause_lock.release()

    @property
    def qsize(self):
        return self.bundle_queue.qsize()

    def main_loop(self):
        while True:
            self._pause_lock.acquire()
            # call DataMgr.make_bundle to make bundle of full m*n map
            # and pass function _send_func(GatewayMgr.send_push) as argument
            self._make_func(self._send_func)
            self._pause_lock.release()
            #TODO sleep longer
            #gevent.sleep(random.random())
            gevent.sleep(MSG_CHECK_INTERV)

    def online_loop(self):
        while True:
            u = self.pending_online_users.get()
            # call DataMgr.make_bundle to make bundle of full m*1 map for specific user
            self._make_func(self._send_func, user_keys = [u])
            # context switch
            gevent.sleep(0)
Пример #27
0
class Balancer(object):
    def __init__(self, dispatcher):
        self.dispatcher = dispatcher
        self.task_list = []
        self.task_queue = Queue()
        self.resource_graph = dispatcher.resource_graph
        self.threads = []
        self.executors = []
        self.logger = logging.getLogger('Balancer')
        self.dispatcher.require_collection('tasks', 'serial', type='log')
        self.create_initial_queues()
        self.start_executors()
        self.schedule_lock = RLock()
        self.distribution_lock = RLock()
        self.debugger = None
        self.debugged_tasks = None
        self.dispatcher.register_event_type('task.changed')

        # Lets try to get `EXECUTING|WAITING|CREATED` state tasks
        # from the previous dispatcher instance and set their
        # states to 'FAILED' since they are no longer running
        # in this instance of the dispatcher
        for stale_task in dispatcher.datastore.query(
                'tasks', ('state', 'in', ['EXECUTING', 'WAITING', 'CREATED'])):
            self.logger.info(
                'Stale task ID: {0}, name: {1} being set to FAILED'.format(
                    stale_task['id'], stale_task['name']))

            stale_task.update({
                'state': 'FAILED',
                'error': {
                    'type': 'TaskException',
                    'message': 'dispatcher process died',
                    'code': errno.EINTR,
                    'stacktrace': '',
                    'extra': None
                }
            })

            dispatcher.datastore.update('tasks', stale_task['id'], stale_task)

    def create_initial_queues(self):
        self.resource_graph.add_resource(Resource('system'))

    def start_executors(self):
        for i in range(
                0,
                self.dispatcher.configstore.get('middleware.executors_count')):
            self.logger.info('Starting task executor #{0}...'.format(i))
            self.executors.append(TaskExecutor(self, i))

    def start(self):
        self.threads.append(gevent.spawn(self.distribution_thread))
        self.logger.info("Started")

    def schema_to_list(self, schema):
        return {
            'type':
            'array',
            'items':
            schema,
            'minItems':
            sum([1 for x in schema if 'mandatory' in x and x['mandatory']]),
            'maxItems':
            len(schema)
        }

    def verify_schema(self, clazz, args, strict=False):
        if not hasattr(clazz, 'params_schema'):
            return []

        schema = self.schema_to_list(clazz.params_schema)
        val = validator.create_validator(
            schema, resolver=self.dispatcher.rpc.get_schema_resolver(schema))
        if strict:
            val.fail_read_only = True
        else:
            val.remove_read_only = True

        return list(val.iter_errors(args))

    def submit(self, name, args, sender, env=None):
        if name not in self.dispatcher.tasks:
            self.logger.warning("Cannot submit task: unknown task type %s",
                                name)
            raise RpcException(errno.EINVAL,
                               "Unknown task type {0}".format(name))

        task = Task(self.dispatcher, name)
        task.user = sender.user.name
        task.session_id = sender.session_id
        task.created_at = datetime.utcnow()
        task.clazz = self.dispatcher.tasks[name]
        task.hooks = self.dispatcher.task_hooks.get(name, {})
        task.args = copy.deepcopy(args)
        task.strict_verify = 'strict_validation' in sender.enabled_features

        if env:
            if not isinstance(env, dict):
                raise ValueError('env must be a dict')

            task.environment = copy.deepcopy(env)

        if self.debugger:
            for m in self.debugged_tasks:
                if fnmatch.fnmatch(name, m):
                    task.debugger = self.debugger

        if 'RUN_AS_USER' in task.environment:
            task.user = task.environment['RUN_AS_USER']

        task.environment['SENDER_ADDRESS'] = sender.client_address
        task.id = self.dispatcher.datastore.insert("tasks", task)
        task.set_state(TaskState.CREATED)
        self.task_queue.put(task)
        self.logger.info("Task %d submitted (type: %s, class: %s)", task.id,
                         name, task.clazz)
        return task.id

    def submit_with_upload(self, task_name, args, sender, env=None):
        task_metadata = self.dispatcher.tasks[task_name]._get_metadata()
        schema = task_metadata['schema']

        if schema is None:
            raise RpcException(
                errno.ENOENT,
                "Task {0} has no schema associated with it".format(task_name))
        upload_token_list = []
        for idx, arg in enumerate(schema):
            if arg.get('type') == 'fd':
                rfd, wfd = os.pipe()
                token = self.dispatcher.token_store.issue_token(
                    FileToken(user=sender.user,
                              lifetime=60,
                              direction='upload',
                              file=FileObjectPosix(wfd, 'wb', close=True),
                              name=str(uuid.uuid4()),
                              size=None))
                upload_token_list.append(token)
                args[idx] = FileDescriptor(rfd)
        task_id = self.submit(task_name, args, sender, env)
        return task_id, upload_token_list

    def submit_with_download(self, task_name, args, sender, env=None):
        task_metadata = self.dispatcher.tasks[task_name]._get_metadata()
        schema = task_metadata['schema']
        url_list = []

        if schema is None:
            raise RpcException(
                errno.ENOENT,
                "Task {0} has no schema associated with it".format(task_name))

        for idx, arg in enumerate(schema):
            if arg.get('type') == 'fd':
                rfd, wfd = os.pipe()
                url_list.append("/dispatcher/filedownload?token={0}".format(
                    self.dispatcher.token_store.issue_token(
                        FileToken(user=sender.user,
                                  lifetime=60,
                                  direction='download',
                                  file=FileObjectPosix(rfd, 'rb', close=True),
                                  name=args[idx]))))
                args[idx] = FileDescriptor(wfd)
        task_id = self.submit(task_name, args, sender, env)
        return task_id, url_list

    def verify_subtask(self, parent, name, args):
        clazz = self.dispatcher.tasks[name]
        instance = clazz(self.dispatcher, self.dispatcher.datastore)
        return instance.verify(*args)

    def run_subtask(self, parent, name, args, env=None):
        args = list(args)
        task = Task(self.dispatcher, name)
        task.created_at = datetime.utcnow()
        task.clazz = self.dispatcher.tasks[name]
        task.hooks = self.dispatcher.task_hooks.get(name, {})
        task.args = args
        task.instance = task.clazz(self.dispatcher, self.dispatcher.datastore)
        task.instance.verify(*task.args)
        task.description = task.instance.describe(*task.args)
        task.id = self.dispatcher.datastore.insert("tasks", task)
        task.parent = parent
        task.environment = {}

        if parent:
            task.environment = copy.deepcopy(parent.environment)
            task.environment['parent'] = parent.id
            task.user = parent.user

        if env:
            if not isinstance(env, dict):
                raise ValueError('env must be a dict')

            task.environment.update(env)

        if self.debugger:
            for m in self.debugged_tasks:
                if fnmatch.fnmatch(name, m):
                    task.debugger = self.debugger

        task.set_state(TaskState.CREATED)
        self.task_list.append(task)

        task.start()
        return task

    def join_subtasks(self, *tasks):
        for i in tasks:
            i.join()

    def abort(self, id, error=None):
        task = self.get_task(id)
        if not task:
            self.logger.warning("Cannot abort task: unknown task id %d", id)
            return

        success = False
        if task.started_at is None:
            success = True
        else:
            try:
                task.executor.abort()
            except:
                pass
        if success:
            task.ended.set()
            if error:
                task.set_state(TaskState.FAILED, TaskStatus(0),
                               serialize_error(error))
                self.logger.debug("Task ID: %d, name: %s aborted with error",
                                  task.id, task.name)
            else:
                task.set_state(TaskState.ABORTED, TaskStatus(0, "Aborted"))
                self.logger.debug("Task ID: %d, name: %s aborted by user",
                                  task.id, task.name)

    def task_exited(self, task):
        self.resource_graph.release(*task.resources)
        self.schedule_tasks(True)

    def schedule_tasks(self, exit=False):
        """
        This function is called when:
        1) any new task is submitted to any of the queues
        2) any task exists
        """
        with self.schedule_lock:
            started = 0
            executing_tasks = [
                t for t in self.task_list if t.state == TaskState.EXECUTING
            ]
            waiting_tasks = [
                t for t in self.task_list if t.state == TaskState.WAITING
            ]

            for task in waiting_tasks:
                if not self.resource_graph.can_acquire(*task.resources):
                    continue

                self.resource_graph.acquire(*task.resources)
                self.threads.append(task.start())
                started += 1

            if not started and not executing_tasks and (
                    exit or len(waiting_tasks) == 1):
                for task in waiting_tasks:
                    # Check whether or not task waits on nonexistent resources. If it does,
                    # abort it 'cause there's no chance anymore that missing resources will appear.
                    if any(
                            self.resource_graph.get_resource(res) is None
                            for res in task.resources):
                        self.logger.warning(
                            'Aborting task {0}: deadlock'.format(task.id))
                        self.abort(
                            task.id,
                            VerifyException(errno.EBUSY,
                                            'Resource deadlock avoided'))

    def distribution_thread(self):
        while True:
            self.task_queue.peek()
            self.distribution_lock.acquire()
            task = self.task_queue.get()

            try:
                self.logger.debug("Picked up task %d: %s with args %s",
                                  task.id, task.name, task.args)

                errors = self.verify_schema(self.dispatcher.tasks[task.name],
                                            task.args, task.strict_verify)
                if len(errors) > 0:
                    errors = list(validator.serialize_errors(errors))
                    self.logger.warning(
                        "Cannot submit task {0}: schema verification failed with errors {1}"
                        .format(task.name, errors))
                    raise ValidationException(extra=errors)

                task.instance = task.clazz(self.dispatcher,
                                           self.dispatcher.datastore)
                task.resources = task.instance.verify(*task.args)
                task.description = task.instance.describe(*task.args)

                if type(task.resources) is not list:
                    raise ValueError(
                        "verify() returned something else than resource list")

            except Exception as err:
                self.logger.warning("Cannot verify task %d: %s", task.id, err)
                task.set_state(TaskState.FAILED, TaskStatus(0),
                               serialize_error(err))
                task.ended.set()
                self.distribution_lock.release()

                if not isinstance(err, VerifyException):
                    self.dispatcher.report_error(
                        'Task {0} verify() method raised invalid exception'.
                        format(err), err)

                continue

            task.set_state(TaskState.WAITING)
            self.task_list.append(task)
            self.distribution_lock.release()
            self.schedule_tasks()
            if task.resources:
                self.logger.debug("Task %d assigned to resources %s", task.id,
                                  ','.join(task.resources))

    def assign_executor(self, task):
        for i in self.executors:
            with i.cv:
                if i.state == WorkerState.IDLE:
                    self.logger.info("Task %d assigned to executor #%d",
                                     task.id, i.index)
                    task.executor = i
                    i.state = WorkerState.ASSIGNED
                    return

        # Out of executors! Need to spawn new one
        executor = TaskExecutor(self, len(self.executors))
        self.executors.append(executor)
        with executor.cv:
            executor.cv.wait_for(lambda: executor.state == WorkerState.IDLE)
            executor.state = WorkerState.ASSIGNED
            task.executor = executor
            self.logger.info("Task %d assigned to executor #%d", task.id,
                             executor.index)

    def dispose_executors(self):
        for i in self.executors:
            i.die()

    def get_active_tasks(self):
        return [
            x for x in self.task_list
            if x.state in (TaskState.CREATED, TaskState.WAITING,
                           TaskState.EXECUTING)
        ]

    def get_tasks(self, type=None):
        if type is None:
            return self.task_list

        return [x for x in self.task_list if x.state == type]

    def get_task(self, id):
        self.distribution_lock.acquire()
        t = first_or_default(lambda x: x.id == id, self.task_list)
        if not t:
            t = first_or_default(lambda x: x.id == id, self.task_queue.queue)

        self.distribution_lock.release()
        return t

    def get_executor_by_key(self, key):
        return first_or_default(lambda t: t.key == key, self.executors)

    def get_executor_by_sender(self, sender):
        return first_or_default(lambda t: t.conn == sender, self.executors)
Пример #28
0
class GPlayerMgr(object):
    """ 联合进程使用的总角色管理类 """
    _rpc_name_ = 'rpc_player_mgr'
    TIME_OUT = 0.1

    def __init__(self):
        self._players = {}  # {pid:sub_mgr_id} 玩家对应子进程
        self.app_pids = {}
        self.pid_login_waiter = {}
        self.account_token = []
        self.lock = RLock()

        #缓存优化
        self._logouts = {}  # {pid:sub_mgr_id} 退出玩家对应子进程缓存优化
        self.logons = TimeMemCache(size=1000,
                                   default_timeout=3,
                                   name='rpc_player_mgr.logons')
        #重登令牌
        self.relogin_cache = TimeMemCache(size=2000,
                                          default_timeout=3600,
                                          name='rpc_player_mgr.relogin_cache')
        self.notoken_cache = TimeMemCache(size=10,
                                          default_timeout=300,
                                          name='rpc_player_mgr.notoken_cache')

        #名称相关
        self.name2pids = {}
        self.pid2names = {}
        self.paused = False  # 暂停服务

        Game.sub(MSG_FRAME_APP_ADD, self._msg_app_add)
        Game.sub(MSG_FRAME_APP_DEL, self._msg_app_del)

    def SetNoTokenLogin(self, pid):
        self.notoken_cache.set(pid, 1)

    def start(self):
        pass

    def pause(self):
        self.paused = True

    def resume(self):
        self.paused = False

    def gw_open(self, processor):
        """ 连接建立 """
        hd = PlayerMgrHandler()
        hd.set_rpc(processor)
        if not self.paused:
            sleep(30)
        hd.stop()

    def _msg_app_add(self, app_name, addr, names):
        if SubPlayerMgr._rpc_name_ not in names:
            return
        log.info('[player_mgr]reg sub_player_mgr:%s', app_name)
        self.app_pids[app_name] = set()
        if len(self.app_pids) == config.logic_pool:
            Game.safe_pub(MSG_ALL_LOGIC_START)

    def _msg_app_del(self, app_name, addr, names):
        """ 子进程退出,清理数据 """
        if SubPlayerMgr._rpc_name_ not in names:
            return
        log.info('[player_mgr]unreg sub_player_mgr:%s', app_name)
        self.app_pids.pop(app_name, None)

        #缓存清理,玩家退出
        def _unreg():
            pids = [
                pid for pid, _app_name in self._logouts.items()
                if _app_name == app_name
            ]
            for pid in pids:
                self._logouts.pop(pid, None)
            for pid, _app_name in self._players.items():
                if _app_name == app_name:
                    self.del_player(_app_name, pid, str(uuid.uuid1()))

        spawn(_unreg)

    def choice_sub_mgr(self):
        """ 选择一个逻辑进程,
        @result: sub_name, sub_mgr """
        keys = list(self.app_pids.keys())
        nofull_keys = [
            app_name for app_name in keys
            if len(self.app_pids[app_name]) < config.logic_players
        ]
        if nofull_keys:
            app_name = random.choice(nofull_keys)
        else:
            #随机选择
            app_name = random.choice(keys)
        return app_name, Game.get_service(app_name, SubPlayerMgr._rpc_name_)

    def get_sub_mgr(self, pid, offline=False, forced=True):
        """ 获取玩家所在进程的player_mgr对象
        @param:
            pid: 玩家id
            offline: 是否离线也选择一个子进程
            forced: 是否离线也强制选一个
        @return:
            app_name, rpc_sub_mgr
        """
        self.lock.acquire()
        try:
            app_name = self._players.get(pid)
            ensure = False
            if not app_name and offline:
                ensure = True
                app_name = self._logouts.get(pid)
            if not app_name and forced:
                app_name, _ = self.choice_sub_mgr()
            if not app_name:
                self.lock.release()
                return '', None
            sub_mgr = Game.get_service(app_name, SubPlayerMgr._rpc_name_)
            if ensure:
                rs = sub_mgr.load_player(pid)
                if not rs:
                    self.lock.release()
                    return '', None
                else:
                    self._logouts[pid] = app_name
            self.lock.release()
            return app_name, sub_mgr
        except:
            self.lock.release()
            log.log_except()
            return '', None

    def get_sub_mgr_addr(self, pid, offline=False, forced=True):
        try:
            app_name, sub_mgr = self.get_sub_mgr(pid,
                                                 offline=offline,
                                                 forced=forced)
            addr = sub_mgr.get_addr()
            return app_name, addr
        except:
            return '', None

    def add_player(self, app_name, pid, name):
        """ 玩家登陆,防止在短时间内重复登录 """
        self._add_name_id(pid, name)
        self._players[pid] = app_name
        self.app_pids[app_name].add(pid)
        self.relogin_cache.delete(pid)
        return True

    def del_player(self, app_name, pid, token):
        """ sub_mgr调用,通知玩家退出 """
        self._logouts[pid] = app_name
        self._players.pop(pid, None)
        if app_name in self.app_pids:
            pids = self.app_pids[app_name]
            if pid in pids:
                pids.remove(pid)
        self.relogin_cache.set(pid, token)
        Game.safe_pub(MSG_LOGOUT, pid)

    def remove_pids(self, pids):
        """ 清理sub_mgr的pid缓存 """
        for pid in pids:
            self._logouts.pop(pid, None)

    @property
    def count(self):
        return len(self._players)

    def get_count(self):
        return self.count

    def _add_name_id(self, pid, name):
        if pid in self.pid2names or not name:
            return
        self.name2pids[name] = pid
        self.pid2names[pid] = name

    def get_online_ids(self, pids=None, random_num=None):
        """ 返回在线的玩家id列表,
        pids: 查询的玩家列表,返回在线的ids
        random:整形, 随机选择random个pid返回
        """
        if random_num is not None:
            if len(self._players) <= random_num:
                return list(self._players.keys())
            return random.sample(self._players, random_num)
        if not pids:
            return list(self._players.keys())
        return [pid for pid in pids if pid in self._players]

    def is_online(self, pid):
        return pid in self._players

    def _login_check_player(self, pid):
        """ 检查登陆情况,防止重复登陆,短时登陆 """
        if not pid:
            return True
        if self.logons.get(pid):
            log.info('禁止玩家(%s)短时登录', pid)
            return False
        self.logons.set(pid, 1)
        return True

    def _login(self, processor, pid):
        """ 玩家登陆请求
        返回: isOk, error or sid
        """
        if self.count >= config.max_players:
            return 0, errcode.EC_USER_MAX
        elif not self._login_check_player(pid):
            return 0, errcode.EC_LOGIN_ERR

        app_name, sub_mgr = self.get_sub_mgr(pid, offline=True)
        #登陆
        rs = sub_mgr.player_login(processor.pid, pid)
        #网关连接子进程
        addr = sub_mgr.get_addr()
        #等待连接成功后才返回,要不前端在得到返回后马上发送的数据可能会丢失
        processor.connect(*addr)

        log.debug('loginSNS finish:%s', rs)
        return 1, rs

    def sub_login_finish(self, pid):
        if pid in self.pid_login_waiter:
            waiter = self.pid_login_waiter.pop(pid)
            waiter.set()

    def broadcast(self, proto, data, exclude=[]):
        for app in self.app_pids:
            sub_mgr = Game.get_service(app, SubPlayerMgr._rpc_name_)
            sub_mgr.broadcast(proto, data, exclude)

#--------------------  业务流程 -------------------------------
# def rc_register(self, account, password):
#     """ 注册账号 """
#     log.debug('注册账号请求:account(%s)', account)
#     if not account:
#         return 0, errcode.EC_FORBID_STRING
#     d = Game.store.query_loads(Player.DATA_CLS.TABLE_NAME, dict(account=account))
#     if d:
#         return 0, errcode.EC_SAME_ACCOUNT
#     data = dict(account=account, password=password, newTime=current_time())
#     try:
#         Game.store.insert(Player.DATA_CLS.TABLE_NAME, data)
#     except:
#         return 0, errcode.EC_SAME_ACCOUNT
#     return 1, None
#
# def rc_login(self, processor, account, password):
#     """ 玩家登陆请求
#     返回: isOk, error or sid
#     """
#     log.debug('用户登录请求:account(%s)', account)
#     d = Game.store.query_loads(Player.DATA_CLS.TABLE_NAME, dict(account=account, password=password))
#     if not d:
#         return 0, errcode.EC_LOGIN_ERR
#     log.debug("%s", d)
#     pid = d[0]['id']
#     rs, d = self._login(processor, pid)
#     if not rs:
#         return rs, d
#     return rs, d

    def rc_relogin(self, processor, pid, token):
        """玩家重连"""
        notoken = self.notoken_cache.get(pid, 0)
        if not notoken:
            cache_token = self.relogin_cache.get(pid)
            if not cache_token:
                return 0, errcode.EC_LOGIN_ERR
            if cache_token != token:
                return 0, errcode.EC_LOGIN_ERR
        self.pid_login_waiter[pid] = Event()
        rs, d = self._login(processor, pid)
        if not rs:
            return rs, d
        if pid in self.pid_login_waiter:
            waiter = self.pid_login_waiter[pid]
            waiter.wait(10)
        if pid in self.pid_login_waiter:
            self.pid_login_waiter.pop(pid, None)
        return rs, d

    def rc_tokenLogin(self, processor, account, token, serverno, platform):
        """玩家令牌登陆"""
        if (account, token) in self.account_token:
            return 0, errcode.EC_TOKEN_ERR

        self.account_token.append((account, token))
        #先校验令牌有效性
        if config.serverNo == "039999":  #启用免登录验证,用于压力测试
            chn = 3
        else:
            request = utility.post(config.login_url,
                                   json=dict(uid=account,
                                             token=token,
                                             server_id=config.serverNo))
            task = spawn(request.send)
            gevent.joinall([task], timeout=3)

            try:
                respData = ujson.loads(request.response.content)
                request.response.close()
                request.session.close()
            except:
                return 0, errcode.EC_LOGIN_ERR

            rs = respData.get("success", 0)
            chn = respData.get("channel_id", 0)
            err = respData.get("err_msg", "")
            if not rs:
                Game.glog.log2File(
                    "rc_tokenLoginError", "%s|%s|%s|%s|%s" %
                    (config.login_url, config.serverNo, account, token, err))
                return 0, errcode.EC_TOKEN_ERR

        d = Game.store.query_loads(Player.DATA_CLS.TABLE_NAME,
                                   dict(account=account))

        if not d:
            data = dict(account=account,
                        channel=chn,
                        platform=platform,
                        newTime=current_time())
            try:
                _id = Game.store.insert(Player.DATA_CLS.TABLE_NAME, data)
                data['id'] = _id
                d = [data]
            except:
                return 0, errcode.EC_SAME_ACCOUNT
        if not d:
            return 0, errcode.EC_LOGIN_ERR
        # log.debug("%s", d)
        pid = None
        for one in d:
            pid = one.get('id')
            if not pid:
                continue
            if str(pid)[-6:] == serverno:
                break
        if not pid:
            return 0, errcode.EC_PLAYER_EMPTY
        self.pid_login_waiter[pid] = Event()
        rs, d = self._login(processor, pid)
        if not rs:
            return rs, d
        if pid in self.pid_login_waiter:
            waiter = self.pid_login_waiter[pid]
            waiter.wait(10)
        if pid in self.pid_login_waiter:
            self.pid_login_waiter.pop(pid, None)
        self.account_token.remove((account, token))
        return rs, d
Пример #29
0
class ComponentRunner(Greenlet):
    """
    A ``ComponentRunner`` is a specialized ``gevent.Greenlet`` sub-class
    that manages  a ``Component`` instance during the execution of a
    ``Network``.

    While the ``Component`` class provides the public API for defining the
    behavior of a component subclass, the ``ComponentRunner`` provides the
    private API used by the Network and port classes.
    """
    logger = logger

    def __init__(self, component, parent):
        """
        Parameters
        ----------
        component : ``rill.engine.component.Component``
        parent : ``rill.engine.network.Network``
        """
        Greenlet.__init__(self)

        self.component = component

        self._lock = RLock()
        self._can_go = Condition(self._lock)

        # the "automatic" input port
        self._null_input = None
        # the "automatic" output port
        self._null_output = None

        # the component's immediate network parent
        self.parent_network = parent

        self.has_run = False

        # used when evaluating component statuses for deadlocks
        self.curr_conn = None  # set externally
        self.curr_outport = None  # set externally

        # FIXME: allow this value to be set.  should we read it from the Network, or do we need per-component control?
        # FIXME: this feature is broken right now due to multiple output ports
        self.ignore_packet_count_error = True
        self._status = StatusValues.NOT_STARTED

    def __str__(self):
        return self.component.get_full_name()

    def __repr__(self):
        return '%s(%r)' % (self.__class__.__name__,
                           self.component.get_full_name())

    def __getstate__(self):
        data = self.__dict__.copy()
        for k in ('_lock', '_can_go'):
            data.pop(k)
        return data

    # FIXME: rename to root_network
    @property
    def network(self):
        """
        The root network.

        Returns
        -------
        ``rill.engine.network.Network``
        """
        return self.get_parents()[0]

    @cache
    def get_parents(self):
        """
        Returns
        -------
        List[``rill.engine.network.Network``]
        """
        parent = self.parent_network
        parents = []
        while True:
            if parent is None:
                break
            parents.append(parent)
            parent = parent.parent_network
        parents.reverse()
        return parents

    def error(self, msg, errtype=FlowError):
        self.component.error(msg, errtype)

    # FIXME: figure out the logging stuff
    def trace_funcs(self, msg, section='funcs'):
        self.logger.debug(msg)
        # self.parent_network.trace_funcs(self, msg)

    def trace_locks(self, msg, **kwargs):
        self.logger.debug(msg, section='locks', **kwargs)
        # self.parent_network.trace_locks(self, msg)

    # Ports --

    def open_ports(self):
        """
        Open all ports.
        """
        for port in self.component.ports:
            port.open()

    def close_ports(self):
        """
        Close all ports.
        """
        for port in self.component.ports:
            port.close()

    # Statuses --

    @property
    def status(self):
        """
        Get the component's current status.

        Returns
        -------
        status : str
            one of ``rill.engine.status.StatusValues``
        """
        return self._status

    @status.setter
    def status(self, new_status):
        if new_status != self._status:
            self.logger.debug("Changing status {} -> {}".format(
                self._status, new_status),
                              component=self)
            self._status = new_status

    def is_terminated(self):
        """
        Return whether the component has terminated.

        Returns
        -------
        bool
        """
        return self.status == StatusValues.TERMINATED

    def has_error(self):
        """
        Return whether the component has an error.

        Returns
        -------
        bool
        """
        return self.status == StatusValues.ERROR

    def terminate(self, new_status=StatusValues.TERMINATED):
        """
        Terminate the component.

        Parameters
        ----------
        new_status : int
            one of ``rill.engine.status.StatusValues`` (usually "TERMINATED" or
            "ERROR")
        """
        for child in self.component.get_children():
            # may be None if the subgraph has not started yet
            if child._runner is not None:
                child._runner.terminate(new_status)
        self.logger.debug("Terminated", component=self)
        self.status = new_status
        # self.parent_network.indicate_terminated(self)
        # FIXME: Thread.interrupt()

    # def long_wait_start(self, intvl):  # interval in seconds!
    #     self.timeout = TimeoutHandler(intvl, self)
    #     self._addto_timeouts(self.timeout)
    #
    # def _addto_timeouts(self, t):
    #     """
    #     t : TimeoutHandler
    #     """
    #     # synchronized (network)
    #     self.network.timeouts[self] = t
    #     self.status = StatusValues.LONG_WAIT
    #
    # def long_wait_end(self):
    #     self.timeout.dispose(self)

    def activate(self):
        """
        Called from other parts of the system to activate this Component.

        This will start its thread or will notify it to continue.
        """
        if self.is_terminated():
            return
        if not self.active():
            self.start()
        else:
            self.trace_locks("act - lock")
            try:
                with self._lock:
                    if self.status in (StatusValues.DORMANT,
                                       StatusValues.SUSP_FIPE):
                        self._can_go.notify()
                        self.trace_locks("act - signal")
            except GreenletExit as e:
                return
            finally:
                self.trace_locks("act - unlock")

    @property
    def self_starting(self):
        """
        True if the component has no connected input ports or has been explictly
        specified as self-starting.

        This is only considered the first time the component is activiated.

        Returns
        -------
        bool
        """
        if self.has_run:
            return False
        if self.component._self_starting:
            return True
        for port in self.component.inports:
            if port.is_connected() and not port.is_initialized():
                return False
        return True

    @property
    def must_run(self):
        """
        Returns
        -------
        bool
        """
        return not self.has_run and self.component._must_run

    def is_all_drained(self):
        """
        Wait for packets to arrive or for all ports to be drained.

        Returns
        -------
        bool
            all input ports are drained
        """
        try:
            self.trace_locks("input states - acquired")
            with self._lock:
                while True:
                    conns = [
                        inp._connection for inp in self.component.inports
                        if inp.is_connected() and not inp.is_null()
                    ]
                    all_drained = all(c.is_drained() for c in conns)
                    has_data = any(not c.is_empty() for c in conns)

                    if has_data or all_drained:
                        return all_drained

                    self.status = StatusValues.DORMANT
                    self.trace_funcs("Dormant")

                    # wait for something to change
                    self.trace_locks("input state - wait")
                    self._can_go.wait()
                    self.trace_locks("input state - wait ended")

                    self.status = StatusValues.ACTIVE
                    self.trace_funcs("Active")
        finally:
            self.trace_locks("input states - unlocked")  # while

    # override of Greenlet._run
    def _run(self):
        try:
            if self.is_terminated() or self.has_error():
                if self._lock._is_owned():
                    self._lock.release()
                    self.trace_locks("run - unlock")
                return

            self.status = StatusValues.ACTIVE
            self.trace_funcs("Started")
            if self.component.ports[IN_NULL].is_connected():
                self._null_input = self.component.ports[IN_NULL]
                # block here until null input receives a packet
                self._null_input.receive_once()
            if self.component.ports[OUT_NULL].is_connected():
                self._null_output = self.component.ports[OUT_NULL]

            self_started = self.self_starting

            while (self_started or not self.is_all_drained()
                   or self._null_input is not None
                   or (self.is_all_drained() and self.must_run)
                   or self.component.stack_size() > 0):
                self._null_input = None
                self.has_run = True

                # FIXME: added has_error to allow this loop to exit if another
                # thread calls parent.signal_error() to set our status to ERROR
                if self.is_terminated() or self.has_error():
                    break

                for inp in self.component.inports:
                    if inp.is_initialized() and not inp.is_null():
                        inp.open()

                self.trace_funcs(colored("Activated", attrs=['bold']))

                self.component.execute()

                self.trace_funcs(colored("Deactivated", attrs=['bold']))

                if self.component._packet_count != 0 and not self.ignore_packet_count_error:
                    self.trace_funcs("deactivated holding {} packets".format(
                        self.component._packet_count))
                    self.error("{} packets not disposed of during component "
                               "deactivation".format(
                                   self.component._packet_count))

                # FIXME: what is the significance of closing and reopening the InitializationConnections?
                # - is_all_drained only checks Connections.
                # - tests succeed if we simply hard-wire InitializationConnection to always open
                # - it ensures that it yields a new result when component is re-activated
                for inp in self.component.inports:
                    if inp.is_initialized() and not inp.is_null():
                        inp.close()
                        # if (not icp.is_closed()):
                        #  raise FlowError("Component deactivated with IIP port not closed: " + self.get_name())
                        #

                if self_started:
                    break

                if self.is_all_drained() and self.component.stack_size() == 0:
                    break  # while

            if self._null_output is not None:
                # p = create("")
                # self._null_output.send(p)
                self._null_output.close()

            self.close_ports()

            if self.component.stack_size() != 0:
                self.error("Compodenent terminated with stack not empty")
            self.parent_network.indicate_terminated(self)

        except ComponentError as e:
            # FIXME:
            if e.get_value() > 0:
                self.trace_funcs("Component exception: " + e.get_value())
                if e.get_value() > 999:
                    self.logger.error("terminated with exception code " +
                                      e.get_value())

                    if self.parent_network is not None:
                        # record the error and terminate siblings
                        self.parent_network.signal_error(e)
                    self.close_ports()
            raise GreenletExit()

        except Exception as err:
            # don't tell the parent if we are already in the ERROR or TERMINATE state
            # because then the parent told us to terminate
            if self.is_terminated() or self.has_error():
                # if we are in the TERMINATED or ERROR state we terminated
                # intentionally
                return

            import traceback
            traceback.print_exc()

            self.status = StatusValues.ERROR

            if self.parent_network is not None:
                # record the error and terminate siblings
                self.parent_network.signal_error(err)
            self.close_ports()

    def active(self):
        return bool(self)
Пример #30
0
class Server():
    def __init__(self):
        self.conn_pairs = {}
        self.id_allocator = idallocator.IDallocator(1)
        self.conn_pair_mutex = RLock()

    def acquire_id(self):
        return self.id_allocator.acquire_id()

    def query_by_id(self, id):
        self.conn_pair_mutex.acquire()
        pair = self.conn_pairs[id]
        self.conn_pair_mutex.release()
        return pair

    def _add_pair_id(self, conn_id, conn_pair):
        self.conn_pair_mutex.acquire()
        self.conn_pairs[conn_id] = conn_pair
        self.conn_pair_mutex.release()

    def _on_reuse_conn(self, conn_obj):
        conn_id = conn_obj.get_id()
        pair = self.query_by_id(conn_id)
        if not pair:
            print(__file__, sys._getframe().f_lineno, "reuse pair not find")
            return False
        ret = pair.remote_conn.replace_conn(conn_obj)
        if not ret:
            print(__file__, sys._getframe().f_lineno, "replace conn failed")
            return False
        print(__file__, sys._getframe().f_lineno, "replace conn success")
        return True

    def _on_new_conn(self, conn_obj):
        conn_id = conn_obj.get_id()
        remote_conn = scpconn.ScpSever(conn_obj)
        local_conn = upstream.Upstream()

        conn_pair = ConnPair(remote_conn, local_conn)
        self._add_pair_id(conn_id, conn_pair)
        conn_pair.pump()
        return True

    def _handle_conn(self, rd_socket):
        conn_obj = conn.Scon(self, rd_socket)
        ret = conn_obj.hand_shake()
        if not ret:
            conn_obj.close()

        if conn_obj.is_reused():
            ret = self._on_reuse_conn(conn_obj)
        else:
            ret = self._on_new_conn(conn_obj)
        if not ret:
            conn_obj.close()

    def server(self):
        addr = str(config['listen']['addr'])
        port = int(config['listen']['port'])
        max_accept = int(config['listen']['max_accept'])
        timeout = int(config['listen']['timeout'])

        listen_socket = socket.socket()
        address = (addr, port)
        try:
            #set socket reuse
            listen_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
            listen_socket.bind(address)
            listen_socket.listen(max_accept)
        except:
            print(__file__, sys._getframe().f_lineno, "listen failed")
            sys.exit(1)

        while True:
            try:
                rd_socket, addr = listen_socket.accept()
                rd_socket.settimeout(timeout)
                print(
                    __file__,
                    sys._getframe().f_lineno,
                    "new connect fd:%d, addr:%s" % (rd_socket.fileno(), addr))
                gevent.spawn(self._handle_conn, rd_socket)
            except:
                print(__file__, sys._getframe().f_lineno, "accept failed")
                sys.exit(1)
        listen_socket.close()
Пример #31
0
class Scon(object):
    def __init__(self, server, conn):
        self.server = server
        self.conn = conn
        self.reused = False
        self.handshakes = 0
        self.id = 0
        self.frozen = False
        self.conn_mutex = RLock()

    def get_id(self):
        return self.id

    def gen_rc4_key(self, v1, v2):
        h = serialization.hmac(v1, v2)
        return h[:]

    def deepcopy_cipherreader(self, old):
        new = CipherReader(old.cipher)
        new.count = old.count
        return new

    def deepcopy_cipherwriter(self, old):
        new = CipherWriter(old.cipher)
        new.count = old.count
        return new

    def _new_conn_reader(self, secret):
        key = bytearray()
        h = self.gen_rc4_key(secret, serialization.to_byte8(0))
        key[0:8] = h
        h = self.gen_rc4_key(secret, serialization.to_byte8(1))
        key[8:16] = h
        h = self.gen_rc4_key(secret, serialization.to_byte8(2))
        key[16:24] = h
        h = self.gen_rc4_key(secret, serialization.to_byte8(3))
        key[24:32] = h
        rc4 = ARC4.new(bytes(key))

        cipher_reader = CipherReader(rc4)
        cipher_reader.set_read_conn(self.conn)
        return cipher_reader

    def _new_conn_writer(self, secret):
        key = bytearray()
        h = self.gen_rc4_key(secret, serialization.to_byte8(0))
        key[0:8] = h
        h = self.gen_rc4_key(secret, serialization.to_byte8(1))
        key[8:16] = h
        h = self.gen_rc4_key(secret, serialization.to_byte8(2))
        key[16:24] = h
        h = self.gen_rc4_key(secret, serialization.to_byte8(3))
        key[24:32] = h
        rc4 = ARC4.new(bytes(key))

        cipher_writer = CipherWriter(rc4)
        cipher_writer.set_write_conn(self.conn)
        return cipher_writer

    def _read_record(self):
        size = struct.unpack('>H', self.conn.recv(2))
        size = int(size[0])
        data = self.conn.recv(size)
        return data

    def _write_record(self, data):
        size = struct.pack('>H', len(data))
        self.conn.send(size)
        self.conn.send(data)

    def _new_handshake(self, conn_req):
        self.reused = False
        self.id = self.server.acquire_id()
        print(__file__,
              sys._getframe().f_lineno, "new handshake id = ", self.id)

        prikey = dh64.private_key()
        pubkey = dh64.public_key(prikey)
        pubkey = serialization.to_byte8(pubkey)

        resp = messages.NewConnResp(self.id, pubkey)
        data = resp.marshal()
        self._write_record(data)

        conn_req_key = serialization.unit64(conn_req.key)
        secret = dh64.secret(prikey, conn_req_key)

        self.secret = serialization.to_byte8(secret)
        self.reader = self._new_conn_reader(self.secret)
        self.writer = self._new_conn_writer(self.secret)
        return True

    def _spawn(self, new):
        self.conn_mutex.acquire()
        self.freeze()
        new.id = self.id
        new.secret = self.secret
        new.reader = self.deepcopy_cipherreader(self.reader)
        new.writer = self.deepcopy_cipherwriter(self.writer)

        new.reader.set_read_conn(new.conn)
        new.writer.set_write_conn(new.conn)
        new.reused = True
        self.conn_mutex.release()

    def _reuse_handshake(self, conn_req):
        diff = 0
        resp = messages.ReuseConnResp()

        while True:
            pair = self.server.query_by_id(conn_req.id)
            old_conn = pair.remote_conn.conn
            if not old_conn:
                resp.code = ErrCode['SCPStatusIDNotFound']
                break
            if not conn_req.verify_sum(old_conn.secret):
                resp.code = ErrCode['SCPStatusUnauthorized']
                break
            if old_conn.handshakes >= conn_req.handshakes:
                resp.code = ErrCode['SCPStatusExpired']
                break
            self.handshakes = conn_req.handshakes
            #all check pass, spawn new conn
            old_conn._spawn(self)
            diff = self.writer.count - conn_req.received
            if diff < 0:
                resp.code = ErrCode['SCPStatusNotAcceptable']
                break
            resp.received = self.reader.count
            break
        data = resp.marshal()
        self._write_record(data)
        if diff > 0:
            last_bytes = self.writer.reuse_buffer.read_last_bytes(diff)
            self.write(last_bytes)
        return True

    def hand_shake(self):
        self.conn_mutex.acquire()
        data = self._read_record()
        sq = messages.ServerReq()
        q = sq.unmarshal(data)
        ret = False
        if isinstance(q, messages.NewConnReq):
            ret = self._new_handshake(q)
        elif isinstance(q, messages.ReuseConnReq):
            ret = self._reuse_handshake(q)
        else:
            print(__file__, sys._getframe().f_lineno, "hand_shake error")
        self.conn_mutex.release()
        return ret

    def is_reused(self):
        return self.reused

    def read(self, size):
        return self.reader.read(size)

    def write(self, data):
        return self.writer.write(data)

    #Close closes raw conn and releases all resources. After close, c can't be reused.
    def close(self):
        self.freeze()
        print(__file__, sys._getframe().f_lineno, "remote conn close")

    #Freeze make conn frozen, and wait for resue
    def freeze(self):
        if self.frozen:
            return
        self.frozen = True

        self.conn.close()
        print(__file__, sys._getframe().f_lineno, "remote conn freeze")
Пример #32
0
class HttpScannerOutput(object):
    def __init__(self, args):
        # TODO: make separate queues for fast logging
        self.args = args
        self.lock = RLock()

        # Colorama init
        init()
        # Initialise logging
        self._init_logger()
        # Initialise output
        self._init_output()
        # Stats
        self.urls_scanned = 0

    def _init_output(self):
        # Initialise output
        self._init_requests_output()
        self._init_csv()
        self._init_json()
        self._init_dump()
        self._init_db()

    def _init_logger(self):
        """
        Init logger
        :return: None
        """
        if self.args.log_file is not None:
            self.logger = logging.getLogger('httpscan_logger')
            self.logger.setLevel(
                logging.DEBUG if self.args.debug else logging.INFO)
            handler = logging.FileHandler(self.args.log_file)
            handler.setFormatter(
                logging.Formatter('%(asctime)s - %(levelname)s - %(message)s',
                                  datefmt='%d.%m.%Y %H:%M:%S'))
            self.logger.addHandler(handler)
        else:
            self.logger = None

    def _init_requests_output(self):
        """
        Init requests library output
        :return: None
        """
        if self.args.debug:
            # Enable requests lib debug output
            HTTPConnection.debuglevel = 5
            packages.urllib3.add_stderr_logger()
            logging.basicConfig()
            logging.getLogger().setLevel(logging.DEBUG)
            requests_log = logging.getLogger("requests.packages.urllib3")
            requests_log.setLevel(logging.DEBUG)
            requests_log.propagate = True
        else:
            # Surpress InsecureRequestWarning: Unverified HTTPS request is being made
            packages.urllib3.disable_warnings()

    def _init_csv(self):
        """
        Initialise CSV output
        :return:
        """
        if self.args.output_csv is None:
            self.csv = None
        else:
            # TODO: check if file exists
            self.csv = writer(open(self.args.output_csv, 'wb'),
                              delimiter=';',
                              quoting=QUOTE_ALL)
            self.csv.writerow(['url', 'status', 'length', 'headers'])

    def _init_json(self):
        """
        Initialise JSON output
        :return: None
        """
        self.json = None if self.args.output_json is None else io.open(
            self.args.output_json, 'w', encoding='utf-8')

    def _init_dump(self):
        """
        Initialise dump folder
        :return: None
        """
        self.dump = path.abspath(
            self.args.dump) if self.args.dump is not None else None
        if self.dump is not None and not path.exists(self.dump):
            makedirs(self.dump)

    def _init_db(self):
        """
        Initialise database output. Create database and table if missing.
        :return: None
        """
        if self.args.output_database is None:
            self.engine = None
            return

        # Check and create database if needed
        if not database_exists(self.args.output_database):
            create_database(self.args.output_database, encoding='utf8')

        # Create table
        self.engine = create_engine(self.args.output_database)
        self.metadata = MetaData()
        self.scan_table = Table('httpscan', self.metadata,
                                Column('id', Integer, primary_key=True),
                                Column('url', String),
                                Column('status', Integer),
                                Column('length', Integer),
                                Column('headers', String))
        self.metadata.create_all(self.engine)

    def write(self, **kwargs):
        spawn(self.write_func, **kwargs)

    def write_func(self, **kwargs):
        # Acquire lock
        self.lock.acquire()

        # Output
        self._display_progress(**kwargs)
        self._write_log(**kwargs)

        # Check for exception
        if kwargs['exception'] is None:
            self._filter_and_write(**kwargs)

        # Realse lock
        self.lock.release()

    def _display_progress(self, **kwargs):
        # TODO: add detailed stats
        # Calculate progreess
        percentage = '{percent:.2%}'.format(percent=float(self.urls_scanned) /
                                            self.args.urls_count)

        # Generate and print colored output
        out = '[%s] [worker:%02i] [%s]\t%s -> status:%i ' % (helper.str_now(
        ), kwargs['worker'], percentage, kwargs['url'], kwargs['status'])
        if kwargs['exception'] is not None:
            out += 'error: (%s)' % str(kwargs['exception'])
        else:
            out += 'length: %s' % naturalsize(int(kwargs['length']))
        if kwargs['status'] == 200:
            print(Fore.GREEN + out + Fore.RESET)
        elif 400 <= kwargs['status'] < 500 or kwargs['status'] == -1:
            print(Fore.RED + out + Fore.RESET)
        else:
            print(Fore.YELLOW + out + Fore.RESET)

    def _filter_and_write(self, **kwargs):
        # Filter responses and save responses that are matching ignore, allow rules
        if (self.args.allow is None and self.args.ignore is None) or \
                (self.args.allow is not None and kwargs['status'] in self.args.allow) or \
                (self.args.ignore is not None and kwargs['status'] not in self.args.ignore):
            self._write_csv(**kwargs)
            self._write_json(**kwargs)
            self._write_dump(**kwargs)
            self._write_db(**kwargs)

    def _kwargs_to_params(self, **kwargs):
        return {
            'url': kwargs['url'],
            'status': kwargs['status'],
            'length': kwargs['length'],
            'headers': str(kwargs['response'].headers)
        }

    def _write_log(self, **kwargs):
        # Write to log file
        if self.logger is None:
            return

        out = '[worker:%02i] %s %s %i' % (kwargs['worker'], kwargs['url'],
                                          kwargs['status'], kwargs['length'])
        if kwargs['exception'] is None:
            self.logger.info(out)
        else:
            self.logger.error("%s %s" % (out, str(kwargs['exception'])))

    def _write_csv(self, **kwargs):
        if self.csv is not None:
            self.csv.writerow([
                kwargs['url'], kwargs['status'], kwargs['length'],
                str(kwargs['response'].headers)
            ])

    def _write_json(self, **kwargs):
        if self.json is None:
            return

        # TODO: bugfix appending json
        self.json.write(
            unicode(dumps(self._kwargs_to_params(**kwargs),
                          ensure_ascii=False)))

    def _write_dump(self, **kwargs):
        if kwargs['response'] is None or self.dump is None:
            return

        # Generate folder and file path
        parsed = urlparse(kwargs['url'])
        host_folder = path.join(self.dump, parsed.netloc)
        p, f = path.split(parsed.path)
        folder = path.join(host_folder, p[1:])
        if not path.exists(folder):
            makedirs(folder)
        filename = path.join(folder, f)

        # Get all content
        try:
            content = kwargs['response'].content
        except Exception as exception:
            self.write_log('Failed to get content for %s Exception: %s' %
                           (kwargs['url'], str(exception)))
            return

        # Save contents to file
        with open(filename, 'wb') as f:
            f.write(content)

    def _write_db(self, **kwargs):
        if self.engine is None:
            return

        # TODO: check if url exists in table
        params = self._kwargs_to_params(**kwargs)
        self.engine.execute(
            self.scan_table.insert().execution_options(autocommit=True),
            params)

    def write_log(self, msg, loglevel=logging.INFO):
        """
        Write message to log file
        :param msg:
        :param loglevel:
        :return: None
        """
        if self.logger is None:
            return

        self.lock.acquire()
        if loglevel == logging.INFO:
            self.logger.info(msg)
        elif loglevel == logging.DEBUG:
            self.logger.debug(msg)
        elif loglevel == logging.ERROR:
            self.logger.error(msg)
        elif loglevel == logging.WARNING:
            self.logger.warning(msg)

        self.lock.release()

    def print_and_log(self, msg, loglevel=logging.INFO):
        # TODO: make separate logging
        print('[%s] %s' % (helper.str_now(), msg))
        self.write_log(msg, loglevel)