class InputStream(object): """ FCGI_STDIN or FCGI_DATA stream. Uses temporary file to store received data once max_mem bytes have been received. """ def __init__(self, max_mem=1024): self._file = SpooledTemporaryFile(max_mem) self._eof_received = Event() def feed(self, data): if self._eof_received.is_set(): raise IOError('Feeding file beyond EOF mark') if not data: # EOF mark self._file.seek(0) self._eof_received.set() else: self._file.write(data) def __iter__(self): self._eof_received.wait() return iter(self._file) def read(self, size=-1): self._eof_received.wait() return self._file.read(size) def readlines(self, sizehint=0): self._eof_received.wait() return self._file.readlines(sizehint) @property def eof_received(self): return self._eof_received.is_set()
def __call__(self, environ, start_response): handler = self.websocket.routes.get(environ['PATH_INFO']) if not handler: return self.wsgi_app(environ, start_response) # do handshake uwsgi.websocket_handshake(environ['HTTP_SEC_WEBSOCKET_KEY'], environ.get('HTTP_ORIGIN', '')) # setup events send_event = Event() send_queue = Queue(maxsize=1) recv_event = Event() recv_queue = Queue(maxsize=1) # create websocket client client = self.client(environ, uwsgi.connection_fd(), send_event, send_queue, recv_event, recv_queue, self.websocket.timeout) # spawn handler handler = spawn(handler, client) # spawn recv listener def listener(client): ready = select([client.fd], [], [], client.timeout) recv_event.set() listening = spawn(listener, client) while True: if not client.connected: recv_queue.put(None) listening.kill() handler.join(client.timeout) return '' # wait for event to draw our attention ready = wait([handler, send_event, recv_event], None, 1) # handle send events if send_event.is_set(): try: uwsgi.websocket_send(send_queue.get()) send_event.clear() except IOError: client.connected = False # handle receive events elif recv_event.is_set(): recv_event.clear() try: recv_queue.put(uwsgi.websocket_recv_nb()) listening = spawn(listener, client) except IOError: client.connected = False # handler done, we're outta here elif handler.ready(): listening.kill() return ''
def retry_with_recovery( transport: UDPTransport, messagedata: bytes, message_id: typing.MessageID, recipient: typing.Address, event_stop: Event, event_healthy: Event, event_unhealthy: Event, backoff: typing.Generator[int, None, None], ) -> bool: """ Send messagedata while the node is healthy until it's acknowledged. Note: backoff must be an infinite iterator, otherwise this task will become a hot loop. """ # The underlying unhealthy will be cleared, care must be taken to properly # clear stop_or_unhealthy too. stop_or_unhealthy = event_first_of( event_stop, event_unhealthy, ) acknowledged = False while not event_stop.is_set() and not acknowledged: # Packets must not be sent to an unhealthy node, nor should the task # wait for it to become available if the message has been acknowledged. if event_unhealthy.is_set(): wait_recovery( event_stop, event_healthy, ) # Assume wait_recovery returned because unhealthy was cleared and # continue execution, this is safe to do because event_stop is # checked below. stop_or_unhealthy.clear() if event_stop.is_set(): return acknowledged acknowledged = retry( transport, messagedata, message_id, recipient, # retry will stop when this event is set, allowing this task to # wait for recovery when the node becomes unhealthy or to quit if # the stop event is set. stop_or_unhealthy, # Intentionally reusing backoff to restart from the last # timeout/number of iterations. backoff, ) return acknowledged
class Signals(object): def __init__(self): self.new_user_event = Event() def signaluser(self, request): user_id = request.GET.get('id') print self.new_user_event.is_set() self.new_user_event.set() self.new_user_event.clear() return HttpResponse("New User Signaled") def receiveuser(self, request): self.new_user_event.wait() return HttpResponse("New User Received")
class GServer(ProtoBufRPCServer): def __init__(self, host, port, service, poolsize=128): self.gpool = Pool(poolsize) self.stop_event = Event() context = zmq.Context() self.port = port self.socket = context.socket(zmq.ROUTER) self.socket.bind("tcp://%s:%s" % (host, port)) self.service = service def serve_forever(self,): while not self.stop_event.is_set(): try: msg = self.socket.recv_multipart() except zmq.ZMQError: if self.socket.closed: break raise e self.gpool.spawn(self.handle_request, msg) def shutdown(self,): self.socket.close() self.stop_event.set() def handle_request(self, msg): assert len(msg) == 3 (id_, null, request) = msg assert null == '' response = self.handle(request) self.socket.send_multipart([id_, null, response.SerializeToString()])
class ConditionPoller(Thread): """ generic polling mechanism: every interval seconds, check if condition returns a true value. if so, pass the value to callback if condition or callback raise exception, stop polling. """ def __init__(self, condition, condition_callback, exception_callback, interval): self.polling_interval = interval self._shutdown_now = Event() self._condition = condition self._callback = condition_callback self._on_exception = exception_callback super(ConditionPoller,self).__init__() def shutdown(self): self.is_shutting_down = True self._shutdown_now.set() def run(self): try: while not self._shutdown_now.is_set(): self._check_condition() self._shutdown_now.wait(self.polling_interval) except: log.error('thread failed', exc_info=True) def _check_condition(self): try: value = self._condition() if value: self._callback(value) except Exception as e: log.debug('stopping poller after exception', exc_info=True) self.shutdown() if self._on_exception: self._on_exception(e) def start(self): super(ConditionPoller,self).start()
class C2DMService(object): def __init__(self, source, email, password): self.source = source self.email = email self.password = password self._send_queue = Queue() self._send_queue_cleared = Event() self.log = logging.getLogger('pulsus.service.c2dm') def _send_loop(self): self._send_greenlet = gevent.getcurrent() try: self.log.info("C2DM service started") while True: notification = self._send_queue.get() try: self._do_push(notification) except Exception, e: self.log.exception("Error while pushing") self._send_queue.put(notification) gevent.sleep(5.0) finally: if self._send_queue.qsize() < 1 and \ not self._send_queue_cleared.is_set(): self._send_queue_cleared.set()
class BlackBerryPushService(object): def __init__(self, app_id, password, push_url): self.app_id = app_id self.password = password self.push_url = push_url self._send_queue = Queue() self._send_queue_cleared = Event() self.log = logging.getLogger('pulsus.service.bbp') def _send_loop(self): self._send_greenlet = gevent.getcurrent() try: self.log.info("BlackBerry Push service started") while True: notification = self._send_queue.get() try: self._do_push(notification) except Exception, e: print e self._send_queue.put(notification) gevent.sleep(5.0) finally: if self._send_queue.qsize() < 1 and \ not self._send_queue_cleared.is_set(): self._send_queue_cleared.set()
class Lock(object): def __init__(self, etcd, key, name, ttl=30): """.""" self.etcd = etcd self.key = key self.name = name self._gthread = None self._ttl = ttl self._stopped = Event() def _heartbeat(self): while True: self._stopped.wait(self._ttl / 2) if self._stopped.is_set(): break self.etcd.testandset(self.key, self.name, self.name, ttl=self._ttl) def lock(self): # This is to work around bugs in etcd. Not very atomic # at all :( while True: try: e = self.etcd.get(self.key) except EtcdError, err: logging.error("lock: %s: error: %r" % ( self.key, err)) self.etcd.set(self.key, self.name) self._gthread = gevent.spawn(self._heartbeat) break else: time.sleep(self._ttl / 2)
def test_semaphore(self): edge = APIEdge(MockApp(), self.get_settings()) api = edge.app.api edge.max_concurrent_calls = 1 in_first_method = Event() finish_first_method = Event() def first_method(): in_first_method.set() finish_first_method.wait() api.first_method = first_method in_second_method = Event() def second_method(): in_second_method.set() api.second_method = second_method gevent.spawn(edge.execute, Call("first_method")) in_first_method.wait() gevent.spawn(edge.execute, Call("second_method")) gevent.sleep(0) assert_logged("too many concurrent callers") assert not in_second_method.is_set() finish_first_method.set() in_second_method.wait() self.assert_edge_clean(edge)
class RecurringTask(object): def __init__(self, interval, fn): self.interval = interval self.fn = fn self._wakeup = Event() self._stopped = Event() self._gthread = None def touch(self): """Make sure the task is executed now.""" self._wakeup.set() def start(self): self._gthread = gevent.spawn(self._run) def stop(self): self._stopped.set() self._wakeup.set() def _run(self): while not self._stopped.is_set(): self.fn() self._wakeup.wait(timeout=self.interval) self._wakeup.clear()
def test_spawning(defer): node = DummyNode() defer(node.stop) init_called, actor_spawned = Event(), Event() class MyActor(Actor): def __init__(self): init_called.set() def pre_start(self): actor_spawned.set() node.spawn(MyActor) ok_(not init_called.is_set()) ok_(not actor_spawned.is_set()) actor_spawned.wait() ok_(init_called.is_set())
def test_job_queue_join_workers(): ok = Event() q = lets.JobQueue() g = q.put(Greenlet(gevent.sleep, 0.1)) g.link(lambda g: ok.set()) # Before 0.0.24, JobQueue.join() doesn't guarantee finish of all workers. q.join() assert ok.is_set()
class NotificationService(object): def __init__(self, sandbox = True, **kwargs): if "certfile" not in kwargs: raise ValueError, u"Must specify a PEM bundle." self._sslargs = kwargs self._push_connection = None self._feedback_connection = None self._sandbox = sandbox self._send_queue = Queue() self._error_queue = Queue() self._feedback_queue = Queue() self._send_greenlet = None self._error_greenlet = None self._feedback_greenlet = None self._send_queue_cleared = Event() def _check_send_connection(self): if self._push_connection is None: s = ssl.wrap_socket(socket(AF_INET, SOCK_STREAM, 0), ssl_version=ssl.PROTOCOL_SSLv3, **self._sslargs) addr = ["gateway.push.apple.com", 2195] if self._sandbox: addr[0] = "gateway.sandbox.push.apple.com" s.connect_ex(tuple(addr)) self._push_connection = s self._error_greenlet = gevent.spawn(self._error_loop) def _check_feedback_connection(self): if self._feedback_connection is None: s = ssl.wrap_socket(socket(AF_INET, SOCK_STREAM, 0), ssl_version = ssl.PROTOCOL_SSLv3, **self._sslargs) addr = ["feedback.push.apple.com", 2196] if self._sandbox: addr[0] = "feedback.sandbox.push.apple.com" s.connect_ex(tuple(addr)) self._feedback_connection = s def _send_loop(self): self._send_greenlet = gevent.getcurrent() try: while True: msg = self._send_queue.get() self._check_send_connection() try: self._push_connection.send(str(msg)) except Exception, e: self._send_queue.put(msg) self._push_connection.close() self._push_connection = None gevent.sleep(5.0) finally: if self._send_queue.qsize() < 1 and \ not self._send_queue_cleared.is_set(): self._send_queue_cleared.set()
class MsgGenerator(gevent.Greenlet): def __init__(self): super(MsgGenerator, self).__init__() self._complete = Event() self._jobs = {} self._instances = [] # FIXME: monkey patch the whole world # because the python side of librados # uses threading.Thread. However, rados # itself will still do blocking on e.g. # connect(), so we probably need to wrap # librados in its own non-gevent python # process and RPC to it. from gevent import monkey monkey.patch_all() monkey.patch_subprocess() def register(self, instance): if instance not in self._instances: self._instances.append(instance) def _emit(self, msg_event): for instance in self._instances: if instance.subscribed > 0: # GMENO theory about memory leak instance.put(msg_event) def complete(self, jid, event): del self._jobs[jid] self._emit(event) def running_jobs(self): self._emit(MsgEvent(RUNNING_JOBS, [{'jid': jid} for jid in self._jobs.keys()])) def run_job(self, fqdn, cmd, args): if fqdn != socket.getfqdn(): raise Unavailable() jid = uuid.uuid4().__str__() self._jobs[jid] = gevent.spawn(lambda: run_job_thread(self, jid, cmd, args)) return jid def _run(self): try: while not self._complete.is_set(): server_heartbeat, cluster_heartbeat = get_heartbeats() log.debug("server_heartbeat: %s" % server_heartbeat) log.debug("cluster_heartbeat: %s" % cluster_heartbeat) if server_heartbeat: self._emit(MsgEvent(SERVER_HEARTBEAT, server_heartbeat)) if cluster_heartbeat: self._emit(MsgEvent(HEARTBEAT, cluster_heartbeat)) self._complete.wait(HEARTBEAT_PERIOD) except: log.error(traceback.format_exc()) raise
class Queue(gqueue.Queue): '''A subclass of gevent.queue.Queue used to organize communication messaging between Compysition Actors. Parameters: name (str): | The name of this queue. Used in certain actors to determine origin faster than reverse key-value lookup ''' def __init__(self, name, *args, **kwargs): super(Queue, self).__init__(*args, **kwargs) self.name = name self.__has_content = Event() self.__has_content.clear() def get(self, block=False, *args, **kwargs): '''Gets an element from the queue.''' try: element = super(Queue, self).get(block=block, *args, **kwargs) except gqueue.Empty: self.__has_content.clear() raise QueueEmpty("Queue {0} has no waiting events".format(self.name)) if self.qsize == 0: self.__has_content.clear() return element def put(self, element, *args, **kwargs): '''Puts element in queue.''' try: super(Queue, self).put(element, *args, **kwargs) self.__has_content.set() except gqueue.Full: raise QueueFull("Queue {0} is full".format(self.name)) def wait_until_content(self): '''Blocks until at least 1 slot is taken.''' self.__has_content.wait() def wait_until_empty(self): '''Blocks until the queue is completely empty.''' while not self.__has_content.is_set(): sleep(0) def dump(self, other_queue): """**Dump all items on this queue to another queue**""" try: while True: other_queue.put(self.next()) except: pass
class ProcessDispatcherExecutorBase(object): """ Base class for PD Executors """ def __init__(self, pd_core): self._pd_core = pd_core self.container = self._pd_core.container self.queue = Queue() self.quit_event = Event() self.exec_pool_size = min(int(get_safe(self._pd_core.pd_cfg, "executor.pool_size") or 1), 10) self.exec_pool = Pool(size=self.exec_pool_size) def start(self): self._pool_gl = spawn(self._action_loop) def stop(self): self.quit_event.set() self.queue.put("__QUIT__", None, None) self.exec_pool.kill() self.exec_pool.join(timeout=2) self._pool_gl.join(timeout=2) def add_action(self, action_tuple): if not action_tuple or len(action_tuple) != 3 or not isinstance(action_tuple[0], basestring) or \ not isinstance(action_tuple[1], AsyncResult) or not isinstance(action_tuple[2], dict): raise BadRequest("Invalid action") self.queue.put(action_tuple) def execute_action(self, action_tuple): self.add_action(action_tuple) action_res = action_tuple[1] return action_res.get() # Blocking on AsyncResult def _action_loop(self): for action in self.queue: if self.quit_event.is_set(): break try: gl = self.exec_pool.spawn(self._process_action, action) except Exception as ex: log.exception("Error in PD Executor action") def _process_action(self, action): log.debug("PD execute action %s", action) action_name, action_asyncres, action_kwargs = action action_funcname = "_action_%s" % action_name action_func = getattr(self, action_funcname, None) if not action_func: log.warn("Action function not found") return try: action_res = action_func(action_kwargs) action_asyncres.set(action_res) except Exception as ex: log.exception("Error executing action") action_asyncres.set_exception(ex)
class StopGreenlet(Greenlet): def __init__(self, *args, **kwargs): self._stop_event = Event() super(StopGreenlet, self).__init__() def stop(self): self._stop_event.set() def stopped(self): return self._stop_event.is_set()
def wait_recovery(event_stop: Event, event_healthy: Event): event_first_of( event_stop, event_healthy, ).wait() if event_stop.is_set(): return # There may be multiple threads waiting, do not restart them all at # once to avoid message flood. gevent.sleep(random.random())
class RampingShareAdjuster(ShareAdjuster): def __init__(self, endpoint, signal_update_fn, ramp_delay, ramp_seconds, curve='linear', update_frequency=10, as_of=None): super(RampingShareAdjuster, self).__init__(endpoint, signal_update_fn) self._ramp_delay = ramp_delay self._ramp_seconds = ramp_seconds self._curve_fn = _CURVE_FNS[curve] self._update_frequency = update_frequency self._start_time = as_of self._stop_event = Event() def start(self): """Start maintaining share adjustment factor for endpoint. """ if not self._start_time: self._start_time = datetime.now() + timedelta(seconds=self._ramp_delay) spawn_later(self._update_frequency, self._update) def stop(self): """Stop maintaining share adjustment factor for endpoint. """ self._stop_event.set() def _update(self): if not self._stop_event.is_set(): try: self._signal_update_fn() finally: if datetime.now() > self._end_time: self.stop() else: spawn_later(self._update_frequency, self._update) @property def _end_time(self): return self._start_time + timedelta(seconds=self._ramp_seconds) @property def auditable_share(self): """Return current share adjustment factor. """ as_of = datetime.now() share = self._curve_fn(self._start_time, self._end_time, as_of) return share, AuditItem('ramp', str(share))
def test_actors_are_garbage_collected_on_termination(defer): class MyActor(Actor): def __del__(self): del_called.set() node = DummyNode() defer(node.stop) del_called = Event() node.spawn(MyActor).stop() idle() gc.collect() ok_(del_called.is_set())
class Watcher(Greenlet): """ A Greenlet to watch web server internals """ def __init__( self, stats, reader_clients, writer_clients, event_push_client ): Greenlet.__init__(self) self._log = logging.getLogger(str(self)) self._stats = stats self._reader_clients = reader_clients self._writer_clients = writer_clients self._event_push_client = event_push_client self._halt_event = Event() def _run(self): self._log.debug("starting") while not self._halt_event.is_set(): reader_info = list() for client in self._reader_clients: reader_info.append(client.queue_size) writer_info = list() for client in self._writer_clients: writer_info.append(client.queue_size) self._log.info( "archives: %(archives)s; retrieves: %(retrieves)s" \ % self._stats ) self._event_push_client.info( "web-server-stats", "web server stats", stats=self._stats, reader=reader_info, writer=writer_info ) self._halt_event.wait(_interval) self._log.debug("ending") def join(self, timeout=None): self._log.debug("joining") self._halt_event.set() Greenlet.join(self, timeout) self._log.debug("join complete") def __str__(self): return "StatsReporter"
def test_object_pool_discard_later_with_slow_destroy(): destroy_started = Event() destroy_ended = Event() def slow_destroy(obj): destroy_started.set() gevent.sleep(10) destroy_ended.set() pool = lets.ObjectPool(1, object, slow_destroy, discard_later=0.1) with pool.reserve() as a: pass destroy_started.wait() assert destroy_started.is_set() with pool.reserve() as b: pass # 'a' is still being destroyed. assert not destroy_ended.is_set() # 'b' should not be destroying 'a'. assert a is not b
class Miner(gevent.Greenlet): def __init__(self, web3, mine_sleep=1): super().__init__() self.web3 = web3 self.mine_sleep = mine_sleep self.stop = Event() def _run(self): while self.stop.is_set() is False: # tester miner sleeps for 1 sec by default, which is the same # period as tester geth is using # (see: raiden/tests/utils/geth.py:geth_generate_poa_genesis()) self.web3.testing.mine(1) gevent.sleep(self.mine_sleep)
def expire_zookeeper_client_session(client, timeout=10): """Expire zookeeper session for the given client. This method should only be used for testing purposed. It will induce an EXPIRED_SESSION_STATE event in given client. Args: client: GZookeeperClient object timeout: optional timeout in seconds to wait for the session to expire. If None, this call will block. This is not recommended. Returns: True if session exipration occured within timeout seconds, False otherwise. """ #session expiration event to wait on session_expiration_event = Event() def observer(event): if event.state_name == "EXPIRED_SESSION_STATE": session_expiration_event.set() client.add_session_observer(observer) #construct new client with same session_id #so we can cause a session expiration event #in our other client. zookeeper_client = GZookeeperClient( client.servers, client.session_id, client.session_password) def zookeeper_observer(event): #Upon connection, immediately stop the client #which will cause a session expiration in #self.zookeeper_client. if event.state_name == "CONNECTED_STATE": zookeeper_client.stop() zookeeper_client.add_session_observer(zookeeper_observer) zookeeper_client.start() zookeeper_client.join() session_expiration_event.wait(timeout) client.remove_session_observer(observer) return session_expiration_event.is_set()
def test_stopping_in_pre_start_directs_any_refs_to_deadletters(defer): class MyActor(Actor): def pre_start(self): self.stop() def receive(self, message): message_received.set() node = DummyNode() defer(node.stop) message_received = Event() a = node.spawn(MyActor) with expect_one_event(DeadLetter(a, 'dummy', sender=None)): a << 'dummy' ok_(not message_received.is_set())
def test_stopping_an_actor_prevents_it_from_processing_any_more_messages(defer): class MyActor(Actor): def receive(self, _): received.set() node = DummyNode() defer(node.stop) received = Event() a = node.spawn(MyActor) a << None received.wait() received.clear() a.stop() sleep(.001) ok_(not received.is_set(), "the '_stop' message should not be receivable in the actor") with expect_one_event(DeadLetter(a, None, sender=None)): a << None
def test_stopping_waits_till_the_ongoing_receive_is_complete(defer): class MyActor(Actor): def receive(self, message): released.wait() def post_stop(self): stopped.set() node = DummyNode() defer(node.stop) stopped, released = Event(), Event() a = node.spawn(MyActor) << 'foo' sleep(.001) a.stop() sleep(.001) ok_(not stopped.is_set()) released.set() stopped.wait()
def test_stopping_waits_till_process_is_done_handling_a_message(defer): class MyProc(Actor): def run(self): self.get() try: released.wait() self.get() except GreenletExit: exited.set() node = DummyNode() defer(node.stop) exited, released = Event(), Event() r = node.spawn(MyProc) r << 'foo' sleep(.001) r.stop() sleep(.001) ok_(not exited.is_set()) released.set() exited.wait()
class FtpWithRpcTestCase(TemporaryBaseDirectoryTestCase): def setUp(self): from gevent.event import Event super(FtpWithRpcTestCase, self).setUp() self.config = self._get_config_for_test() ensure_directory_exists(path.join(self.config.incoming_directory, 'main-stable')) self.test_succeded = Event() def mark_success(self, config, index, filepath): self.test_succeded.set() def test_upload(self): from infi.app_repo import service with patch("infi.app_repo.service.process_filepath_by_name") as process_filepath_by_name: process_filepath_by_name.side_effect = self.mark_success fd = StringIO("hello world") with self.ftp_server_context(self.config), self.ftp_client_context(self.config, True) as client: with self.rpc_server_context(self.config) as server: client.storbinary("STOR main-stable/testfile", fd) self.test_succeded.wait(1) self.assertTrue(self.test_succeded.is_set())
class RaidenService: """ A Raiden node. """ def __init__(self, chain, default_registry, private_key_bin, transport, discovery, config): if not isinstance(private_key_bin, bytes) or len(private_key_bin) != 32: raise ValueError('invalid private_key') invalid_timeout = ( config['settle_timeout'] < NETTINGCHANNEL_SETTLE_TIMEOUT_MIN or config['settle_timeout'] > NETTINGCHANNEL_SETTLE_TIMEOUT_MAX) if invalid_timeout: raise ValueError('settle_timeout must be in range [{}, {}]'.format( NETTINGCHANNEL_SETTLE_TIMEOUT_MIN, NETTINGCHANNEL_SETTLE_TIMEOUT_MAX)) self.tokens_to_connectionmanagers = dict() self.identifier_to_results = defaultdict(list) # This is a map from a hashlock to a list of channels, the same # hashlock can be used in more than one token (for tokenswaps), a # channel should be removed from this list only when the lock is # released/withdrawn but not when the secret is registered. self.token_to_hashlock_to_channels = defaultdict( lambda: defaultdict(list)) self.chain = chain self.default_registry = default_registry self.config = config self.privkey = private_key_bin self.address = privatekey_to_address(private_key_bin) endpoint_registration_event = gevent.spawn( discovery.register, self.address, config['external_ip'], config['external_port'], ) endpoint_registration_event.link_exception( endpoint_registry_exception_handler) self.private_key = PrivateKey(private_key_bin) self.pubkey = self.private_key.public_key.format(compressed=False) self.protocol = RaidenProtocol( transport, discovery, self, config['protocol']['retry_interval'], config['protocol']['retries_before_backoff'], config['protocol']['nat_keepalive_retries'], config['protocol']['nat_keepalive_timeout'], config['protocol']['nat_invitation_timeout'], ) # TODO: remove this cyclic dependency transport.protocol = self.protocol self.blockchain_events = BlockchainEvents() self.alarm = AlarmTask(chain) self.shutdown_timeout = config['shutdown_timeout'] self._block_number = None self.stop_event = Event() self.start_event = Event() self.chain.client.inject_stop_event(self.stop_event) self.wal = None self.database_path = config['database_path'] if self.database_path != ':memory:': database_dir = os.path.dirname(config['database_path']) os.makedirs(database_dir, exist_ok=True) self.database_dir = database_dir # Prevent concurrent acces to the same db self.lock_file = os.path.join(self.database_dir, '.lock') self.db_lock = filelock.FileLock(self.lock_file) else: self.database_path = ':memory:' self.database_dir = None self.lock_file = None self.serialization_file = None self.db_lock = None # If the endpoint registration fails the node will quit, this must # finish before starting the protocol endpoint_registration_event.join() # Lock used to serialize calls to `poll_blockchain_events`, this is # important to give a consistent view of the node state. self.event_poll_lock = gevent.lock.Semaphore() self.start() def start(self): """ Start the node. """ # XXX Should this really be here? Or will start() never be called again # after stop() in the lifetime of Raiden apart from the tests? This is # at least at the moment prompted by tests/integration/test_transer.py if self.stop_event and self.stop_event.is_set(): self.stop_event.clear() if self.database_dir is not None: self.db_lock.acquire(timeout=0) assert self.db_lock.is_locked # The database may be :memory: storage = sqlite.SQLiteStorage(self.database_path, serialize.PickleSerializer()) self.wal, unapplied_events = wal.restore_from_latest_snapshot( node.state_transition, storage, ) # First run, initialize the basic state if self.wal.state_manager.current_state is None: block_number = self.chain.block_number() first_run = True state_change = ActionInitNode(block_number) self.wal.log_and_dispatch(state_change, block_number) # The alarm task must be started after the snapshot is loaded or the # state is primed, the callbacks assume the node is initialized. self.alarm.start() self.alarm.register_callback(self.poll_blockchain_events) self.alarm.register_callback(self.set_block_number) self._block_number = self.chain.block_number() # Registry registration must start *after* the alarm task. This # avoids corner cases where the registry is queried in block A, a new # block B is mined, and the alarm starts polling at block C. if first_run: self.register_payment_network(self.default_registry.address) # Start the protocol after the registry is queried to avoid warning # about unknown channels. self.protocol.start() # Health check needs the protocol layer self.start_neighbours_healthcheck() self.start_event.set() for event in unapplied_events: on_raiden_event(self, event) def start_neighbours_healthcheck(self): for neighbour in views.all_neighbour_nodes( self.wal.state_manager.current_state): if neighbour != ConnectionManager.BOOTSTRAP_ADDR: self.start_health_check_for(neighbour) def stop(self): """ Stop the node. """ # Needs to come before any greenlets joining self.stop_event.set() self.protocol.stop_and_wait() self.alarm.stop_async() wait_for = [self.alarm] wait_for.extend(self.protocol.greenlets) # We need a timeout to prevent an endless loop from trying to # contact the disconnected client gevent.wait(wait_for, timeout=self.shutdown_timeout) # Filters must be uninstalled after the alarm task has stopped. Since # the events are polled by an alarm task callback, if the filters are # uninstalled before the alarm task is fully stopped the callback # `poll_blockchain_events` will fail. # # We need a timeout to prevent an endless loop from trying to # contact the disconnected client try: with gevent.Timeout(self.shutdown_timeout): self.blockchain_events.uninstall_all_event_listeners() except (gevent.timeout.Timeout, RaidenShuttingDown): pass if self.db_lock is not None: self.db_lock.release() def __repr__(self): return '<{} {}>'.format(self.__class__.__name__, pex(self.address)) def set_block_number(self, block_number): state_change = Block(block_number) self.handle_state_change(state_change, block_number) # To avoid races, only update the internal cache after all the state # tasks have been updated. self._block_number = block_number def handle_state_change(self, state_change, block_number=None): is_logging = log.isEnabledFor(logging.DEBUG) if is_logging: log.debug('STATE CHANGE', node=pex(self.address), state_change=state_change) if block_number is None: block_number = self.get_block_number() event_list = self.wal.log_and_dispatch(state_change, block_number) for event in event_list: if is_logging: log.debug('EVENT', node=pex(self.address), event=event) on_raiden_event(self, event) return event_list def set_node_network_state(self, node_address, network_state): state_change = ActionChangeNodeNetworkState(node_address, network_state) self.wal.log_and_dispatch(state_change, self.get_block_number()) def start_health_check_for(self, node_address): self.protocol.start_health_check(node_address) def get_block_number(self): return views.block_number(self.wal.state_manager.current_state) def poll_blockchain_events(self, current_block=None): # pylint: disable=unused-argument with self.event_poll_lock: for event in self.blockchain_events.poll_blockchain_events(): on_blockchain_event(self, event) def sign(self, message): """ Sign message inplace. """ if not isinstance(message, SignedMessage): raise ValueError('{} is not signable.'.format(repr(message))) message.sign(self.private_key, self.address) def send_async(self, recipient, message): """ Send `message` to `recipient` using the raiden protocol. The protocol will take care of resending the message on a given interval until an Acknowledgment is received or a given number of tries. """ if not isaddress(recipient): raise ValueError('recipient is not a valid address.') if recipient == self.address: raise ValueError('programming error, sending message to itself') return self.protocol.send_async(recipient, message) def send_and_wait(self, recipient, message, timeout): """ Send `message` to `recipient` and wait for the response or `timeout`. Args: recipient (address): The address of the node that will receive the message. message: The transfer message. timeout (float): How long should we wait for a response from `recipient`. Returns: None: If the wait timed out object: The result from the event """ if not isaddress(recipient): raise ValueError('recipient is not a valid address.') self.protocol.send_and_wait(recipient, message, timeout) def register_payment_network(self, registry_address): proxies = get_relevant_proxies( self.chain, self.address, registry_address, ) # Install the filters first to avoid missing changes, as a consequence # some events might be applied twice. self.blockchain_events.add_proxies_listeners(proxies) token_network_list = list() for manager in proxies.channel_managers: manager_address = manager.address netting_channel_proxies = proxies.channelmanager_nettingchannels[ manager_address] network = get_token_network_state_from_proxies( self, manager, netting_channel_proxies) token_network_list.append(network) payment_network = PaymentNetworkState( registry_address, token_network_list, ) state_change = ContractReceiveNewPaymentNetwork(payment_network) self.handle_state_change(state_change) def connection_manager_for_token(self, token_address): if not isaddress(token_address): raise InvalidAddress('token address is not valid.') registry_address = self.default_registry.address known_token_networks = views.get_token_network_addresses_for( self.wal.state_manager.current_state, registry_address, ) if token_address not in known_token_networks: raise InvalidAddress('token is not registered.') manager = self.tokens_to_connectionmanagers.get(token_address) if manager is None: manager = ConnectionManager(self, token_address) self.tokens_to_connectionmanagers[token_address] = manager return manager def leave_all_token_networks(self): state_change = ActionLeaveAllNetworks() self.wal.log_and_dispatch(state_change, self.get_block_number()) def close_and_settle(self): log.info('raiden will close and settle all channels now') self.leave_all_token_networks() connection_managers = [ self.tokens_to_connectionmanagers[token_address] for token_address in self.tokens_to_connectionmanagers ] if connection_managers: waiting.wait_for_settle_all_channels( self, self.alarm.wait_time, ) def mediated_transfer_async(self, token_address, amount, target, identifier): """ Transfer `amount` between this node and `target`. This method will start an asyncronous transfer, the transfer might fail or succeed depending on a couple of factors: - Existence of a path that can be used, through the usage of direct or intermediary channels. - Network speed, making the transfer sufficiently fast so it doesn't expire. """ async_result = self.start_mediated_transfer( token_address, amount, identifier, target, ) return async_result def direct_transfer_async(self, token_address, amount, target, identifier): """ Do a direct transfer with target. Direct transfers are non cancellable and non expirable, since these transfers are a signed balance proof with the transferred amount incremented. Because the transfer is non cancellable, there is a level of trust with the target. After the message is sent the target is effectively paid and then it is not possible to revert. The async result will be set to False iff there is no direct channel with the target or the payer does not have balance to complete the transfer, otherwise because the transfer is non expirable the async result *will never be set to False* and if the message is sent it will hang until the target node acknowledge the message. This transfer should be used as an optimization, since only two packets are required to complete the transfer (from the payers perspective), whereas the mediated transfer requires 6 messages. """ self.protocol.start_health_check(target) if identifier is None: identifier = create_default_identifier() registry_address = self.default_registry.address direct_transfer = ActionTransferDirect( registry_address, token_address, target, identifier, amount, ) self.handle_state_change(direct_transfer) def start_mediated_transfer(self, token_address, amount, identifier, target): self.protocol.start_health_check(target) if identifier is None: identifier = create_default_identifier() assert identifier not in self.identifier_to_results async_result = AsyncResult() self.identifier_to_results[identifier].append(async_result) secret = random_secret() init_initiator_statechange = initiator_init( self, identifier, amount, secret, token_address, target, ) # TODO: implement the network timeout raiden.config['msg_timeout'] and # cancel the current transfer if it happens (issue #374) # # Dispatch the state change even if there are no routes to create the # wal entry. self.handle_state_change(init_initiator_statechange) return async_result def mediate_mediated_transfer(self, transfer): init_mediator_statechange = mediator_init(self, transfer) self.handle_state_change(init_mediator_statechange) def target_mediated_transfer(self, transfer): init_target_statechange = target_init(self, transfer) self.handle_state_change(init_target_statechange)
x.kill() with no_time(): result = gevent.wait() assert result is True # exiting because of event (the spawned greenlet still runs) for _ in xrange(2): x = gevent.spawn_later(10, lambda: 5) event = Event() event_set = gevent.spawn_later(SMALL, event.set) with expected_time(SMALL): result = gevent.wait([event]) assert result == [event], repr(result) assert not x.dead, x assert event_set.dead assert event.is_set() x.kill() with no_time(): result = gevent.wait() assert result is True # checking "ref=False" argument for _ in xrange(2): gevent.get_hub().loop.timer(10, ref=False).start(lambda: None) with no_time(): result = gevent.wait() assert result is True # checking "ref=False" attribute for _d in xrange(2): w = gevent.get_hub().loop.timer(10)
class MDSThrasher(Greenlet): """ MDSThrasher:: The MDSThrasher thrashes MDSs during execution of other tasks (workunits, etc). The config is optional. Many of the config parameters are a a maximum value to use when selecting a random value from a range. To always use the maximum value, set no_random to true. The config is a dict containing some or all of: max_thrash: [default: 1] the maximum number of active MDSs per FS that will be thrashed at any given time. max_thrash_delay: [default: 30] maximum number of seconds to delay before thrashing again. max_replay_thrash_delay: [default: 4] maximum number of seconds to delay while in the replay state before thrashing. max_revive_delay: [default: 10] maximum number of seconds to delay before bringing back a thrashed MDS. randomize: [default: true] enables randomization and use the max/min values seed: [no default] seed the random number generator thrash_in_replay: [default: 0.0] likelihood that the MDS will be thrashed during replay. Value should be between 0.0 and 1.0. thrash_max_mds: [default: 0.05] likelihood that the max_mds of the mds cluster will be modified to a value [1, current) or (current, starting max_mds]. Value should be between 0.0 and 1.0. thrash_while_stopping: [default: false] thrash an MDS while there are MDS in up:stopping (because max_mds was changed and some MDS were deactivated). thrash_weights: allows specific MDSs to be thrashed more/less frequently. This option overrides anything specified by max_thrash. This option is a dict containing mds.x: weight pairs. For example, [mds.a: 0.7, mds.b: 0.3, mds.c: 0.0]. Each weight is a value from 0.0 to 1.0. Any MDSs not specified will be automatically given a weight of 0.0 (not thrashed). For a given MDS, by default the trasher delays for up to max_thrash_delay, trashes, waits for the MDS to recover, and iterates. If a non-zero weight is specified for an MDS, for each iteration the thrasher chooses whether to thrash during that iteration based on a random value [0-1] not exceeding the weight of that MDS. Examples:: The following example sets the likelihood that mds.a will be thrashed to 80%, mds.b to 20%, and other MDSs will not be thrashed. It also sets the likelihood that an MDS will be thrashed in replay to 40%. Thrash weights do not have to sum to 1. tasks: - ceph: - mds_thrash: thrash_weights: - mds.a: 0.8 - mds.b: 0.2 thrash_in_replay: 0.4 - ceph-fuse: - workunit: clients: all: [suites/fsx.sh] The following example disables randomization, and uses the max delay values: tasks: - ceph: - mds_thrash: max_thrash_delay: 10 max_revive_delay: 1 max_replay_thrash_delay: 4 """ def __init__(self, ctx, manager, config, fs, max_mds): Greenlet.__init__(self) self.config = config self.ctx = ctx self.e = None self.logger = log.getChild('fs.[{f}]'.format(f = fs.name)) self.fs = fs self.manager = manager self.max_mds = max_mds self.name = 'thrasher.fs.[{f}]'.format(f = fs.name) self.stopping = Event() self.randomize = bool(self.config.get('randomize', True)) self.thrash_max_mds = float(self.config.get('thrash_max_mds', 0.05)) self.max_thrash = int(self.config.get('max_thrash', 1)) self.max_thrash_delay = float(self.config.get('thrash_delay', 120.0)) self.thrash_in_replay = float(self.config.get('thrash_in_replay', False)) assert self.thrash_in_replay >= 0.0 and self.thrash_in_replay <= 1.0, 'thrash_in_replay ({v}) must be between [0.0, 1.0]'.format( v=self.thrash_in_replay) self.max_replay_thrash_delay = float(self.config.get('max_replay_thrash_delay', 4.0)) self.max_revive_delay = float(self.config.get('max_revive_delay', 10.0)) def _run(self): try: self.do_thrash() except Exception as e: # Log exceptions here so we get the full backtrace (gevent loses them). # Also allow successful completion as gevent exception handling is a broken mess: # # 2017-02-03T14:34:01.259 CRITICAL:root: File "gevent.libev.corecext.pyx", line 367, in gevent.libev.corecext.loop.handle_error (src/gevent/libev/gevent.corecext.c:5051) # File "/home/teuthworker/src/git.ceph.com_git_teuthology_master/virtualenv/local/lib/python2.7/site-packages/gevent/hub.py", line 558, in handle_error # self.print_exception(context, type, value, tb) # File "/home/teuthworker/src/git.ceph.com_git_teuthology_master/virtualenv/local/lib/python2.7/site-packages/gevent/hub.py", line 605, in print_exception # traceback.print_exception(type, value, tb, file=errstream) # File "/usr/lib/python2.7/traceback.py", line 124, in print_exception # _print(file, 'Traceback (most recent call last):') # File "/usr/lib/python2.7/traceback.py", line 13, in _print # file.write(str+terminator) # 2017-02-03T14:34:01.261 CRITICAL:root:IOError self.e = e self.logger.exception("exception:") # allow successful completion so gevent doesn't see an exception... def log(self, x): """Write data to logger assigned to this MDThrasher""" self.logger.info(x) def stop(self): self.stopping.set() def kill_mds(self, mds): if self.config.get('powercycle'): (remote,) = (self.ctx.cluster.only('mds.{m}'.format(m=mds)). remotes.keys()) self.log('kill_mds on mds.{m} doing powercycle of {s}'. format(m=mds, s=remote.name)) self._assert_ipmi(remote) remote.console.power_off() else: self.ctx.daemons.get_daemon('mds', mds).stop() @staticmethod def _assert_ipmi(remote): assert remote.console.has_ipmi_credentials, ( "powercycling requested but RemoteConsole is not " "initialized. Check ipmi config.") def revive_mds(self, mds): """ Revive mds -- do an ipmpi powercycle (if indicated by the config) and then restart. """ if self.config.get('powercycle'): (remote,) = (self.ctx.cluster.only('mds.{m}'.format(m=mds)). remotes.keys()) self.log('revive_mds on mds.{m} doing powercycle of {s}'. format(m=mds, s=remote.name)) self._assert_ipmi(remote) remote.console.power_on() self.manager.make_admin_daemon_dir(self.ctx, remote) args = [] self.ctx.daemons.get_daemon('mds', mds).restart(*args) def wait_for_stable(self, rank = None, gid = None): self.log('waiting for mds cluster to stabilize...') for itercount in itertools.count(): status = self.fs.status() max_mds = status.get_fsmap(self.fs.id)['mdsmap']['max_mds'] ranks = list(status.get_ranks(self.fs.id)) stopping = sum(1 for _ in ranks if "up:stopping" == _['state']) actives = sum(1 for _ in ranks if "up:active" == _['state'] and "laggy_since" not in _) if not bool(self.config.get('thrash_while_stopping', False)) and stopping > 0: if itercount % 5 == 0: self.log('cluster is considered unstable while MDS are in up:stopping (!thrash_while_stopping)') else: if rank is not None: try: info = status.get_rank(self.fs.id, rank) if info['gid'] != gid and "up:active" == info['state']: self.log('mds.{name} has gained rank={rank}, replacing gid={gid}'.format(name = info['name'], rank = rank, gid = gid)) return status except: pass # no rank present if actives >= max_mds: # no replacement can occur! self.log("cluster has {actives} actives (max_mds is {max_mds}), no MDS can replace rank {rank}".format( actives=actives, max_mds=max_mds, rank=rank)) return status else: if actives == max_mds: self.log('mds cluster has {count} alive and active, now stable!'.format(count = actives)) return status, None if itercount > 300/2: # 5 minutes raise RuntimeError('timeout waiting for cluster to stabilize') elif itercount % 5 == 0: self.log('mds map: {status}'.format(status=status)) else: self.log('no change') sleep(2) def do_thrash(self): """ Perform the random thrashing action """ self.log('starting mds_do_thrash for fs {fs}'.format(fs = self.fs.name)) stats = { "max_mds": 0, "deactivate": 0, "kill": 0, } while not self.stopping.is_set(): delay = self.max_thrash_delay if self.randomize: delay = random.randrange(0.0, self.max_thrash_delay) if delay > 0.0: self.log('waiting for {delay} secs before thrashing'.format(delay=delay)) self.stopping.wait(delay) if self.stopping.is_set(): continue status = self.fs.status() if random.random() <= self.thrash_max_mds: max_mds = status.get_fsmap(self.fs.id)['mdsmap']['max_mds'] options = list(range(1, max_mds))+list(range(max_mds+1, self.max_mds+1)) if len(options) > 0: sample = random.sample(options, 1) new_max_mds = sample[0] self.log('thrashing max_mds: %d -> %d' % (max_mds, new_max_mds)) self.fs.set_max_mds(new_max_mds) stats['max_mds'] += 1 self.wait_for_stable() count = 0 for info in status.get_ranks(self.fs.id): name = info['name'] label = 'mds.' + name rank = info['rank'] gid = info['gid'] # if thrash_weights isn't specified and we've reached max_thrash, # we're done count = count + 1 if 'thrash_weights' not in self.config and count > self.max_thrash: break weight = 1.0 if 'thrash_weights' in self.config: weight = self.config['thrash_weights'].get(label, '0.0') skip = random.randrange(0.0, 1.0) if weight <= skip: self.log('skipping thrash iteration with skip ({skip}) > weight ({weight})'.format(skip=skip, weight=weight)) continue self.log('kill {label} (rank={rank})'.format(label=label, rank=rank)) self.kill_mds(name) stats['kill'] += 1 # wait for mon to report killed mds as crashed last_laggy_since = None itercount = 0 while True: status = self.fs.status() info = status.get_mds(name) if not info: break if 'laggy_since' in info: last_laggy_since = info['laggy_since'] break if any([(f == name) for f in status.get_fsmap(self.fs.id)['mdsmap']['failed']]): break self.log( 'waiting till mds map indicates {label} is laggy/crashed, in failed state, or {label} is removed from mdsmap'.format( label=label)) itercount = itercount + 1 if itercount > 10: self.log('mds map: {status}'.format(status=status)) sleep(2) if last_laggy_since: self.log( '{label} reported laggy/crashed since: {since}'.format(label=label, since=last_laggy_since)) else: self.log('{label} down, removed from mdsmap'.format(label=label, since=last_laggy_since)) # wait for a standby mds to takeover and become active status = self.wait_for_stable(rank, gid) # wait for a while before restarting old active to become new # standby delay = self.max_revive_delay if self.randomize: delay = random.randrange(0.0, self.max_revive_delay) self.log('waiting for {delay} secs before reviving {label}'.format( delay=delay, label=label)) sleep(delay) self.log('reviving {label}'.format(label=label)) self.revive_mds(name) for itercount in itertools.count(): if itercount > 300/2: # 5 minutes raise RuntimeError('timeout waiting for MDS to revive') status = self.fs.status() info = status.get_mds(name) if info and info['state'] in ('up:standby', 'up:standby-replay', 'up:active'): self.log('{label} reported in {state} state'.format(label=label, state=info['state'])) break self.log( 'waiting till mds map indicates {label} is in active, standby or standby-replay'.format(label=label)) sleep(2) for stat in stats: self.log("stat['{key}'] = {value}".format(key = stat, value = stats[stat]))
class _FelixEtcdWatcher(gevent.Greenlet): """ Greenlet that communicates with the etcd driver over a socket. * Does the initial handshake with the driver, sending it the init message. * Receives the pre-loaded config from the driver and uses that to do Felix's one-off configuration. * Sends the relevant config back to the driver. * Processes the event stream from the driver, sending it on to the splitter. This class is similar to the EtcdWatcher class in that it uses a PathDispatcher to fan out updates but it doesn't own an etcd connection of its own. """ def __init__(self, config, etcd_api, status_reporter, hosts_ipset): super(_FelixEtcdWatcher, self).__init__() self._config = config self._etcd_api = etcd_api self._status_reporter = status_reporter self.hosts_ipset = hosts_ipset # Whether we've been in sync with etcd at some point. self._been_in_sync = False # Keep track of the config loaded from etcd so we can spot if it # changes. self.last_global_config = None self.last_host_config = None self.my_config_dir = dir_for_per_host_config(self._config.HOSTNAME) # Events triggered by the EtcdAPI Actor to tell us to load the config # and start polling. These are one-way flags. self.load_config = Event() self.begin_polling = Event() # Event that we trigger once the config is loaded. self.configured = Event() # Polling state initialized at poll start time. self.splitter = None # Next-hop IP addresses of our hosts, if populated in etcd. self.ipv4_by_hostname = {} # Forces a resync after the current poll if set. Safe to set from # another thread. Automatically reset to False after the resync is # triggered. self.resync_requested = False self.dispatcher = PathDispatcher() # The Popen object for the driver. self._driver_process = None # Stats. self.read_count = 0 self.msgs_processed = 0 self.last_rate_log_time = monotonic_time() # Register for events when values change. self._register_paths() def _register_paths(self): """ Program the dispatcher with the paths we care about. """ reg = self.dispatcher.register # Profiles and their contents. reg(TAGS_KEY, on_set=self.on_tags_set, on_del=self.on_tags_delete) reg(RULES_KEY, on_set=self.on_rules_set, on_del=self.on_rules_delete) reg(PROFILE_LABELS_KEY, on_set=self.on_prof_labels_set, on_del=self.on_prof_labels_delete) # Tiered policy reg(TIER_DATA, on_set=self.on_tier_data_set, on_del=self.on_tier_data_delete) reg(TIERED_PROFILE, on_set=self.on_tiered_policy_set, on_del=self.on_tiered_policy_delete) # Hosts and endpoints. reg(HOST_IP_KEY, on_set=self.on_host_ip_set, on_del=self.on_host_ip_delete) reg(PER_ENDPOINT_KEY, on_set=self.on_endpoint_set, on_del=self.on_endpoint_delete) reg(CIDR_V4_KEY, on_set=self.on_ipam_v4_pool_set, on_del=self.on_ipam_v4_pool_delete) # Configuration keys. If any of these is changed or created, we'll # restart to pick up the change. reg(CONFIG_PARAM_KEY, on_set=self._on_config_updated, on_del=self._on_config_updated) reg(PER_HOST_CONFIG_PARAM_KEY, on_set=self._on_host_config_updated, on_del=self._on_host_config_updated) @logging_exceptions def _run(self): # Don't do anything until we're told to load the config. _log.info("Waiting for load_config event...") self.load_config.wait() _log.info("...load_config set. Starting driver read %s loop", self) # Start the driver process and wait for it to connect back to our # socket. self._msg_reader, self._msg_writer = self._start_driver() # Loop reading from the socket and processing messages. self._loop_reading_from_driver() def _loop_reading_from_driver(self): while True: try: # Note: self._msg_reader.new_messages() returns iterator so # whole for loop must be inside the try. for msg_type, msg in self._msg_reader.new_messages(timeout=1): self._dispatch_msg_from_driver(msg_type, msg) except SocketClosed: _log.critical("The driver process closed its socket, Felix " "must exit.") die_and_restart() if self.resync_requested: _log.info("Resync requested, sending resync request to driver") self.resync_requested = False self._msg_writer.send_message(MSG_TYPE_RESYNC) # Check that the driver hasn't died. The recv() call should # raise an exception when the buffer runs dry but this usually # gets hit first. driver_rc = self._driver_process.poll() if driver_rc is not None: _log.critical( "Driver process died with RC = %s. Felix must " "exit.", driver_rc) die_and_restart() def _dispatch_msg_from_driver(self, msg_type, msg): # Optimization: put update first in the "switch" block because # it's on the critical path. if msg_type == MSG_TYPE_UPDATE: _stats.increment("Update messages from driver") self._on_update_from_driver(msg) elif msg_type == MSG_TYPE_CONFIG_LOADED: _stats.increment("Config loaded messages from driver") self._on_config_loaded_from_driver(msg) elif msg_type == MSG_TYPE_STATUS: _stats.increment("Status messages from driver") self._on_status_from_driver(msg) else: raise RuntimeError("Unexpected message %s" % msg) self.msgs_processed += 1 if self.msgs_processed % MAX_EVENTS_BEFORE_YIELD == 0: # Yield to ensure that other actors make progress. (gevent only # yields for us if the socket would block.) The sleep must be # non-zero to work around gevent issue where we could be # immediately rescheduled. gevent.sleep(0.000001) def _on_update_from_driver(self, msg): """ Called when the driver sends us a key/value pair update. After the initial handshake, the stream of events consists entirely of updates unless something happens to change the state of the driver. :param dict msg: The message received from the driver. """ assert self.configured.is_set(), "Received update before config" # The driver starts polling immediately, make sure we block until # everyone else is ready to receive updates. self.begin_polling.wait() # Unpack the message. key = msg[MSG_KEY_KEY] value = msg[MSG_KEY_VALUE] _log.debug("Update from driver: %s -> %s", key, value) # Output some very coarse stats. self.read_count += 1 if self.read_count % 1000 == 0: now = monotonic_time() delta = now - self.last_rate_log_time _log.info("Processed %s updates from driver " "%.1f/s", self.read_count, 1000.0 / delta) self.last_rate_log_time = now # Wrap the update in an EtcdEvent object so we can dispatch it via the # PathDispatcher. n = EtcdEvent("set" if value is not None else "delete", key, value) self.dispatcher.handle_event(n) def _on_config_loaded_from_driver(self, msg): """ Called when we receive a config loaded message from the driver. This message is expected once per resync, when the config is pre-loaded by the driver. On the first call, responds to the driver synchronously with a config response. If the config has changed since a previous call, triggers Felix to die. """ global_config = msg[MSG_KEY_GLOBAL_CONFIG] host_config = msg[MSG_KEY_HOST_CONFIG] _log.info("Config loaded by driver:\n" "Global: %s\nPer-host: %s", global_config, host_config) if self.configured.is_set(): # We've already been configured. We don't yet support # dynamic config update so instead we check if the config # has changed and die if it has. _log.info("Checking configuration for changes...") if (host_config != self.last_host_config or global_config != self.last_global_config): _log.warning("Felix configuration has changed, " "felix must restart.") _log.info("Old host config: %s", self.last_host_config) _log.info("New host config: %s", host_config) _log.info("Old global config: %s", self.last_global_config) _log.info("New global config: %s", global_config) die_and_restart() else: # First time loading the config. Report it to the config # object. Take copies because report_etcd_config is # destructive. self.last_host_config = host_config.copy() self.last_global_config = global_config.copy() self._config.report_etcd_config(host_config, global_config) # Config now fully resolved, inform the driver. driver_log_file = self._config.DRIVERLOGFILE self._msg_writer.send_message( MSG_TYPE_CONFIG, { MSG_KEY_LOG_FILE: driver_log_file, MSG_KEY_SEV_FILE: self._config.LOGLEVFILE, MSG_KEY_SEV_SCREEN: self._config.LOGLEVSCR, MSG_KEY_SEV_SYSLOG: self._config.LOGLEVSYS, }) self.configured.set() def _on_status_from_driver(self, msg): """ Called when we receive a status update from the driver. The driver sends us status messages whenever its status changes. It moves through these states: (1) wait-for-ready (waiting for the global ready flag to become set) (2) resync (resyncing with etcd, processing a snapshot and any concurrent events) (3) in-sync (snapshot processsing complete, now processing only events from etcd) If the driver falls out of sync with etcd then it will start again from (1). If the status is in-sync, triggers the relevant processing. """ status = msg[MSG_KEY_STATUS] _log.info("etcd driver status changed to %s", status) if status == STATUS_IN_SYNC and not self._been_in_sync: # We're now in sync, tell the Actors that need to do start-of-day # cleanup. self.begin_polling.wait() # Make sure splitter is set. self._been_in_sync = True self.splitter.on_datamodel_in_sync() if self._config.REPORT_ENDPOINT_STATUS: self._status_reporter.clean_up_endpoint_statuses(async=True) self._update_hosts_ipset() def _start_driver(self): """ Starts the driver subprocess, connects to it over the socket and sends it the init message. Stores the Popen object in self._driver_process for future access. :return: the connected socket to the driver. """ _log.info("Creating server socket.") try: os.unlink("/run/felix-driver.sck") except OSError: _log.debug("Failed to delete driver socket, assuming it " "didn't exist.") update_socket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) update_socket.bind("/run/felix-driver.sck") update_socket.listen(1) self._driver_process = subprocess.Popen([ sys.executable, "-m", "calico.etcddriver", "/run/felix-driver.sck" ]) _log.info("Started etcd driver with PID %s", self._driver_process.pid) update_conn, _ = update_socket.accept() _log.info("Accepted connection on socket") # No longer need the server socket, remove it. try: os.unlink("/run/felix-driver.sck") except OSError: # Unexpected but carry on... _log.exception("Failed to unlink socket") else: _log.info("Unlinked server socket") # Wrap the socket in reader/writer objects that simplify using the # protocol. reader = MessageReader(update_conn) writer = MessageWriter(update_conn) # Give the driver its config. writer.send_message( MSG_TYPE_INIT, { MSG_KEY_ETCD_URLS: [ self._config.ETCD_SCHEME + "://" + addr for addr in self._config.ETCD_ADDRS ], MSG_KEY_HOSTNAME: self._config.HOSTNAME, MSG_KEY_KEY_FILE: self._config.ETCD_KEY_FILE, MSG_KEY_CERT_FILE: self._config.ETCD_CERT_FILE, MSG_KEY_CA_FILE: self._config.ETCD_CA_FILE }) return reader, writer def on_endpoint_set(self, response, hostname, orchestrator, workload_id, endpoint_id): """Handler for endpoint updates, passes the update to the splitter.""" combined_id = EndpointId(hostname, orchestrator, workload_id, endpoint_id) _log.debug("Endpoint %s updated", combined_id) _stats.increment("Endpoint created/updated") endpoint = parse_endpoint(self._config, combined_id, response.value) self.splitter.on_endpoint_update(combined_id, endpoint) def on_endpoint_delete(self, response, hostname, orchestrator, workload_id, endpoint_id): """Handler for endpoint deleted, passes the update to the splitter.""" combined_id = EndpointId(hostname, orchestrator, workload_id, endpoint_id) _log.debug("Endpoint %s deleted", combined_id) _stats.increment("Endpoint deleted") self.splitter.on_endpoint_update(combined_id, None) def on_rules_set(self, response, profile_id): """Handler for rules updates, passes the update to the splitter.""" _log.debug("Rules for %s set", profile_id) _stats.increment("Rules created/updated") rules = parse_profile(profile_id, response.value) profile_id = intern(profile_id.encode("utf8")) self.splitter.on_rules_update(profile_id, rules) def on_rules_delete(self, response, profile_id): """Handler for rules deletes, passes the update to the splitter.""" _log.debug("Rules for %s deleted", profile_id) _stats.increment("Rules deleted") self.splitter.on_rules_update(profile_id, None) def on_tags_set(self, response, profile_id): """Handler for tags updates, passes the update to the splitter.""" _log.debug("Tags for %s set", profile_id) _stats.increment("Tags created/updated") rules = parse_tags(profile_id, response.value) profile_id = intern(profile_id.encode("utf8")) self.splitter.on_tags_update(profile_id, rules) def on_tags_delete(self, response, profile_id): """Handler for tags deletes, passes the update to the splitter.""" _log.debug("Tags for %s deleted", profile_id) _stats.increment("Tags deleted") self.splitter.on_tags_update(profile_id, None) def on_prof_labels_set(self, response, profile_id): """Handler for profile labels, passes update to the splitter.""" _log.debug("Labels for profile %s created/updated", profile_id) labels = parse_labels(profile_id, response.value) profile_id = intern(profile_id.encode("utf8")) self.splitter.on_prof_labels_set(profile_id, labels) def on_prof_labels_delete(self, response, profile_id): """Handler for profile label deletion passed update to the splitter.""" _log.debug("Labels for profile %s deleted", profile_id) profile_id = intern(profile_id.encode("utf8")) self.splitter.on_prof_labels_set(profile_id, None) def on_tier_data_set(self, response, tier): _log.debug("Tier data set for tier %s", tier) _stats.increment("Tier data created/updated") data = parse_tier_data(tier, response.value) self.splitter.on_tier_data_update(tier, data) def on_tier_data_delete(self, response, tier): _log.debug("Tier data deleted for tier %s", tier) _stats.increment("Tier data deleted") self.splitter.on_tier_data_update(tier, None) def on_tiered_policy_set(self, response, tier, policy_id): _log.debug("Rules for %s/%s set", tier, policy_id) _stats.increment("Tiered rules created/updated") policy_id = TieredPolicyId(tier, policy_id) rules = parse_policy(policy_id, response.value) if rules is not None: selector = rules.pop("selector") order = rules.pop("order") self.splitter.on_rules_update(policy_id, rules) self.splitter.on_policy_selector_update(policy_id, selector, order) else: self.splitter.on_rules_update(policy_id, None) self.splitter.on_policy_selector_update(policy_id, None, None) def on_tiered_policy_delete(self, response, tier, policy_id): """Handler for tiered rules deletes, passes update to the splitter.""" _log.debug("Rules for %s/%s deleted", tier, policy_id) _stats.increment("tiered rules deleted") policy_id = TieredPolicyId(tier, policy_id) self.splitter.on_rules_update(policy_id, None) self.splitter.on_policy_selector_update(policy_id, None, None) def on_host_ip_set(self, response, hostname): if not self._config.IP_IN_IP_ENABLED: _log.debug("Ignoring update to %s because IP-in-IP is disabled", response.key) return _stats.increment("Host IP created/updated") ip = parse_host_ip(hostname, response.value) if ip: self.ipv4_by_hostname[hostname] = ip else: _log.warning( "Invalid IP for hostname %s: %s, treating as " "deletion", hostname, response.value) self.ipv4_by_hostname.pop(hostname, None) self._update_hosts_ipset() def on_host_ip_delete(self, response, hostname): if not self._config.IP_IN_IP_ENABLED: _log.debug("Ignoring update to %s because IP-in-IP is disabled", response.key) return _stats.increment("Host IP deleted") if self.ipv4_by_hostname.pop(hostname, None): self._update_hosts_ipset() def _update_hosts_ipset(self): if not self._been_in_sync: _log.debug("Deferring update to hosts ipset until we're in-sync") return self.hosts_ipset.replace_members(frozenset( self.ipv4_by_hostname.values()), async=True) def _on_config_updated(self, response, config_param): new_value = response.value if self.last_global_config.get(config_param) != new_value: _log.critical( "Global config value %s updated. Felix must be " "restarted.", config_param) die_and_restart() _stats.increment("Global config (non) updates") def _on_host_config_updated(self, response, hostname, config_param): if hostname != self._config.HOSTNAME: _log.debug("Ignoring config update for host %s", hostname) return _stats.increment("Per-host config created/updated") new_value = response.value if self.last_host_config.get(config_param) != new_value: _log.critical( "Global config value %s updated. Felix must be " "restarted.", config_param) die_and_restart() def on_ipam_v4_pool_set(self, response, pool_id): _stats.increment("IPAM pool created/updated") pool = parse_ipam_pool(pool_id, response.value) self.splitter.on_ipam_pool_updated(pool_id, pool) def on_ipam_v4_pool_delete(self, response, pool_id): _stats.increment("IPAM pool deleted") self.splitter.on_ipam_pool_updated(pool_id, None)
class MeekSession(object): def __init__(self, sessionid, socksip, socksport, timeout, sessionmap): self.sessionid = sessionid self.socksip = socksip self.socksport = socksport self.timeout = timeout self.sessionmap = sessionmap self.sessionmap[self.sessionid] = self self.udpsock = None self.udp_associate = None self.socksconn = None self.allsocks = [] self.status = SESSION_WAIT_INIT self.initialized = False self.in_queue = Queue() self.in_notifier = Event() self.in_notifier.clear() self.out_queue = Queue() self.timer = SharedTimer(self.timeout) self.finish = Event() self.finish.clear() self.threads = [] def meeks_clean_thread(self): while not self.finish.is_set(): gevent.sleep(SERVER_TURNAROUND_MAX) [t.join() for t in self.threads] self.clean() def write_to_socks(self, data): if self.udpsock: self.udpsock.sendto(data, self.udp_associate) else: self.socksconn.sendall(data) def meeks_write_to_socks_thread(self): while not self.finish.is_set(): try: hasdata = self.in_notifier.wait(timeout=CLIENT_MAX_POLL_INTERVAL) self.in_notifier.clear() if not hasdata: self.timer.count(CLIENT_MAX_POLL_INTERVAL) if self.timer.timeout(): break self.timer.reset() while not self.in_queue.empty(): data = self.in_queue.get() log.debug("%s: RELAY-UP %d bytes" % (self.sessionid, len(data))) self.write_to_socks(data) except Exception as ex: log.error("[Exception][meeks_write_to_socks_thread] %s: %s" % (self.sessionid, str(ex))) break self.finish.set() def meeks_read_from_socks_thread(self): while not self.finish.is_set(): try: readable, _, _ = select.select(self.allsocks, [], [], CLIENT_MAX_POLL_INTERVAL) if not readable: self.timer.count(CLIENT_MAX_POLL_INTERVAL) if self.timer.timeout(): break else: self.timer.reset() if self.socksconn in readable: if self.udpsock: raise RelaySessionError("unexcepted read-event from tcp socket in UDP session") data = self.socksconn.recv(MAX_PAYLOAD_LENGTH) if not data: raise RelaySessionError("peer closed") self.out_queue.put(data) continue if self.udpsock and self.udpsock in readable: data, _ = self.udpsock.recvfrom(MAX_PAYLOAD_LENGTH) if data: self.out_queue.put(data) except Exception as ex: log.error("[Exception][meeks_read_from_socks_thread] %s:%s" % (self.sessionid, str(ex))) break self.finish.set() def initialize(self): self.socksconn = socket.create_connection((self.socksip, self.socksport), self.timeout) self.allsocks = [self.socksconn] self.socksconn.sendall(InitRequest().pack()) read_init_reply(self.socksconn) self.status = SESSION_WAIT_REQUEST self.initialized = True def cmd_connect(self, req): self.socksconn.sendall(req.pack()) reply = read_reply(self.socksconn) resp = reply.pack() headers = [ (HEADER_SESSION_ID, self.sessionid), (HEADER_MSGTYPE, MSGTYPE_DATA) ] self.threads.append(gevent.spawn(self.meeks_write_to_socks_thread)) self.threads.append(gevent.spawn(self.meeks_read_from_socks_thread)) # clean_thread will join the other two threads, then clean resources gevent.spawn(self.meeks_clean_thread) self.status = SESSION_TCP return resp, headers def cmd_udp_associate(self, req): self.udpsock = bind_local_udp(self.socksconn) self.allsocks.append(self.udpsock) addrtype, ip, port = sock_addr_info(self.udpsock) self.socksconn.sendall(Request(cmd=UDP_ASSOCIATE, addrtype=addrtype, dstaddr=ip, dstport=port).pack()) reply = read_reply(self.socksconn) resp = reply.pack() headers = [ (HEADER_SESSION_ID, self.sessionid), (HEADER_MSGTYPE, MSGTYPE_DATA) ] self.udp_associate = (reply.bndaddr, reply.bndport) self.threads.append(gevent.spawn(self.meeks_write_to_socks_thread)) self.threads.append(gevent.spawn(self.meeks_read_from_socks_thread)) # clean_thread will join the other two threads, then clean resources gevent.spawn(self.meeks_clean_thread) self.status = SESSION_UDP return resp, headers def cmd_bind(self, req): resp = "" headers = [ (HEADER_SESSION_ID, self.sessionid), (HEADER_ERROR, "Not Supported") ] return resp, headers def sync_socks_request(self, data, env): req = Request() req.unpack(data) return { CONNECT: self.cmd_connect, BIND: self.cmd_bind, UDP_ASSOCIATE : self.cmd_udp_associate }[req.cmd](req) def _fetch_resp(self): data = [] totalsize = 0 while True: while not self.out_queue.empty() and totalsize < MAX_PAYLOAD_LENGTH: pkt = self.out_queue.get() data.append(pkt) totalsize += len(pkt) if data: return data, totalsize else: try: self.out_queue.peek(block=True, timeout=SERVER_TURNAROUND_TIMEOUT) except Empty: break return data, totalsize def fetch_resp(self): data, _ = self._fetch_resp() resp = "".join(data) headers = [ (HEADER_SESSION_ID, self.sessionid), (HEADER_MSGTYPE, MSGTYPE_DATA), ] if self.status == SESSION_UDP and data: headers.append((HEADER_UDP_PKTS, ",".join([str(len(d)) for d in data]))) return resp, headers def process_tcp(self, data, env): if data: self.in_queue.put(data) self.in_notifier.set() return self.fetch_resp() def process_udp(self, data, env): if data: lengths = env[header_to_env(HEADER_UDP_PKTS)].split(",") pos = 0 for length in lengths: nxt = pos + int(length) self.in_queue.put(data[pos:nxt]) pos = nxt self.in_notifier.set() return self.fetch_resp() def process(self, data, env): if not self.initialized: self.initialize() return { SESSION_WAIT_REQUEST: self.sync_socks_request, SESSION_TCP: self.process_tcp, SESSION_UDP: self.process_udp, }[self.status](data, env) def alive(self): return not self.finish.is_set() def clean(self): self.finish.set() for sock in self.allsocks: sock.close() self.in_queue.queue.clear() self.out_queue.queue.clear() if self.sessionid in self.sessionmap: del self.sessionmap[self.sessionid] log.info("%s: quit, %d sessions left" % (self.sessionid, len(self.sessionmap.keys())))
class MsgGenerator(gevent.Greenlet): def __init__(self): super(MsgGenerator, self).__init__() self._complete = Event() self._jobs = {} self._instances = [] # FIXME: monkey patch the whole world # because the python side of librados # uses threading.Thread. However, rados # itself will still do blocking on e.g. # connect(), so we probably need to wrap # librados in its own non-gevent python # process and RPC to it. from gevent import monkey monkey.patch_all() monkey.patch_subprocess() def register(self, instance): if instance not in self._instances: self._instances.append(instance) def _emit(self, msg_event): for instance in self._instances: if instance.subscribed > 0: # GMENO theory about memory leak instance.put(msg_event) def complete(self, jid, event): del self._jobs[jid] self._emit(event) def running_jobs(self): self._emit( MsgEvent(RUNNING_JOBS, [{ 'jid': jid } for jid in self._jobs.keys()])) def run_job(self, fqdn, cmd, args): if fqdn != socket.getfqdn(): raise Unavailable() jid = uuid.uuid4().__str__() self._jobs[jid] = gevent.spawn( lambda: run_job_thread(self, jid, cmd, args)) return jid def _run(self): try: while not self._complete.is_set(): server_heartbeat, cluster_heartbeat = get_heartbeats() log.debug("server_heartbeat: %s" % server_heartbeat) log.debug("cluster_heartbeat: %s" % cluster_heartbeat) if server_heartbeat: self._emit(MsgEvent(SERVER_HEARTBEAT, server_heartbeat)) if cluster_heartbeat: self._emit(MsgEvent(HEARTBEAT, cluster_heartbeat)) self._complete.wait(HEARTBEAT_PERIOD) except: log.error(traceback.format_exc()) raise
class MultiChannelSocket(MultiSocketHandler): """ Use this class to implement virtual channels over web socket. To use it, inherit class from this and override init_channel function, where you can register all channel handlers by register_channel function Example: class MyWebSocket(MultiChannelWS): def init_channels(self): self.register_channel(0, NullChannelHandler) self.register_channel(1, FirstChannelHandler) ... """ def __init__(self, request, transport): super(MultiChannelSocket, self).__init__(request, transport) self.session = WSSession() self.channel_handlers = {} self.permissions = None self.allowed_channels = None self.access_token = None self.close_event = Event() def clear_test_data(self): for key in TEST_DATA_KEYS: if self.channel_history.get(key): del self.channel_history[key] def write_test_data(self, test_data): for key in TEST_DATA_KEYS: self.channel_history[key] = test_data[key] # noinspection PyUnusedLocal def __call__(self, env, start_response): websocket = env.get('wsgi.websocket') if not websocket: self.bad_request() self.ws = websocket # Endless event loop while 1: try: data = self.ws.receive() self.clear_test_data() except WebSocketError as e: if is_ws_error_abnormal(e): log.error('WebSocket fault: %s' % e.message, extra=self.channel_history) break except Exception: f = Formatter() traceback = f.formatException(sys.exc_info()) log.error('Servlet fault: \n%s' % traceback, extra=self.channel_history) break if data: jd = json.loads(data) if jd.get('pkg') \ and jd['pkg'].get('data') \ and isinstance(jd['pkg']['data'], dict)\ and jd['pkg']['data'].get('testData'): self.write_test_data(jd['pkg']['data']['testData']) del jd['pkg']['data']['testData'] self.channel_history['messages'].append(jd) if hasattr(self.session, 'sess') and self.session.sess: self.channel_history['session_id'] = self.session.sess.id self.channel_history['user_id'] = self.session.sess.user_id if not jd.get('channel') and jd.get('pkg'): act = jd['pkg'].get('action') assert not act.startswith('_'), "security violation" try: handler = getattr(self, act) except WebSocketError as e: if is_ws_error_abnormal(e): f = Formatter() traceback = f.formatException(sys.exc_info()) log.error('Global channel action error: \n%s' % traceback, extra=self.channel_history) break assert handler.__name__ == "action_wrapper", \ "%s is not allowed to be executed externally." % act handler(jd['pkg']['data']) continue if self.check_permissions \ and not self.validate_send(jd.get('channel')): jd['result'] = 403 if not self.ws.closed: try: self.ws.send(json.dumps(jd)) except WebSocketError as e: if is_ws_error_abnormal(e): log.error('WebSocket fault: %s' % e.message, extra=self.channel_history) continue else: self.run_callback('message', ApiSocketMessage(data)) else: log.debug('Web Socket is disconnected') self.close_event.set() if self.close_event.is_set(): break self.run_callback('close') @action def open(self, data): self.propagate_greenlet_data(data) if not data.get('token'): self.bad_request(message='No access token, exit') return if not data.get('channel'): self.bad_request(message='No channel name') return self.access_token = data.get('token') self.reopen = data.get('reopen', False) self.pre_open() handler = self.allowed_channels.get(data.get('channel')) if not handler: return self.channel_404(data['channel']) if self.check_permissions \ and not self.validate_open(data.get('channel')): return self.send_error_code(403, 'open', data.get('channel')) if not self.is_auth: return self.send_error_code(403, 'open', data.get('channel')) handler = self.register_channel(data.get('channel'), handler) self.run_callback('open') pkg = { 'action': 'open', 'data': { 'closable': handler.closable, 'result': 200 }, } package_to_send = { 'channel': data.get('channel'), 'pkg': pkg, 'session_params': self.session.params } raw_data = json.dumps(package_to_send, default=datahandler) self.after_open() try: self.ws.send(raw_data) except WebSocketError as e: if is_ws_error_abnormal(e): log.error('WebSocket fault: %s' % e.message, extra=self.channel_history) @action def close(self, data): if not data.get('channel'): raise Exception('No channel name, exit') handler = self.channel_handlers.get(data['channel']) if not handler: return self.channel_404(data['channel']) if self.check_permissions and not self.validate_close(data['channel']): return self.send_error_code(403, 'open', data['channel']) if not handler.closable and not self.ws.closed: try: self.ws.send( json.dumps({ 'pkg': { 'action': 'close', 'data': { 'channel': data['channel'], 'result': 501 }, }, })) except WebSocketError as e: if is_ws_error_abnormal(e): log.error('WebSocket fault: %s' % e.message, extra=self.channel_history) return handler.onclose() del self.channel_handlers[data['channel']] if not self.ws.closed: try: self.ws.send( json.dumps({ 'pkg': { 'action': 'close', 'data': { 'channel': data['channel'], 'result': 200 }, }, })) except WebSocketError as e: if is_ws_error_abnormal(e): log.error('WebSocket fault: %s' % e.message, extra=self.channel_history) def onopen(self): for channel_handler in self.channel_handlers.values(): channel_handler.onopen() def onclose(self): for channel_handler in self.channel_handlers.values(): channel_handler.onclose() def onmessage(self, msg): channel = msg.data.get('channel') if channel is None: raise MultiChannelWSError('No such channel ID in request') channel_handler = self.channel_handlers.get(channel) if not channel_handler: return self.channel_404(channel) if RIEMANN_USE: start = time.time() act = 'unknown' if 'action' in msg.data['pkg']: act = msg.data['pkg']['action'] elif 'Action' in msg.data['pkg']: if 'name' in msg.data['pkg']['Action']: act = msg.data['pkg']['Action']['name'] channel_handler.onmessage(msg.data) RIEMANN_QUEUE.put( ("ws.%s.%s" % (channel, act), time.time() - start)) else: channel_handler.onmessage(msg.data) def validate(self, permission_name): if self.permissions: return bool(self.permissions.get_perm(permission_name)) return False def validate_open(self, channel): return self.validate('%s.ws.open' % channel) def validate_close(self, channel): return self.validate('%s.ws.close' % channel) def validate_send(self, channel): return self.validate('%s.ws.send' % channel) def register_channel(self, channel, channel_handler_class): """Registers new channel with channel id - channel and channel handler class - channel_handler_class """ channel_handler = channel_handler_class(self.request, channel, self.ws, self.session, self.permissions, self.channel_history) self.channel_handlers[channel] = channel_handler return channel_handler
class GPing: """ This class, when instantiated will start listening for ICMP responses. Then call its send method to send pings. Callbacks will be sent ping details """ def __init__(self,timeout=2,max_outstanding=100): """ :timeout - amount of time a ICMP echo request can be outstanding :max_outstanding - maximum number of outstanding ICMP echo requests without responses (limits traffic) """ self.timeout = timeout self.max_outstanding = max_outstanding # id we will increment with each ping self.id = 0 # object to hold and keep track of all of our self.pings self.pings = {} # Hold failures self.failures = [] # event to file when we want to shut down self.die_event = Event() # setup socket icmp = socket.getprotobyname("icmp") try: self.socket = socket.socket(socket.AF_INET, socket.SOCK_RAW, icmp) except socket.error as e: if e.errno == 1: # Operation not permitted e.message = str(e) + ( " - Note that ICMP messages can only be sent from processes" " running as root." ) raise socket.error(e.message) raise # raise the original error self.receive_glet = gevent.spawn(self.__receive__) self.processto_glet = gevent.spawn(self.__process_timeouts__) def die(self): """ try to shut everything down gracefully """ print("shutting down") self.die_event.set() socket.cancel_wait() gevent.joinall([self.receive_glet,self.processto_glet]) def join(self): """ does a lot of nothing until self.pings is empty """ while len(self.pings): gevent.sleep() def send(self, dest_addr, callback, idx, current_data, data, datapsize=64): """ Send a ICMP echo request. :dest_addr - where to send it :callback - what to call when we get a response :psize - how much data to send with it """ # make sure we dont have too many outstanding requests number_of_packages = current_data[1] while len(self.pings) >= self.max_outstanding: gevent.sleep() psize = datapsize # figure out our id packet_id = self.id # increment our id, but wrap if we go over the max size for USHORT self.id = (self.id + 1) % 2 ** 16 # make a spot for this ping in self.pings self.pings[packet_id] = {'sent':False,'success':False,'error':False,'dest_addr':dest_addr,'dest_ip':None,'callback':callback, 'idx': idx, 'current_data': current_data, 'data_to_write_to': data, 'dtime': time.time(), 'packages_received': 0 } # Resolve hostname try: dest_ip = socket.gethostbyname(dest_addr) self.pings[packet_id]['dest_ip'] = dest_ip except socket.gaierror as ex: self.pings[packet_id]['error'] = True self.pings[packet_id]['message'] = str(ex) return # Remove header size from packet size psize = psize - 8 # Header is type (8), code (8), checksum (16), id (16), sequence (16) my_checksum = 0 # Make a dummy heder with a 0 checksum. header = struct.pack("bbHHh", ICMP_ECHO_REQUEST, 0, my_checksum, packet_id, 1) my_bytes = struct.calcsize("d") data = (psize - my_bytes) * "Q" data = struct.pack("d", time.time()) + bytes(data, "utf-8") # Calculate the checksum on the data and the dummy header. my_checksum = checksum(header + data) # Now that we have the right checksum, we put that in. It's just easier # to make up a new header than to stuff it into the dummy. header = struct.pack( "bbHHh", ICMP_ECHO_REQUEST, 0, socket.htons(my_checksum), packet_id, 1 ) packet = header + data # note the send_time for checking for timeouts self.pings[packet_id]['data'] = data self.pings[packet_id]['application_id'] = current_data[4] self.pings[packet_id]['send_time'] = time.time() # send the packet for i in range(number_of_packages): self.socket.sendto(packet, (dest_ip, 1)) # Don't know about the 1 #mark the packet as sent self.pings[packet_id]['sent'] = True def __process_timeouts__(self): """ check to see if any of our pings have timed out """ while not self.die_event.is_set(): for i in self.pings: # Detect timeout if self.pings[i]['sent'] and time.time() - self.pings[i]['send_time'] > self.timeout: self.pings[i]['error'] = True self.pings[i]['message'] = 'Timeout after {} seconds'.format(self.timeout) # Handle all failures if self.pings[i]['error'] == True: self.pings[i]['callback'](self.pings[i]) self.failures.append(self.pings[i]) del(self.pings[i]) break gevent.sleep() def __receive__(self): """ receive response packets """ while 1: # wait till we can recv try: socket.wait_read(self.socket.fileno()) except socket.error as e: if e.errno == socket.EBADF: print("interrupting wait_read") return # reraise original exceptions print("re-throwing socket exception on wait_read()") raise time_received = time.time() received_packet, addr = self.socket.recvfrom(64) # while(received_packet): # received_packet, addr = self.socket.recvfrom(1024) # currently_received += 1 icmpHeader = received_packet[20:28] type, code, checksum, packet_id, sequence = struct.unpack( "bbHHh", icmpHeader ) if packet_id in self.pings: bytes_received = struct.calcsize("d") time_sent = struct.unpack("d", received_packet[28:28 + bytes_received])[0] # i'd call that a success # call our callback if we've got one self.pings[packet_id]['packages_received'] = self.pings[packet_id]['packages_received'] + 1 if self.pings[packet_id]['packages_received'] == self.pings[packet_id]['current_data'][1]: self.pings[packet_id]['delay'] = time_received - time_sent self.pings[packet_id]['success'] = True self.pings[packet_id]['callback'](self.pings[packet_id]) del(self.pings[packet_id]) def print_failures(self): template = '{hostname:45}{message}' for failure in self.failures: message = template.format(hostname=failure['dest_addr'], message=failure.get('message', 'unknown error'))
class TreeHolderCleaner(object): def __init__(self, tree_hub): self._tree_hub = tree_hub self._old_offset = (60 * 60 * 24 * settings.TREE_HOLDER_CLEANER_OLD_OFFSET) self._period = settings.TREE_HOLDER_CLEANER_PERIOD self._stopped = Event() def track(self, application_name, type_name): if not switch.is_switched_on(SWITCH_ENABLE_TREE_HOLDER_CLEANER_TRACK): return name = '{}:{}'.format(application_name, type_name) score = time.time() try: redis_client.zadd(REDIS_KEY, **{name: score}) except Exception as e: logger.warning('tree holder cleaner track item failed: %s', e) def clean(self): if not (switch.is_switched_on(SWITCH_ENABLE_TREE_HOLDER_CLEANER_TRACK) and switch.is_switched_on( SWITCH_ENABLE_TREE_HOLDER_CLEANER_CLEAN, False)): return if self._is_time_to_clean(): self._clean() def spawn_cleaning_thread(self): gevent.spawn(self._worker) def _worker(self): while not self._stopped.is_set(): self.clean() gevent.sleep(self._period) def _clean(self): max_score = time.time() - self._old_offset try: items = redis_client.zrangebyscore(REDIS_KEY, 0, max_score) except Exception as e: logger.warning('get tree holder cleaner data failed: %s', e) return for key in items: application_name, type_name = key.split(':') holder = self._tree_hub.release_tree_holder( application_name, type_name) if holder is not None: logger.info('release unused tree holder: %s %s', application_name, type_name) monitor_client.increment('tree_holder.release_unused', tags={ 'application_name': application_name, 'appid': application_name, 'type_name': type_name, }) self._clean_old_redis_data() def _clean_old_redis_data(self): max_score = time.time() - self._old_offset * 3 try: redis_client.zremrangebyscore(REDIS_KEY, 0, max_score) except Exception as e: logger.warning('clean tree holder cleaner old data failed: %s', e) def _is_time_to_clean(self): condition = settings.TREE_HOLDER_CLEANER_CONDITION if not condition: return False cpu = self._get_cpu_percent() memory = self._get_virtual_memory_percent() # e.g. 'cpu < 50 and memory > 90' condition = condition.replace('cpu', str(cpu)).replace('memory', str(memory)) try: return eval(condition, {}, {}) except BaseException as e: logger.error('invalid tree holder cleaner condition: %r %s', condition, e) capture_exception('invalid tree holder cleaner condition') return False def _get_cpu_percent(self): return psutil.cpu_percent() def _get_virtual_memory_percent(self): return psutil.virtual_memory().percent
class RaidenService: """ A Raiden node. """ # pylint: disable=too-many-instance-attributes,too-many-public-methods def __init__(self, chain, default_registry, private_key_bin, transport, discovery, config): if not isinstance(private_key_bin, bytes) or len(private_key_bin) != 32: raise ValueError('invalid private_key') invalid_timeout = ( config['settle_timeout'] < NETTINGCHANNEL_SETTLE_TIMEOUT_MIN or config['settle_timeout'] > NETTINGCHANNEL_SETTLE_TIMEOUT_MAX) if invalid_timeout: raise ValueError('settle_timeout must be in range [{}, {}]'.format( NETTINGCHANNEL_SETTLE_TIMEOUT_MIN, NETTINGCHANNEL_SETTLE_TIMEOUT_MAX)) self.token_to_channelgraph = dict() self.tokens_to_connectionmanagers = dict() self.manager_to_token = dict() self.swapkey_to_tokenswap = dict() self.swapkey_to_greenlettask = dict() self.identifier_to_statemanagers = defaultdict(list) self.identifier_to_results = defaultdict(list) # This is a map from a hashlock to a list of channels, the same # hashlock can be used in more than one token (for tokenswaps), a # channel should be removed from this list only when the lock is # released/withdrawn but not when the secret is registered. self.token_to_hashlock_to_channels = defaultdict( lambda: defaultdict(list)) self.chain = chain self.default_registry = default_registry self.config = config self.privkey = private_key_bin self.address = privatekey_to_address(private_key_bin) endpoint_registration_event = gevent.spawn( discovery.register, self.address, config['external_ip'], config['external_port'], ) endpoint_registration_event.link_exception( endpoint_registry_exception_handler) self.private_key = PrivateKey(private_key_bin) self.pubkey = self.private_key.public_key.format(compressed=False) self.protocol = RaidenProtocol( transport, discovery, self, config['protocol']['retry_interval'], config['protocol']['retries_before_backoff'], config['protocol']['nat_keepalive_retries'], config['protocol']['nat_keepalive_timeout'], config['protocol']['nat_invitation_timeout'], ) # TODO: remove this cyclic dependency transport.protocol = self.protocol self.message_handler = RaidenMessageHandler(self) self.state_machine_event_handler = StateMachineEventHandler(self) self.blockchain_events = BlockchainEvents() self.greenlet_task_dispatcher = GreenletTasksDispatcher() self.on_message = self.message_handler.on_message self.alarm = AlarmTask(chain) self.shutdown_timeout = config['shutdown_timeout'] self._block_number = None self.stop_event = Event() self.start_event = Event() self.chain.client.inject_stop_event(self.stop_event) self.transaction_log = StateChangeLog( storage_instance=StateChangeLogSQLiteBackend( database_path=config['database_path'])) if config['database_path'] != ':memory:': self.database_dir = os.path.dirname(config['database_path']) self.lock_file = os.path.join(self.database_dir, '.lock') self.snapshot_dir = os.path.join(self.database_dir, 'snapshots') self.serialization_file = os.path.join(self.snapshot_dir, 'data.pickle') if not os.path.exists(self.snapshot_dir): os.makedirs(self.snapshot_dir) # Prevent concurrent acces to the same db self.db_lock = filelock.FileLock(self.lock_file) else: self.database_dir = None self.lock_file = None self.snapshot_dir = None self.serialization_file = None self.db_lock = None # If the endpoint registration fails the node will quit, this must # finish before starting the protocol endpoint_registration_event.join() self.start() def start(self): """ Start the node. """ # XXX Should this really be here? Or will start() never be called again # after stop() in the lifetime of Raiden apart from the tests? This is # at least at the moment prompted by tests/integration/test_transer.py if self.stop_event and self.stop_event.is_set(): self.stop_event.clear() self.alarm.start() # Prime the block number cache and set the callbacks self._block_number = self.alarm.last_block_number self.alarm.register_callback(self.poll_blockchain_events) self.alarm.register_callback(self.set_block_number) # Registry registration must start *after* the alarm task, this avoid # corner cases were the registry is queried in block A, a new block B # is mined, and the alarm starts polling at block C. self.register_registry(self.default_registry.address) # Restore from snapshot must come after registering the registry as we # need to know the registered tokens to populate `token_to_channelgraph` if self.database_dir is not None: self.db_lock.acquire(timeout=0) assert self.db_lock.is_locked self.restore_from_snapshots() # Start the protocol after the registry is queried to avoid warning # about unknown channels. self.protocol.start() # Health check needs the protocol layer self.start_neighbours_healthcheck() self.start_event.set() def start_neighbours_healthcheck(self): for graph in self.token_to_channelgraph.values(): for neighbour in graph.get_neighbours(): if neighbour != ConnectionManager.BOOTSTRAP_ADDR: self.start_health_check_for(neighbour) def stop(self): """ Stop the node. """ # Needs to come before any greenlets joining self.stop_event.set() self.protocol.stop_and_wait() self.alarm.stop_async() wait_for = [self.alarm] wait_for.extend(self.protocol.greenlets) wait_for.extend(self.greenlet_task_dispatcher.stop()) # We need a timeout to prevent an endless loop from trying to # contact the disconnected client gevent.wait(wait_for, timeout=self.shutdown_timeout) # Filters must be uninstalled after the alarm task has stopped. Since # the events are polled by an alarm task callback, if the filters are # uninstalled before the alarm task is fully stopped the callback # `poll_blockchain_events` will fail. # # We need a timeout to prevent an endless loop from trying to # contact the disconnected client try: with gevent.Timeout(self.shutdown_timeout): self.blockchain_events.uninstall_all_event_listeners() except (gevent.timeout.Timeout, RaidenShuttingDown): pass # save the state after all tasks are done if self.serialization_file: save_snapshot(self.serialization_file, self) if self.db_lock is not None: self.db_lock.release() def __repr__(self): return '<{} {}>'.format(self.__class__.__name__, pex(self.address)) def restore_from_snapshots(self): data = load_snapshot(self.serialization_file) data_exists_and_is_recent = (data is not None and 'registry_address' in data and data['registry_address'] == ROPSTEN_REGISTRY_ADDRESS) if data_exists_and_is_recent: first_channel = True for channel in data['channels']: try: self.restore_channel(channel) first_channel = False except AddressWithoutCode as e: log.warn( 'Channel without code while restoring. Must have been ' 'already settled while we were offline.', error=str(e)) except AttributeError as e: if first_channel: log.warn( 'AttributeError during channel restoring. If code has changed' ' then this is fine. If not then please report a bug.', error=str(e)) break else: raise for restored_queue in data['queues']: self.restore_queue(restored_queue) self.protocol.receivedhashes_to_acks = data[ 'receivedhashes_to_acks'] self.protocol.nodeaddresses_to_nonces = data[ 'nodeaddresses_to_nonces'] self.restore_transfer_states(data['transfers']) def set_block_number(self, block_number): state_change = Block(block_number) self.state_machine_event_handler.log_and_dispatch_to_all_tasks( state_change) for graph in self.token_to_channelgraph.values(): for channel in graph.address_to_channel.values(): channel.state_transition(state_change) # To avoid races, only update the internal cache after all the state # tasks have been updated. self._block_number = block_number def set_node_network_state(self, node_address, network_state): for graph in self.token_to_channelgraph.values(): channel = graph.partneraddress_to_channel.get(node_address) if channel: channel.network_state = network_state def start_health_check_for(self, node_address): self.protocol.start_health_check(node_address) def get_block_number(self): return self._block_number def poll_blockchain_events(self, current_block=None): # pylint: disable=unused-argument on_statechange = self.state_machine_event_handler.on_blockchain_statechange for state_change in self.blockchain_events.poll_state_change( self._block_number): on_statechange(state_change) def find_channel_by_address(self, netting_channel_address_bin): for graph in self.token_to_channelgraph.values(): channel = graph.address_to_channel.get(netting_channel_address_bin) if channel is not None: return channel raise ValueError('unknown channel {}'.format( encode_hex(netting_channel_address_bin))) def sign(self, message): """ Sign message inplace. """ if not isinstance(message, SignedMessage): raise ValueError('{} is not signable.'.format(repr(message))) message.sign(self.private_key, self.address) def send_async(self, recipient, message): """ Send `message` to `recipient` using the raiden protocol. The protocol will take care of resending the message on a given interval until an Acknowledgment is received or a given number of tries. """ if not isaddress(recipient): raise ValueError('recipient is not a valid address.') if recipient == self.address: raise ValueError('programming error, sending message to itself') return self.protocol.send_async(recipient, message) def send_and_wait(self, recipient, message, timeout): """ Send `message` to `recipient` and wait for the response or `timeout`. Args: recipient (address): The address of the node that will receive the message. message: The transfer message. timeout (float): How long should we wait for a response from `recipient`. Returns: None: If the wait timed out object: The result from the event """ if not isaddress(recipient): raise ValueError('recipient is not a valid address.') self.protocol.send_and_wait(recipient, message, timeout) def register_secret(self, secret: bytes): """ Register the secret with any channel that has a hashlock on it. This must search through all channels registered for a given hashlock and ignoring the tokens. Useful for refund transfer, split transfer, and token swaps. Raises: TypeError: If secret is unicode data. """ if not isinstance(secret, bytes): raise TypeError('secret must be bytes') hashlock = sha3(secret) revealsecret_message = RevealSecret(secret) self.sign(revealsecret_message) for hash_channel in self.token_to_hashlock_to_channels.values(): for channel in hash_channel[hashlock]: channel.register_secret(secret) # The protocol ignores duplicated messages. self.send_async( channel.partner_state.address, revealsecret_message, ) def register_channel_for_hashlock(self, token_address, channel, hashlock): channels_registered = self.token_to_hashlock_to_channels[ token_address][hashlock] if channel not in channels_registered: channels_registered.append(channel) def handle_secret( # pylint: disable=too-many-arguments self, identifier, token_address, secret, partner_secret_message, hashlock): """ Unlock/Witdraws locks, register the secret, and send Secret messages as necessary. This function will: - Unlock the locks created by this node and send a Secret message to the corresponding partner so that she can withdraw the token. - Withdraw the lock from sender. - Register the secret for the locks received and reveal the secret to the senders Note: The channel needs to be registered with `raiden.register_channel_for_hashlock`. """ # handling the secret needs to: # - unlock the token for all `forward_channel` (the current one # and the ones that failed with a refund) # - send a message to each of the forward nodes allowing them # to withdraw the token # - register the secret for the `originating_channel` so that a # proof can be made, if necessary # - reveal the secret to the `sender` node (otherwise we # cannot withdraw the token) channels_list = self.token_to_hashlock_to_channels[token_address][ hashlock] channels_to_remove = list() revealsecret_message = RevealSecret(secret) self.sign(revealsecret_message) messages_to_send = [] for channel in channels_list: # unlock a pending lock if channel.our_state.is_known(hashlock): secret = channel.create_secret(identifier, secret) self.sign(secret) channel.register_transfer( self.get_block_number(), secret, ) messages_to_send.append(( channel.partner_state.address, secret, )) channels_to_remove.append(channel) # withdraw a pending lock elif channel.partner_state.is_known(hashlock): if partner_secret_message: is_balance_proof = (partner_secret_message.sender == channel.partner_state.address and partner_secret_message.channel == channel.channel_address) if is_balance_proof: channel.register_transfer( self.get_block_number(), partner_secret_message, ) channels_to_remove.append(channel) else: channel.register_secret(secret) messages_to_send.append(( channel.partner_state.address, revealsecret_message, )) else: channel.register_secret(secret) messages_to_send.append(( channel.partner_state.address, revealsecret_message, )) else: log.error( 'Channel is registered for a given lock but the lock is not contained in it.' ) for channel in channels_to_remove: channels_list.remove(channel) if not channels_list: del self.token_to_hashlock_to_channels[token_address][hashlock] # send the messages last to avoid races for recipient, message in messages_to_send: self.send_async( recipient, message, ) def get_channel_details(self, token_address, netting_channel): channel_details = netting_channel.detail() our_state = ChannelEndState( channel_details['our_address'], channel_details['our_balance'], None, EMPTY_MERKLE_TREE, ) partner_state = ChannelEndState( channel_details['partner_address'], channel_details['partner_balance'], None, EMPTY_MERKLE_TREE, ) def register_channel_for_hashlock(channel, hashlock): self.register_channel_for_hashlock( token_address, channel, hashlock, ) channel_address = netting_channel.address reveal_timeout = self.config['reveal_timeout'] settle_timeout = channel_details['settle_timeout'] external_state = ChannelExternalState( register_channel_for_hashlock, netting_channel, ) channel_detail = ChannelDetails( channel_address, our_state, partner_state, external_state, reveal_timeout, settle_timeout, ) return channel_detail def restore_channel(self, serialized_channel): token_address = serialized_channel.token_address netting_channel = self.chain.netting_channel( serialized_channel.channel_address, ) # restoring balances from the blockchain since the serialized # value could be falling behind. channel_details = netting_channel.detail() # our_address is checked by detail assert channel_details[ 'partner_address'] == serialized_channel.partner_address if serialized_channel.our_leaves: our_layers = compute_layers(serialized_channel.our_leaves) our_tree = MerkleTreeState(our_layers) else: our_tree = EMPTY_MERKLE_TREE our_state = ChannelEndState( channel_details['our_address'], channel_details['our_balance'], serialized_channel.our_balance_proof, our_tree, ) if serialized_channel.partner_leaves: partner_layers = compute_layers(serialized_channel.partner_leaves) partner_tree = MerkleTreeState(partner_layers) else: partner_tree = EMPTY_MERKLE_TREE partner_state = ChannelEndState( channel_details['partner_address'], channel_details['partner_balance'], serialized_channel.partner_balance_proof, partner_tree, ) def register_channel_for_hashlock(channel, hashlock): self.register_channel_for_hashlock( token_address, channel, hashlock, ) external_state = ChannelExternalState( register_channel_for_hashlock, netting_channel, ) details = ChannelDetails( serialized_channel.channel_address, our_state, partner_state, external_state, serialized_channel.reveal_timeout, channel_details['settle_timeout'], ) graph = self.token_to_channelgraph[token_address] graph.add_channel(details) channel = graph.address_to_channel.get( serialized_channel.channel_address, ) channel.our_state.balance_proof = serialized_channel.our_balance_proof channel.partner_state.balance_proof = serialized_channel.partner_balance_proof def restore_queue(self, serialized_queue): receiver_address = serialized_queue['receiver_address'] token_address = serialized_queue['token_address'] queue = self.protocol.get_channel_queue( receiver_address, token_address, ) for messagedata in serialized_queue['messages']: queue.put(messagedata) def restore_transfer_states(self, transfer_states): self.identifier_to_statemanagers = transfer_states def register_registry(self, registry_address): proxies = get_relevant_proxies( self.chain, self.address, registry_address, ) # Install the filters first to avoid missing changes, as a consequence # some events might be applied twice. self.blockchain_events.add_proxies_listeners(proxies) for manager in proxies.channel_managers: token_address = manager.token_address() manager_address = manager.address channels_detail = list() netting_channels = proxies.channelmanager_nettingchannels[ manager_address] for channel in netting_channels: detail = self.get_channel_details(token_address, channel) channels_detail.append(detail) edge_list = manager.channels_addresses() graph = ChannelGraph( self.address, manager_address, token_address, edge_list, channels_detail, ) self.manager_to_token[manager_address] = token_address self.token_to_channelgraph[token_address] = graph self.tokens_to_connectionmanagers[ token_address] = ConnectionManager(self, token_address, graph) def channel_manager_is_registered(self, manager_address): return manager_address in self.manager_to_token def register_channel_manager(self, manager_address): manager = self.default_registry.manager(manager_address) netting_channels = [ self.chain.netting_channel(channel_address) for channel_address in manager.channels_by_participant(self.address) ] # Install the filters first to avoid missing changes, as a consequence # some events might be applied twice. self.blockchain_events.add_channel_manager_listener(manager) for channel in netting_channels: self.blockchain_events.add_netting_channel_listener(channel) token_address = manager.token_address() edge_list = manager.channels_addresses() channels_detail = [ self.get_channel_details(token_address, channel) for channel in netting_channels ] graph = ChannelGraph( self.address, manager_address, token_address, edge_list, channels_detail, ) self.manager_to_token[manager_address] = token_address self.token_to_channelgraph[token_address] = graph self.tokens_to_connectionmanagers[token_address] = ConnectionManager( self, token_address, graph) def register_netting_channel(self, token_address, channel_address): netting_channel = self.chain.netting_channel(channel_address) self.blockchain_events.add_netting_channel_listener(netting_channel) detail = self.get_channel_details(token_address, netting_channel) graph = self.token_to_channelgraph[token_address] graph.add_channel(detail) def connection_manager_for_token(self, token_address): if not isaddress(token_address): raise InvalidAddress('token address is not valid.') if token_address in self.tokens_to_connectionmanagers.keys(): manager = self.tokens_to_connectionmanagers[token_address] else: raise InvalidAddress('token is not registered.') return manager def leave_all_token_networks_async(self): leave_results = [] for token_address in self.token_to_channelgraph.keys(): try: connection_manager = self.connection_manager_for_token( token_address) leave_results.append(connection_manager.leave_async()) except InvalidAddress: pass combined_result = AsyncResult() gevent.spawn(gevent.wait, leave_results).link(combined_result) return combined_result def close_and_settle(self): log.info('raiden will close and settle all channels now') connection_managers = [ self.connection_manager_for_token(token_address) for token_address in self.token_to_channelgraph ] def blocks_to_wait(): return max(connection_manager.min_settle_blocks for connection_manager in connection_managers) all_channels = list( itertools.chain.from_iterable([ connection_manager.open_channels for connection_manager in connection_managers ])) leaving_greenlet = self.leave_all_token_networks_async() # using the un-cached block number here last_block = self.chain.block_number() earliest_settlement = last_block + blocks_to_wait() # TODO: estimate and set a `timeout` parameter in seconds # based on connection_manager.min_settle_blocks and an average # blocktime from the past current_block = last_block while current_block < earliest_settlement: gevent.sleep(self.alarm.wait_time) last_block = self.chain.block_number() if last_block != current_block: current_block = last_block avg_block_time = self.chain.estimate_blocktime() wait_blocks_left = blocks_to_wait() not_settled = sum( 1 for channel in all_channels if not channel.state == CHANNEL_STATE_SETTLED) if not_settled == 0: log.debug('nothing left to settle') break log.info( 'waiting at least %s more blocks (~%s sec) for settlement' '(%s channels not yet settled)' % (wait_blocks_left, wait_blocks_left * avg_block_time, not_settled)) leaving_greenlet.wait(timeout=blocks_to_wait() * self.chain.estimate_blocktime() * 1.5) if any(channel.state != CHANNEL_STATE_SETTLED for channel in all_channels): log.error('Some channels were not settled!', channels=[ pex(channel.channel_address) for channel in all_channels if channel.state != CHANNEL_STATE_SETTLED ]) def mediated_transfer_async(self, token_address, amount, target, identifier): """ Transfer `amount` between this node and `target`. This method will start an asyncronous transfer, the transfer might fail or succeed depending on a couple of factors: - Existence of a path that can be used, through the usage of direct or intermediary channels. - Network speed, making the transfer sufficiently fast so it doesn't expire. """ async_result = self.start_mediated_transfer( token_address, amount, identifier, target, ) return async_result def direct_transfer_async(self, token_address, amount, target, identifier): """ Do a direct tranfer with target. Direct transfers are non cancellable and non expirable, since these transfers are a signed balance proof with the transferred amount incremented. Because the transfer is non cancellable, there is a level of trust with the target. After the message is sent the target is effectively paid and then it is not possible to revert. The async result will be set to False iff there is no direct channel with the target or the payer does not have balance to complete the transfer, otherwise because the transfer is non expirable the async result *will never be set to False* and if the message is sent it will hang until the target node acknowledge the message. This transfer should be used as an optimization, since only two packets are required to complete the transfer (from the payer's perspective), whereas the mediated transfer requires 6 messages. """ graph = self.token_to_channelgraph[token_address] direct_channel = graph.partneraddress_to_channel.get(target) direct_channel_with_capacity = (direct_channel and direct_channel.can_transfer and amount <= direct_channel.distributable) if direct_channel_with_capacity: direct_transfer = direct_channel.create_directtransfer( amount, identifier) self.sign(direct_transfer) direct_channel.register_transfer( self.get_block_number(), direct_transfer, ) direct_transfer_state_change = ActionTransferDirect( identifier, amount, token_address, direct_channel.partner_state.address, ) # TODO: add the transfer sent event state_change_id = self.transaction_log.log( direct_transfer_state_change) # TODO: This should be set once the direct transfer is acknowledged transfer_success = EventTransferSentSuccess( identifier, amount, target, ) self.transaction_log.log_events(state_change_id, [transfer_success], self.get_block_number()) async_result = self.protocol.send_async( direct_channel.partner_state.address, direct_transfer, ) else: async_result = AsyncResult() async_result.set(False) return async_result def start_mediated_transfer(self, token_address, amount, identifier, target): # pylint: disable=too-many-locals async_result = AsyncResult() graph = self.token_to_channelgraph[token_address] available_routes = get_best_routes( graph, self.protocol.nodeaddresses_networkstatuses, self.address, target, amount, None, ) if not available_routes: async_result.set(False) return async_result self.protocol.start_health_check(target) if identifier is None: identifier = create_default_identifier() route_state = RoutesState(available_routes) our_address = self.address block_number = self.get_block_number() transfer_state = LockedTransferState( identifier=identifier, amount=amount, token=token_address, initiator=self.address, target=target, expiration=None, hashlock=None, secret=None, ) # Issue #489 # # Raiden may fail after a state change using the random generator is # handled but right before the snapshot is taken. If that happens on # the next initialization when raiden is recovering and applying the # pending state changes a new secret will be generated and the # resulting events won't match, this breaks the architecture model, # since it's assumed the re-execution of a state change will always # produce the same events. # # TODO: Removed the secret generator from the InitiatorState and add # the secret into all state changes that require one, this way the # secret will be serialized with the state change and the recovery will # use the same /random/ secret. random_generator = RandomSecretGenerator() init_initiator = ActionInitInitiator( our_address=our_address, transfer=transfer_state, routes=route_state, random_generator=random_generator, block_number=block_number, ) state_manager = StateManager(initiator.state_transition, None) self.state_machine_event_handler.log_and_dispatch( state_manager, init_initiator) # TODO: implement the network timeout raiden.config['msg_timeout'] and # cancel the current transfer if it hapens (issue #374) self.identifier_to_statemanagers[identifier].append(state_manager) self.identifier_to_results[identifier].append(async_result) return async_result def mediate_mediated_transfer(self, message): # pylint: disable=too-many-locals identifier = message.identifier amount = message.lock.amount target = message.target token = message.token graph = self.token_to_channelgraph[token] available_routes = get_best_routes( graph, self.protocol.nodeaddresses_networkstatuses, self.address, target, amount, message.sender, ) from_channel = graph.partneraddress_to_channel[message.sender] from_route = channel_to_routestate(from_channel, message.sender) our_address = self.address from_transfer = lockedtransfer_from_message(message) route_state = RoutesState(available_routes) block_number = self.get_block_number() init_mediator = ActionInitMediator( our_address, from_transfer, route_state, from_route, block_number, ) state_manager = StateManager(mediator.state_transition, None) self.state_machine_event_handler.log_and_dispatch( state_manager, init_mediator) self.identifier_to_statemanagers[identifier].append(state_manager) def target_mediated_transfer(self, message): graph = self.token_to_channelgraph[message.token] from_channel = graph.partneraddress_to_channel[message.sender] from_route = channel_to_routestate(from_channel, message.sender) from_transfer = lockedtransfer_from_message(message) our_address = self.address block_number = self.get_block_number() init_target = ActionInitTarget( our_address, from_route, from_transfer, block_number, ) state_manager = StateManager(target_task.state_transition, None) self.state_machine_event_handler.log_and_dispatch( state_manager, init_target) identifier = message.identifier self.identifier_to_statemanagers[identifier].append(state_manager)
class RaidenService: """ A Raiden node. """ def __init__( self, chain: BlockChainService, default_registry: Registry, default_secret_registry: SecretRegistry, private_key_bin, transport, config, discovery=None, ): if not isinstance(private_key_bin, bytes) or len(private_key_bin) != 32: raise ValueError('invalid private_key') invalid_timeout = ( config['settle_timeout'] < NETTINGCHANNEL_SETTLE_TIMEOUT_MIN or config['settle_timeout'] > NETTINGCHANNEL_SETTLE_TIMEOUT_MAX ) if invalid_timeout: raise ValueError('settle_timeout must be in range [{}, {}]'.format( NETTINGCHANNEL_SETTLE_TIMEOUT_MIN, NETTINGCHANNEL_SETTLE_TIMEOUT_MAX, )) self.tokens_to_connectionmanagers = dict() self.identifier_to_results = defaultdict(list) self.chain: BlockChainService = chain self.default_registry = default_registry self.default_secret_registry = default_secret_registry self.config = config self.privkey = private_key_bin self.address = privatekey_to_address(private_key_bin) self.discovery = discovery if config['transport_type'] == 'udp': endpoint_registration_event = gevent.spawn( discovery.register, self.address, config['external_ip'], config['external_port'], ) endpoint_registration_event.link_exception(endpoint_registry_exception_handler) self.private_key = PrivateKey(private_key_bin) self.pubkey = self.private_key.public_key.format(compressed=False) self.transport = transport self.blockchain_events = BlockchainEvents() self.alarm = AlarmTask(chain) self.shutdown_timeout = config['shutdown_timeout'] self.stop_event = Event() self.start_event = Event() self.chain.client.inject_stop_event(self.stop_event) self.wal = None self.database_path = config['database_path'] if self.database_path != ':memory:': database_dir = os.path.dirname(config['database_path']) os.makedirs(database_dir, exist_ok=True) self.database_dir = database_dir # Prevent concurrent access to the same db self.lock_file = os.path.join(self.database_dir, '.lock') self.db_lock = filelock.FileLock(self.lock_file) else: self.database_path = ':memory:' self.database_dir = None self.lock_file = None self.serialization_file = None self.db_lock = None if config['transport_type'] == 'udp': # If the endpoint registration fails the node will quit, this must # finish before starting the transport endpoint_registration_event.join() self.event_poll_lock = gevent.lock.Semaphore() self.start() def start(self): """ Start the node. """ if self.stop_event and self.stop_event.is_set(): self.stop_event.clear() if self.database_dir is not None: self.db_lock.acquire(timeout=0) assert self.db_lock.is_locked # The database may be :memory: storage = sqlite.SQLiteStorage(self.database_path, serialize.PickleSerializer()) self.wal, unapplied_events = wal.restore_from_latest_snapshot( node.state_transition, storage, ) if self.wal.state_manager.current_state is None: block_number = self.chain.block_number() state_change = ActionInitNode( random.Random(), block_number, ) self.wal.log_and_dispatch(state_change, block_number) payment_network = PaymentNetworkState( self.default_registry.address, [], # empty list of token network states as it's the node's startup ) state_change = ContractReceiveNewPaymentNetwork(payment_network) self.handle_state_change(state_change) # On first run Raiden needs to fetch all events for the payment # network, to reconstruct all token network graphs and find opened # channels last_log_block_number = 0 else: # The `Block` state change is dispatched only after all the events # for that given block have been processed, filters can be safely # installed starting from this position without losing events. last_log_block_number = views.block_number(self.wal.state_manager.current_state) self.install_and_query_payment_network_filters( self.default_registry.address, last_log_block_number, ) # Regarding the timing of starting the alarm task it is important to: # - Install the filters which will be polled by poll_blockchain_events # after the state has been primed, otherwise the state changes won't # have effect. # - Install the filters using the correct from_block value, otherwise # blockchain logs can be lost. self.alarm.register_callback(self._callback_new_block) self.alarm.start() # Start the transport after the registry is queried to avoid warning # about unknown channels. queueids_to_queues = views.get_all_messagequeues(views.state_from_raiden(self)) self.transport.start(self, queueids_to_queues) # Health check needs the transport layer self.start_neighbours_healthcheck() for event in unapplied_events: on_raiden_event(self, event) self.start_event.set() def start_neighbours_healthcheck(self): for neighbour in views.all_neighbour_nodes(self.wal.state_manager.current_state): if neighbour != ConnectionManager.BOOTSTRAP_ADDR: self.start_health_check_for(neighbour) def stop(self): """ Stop the node. """ # Needs to come before any greenlets joining self.stop_event.set() self.transport.stop_and_wait() self.alarm.stop_async() wait_for = [self.alarm] wait_for.extend(getattr(self.transport, 'greenlets', [])) # We need a timeout to prevent an endless loop from trying to # contact the disconnected client gevent.wait(wait_for, timeout=self.shutdown_timeout) # Filters must be uninstalled after the alarm task has stopped. Since # the events are polled by an alarm task callback, if the filters are # uninstalled before the alarm task is fully stopped the callback # `poll_blockchain_events` will fail. # # We need a timeout to prevent an endless loop from trying to # contact the disconnected client try: with gevent.Timeout(self.shutdown_timeout): self.blockchain_events.uninstall_all_event_listeners() except (gevent.timeout.Timeout, RaidenShuttingDown): pass self.blockchain_events.reset() if self.db_lock is not None: self.db_lock.release() def __repr__(self): return '<{} {}>'.format(self.__class__.__name__, pex(self.address)) def get_block_number(self): return views.block_number(self.wal.state_manager.current_state) def handle_state_change(self, state_change, block_number=None): log.debug('STATE CHANGE', node=pex(self.address), state_change=state_change) if block_number is None: block_number = self.get_block_number() event_list = self.wal.log_and_dispatch(state_change, block_number) for event in event_list: log.debug('EVENT', node=pex(self.address), raiden_event=event) on_raiden_event(self, event) return event_list def set_node_network_state(self, node_address, network_state): state_change = ActionChangeNodeNetworkState(node_address, network_state) self.wal.log_and_dispatch(state_change, self.get_block_number()) def start_health_check_for(self, node_address): self.transport.start_health_check(node_address) def _callback_new_block(self, current_block_number): """Called once a new block is detected by the alarm task. Note: This should be called only once per block, otherwise there will be duplicated `Block` state changes in the log. Therefore this method should be called only once a new block is mined with the appropriate block_number argument from the AlarmTask. """ # Raiden relies on blockchain events to update its off-chain state, # therefore some APIs /used/ to forcefully poll for events. # # This was done for APIs which have on-chain side-effects, e.g. # openning a channel, where polling the event is required to update # off-chain state to providing a consistent view to the caller, e.g. # the channel exists after the API call returns. # # That pattern introduced a race, because the events are returned only # once per filter, and this method would be called concurrently by the # API and the AlarmTask. The following lock is necessary, to ensure the # expected side-effects are properly applied (introduced by the commit # 3686b3275ff7c0b669a6d5e2b34109c3bdf1921d) with self.event_poll_lock: for event in self.blockchain_events.poll_blockchain_events(): # These state changes will be procesed with a block_number # which is /larger/ than the NodeState's block_number. on_blockchain_event(self, event, current_block_number) # On restart the Raiden node will re-create the filters with the # ethereum node. These filters will have the from_block set to the # value of the latest Block state change. To avoid missing events # the Block state change is dispatched only after all of the events # have been processed. # # This means on some corner cases a few events may be applied # twice, this will happen if the node crashed and some events have # been processed but the Block state change has not been # dispatched. state_change = Block(current_block_number) self.handle_state_change(state_change, current_block_number) def sign(self, message): """ Sign message inplace. """ if not isinstance(message, SignedMessage): raise ValueError('{} is not signable.'.format(repr(message))) message.sign(self.private_key) def install_and_query_payment_network_filters(self, payment_network_id, from_block=0): proxies = get_relevant_proxies( self.chain, self.address, payment_network_id, ) # Install the filters and then poll them and dispatch the events to the WAL with self.event_poll_lock: self.blockchain_events.add_proxies_listeners(proxies, from_block) for event in self.blockchain_events.poll_blockchain_events(): on_blockchain_event(self, event, event.event_data['block_number']) def connection_manager_for_token(self, registry_address, token_address): if not is_binary_address(token_address): raise InvalidAddress('token address is not valid.') known_token_networks = views.get_token_network_addresses_for( self.wal.state_manager.current_state, registry_address, ) if token_address not in known_token_networks: raise InvalidAddress('token is not registered.') manager = self.tokens_to_connectionmanagers.get(token_address) if manager is None: manager = ConnectionManager(self, registry_address, token_address) self.tokens_to_connectionmanagers[token_address] = manager return manager def leave_all_token_networks(self): state_change = ActionLeaveAllNetworks() self.wal.log_and_dispatch(state_change, self.get_block_number()) def close_and_settle(self): log.info('raiden will close and settle all channels now') self.leave_all_token_networks() connection_managers = [ self.tokens_to_connectionmanagers[token_address] for token_address in self.tokens_to_connectionmanagers ] if connection_managers: waiting.wait_for_settle_all_channels( self, self.alarm.wait_time, ) def mediated_transfer_async( self, token_network_identifier, amount, target, identifier, ): """ Transfer `amount` between this node and `target`. This method will start an asyncronous transfer, the transfer might fail or succeed depending on a couple of factors: - Existence of a path that can be used, through the usage of direct or intermediary channels. - Network speed, making the transfer sufficiently fast so it doesn't expire. """ async_result = self.start_mediated_transfer( token_network_identifier, amount, target, identifier, ) return async_result def direct_transfer_async(self, token_network_identifier, amount, target, identifier): """ Do a direct transfer with target. Direct transfers are non cancellable and non expirable, since these transfers are a signed balance proof with the transferred amount incremented. Because the transfer is non cancellable, there is a level of trust with the target. After the message is sent the target is effectively paid and then it is not possible to revert. The async result will be set to False iff there is no direct channel with the target or the payer does not have balance to complete the transfer, otherwise because the transfer is non expirable the async result *will never be set to False* and if the message is sent it will hang until the target node acknowledge the message. This transfer should be used as an optimization, since only two packets are required to complete the transfer (from the payers perspective), whereas the mediated transfer requires 6 messages. """ self.transport.start_health_check(target) if identifier is None: identifier = create_default_identifier() direct_transfer = ActionTransferDirect( token_network_identifier, target, identifier, amount, ) self.handle_state_change(direct_transfer) def start_mediated_transfer( self, token_network_identifier, amount, target, identifier, ): self.transport.start_health_check(target) if identifier is None: identifier = create_default_identifier() assert identifier not in self.identifier_to_results async_result = AsyncResult() self.identifier_to_results[identifier].append(async_result) secret = random_secret() init_initiator_statechange = initiator_init( self, identifier, amount, secret, token_network_identifier, target, ) # TODO: implement the network timeout raiden.config['msg_timeout'] and # cancel the current transfer if it happens (issue #374) # # Dispatch the state change even if there are no routes to create the # wal entry. self.handle_state_change(init_initiator_statechange) return async_result def mediate_mediated_transfer(self, transfer: LockedTransfer): init_mediator_statechange = mediator_init(self, transfer) self.handle_state_change(init_mediator_statechange) def target_mediated_transfer(self, transfer: LockedTransfer): init_target_statechange = target_init(transfer) self.handle_state_change(init_target_statechange)
class _Poller: def __init__( self, polled_call, polled_call_args=(), polling_period=1000, value_changed_callback=None, error_callback=None, compare=True, ): self.polled_call_ref = saferef.safe_ref(polled_call) self.args = polled_call_args self.polling_period = polling_period self.value_changed_callback_ref = saferef.safe_ref( value_changed_callback) self.error_callback_ref = saferef.safe_ref(error_callback) self.compare = compare self.old_res = NotInitializedValue self.queue = _threading.Queue() # Queue.Queue() self.delay = 0 self.stop_event = Event() self.async_watcher = gevent.get_hub().loop.async_() def start_delayed(self, delay): self.delay = delay _threading.start_new_thread(self.run, ()) # self.start() def stop(self): self.stop_event.set() del POLLERS[self.get_id()] def is_stopped(self): return self.stop_event.is_set() def get_id(self): return id(self) def get_polling_period(self): return self.polling_period def set_polling_period(self, polling_period): # logging.info(">>>>> CHANGIG POLLING PERIOD TO %d", polling_period) self.polling_period = polling_period def restart(self, delay=0): self.stop() polled_call = self.polled_call_ref() value_changed_cb = self.value_changed_callback_ref() error_cb = self.error_callback_ref() if polled_call is not None: return poll( polled_call, self.args, self.polling_period, value_changed_cb, error_cb, self.compare, delay, start_value=self.old_res, ) def new_event(self): while True: try: res = Queue().get_nowait() except Empty: break if isinstance(res, PollingException): cb = self.error_callback_ref() if cb is not None: gevent.spawn(cb, res.original_exception, res.poller_id) else: cb = self.value_changed_callback_ref() if cb is not None: gevent.spawn(cb, res) def run(self): sleep = gevent.monkey._get_original("time", ["sleep"])[0] self.async_watcher.start(self.new_event) err_callback_args = None error_cb = None first_run = True while not self.stop_event.is_set(): if first_run and self.delay: sleep(self.delay / 1000.0) first_run = False if self.stop_event.is_set(): break polled_call = self.polled_call_ref() if polled_call is None: break try: res = polled_call(*self.args) except Exception as e: if self.stop_event.is_set(): break error_cb = self.error_callback_ref() if error_cb is not None: self.queue.put(PollingException(e, self.get_id())) break del polled_call if self.stop_event.is_set(): break if isinstance(res, numpy.ndarray): # for arrays comparison = res == self.old_res if isinstance(comparison, bool): is_equal = comparison else: is_equal = all(comparison) else: is_equal = res == self.old_res if self.compare and is_equal: # do nothing: previous value is the same as "new" value pass else: new_value = True if self.compare: new_value = not is_equal if new_value: self.old_res = res self.queue.put(res) self.async_watcher.send() sleep(self.polling_period / 1000.0) if error_cb is not None: self.async_watcher.send()
class Channel(object): # pylint: disable=too-many-instance-attributes,too-many-arguments def __init__(self, our_state, partner_state, external_state, asset_address, reveal_timeout, settle_timeout): if settle_timeout <= reveal_timeout: # reveal_timeout must be a fraction of the settle_timeout raise ValueError( 'reveal_timeout can not be larger-or-equal to settle_timeout') if reveal_timeout < 3: # To guarantee that assets won't be lost the expiration needs to # decrease at each hop, this is what forces the next hop to reveal # the secret with enough time for this node to unlock the lock with # the previous. # # This /should be/ at least: # # reveal_timeout = blocks_to_learn + blocks_to_mine * 2 # # Where: # # - `blocks_to_learn` is the estimated worst case for a given block # to propagate to the full network. This is the time to learn a # secret revealed throught the blockchain. # - `blocks_to_mine * 2` is the estimated worst case for a given # transfer to be included in a block. This is the time to close a # channel and then to unlock a lock on chain. # raise ValueError('reveal_timeout must be at least 1') if not isinstance(settle_timeout, (int, long)): raise ValueError('settle_timeout must be integral') if not isinstance(reveal_timeout, (int, long)): raise ValueError('reveal_timeout must be integral') self.our_state = our_state self.partner_state = partner_state self.asset_address = asset_address self.reveal_timeout = reveal_timeout self.settle_timeout = settle_timeout self.external_state = external_state self.open_event = Event() self.close_event = Event() self.settle_event = Event() external_state.callback_on_opened(lambda _: self.open_event.set()) external_state.callback_on_closed(lambda _: self.close_event.set()) external_state.callback_on_settled(lambda _: self.settle_event.set()) external_state.callback_on_closed(self.channel_closed) self.received_transfers = [] self.sent_transfers = [ ] #: transfers that were sent, required for settling self.on_withdrawable_callbacks = list( ) # mapping of transfer to callback list self.on_task_completed_callbacks = list() # XXX naming @property def isopen(self): return self.external_state.isopen() @property def contract_balance(self): """ Return the amount of asset used to open the channel. """ return self.our_state.contract_balance @property def transferred_amount(self): """ Return how much we transferred to partner. """ return self.our_state.transferred_amount @property def balance(self): """ Return our current balance. Balance is equal to `initial_deposit + received_amount - sent_amount`, were both `receive_amount` and `sent_amount` are unlocked. """ return self.our_state.balance(self.partner_state) @property def distributable(self): """ Return the available amount of the asset that our end of the channel can transfer to the partner. """ return self.our_state.distributable(self.partner_state) @property def locked(self): """ Return the current amount of our asset that is locked waiting for a secret. The locked value is equal to locked transfers that have being initialized but the secret has not being revealed. """ return self.partner_state.locked() @property def outstanding(self): return self.our_state.locked() def register_withdrawable_callback(self, callback): self.on_withdrawable_callbacks.append(callback) def channel_closed(self, block_number): self.external_state.register_block_alarm(self.blockalarm_for_settle) balance_proof = self.partner_state.balance_proof transfer = balance_proof.transfer unlock_proofs = balance_proof.get_known_unlocks() self.external_state.update_transfer(self.our_state.address, transfer) self.external_state.unlock(self.our_state.address, unlock_proofs) def blockalarm_for_settle(self, block_number): def _settle(): for _ in range(3): try: self.external_state.settle() except: log.exception('Timedout while calling settle') # wait for the settle event, it could be our transaction or our # partner's self.settle_event.wait(0.5) if self.settle_event.is_set(): log.info('channel automatically settled') return if self.external_state.closed_block + self.settle_timeout >= block_number: gevent.spawn(_settle) # don't block the alarm return REMOVE_CALLBACK def get_state_for(self, node_address_bin): if self.our_state.address == node_address_bin: return self.our_state if self.partner_state.address == node_address_bin: return self.partner_state raise Exception('Unknow address {}'.format( encode_hex(node_address_bin))) def register_secret(self, secret): """ Register a secret. This wont claim the lock (update the transferred_amount), it will only save the secret in case that a proof needs to be created. This method can be used for any of the ends of the channel. Note: When a secret is revealed a message could be in-transit containing the older lockroot, for this reason the recipient cannot update it's locksroot at the moment a secret was revealed. The protocol is to register the secret so that it can compute a proof of balance, if necessary, forward the secret to the sender and wait for the update from it. It's the sender duty to order the current in-transit (and possible the transfers in queue) transfers and the secret/locksroot update. The channel and it's queue must be changed in sync, a transfer must not be created and while we update the balance_proof. Args: secret: The secret that releases a locked transfer. """ hashlock = sha3(secret) our_known = self.our_state.balance_proof.is_known(hashlock) partner_known = self.partner_state.balance_proof.is_known(hashlock) if not our_known and not partner_known: msg = 'Secret doesnt correspond to a registered hashlock. hashlock:{} asset:{}'.format( pex(hashlock), pex(self.asset_address), ) raise ValueError(msg) if our_known: lock = self.our_state.balance_proof.get_lock_by_hashlock(hashlock) if log.isEnabledFor(logging.DEBUG): log.debug( 'SECRET REGISTERED node:%s %s > %s asset:%s hashlock:%s amount:%s', pex(self.our_state.address), pex(self.our_state.address), pex(self.partner_state.address), pex(self.asset_address), pex(hashlock), lock.amount, ) self.our_state.register_secret(secret) if partner_known: lock = self.partner_state.balance_proof.get_lock_by_hashlock( hashlock) if log.isEnabledFor(logging.DEBUG): log.debug( 'SECRET REGISTERED node:%s %s > %s asset:%s hashlock:%s amount:%s', pex(self.our_state.address), pex(self.partner_state.address), pex(self.our_state.address), pex(self.asset_address), pex(hashlock), lock.amount, ) self.partner_state.register_secret(secret) def release_lock(self, secret): """ Release a lock for a transfer that was initiated from this node. Only the sender of the mediated transfer can release a lock, the receiver might know the secret but it needs to wait for a message from the initiator. This is because the sender needs to coordinate states updates (the hashlock for the transfers that are in transit and/or in queue need to be in sync with the state known by the partner). Note: Releasing a lock should always be accompained by at least one Secret message to the partner node. The node should also release the locks for the refund transfer. """ hashlock = sha3(secret) if not self.partner_state.balance_proof.is_known(hashlock): raise ValueError( 'The secret doesnt unlock any hashlock. hashlock:{} asset:{}'. format( pex(hashlock), pex(self.asset_address), )) lock = self.partner_state.balance_proof.get_lock_by_hashlock(hashlock) if log.isEnabledFor(logging.DEBUG): log.debug( 'ASSET UNLOCKED %s > %s asset:%s hashlock:%s lockhash:%s amount:%s', pex(self.our_state.address), pex(self.partner_state.address), pex(self.asset_address), pex(hashlock), pex(sha3(lock.as_bytes)), lock.amount, ) self.partner_state.release_lock(self.our_state, secret) def withdraw_lock(self, secret): """ A lock was released by the sender, withdraw it's funds and update the state. """ hashlock = sha3(secret) if not self.our_state.balance_proof.is_known(hashlock): msg = 'The secret doesnt withdraw any hashlock. hashlock:{} asset:{}'.format( pex(hashlock), pex(self.asset_address), ) raise ValueError(msg) lock = self.our_state.balance_proof.get_lock_by_hashlock(hashlock) if log.isEnabledFor(logging.DEBUG): log.debug( 'ASSET WITHDRAWED %s < %s asset:%s hashlock:%s lockhash:%s amount:%s', pex(self.our_state.address), pex(self.partner_state.address), pex(self.asset_address), pex(hashlock), pex(sha3(lock.as_bytes)), lock.amount, ) self.our_state.release_lock(self.partner_state, secret) def register_transfer(self, transfer): """ Register a signed transfer, updating the channel's state accordingly. """ if transfer.recipient == self.partner_state.address: self.register_transfer_from_to( transfer, from_state=self.our_state, to_state=self.partner_state, ) self.sent_transfers.append(transfer) elif transfer.recipient == self.our_state.address: self.register_transfer_from_to( transfer, from_state=self.partner_state, to_state=self.our_state, ) self.received_transfers.append(transfer) else: raise ValueError('Invalid address') def register_transfer_from_to(self, transfer, from_state, to_state): # noqa pylint: disable=too-many-branches """ Validates and register a signed transfer, updating the channel's state accordingly. Note: The transfer must be register before it is sent, not on acknowledgement. That is necessary for to reasons: - Guarantee that the transfer is valid. - Avoiding sending a new transaction without funds. Raises: InsufficientBalance: If the transfer is negative or above the distributable amount. InvalidLocksRoot: If locksroot check fails. InvalidLockTime: If the transfer has expired. InvalidNonce: If the expected nonce does not match. InvalidSecret: If there is no lock registered for the given secret. ValueError: If there is an address mismatch (asset or node address). """ if transfer.asset != self.asset_address: raise ValueError('Asset address mismatch') if transfer.recipient != to_state.address: raise ValueError('Unknow recipient') if transfer.sender != from_state.address: raise ValueError('Unsigned transfer') # nonce is changed only when a transfer is un/registered, if the test # fail either we are out of sync, a message out of order, or it's an # forged transfer if transfer.nonce < 1 or transfer.nonce != from_state.nonce: raise InvalidNonce(transfer) # if the locksroot is out-of-sync (because a transfer was created while # a Secret was in trafic) the balance _will_ be wrong, so first check # the locksroot and then the balance if isinstance(transfer, LockedTransfer): block_number = self.external_state.get_block_number() if to_state.balance_proof.is_pending(transfer.lock.hashlock): raise ValueError('hashlock is already registered') # As a receiver: Check that all locked transfers are registered in # the locksroot, if any hashlock is missing there is no way to # claim it while the channel is closing expected_locksroot = to_state.compute_merkleroot_with( transfer.lock) if expected_locksroot != transfer.locksroot: if log.isEnabledFor(logging.ERROR): log.error( 'LOCKSROOT MISMATCH node:%s %s > %s lockhash:%s lockhashes:%s', pex(self.our_state.address), pex(from_state.address), pex(to_state.address), pex(sha3(transfer.lock.as_bytes)), lpex(to_state.balance_proof.unclaimed_merkletree()), expected_locksroot=pex(expected_locksroot), received_locksroot=pex(transfer.locksroot), ) raise InvalidLocksRoot(transfer) # As a receiver: If the lock expiration is larger than the settling # time a secret could be revealed after the channel is settled and # we won't be able to claim the asset if not transfer.lock.expiration - block_number < self.settle_timeout: log.error( "Transfer expiration doesn't allow for correct settlement.", lock_expiration=transfer.lock.expiration, current_block=block_number, settle_timeout=self.settle_timeout, ) raise ValueError( "Transfer expiration doesn't allow for correct settlement." ) if not transfer.lock.expiration - block_number > self.reveal_timeout: log.error( 'Expiration smaller than the minimum required.', lock_expiration=transfer.lock.expiration, current_block=block_number, reveal_timeout=self.reveal_timeout, ) raise ValueError( 'Expiration smaller than the minimum required.') # only check the balance if the locksroot matched if transfer.transferred_amount < from_state.transferred_amount: if log.isEnabledFor(logging.ERROR): log.error( 'NEGATIVE TRANSFER node:%s %s > %s %s', pex(self.our_state.address), pex(from_state.address), pex(to_state.address), transfer, ) raise ValueError('Negative transfer') amount = transfer.transferred_amount - from_state.transferred_amount distributable = from_state.distributable(to_state) if amount > distributable: raise InsufficientBalance(transfer) if isinstance(transfer, LockedTransfer): if amount + transfer.lock.amount > distributable: raise InsufficientBalance(transfer) # all checks need to be done before the internal state of the channel # is changed, otherwise if a check fails and state was changed the # channel will be left trashed if isinstance(transfer, LockedTransfer): if log.isEnabledFor(logging.DEBUG): log.debug( 'REGISTERED LOCK node:%s %s > %s currentlocksroot:%s lockhashes:%s', pex(self.our_state.address), pex(from_state.address), pex(to_state.address), pex(to_state.balance_proof.merkleroot_for_unclaimed()), lpex(to_state.balance_proof.unclaimed_merkletree()), lock_amount=transfer.lock.amount, lock_expiration=transfer.lock.expiration, lock_hashlock=pex(transfer.lock.hashlock), lockhash=pex(sha3(transfer.lock.as_bytes)), ) to_state.register_locked_transfer(transfer) # register this channel as waiting for the secret (the secret can # be revealed through a message or an blockchain log) self.external_state.register_channel_for_hashlock( self, transfer.lock.hashlock, ) if isinstance(transfer, DirectTransfer): to_state.register_direct_transfer(transfer) from_state.transferred_amount = transfer.transferred_amount from_state.nonce += 1 if isinstance(transfer, DirectTransfer): # if we are the recipient, spawn callback for incoming transfers if transfer.recipient == self.our_state.address: for callback in self.on_withdrawable_callbacks: gevent.spawn( callback, transfer.asset, transfer.recipient, transfer.sender, # 'initiator' is sender here transfer.transferred_amount, None # no hashlock in DirectTransfer ) # if we are the sender, call the 'success' callback elif from_state.address == self.our_state.address: callbacks_to_remove = list() for callback in self.on_task_completed_callbacks: result = callback( task=None, success=True) # XXX maybe use gevent.spawn() if result is True: callbacks_to_remove.append(callback) for callback in callbacks_to_remove: self.on_task_completed_callbacks.remove(callback) if log.isEnabledFor(logging.DEBUG): log.debug( 'REGISTERED TRANSFER node:%s %s > %s ' 'transfer:%s transferred_amount:%s nonce:%s ' 'current_locksroot:%s', pex(self.our_state.address), pex(from_state.address), pex(to_state.address), repr(transfer), from_state.transferred_amount, from_state.nonce, pex(to_state.balance_proof.merkleroot_for_unclaimed()), ) def create_directtransfer(self, amount, identifier): """ Return a DirectTransfer message. This message needs to be signed and registered with the channel before sent. """ if not self.isopen: raise ValueError('The channel is closed') from_ = self.our_state to_ = self.partner_state distributable = from_.distributable(to_) if amount <= 0 or amount > distributable: log.debug( 'Insufficient funds', amount=amount, distributable=distributable, ) raise ValueError('Insufficient funds') transferred_amount = from_.transferred_amount + amount current_locksroot = to_.balance_proof.merkleroot_for_unclaimed() return DirectTransfer( identifier=identifier, nonce=from_.nonce, asset=self.asset_address, transferred_amount=transferred_amount, recipient=to_.address, locksroot=current_locksroot, ) def create_lockedtransfer(self, amount, identifier, expiration, hashlock): """ Return a LockedTransfer message. This message needs to be signed and registered with the channel before sent. """ if not self.isopen: raise ValueError('The channel is closed.') block_number = self.external_state.get_block_number() timeout = expiration - block_number # the lock timeout cannot be larger than the settle timeout (otherwise # the smart contract cannot check the locks) if timeout >= self.settle_timeout: log.debug( 'Lock expiration is larger than settle timeout.', expiration=expiration, block_number=block_number, settle_timeout=self.settle_timeout, ) raise ValueError('Invalid expiration.') # the expiration cannot be lower than the reveal timeout (otherwise we # dont have enough time to listen for the ChannelSecretRevealed event) if timeout <= self.reveal_timeout: log.debug( 'Lock expiration is lower than reveal timeout.', expiration=expiration, block_number=block_number, reveal_timeout=self.reveal_timeout, ) raise ValueError('Invalid expiration.') from_ = self.our_state to_ = self.partner_state distributable = from_.distributable(to_) if amount <= 0 or amount > distributable: log.debug( 'Insufficient funds', amount=amount, distributable=distributable, ) raise ValueError('Insufficient funds') lock = Lock(amount, expiration, hashlock) updated_locksroot = to_.compute_merkleroot_with(include=lock) transferred_amount = from_.transferred_amount return LockedTransfer( identifier=identifier, nonce=from_.nonce, asset=self.asset_address, transferred_amount=transferred_amount, recipient=to_.address, locksroot=updated_locksroot, lock=lock, ) def create_mediatedtransfer(self, transfer_initiator, transfer_target, fee, amount, identifier, expiration, hashlock): """ Return a MediatedTransfer message. This message needs to be signed and registered with the channel before sent. Args: transfer_initiator (address): The node that requested the transfer. transfer_target (address): The final destination node of the transfer amount (float): How much of an asset is being transferred. expiration (int): The maximum block number until the transfer message can be received. """ locked_transfer = self.create_lockedtransfer( amount, identifier, expiration, hashlock, ) mediated_transfer = locked_transfer.to_mediatedtransfer( transfer_target, transfer_initiator, fee, ) return mediated_transfer def create_refundtransfer_for(self, transfer): """ Return RefundTransfer for `transfer`. """ lock = transfer.lock if not self.our_state.balance_proof.is_pending(lock.hashlock): raise ValueError('Unknow hashlock') locked_transfer = self.create_lockedtransfer( lock.amount, 1, # TODO: Perhaps add identifier in the refund transfer too? lock.expiration, lock.hashlock, ) cancel_transfer = locked_transfer.to_refundtransfer() return cancel_transfer def create_timeouttransfer_for(self, transfer): """ Return a TransferTimeout for `transfer`. """ lock = transfer.lock if not self.our_state.balance_proof.is_pending(lock.hashlock): raise ValueError('Unknow hashlock') return TransferTimeout( transfer.hash, lock.hashlock, )
class RaidenService: """ A Raiden node. """ def __init__( self, chain: BlockChainService, query_start_block: typing.BlockNumber, default_registry: TokenNetworkRegistry, default_secret_registry: SecretRegistry, private_key_bin, transport, config, discovery=None, ): if not isinstance(private_key_bin, bytes) or len(private_key_bin) != 32: raise ValueError('invalid private_key') self.tokennetworkids_to_connectionmanagers = dict() self.identifier_to_results = defaultdict(list) self.chain: BlockChainService = chain self.default_registry = default_registry self.query_start_block = query_start_block self.default_secret_registry = default_secret_registry self.config = config self.privkey = private_key_bin self.address = privatekey_to_address(private_key_bin) self.discovery = discovery if config['transport_type'] == 'udp': endpoint_registration_event = gevent.spawn( discovery.register, self.address, config['external_ip'], config['external_port'], ) endpoint_registration_event.link_exception( endpoint_registry_exception_handler) self.private_key = PrivateKey(private_key_bin) self.pubkey = self.private_key.public_key.format(compressed=False) self.transport = transport self.blockchain_events = BlockchainEvents() self.alarm = AlarmTask(chain) self.shutdown_timeout = config['shutdown_timeout'] self.stop_event = Event() self.start_event = Event() self.chain.client.inject_stop_event(self.stop_event) self.wal = None self.database_path = config['database_path'] if self.database_path != ':memory:': database_dir = os.path.dirname(config['database_path']) os.makedirs(database_dir, exist_ok=True) self.database_dir = database_dir # Prevent concurrent access to the same db self.lock_file = os.path.join(self.database_dir, '.lock') self.db_lock = filelock.FileLock(self.lock_file) else: self.database_path = ':memory:' self.database_dir = None self.lock_file = None self.serialization_file = None self.db_lock = None if config['transport_type'] == 'udp': # If the endpoint registration fails the node will quit, this must # finish before starting the transport endpoint_registration_event.join() self.event_poll_lock = gevent.lock.Semaphore() self.start() def start(self): """ Start the node. """ if self.stop_event and self.stop_event.is_set(): self.stop_event.clear() if self.database_dir is not None: self.db_lock.acquire(timeout=0) assert self.db_lock.is_locked # The database may be :memory: storage = sqlite.SQLiteStorage(self.database_path, serialize.PickleSerializer()) self.wal, unapplied_events = wal.restore_from_latest_snapshot( node.state_transition, storage, ) if self.wal.state_manager.current_state is None: block_number = self.chain.block_number() state_change = ActionInitChain( random.Random(), block_number, self.chain.network_id, ) self.wal.log_and_dispatch(state_change, block_number) payment_network = PaymentNetworkState( self.default_registry.address, [], # empty list of token network states as it's the node's startup ) state_change = ContractReceiveNewPaymentNetwork(payment_network) self.handle_state_change(state_change) # On first run Raiden needs to fetch all events for the payment # network, to reconstruct all token network graphs and find opened # channels last_log_block_number = 0 else: # The `Block` state change is dispatched only after all the events # for that given block have been processed, filters can be safely # installed starting from this position without losing events. last_log_block_number = views.block_number( self.wal.state_manager.current_state) # Install the filters using the correct from_block value, otherwise # blockchain logs can be lost. self.install_all_blockchain_filters( self.default_registry, self.default_secret_registry, last_log_block_number, ) # Complete the first_run of the alarm task and synchronize with the # blockchain since the last run. # # Notes about setup order: # - The filters must be polled after the node state has been primed, # otherwise the state changes won't have effect. # - The alarm must complete its first run before the transport is started, # to avoid rejecting messages for unknown channels. self.alarm.register_callback(self._callback_new_block) self.alarm.first_run() self.alarm.start() queueids_to_queues = views.get_all_messagequeues( views.state_from_raiden(self)) self.transport.start(self, queueids_to_queues) # Health check needs the transport layer self.start_neighbours_healthcheck() for event in unapplied_events: on_raiden_event(self, event) self.start_event.set() def start_neighbours_healthcheck(self): for neighbour in views.all_neighbour_nodes( self.wal.state_manager.current_state): if neighbour != ConnectionManager.BOOTSTRAP_ADDR: self.start_health_check_for(neighbour) def stop(self): """ Stop the node. """ # Needs to come before any greenlets joining self.stop_event.set() self.transport.stop_and_wait() self.alarm.stop_async() wait_for = [self.alarm] wait_for.extend(getattr(self.transport, 'greenlets', [])) # We need a timeout to prevent an endless loop from trying to # contact the disconnected client gevent.wait(wait_for, timeout=self.shutdown_timeout) # Filters must be uninstalled after the alarm task has stopped. Since # the events are polled by an alarm task callback, if the filters are # uninstalled before the alarm task is fully stopped the callback # `poll_blockchain_events` will fail. # # We need a timeout to prevent an endless loop from trying to # contact the disconnected client try: with gevent.Timeout(self.shutdown_timeout): self.blockchain_events.uninstall_all_event_listeners() except (gevent.timeout.Timeout, RaidenShuttingDown): pass self.blockchain_events.reset() if self.db_lock is not None: self.db_lock.release() def __repr__(self): return '<{} {}>'.format(self.__class__.__name__, pex(self.address)) def get_block_number(self): return views.block_number(self.wal.state_manager.current_state) def handle_state_change(self, state_change, block_number=None): log.debug('STATE CHANGE', node=pex(self.address), state_change=state_change) if block_number is None: block_number = self.get_block_number() event_list = self.wal.log_and_dispatch(state_change, block_number) for event in event_list: log.debug('RAIDEN EVENT', node=pex(self.address), raiden_event=event) on_raiden_event(self, event) return event_list def set_node_network_state(self, node_address, network_state): state_change = ActionChangeNodeNetworkState(node_address, network_state) self.wal.log_and_dispatch(state_change, self.get_block_number()) def start_health_check_for(self, node_address): self.transport.start_health_check(node_address) def _callback_new_block(self, current_block_number, chain_id): """Called once a new block is detected by the alarm task. Note: This should be called only once per block, otherwise there will be duplicated `Block` state changes in the log. Therefore this method should be called only once a new block is mined with the appropriate block_number argument from the AlarmTask. """ # Raiden relies on blockchain events to update its off-chain state, # therefore some APIs /used/ to forcefully poll for events. # # This was done for APIs which have on-chain side-effects, e.g. # openning a channel, where polling the event is required to update # off-chain state to providing a consistent view to the caller, e.g. # the channel exists after the API call returns. # # That pattern introduced a race, because the events are returned only # once per filter, and this method would be called concurrently by the # API and the AlarmTask. The following lock is necessary, to ensure the # expected side-effects are properly applied (introduced by the commit # 3686b3275ff7c0b669a6d5e2b34109c3bdf1921d) with self.event_poll_lock: for event in self.blockchain_events.poll_blockchain_events( current_block_number): # These state changes will be procesed with a block_number # which is /larger/ than the ChainState's block_number. on_blockchain_event(self, event, current_block_number, chain_id) # On restart the Raiden node will re-create the filters with the # ethereum node. These filters will have the from_block set to the # value of the latest Block state change. To avoid missing events # the Block state change is dispatched only after all of the events # have been processed. # # This means on some corner cases a few events may be applied # twice, this will happen if the node crashed and some events have # been processed but the Block state change has not been # dispatched. state_change = Block(current_block_number) self.handle_state_change(state_change, current_block_number) def sign(self, message): """ Sign message inplace. """ if not isinstance(message, SignedMessage): raise ValueError('{} is not signable.'.format(repr(message))) message.sign(self.private_key) def install_all_blockchain_filters( self, token_network_registry_proxy, secret_registry_proxy, from_block, ): with self.event_poll_lock: node_state = views.state_from_raiden(self) channels = views.list_all_channelstate(node_state) token_networks = views.get_token_network_identifiers( node_state, token_network_registry_proxy.address, ) self.blockchain_events.add_token_network_registry_listener( token_network_registry_proxy, from_block, ) self.blockchain_events.add_secret_registry_listener( secret_registry_proxy, from_block, ) for token_network in token_networks: token_network_proxy = self.chain.token_network(token_network) self.blockchain_events.add_token_network_listener( token_network_proxy, from_block, ) for channel_state in channels: channel_proxy = self.chain.payment_channel( channel_state.token_network_identifier, channel_state.identifier, ) self.blockchain_events.add_payment_channel_listener( channel_proxy, from_block, ) def connection_manager_for_token_network(self, token_network_identifier): if not is_binary_address(token_network_identifier): raise InvalidAddress('token address is not valid.') known_token_networks = views.get_token_network_identifiers( views.state_from_raiden(self), self.default_registry.address, ) if token_network_identifier not in known_token_networks: raise InvalidAddress('token is not registered.') manager = self.tokennetworkids_to_connectionmanagers.get( token_network_identifier) if manager is None: manager = ConnectionManager(self, token_network_identifier) self.tokennetworkids_to_connectionmanagers[ token_network_identifier] = manager return manager def leave_all_token_networks(self): state_change = ActionLeaveAllNetworks() self.wal.log_and_dispatch(state_change, self.get_block_number()) def close_and_settle(self): log.info('raiden will close and settle all channels now') self.leave_all_token_networks() connection_managers = [ cm for cm in self.tokennetworkids_to_connectionmanagers.values() ] if connection_managers: waiting.wait_for_settle_all_channels( self, self.alarm.sleep_time, ) def mediated_transfer_async( self, token_network_identifier, amount, target, identifier, ): """ Transfer `amount` between this node and `target`. This method will start an asyncronous transfer, the transfer might fail or succeed depending on a couple of factors: - Existence of a path that can be used, through the usage of direct or intermediary channels. - Network speed, making the transfer sufficiently fast so it doesn't expire. """ async_result = self.start_mediated_transfer( token_network_identifier, amount, target, identifier, ) return async_result def direct_transfer_async(self, token_network_identifier, amount, target, identifier): """ Do a direct transfer with target. Direct transfers are non cancellable and non expirable, since these transfers are a signed balance proof with the transferred amount incremented. Because the transfer is non cancellable, there is a level of trust with the target. After the message is sent the target is effectively paid and then it is not possible to revert. The async result will be set to False iff there is no direct channel with the target or the payer does not have balance to complete the transfer, otherwise because the transfer is non expirable the async result *will never be set to False* and if the message is sent it will hang until the target node acknowledge the message. This transfer should be used as an optimization, since only two packets are required to complete the transfer (from the payers perspective), whereas the mediated transfer requires 6 messages. """ self.start_health_check_for(target) if identifier is None: identifier = create_default_identifier() direct_transfer = ActionTransferDirect( token_network_identifier, target, identifier, amount, ) self.handle_state_change(direct_transfer) def start_mediated_transfer( self, token_network_identifier, amount, target, identifier, ): self.start_health_check_for(target) if identifier is None: identifier = create_default_identifier() assert identifier not in self.identifier_to_results async_result = AsyncResult() self.identifier_to_results[identifier].append(async_result) secret = random_secret() init_initiator_statechange = initiator_init( self, identifier, amount, secret, token_network_identifier, target, ) # TODO: implement the network timeout raiden.config['msg_timeout'] and # cancel the current transfer if it happens (issue #374) # # Dispatch the state change even if there are no routes to create the # wal entry. self.handle_state_change(init_initiator_statechange) return async_result def mediate_mediated_transfer(self, transfer: LockedTransfer): init_mediator_statechange = mediator_init(self, transfer) self.handle_state_change(init_mediator_statechange) def target_mediated_transfer(self, transfer: LockedTransfer): self.start_health_check_for(transfer.initiator) init_target_statechange = target_init(transfer) self.handle_state_change(init_target_statechange) # demo send crosstransaction def start_crosstransaction(self, token_network_identifier, target_address, initiator_address, sendETH_amount, sendBTC_amount, receiveBTC_address, cross_type, identifier): identifier = create_default_crossid() async_result = AsyncResult() self.identifier_to_results[identifier].append(async_result) self.transport.start_health_check(target_address) cross_id = identifier if (cross_type == 1): self.wal.create_crosstransactiontry(initiator_address, target_address, token_network_identifier, sendETH_amount, sendBTC_amount, receiveBTC_address, cross_id) print("write data to sqlite") print(self.wal.get_crosstransaction_by_identifier(cross_id)) crosstransaction_message = Crosstransaction( random.randint(0, UINT64_MAX), initiator_address, target_address, token_network_identifier, sendETH_amount, sendBTC_amount, receiveBTC_address, cross_type, cross_id, ) self.sign(crosstransaction_message) self.transport.send_async( target_address, bytes("123", 'utf-8'), crosstransaction_message, ) return async_result # demo def start_send_crosstansfer(self, cross_id, identifier=None): cross_data = self.wal.get_crosstransaction_by_identifier(cross_id) print(cross_data) amount = cross_data[4] target = cross_data[2] btc_amount = cross_data[5] token_network_identifier = cross_data[3] self.transport.start_health_check(target) secret = random_secret() init_initiator_statechange = initiator_init( self, cross_id, amount, secret, token_network_identifier, target, ) print("init_initiator_statechange: ", init_initiator_statechange) self.handle_cross_state_change(init_initiator_statechange, cross_id, secret, btc_amount) def get_crosstransaction_by_crossid(self, cross_id): res = self.wal.get_crosstransaction_by_identifier(cross_id) res = list(res) res[1] = to_normalized_address(res[1]) res[2] = to_normalized_address(res[2]) res[3] = to_normalized_address(res[3]) return res def get_crosstransaction_all(self): res = self.wal.get_all_crosstransaction() return res def handle_cross_state_change(self, state_change, cross_id, secret, btc_amount, block_number=None): if block_number is None: block_number = self.get_block_number() event_list = self.wal.log_and_dispatch(state_change, block_number) row = self.wal.storage.get_lnd(1) macaroon = row[4] lnd_url = "https://{}/v1/invoices".format(self.config['lnd_address']) lnd_headers = {'Grpc-Metadata-macaroon': macaroon} lnd_r = base64.b64encode(secret) lnd_data = { 'value': btc_amount, 'r_preimage': lnd_r.decode('utf-8'), 'type': "CROSS_CHAIN_INVOICE" } res = requests.post(lnd_url, headers=lnd_headers, data=json.dumps(lnd_data), verify=False) res_json = res.json() lnd_r_hash = res_json['r_hash'] lnd_payment_request = res_json['payment_request'] print('send invoice succ, lnd_r_hash:', lnd_r_hash) for event in event_list: log.debug('RAIDEN EVENT', node=pex(self.address), raiden_event=event) if type(event) == SendLockedTransfer: locked_transfer_message = message_from_sendevent( event, self.address) self.sign(locked_transfer_message) self.wal.storage.change_crosstransaction_r( cross_id, encode_hex(locked_transfer_message.lock.secrethash), lnd_r_hash) tmp_r_hash = base64.b64decode(lnd_r_hash) raiden_r_hash = locked_transfer_message.lock.secrethash hex_r_hash = encode_hex(tmp_r_hash) lnd_string = bytes(lnd_payment_request, "utf-8") cross_transfer_message = CrossLockedTransfer( locked_transfer_message, cross_id, lnd_string) self.sign(cross_transfer_message) self.transport.send_async(cross_transfer_message.recipient, bytes("456", 'utf-8'), cross_transfer_message) print('corss_message send ok') continue on_raiden_event(self, event) def cross_handle_recieved_locked_transfer(self, transfer, cross_id): self.start_health_check_for(transfer.initiator) state_change = target_init(transfer) block_number = self.get_block_number() event_list = self.wal.log_and_dispatch(state_change, block_number) for event in event_list: log.debug('RAIDEN EVENT', node=pex(self.address), raiden_event=event) if type(event) == SendSecretRequest: secret_request_message = message_from_sendevent( event, self.address) self.sign(secret_request_message) cross_secret_request_message = CrossSecretRequest( secret_request_message, cross_id) self.sign(cross_secret_request_message) self.transport.send_async( event.recipient, event.queue_name, cross_secret_request_message, ) continue on_raiden_event(self, event) return event_list def send_payment_request(self, lnd_string): row = self.wal.storage.get_lnd(1) macaroon = row[4] lnd_url = "https://{}/v1/channels/transactions".format( self.config['lnd_address']) lnd_headers = {'Grpc-Metadata-macaroon': macaroon} data = {'payment_request': lnd_string} res = requests.post(lnd_url, headers=lnd_headers, data=json.dumps(data), verify=False) if res.status_code == 200: print("send payment request to lnd succ")
class SubProcess(Greenlet): """ Threaded execution of a command being called. """ def __init__(self, command, timeout=None): """ Initialize the function """ Greenlet.__init__(self, run=None) # we abort if this is set self._abort = Event() # this is set when an command has completed execution self._done = Event() # Tracks the PID file of item being executed self._pid = None # The return code is set after the programs execution self._returncode = ReturnCode.Unknown # The command itself should a list() identifing the executable as the # first entry followed by all of the arguments you wish to pass into # it. self._cmd = command # Since we need to poll until the execution of the process is # complete, we need to set a poll time. self._throttle = 0.5 # Track when the execution started self._execution_begin = None # Track when the execution completed self._execution_finish = None # The number of seconds at most we will allow the execution of the # process to run for before we force it to abort it's operation. # Setting this to zero disables this timeout restriction self._timeout = 0.0 if timeout: self._timeout = timeout # These are populated with the output of the stdout and # stderr stream. self._stdout = StringIO() self._stderr = StringIO() def elapsed(self): """ Returns the elapsed time (as a float) of the threaded execution which includes the number of microseconds. """ if self._execution_begin is None: # No elapsed time has taken place yet return 0.0 if self._execution_finish is not None: # Execution has completed, we only want to calculate # the execution time. elapsed_time = self._execution_finish - self._execution_begin else: # Calculate Elapsed Time elapsed_time = datetime.utcnow() - self._execution_begin elapsed_time = (elapsed_time.days * 86400) \ + elapsed_time.seconds \ + (elapsed_time.microseconds/1e6) return elapsed_time def _run(self): """ Read from the work_queue, process it using an NNTPRequest object. """ # Make sure our done flag is not set self._done.clear() # Execute our Process p1 = subprocess.Popen( self._cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) # Calculate Current Time self._execution_begin = datetime.utcnow() # Store some information self._pid = p1.pid # Calculate Wait Time max_wait_time = self._execution_begin + \ timedelta(seconds=self._timeout) while p1.poll() is None and not self._abort.is_set(): # Head of Poll Loop if self._timeout and \ datetime.utcnow() >= max_wait_time: # Process aborted (took too long) try: kill(self._pid, signal.SIGKILL) except: pass # Force bad return code self._returncode = ReturnCode.Timeout # Set our elapsed time to now self._execution_finish = datetime.utcnow() # Retrieve stdout/stderr self._stdout = StringIO(p1.stdout.read()) self._stderr = StringIO(p1.stderr.read()) # Make sure no one uses the PID anymore self._pid = None # Set our done flag self._done.set() return # CPU Throttle self._abort.wait(self._throttle) if p1.poll() is None or self._abort.is_set(): # Safety try: kill(self._pid, signal.SIGKILL) except: pass # Force bad return code self._returncode = ReturnCode.Aborted else: # Store return code self._returncode = p1.returncode # Execution Completion Time self._execution_finish = datetime.utcnow() # Retrieve stdout/stderr self._stdout = StringIO(p1.stdout.read()) self._stderr = StringIO(p1.stderr.read()) # Make sure no one uses the PID anymore self._pid = None # Set our done flag self._done.set() # We're done! return def is_complete(self, timeout=None): """ Returns True if the process has completed its execution if timeout is set to a time, then the function blocks up until that period of time elapses or the call completes. Times should be specified as float values (in seconds). """ if timeout is not None: self._done.wait(timeout) return self._done.is_set() def response_code(self): """ Returns the result """ return self._returncode def successful(self): """ Returns True if the calling action was successful or not. This call can be subjective because it bases it's response simply on whether or not a zero (0) was returned by the program called. Usually a non-zero value means there was a failure. """ return self._returncode is 0 def stdout(self, as_list=True): """ if as_list is set to True, then the stdout results are split on new lines into a list object """ # Ensure we're at the head of our buffer self._stdout.seek(0L, SEEK_SET) if as_list: return NEW_LINE_RE.split(self._stdout.read()) return self._stdout.read() def stderr(self, as_list=True): """ if as_list is set to True, then the stdout results are split on new lines into a list object """ # Ensure we're at the head of our buffer self._stderr.seek(0L, SEEK_SET) if as_list: return NEW_LINE_RE.split(self._stderr.read()) return self._stderr.read() def pid(self): """ returns the pid number of the running process, but returns None if the process is no longer running. """ return self._pid def abort(self): """ Abort the executing command """ self._abort.set() try: kill(self._pid, signal.SIGKILL) except: pass if self._pid: self.join(timeout=10.0) def __str__(self): """ returns the command being executed """ return ' '.join(self._cmd) def __repr__(self): """ Return a printable version of the file being read """ return '<SubProcess cmd=%s execution_time=%ds return_code=%d />' % ( self._cmd[0], self.elapsed(), self._returncode, )
class HuskarApiIOLoop(IOLoop): ''' HuskarApiIOLoop is responsible for running eventloop connected to huskar api. The design is to use long polling for all requests, in disregard of whenever it's required. ''' def initialize(self, url, token, cache_dir="/tmp/huskar", max_alive_time=10 * 60, reconnect_gap=60): super(HuskarApiIOLoop, self).initialize(url, token, cache_dir) self.url_path = join_url(self.url, '/api/data/long_poll') self.init_session() self.connected = Event() self.stop_loop_event = Event() self.stopped = Event() self.stopped.set() self.is_disconnected = Event() self.next_watch_completed_event = Event() self.greenlet = None self.reconnect_gap = reconnect_gap self.has_once_connected = False self.max_alive_time = (0.8 + 0.2 * random.random()) * max_alive_time self.watched_services = Component(self, 'services', cache_dir) self.watched_configs = Component(self, 'configs', cache_dir) self.watched_switches = Component(self, 'switches', cache_dir) def on_watch_list_changed(self, component_name): if self.connected.is_set(): self.force_reinit_session_next_round() def force_reinit_session_next_round(self): # Race risks self.last_session_created_time = 0 self.next_watch_completed_event.clear() def wait_for_next_loop(self, timeout): return self.next_watch_completed_event.wait(timeout) def check_refresh_session(self): if not self.next_watch_completed_event.is_set() and \ self.last_session_created_time != 0: self.next_watch_completed_event.set() if time.time() - self.last_session_created_time > self.max_alive_time: self.init_session() return True return False def init_session(self): import requests self.session = requests.Session() self.session.headers['User-Agent'] = ' '.join( [USER_AGENT, self.session.headers.get('User-Agent', '')]) self.session.headers['Authorization'] = self.token self.last_session_created_time = time.time() if self._soa_mode is None: return self.session.headers[SOA_MODE_HEADER] = self._soa_mode self.session.headers[SOA_CLUSTER_HEADER] = self._soa_cluster def is_running(self): return self.greenlet def run(self): if not self.greenlet: self.greenlet = gevent.spawn(self.start_long_poll) def stop(self, timeout=None, close_components=True): self.stop_loop_event.set() if close_components: self.watched_configs.close() self.watched_services.close() self.watched_switches.close() if timeout is not None: return self.stopped.wait(timeout) def wait(self, timeout=10.0): if not (self.has_once_connected or self.connected.is_set()): return self.connected.wait(timeout=timeout) def is_connected(self): return self.connected.is_set() def event_loop(self): try: from httplib import IncompleteRead # Py2 except ImportError: from http.client import IncompleteRead # Py3 import requests fail_count = Counter(0) def loop(): # Use closure to jump around generator gc issue. See # https://groups.google.com/forum/#!topic/comp.lang.python/EhAY4ZmWaIw try: payload = { k: v for k, v in iteritems({ 'service': self.watched_services.dict, 'config': self.watched_configs.dict, 'switch': self.watched_switches.dict }) if v } r = self.session.post( self.url_path, json=payload, stream=True, timeout=3, ) if not r.ok: logger.error('failed to watch: %d %r', r.status_code, r.text) r.raise_for_status() for i in r.iter_lines(chunk_size=4096, decode_unicode=True): self.handle_message(i) fail_count.reset() if not self.connected.is_set(): self.connected.set() self.is_disconnected.clear() self.has_once_connected = True if self.stop_loop_event.is_set(): return True if self.check_refresh_session(): break except (socket.gaierror, socket.error, IncompleteRead, requests.RequestException) as error: self.connected.clear() self.is_disconnected.set() if self.stop_loop_event.is_set(): logger.info("Stopping huskar connection event loop") return True fail_count.incr() message = '' exc_cls = HuskarDiscoveryServerError if (isinstance(error, requests.RequestException) and error.response is not None): response = error.response if response.status_code < 500: exc_cls = HuskarDiscoveryUserError message = 'status_code: {0}, body: {1!r}'.format( response.status_code, response.content[:200]) try: reraise(exc_cls(error, self.url_path, message)) except HuskarDiscoveryException as e: self.notify('polling_error', e) retry_wait = (0.5+random.random()) * fail_count.get() *\ self.reconnect_gap logger.warning('Huskar connection disconnected, ' 'will retry in %s' % retry_wait, exc_info=True) gevent.sleep(retry_wait) while True: if loop(): return def start_long_poll(self): self.connected.clear() self.stopped.clear() try: self.event_loop() finally: self.stopped.set() self.stop_loop_event.clear() self.connected.clear() def update_watches(self, message, full=False): self.watched_services.update(message.get('service'), full=full) self.watched_configs.update(message.get('config'), full=full) self.watched_switches.update(message.get('switch'), full=full) def delete_watches(self, message): self.watched_services.delete(message.get('service')) self.watched_configs.delete(message.get('config')) self.watched_switches.delete(message.get('switch')) def handle_message(self, message): if self.stopped.is_set(): return if not self.has_once_connected: logger.info("Got Huskar messages. Processing...") try: message = json.loads(message) except Exception: logger.warning("Error parsing huskar message: %r", message) return try: if message['message'] == 'ping': pass elif message['message'] == 'update': self.update_watches(message['body']) elif message['message'] == 'delete': self.delete_watches(message['body']) elif message['message'] == 'all': self.update_watches(message['body'], full=True) except Exception as err: logger.exception("Error handling huskar api message: %r", err)
class BaseService(object): service_type = None def __init__(self): self._send_queue = Queue() self._send_queue_cleared = Event() self._send_greenlet = None self.timeout = INITIAL_TIMEOUT self._feedback_queue = Queue() def start(self): """Start the message sending loop.""" if self._send_greenlet is None: self._send_greenlet = gevent.spawn(self.save_err, self._send_loop) def _send_loop(self): self._send_greenlet = gevent.getcurrent() try: logger.info("%s service started" % self.service_type) while True: message = self._send_queue.get() try: self.send_notification(message) except Exception: self.error_sending_notification(message) else: self.timeout = INITIAL_TIMEOUT finally: if self._send_queue.qsize() < 1 and \ not self._send_queue_cleared.is_set(): self._send_queue_cleared.set() except gevent.GreenletExit: pass finally: self._send_greenlet = None logger.info("%s service stopped" % self.service_type) def stop(self, timeout=10.0): if (self._send_greenlet is not None) and \ (self._send_queue.qsize() > 0): self.wait_send(timeout=timeout) if self._send_greenlet is not None: gevent.kill(self._send_greenlet) self._send_greenlet = None return self._send_queue.qsize() < 1 def wait_send(self, timeout=None): self._send_queue_cleared.clear() return self._send_queue_cleared.wait(timeout=timeout) def queue_notification(self, notification): self._send_queue.put(notification) def send_notification(self, notification): raise NotImplementedError def save_err(self, func, *args, **kwargs): try: func(*args, **kwargs) except Exception as e: self.last_err = e raise def get_last_error(self): return self.last_err def error_sending_notification(self, notification): logger.exception("Error while pushing") self._send_queue.put(notification) gevent.sleep(self.timeout) # approaching Fibonacci series timeout = int(round(float(self.timeout) * 1.6)) self.timeout = min(timeout, MAX_TIMEOUT) def check_blocking(self): if self.timeout == INITIAL_TIMEOUT: return False return True
class BaseServer(object): """An abstract base class that implements some common functionality for the servers in gevent. *listener* can either be an address that the server should bind on or a :class:`gevent.socket.socket` instance that is already bound (and put into listening mode in case of TCP socket). *spawn*, if provided, is called to create a new greenlet to run the handler. By default, :func:`gevent.spawn` is used. Possible values for *spawn*: * a :class:`gevent.pool.Pool` instance -- *handle* will be executed using :meth:`Pool.spawn` method only if the pool is not full. While it is full, all the connection are dropped; * :func:`gevent.spawn_raw` -- *handle* will be executed in a raw greenlet which have a little less overhead then :class:`gevent.Greenlet` instances spawned by default; * ``None`` -- *handle* will be executed right away, in the :class:`Hub` greenlet. *handle* cannot use any blocking functions as it means switching to the :class:`Hub`. * an integer -- a shortcut for ``gevent.pool.Pool(integer)`` """ # the number of seconds to sleep in case there was an error in accept() call # for consecutive errors the delay will double until it reaches max_delay # when accept() finally succeeds the delay will be reset to min_delay again min_delay = 0.01 max_delay = 1 # Sets the maximum number of consecutive accepts that a process may perform on # a single wake up. High values give higher priority to high connection rates, # while lower values give higher priority to already established connections. # Default is 100. Note, that in case of multiple working processes on the same # listening value, it should be set to a lower value. (pywsgi.WSGIServer sets it # to 1 when environ["wsgi.multiprocess"] is true) max_accept = 100 _spawn = Greenlet.spawn # the default timeout that we wait for the client connections to close in stop() stop_timeout = 1 fatal_errors = (errno.EBADF, errno.EINVAL, errno.ENOTSOCK) def __init__(self, listener, handle=None, spawn='default'): self._stop_event = Event() self._stop_event.set() self._watcher = None self._timer = None self.pool = None try: self.set_listener(listener) self.set_spawn(spawn) self.set_handle(handle) self.delay = self.min_delay self.loop = get_hub().loop if self.max_accept < 1: raise ValueError('max_accept must be positive int: %r' % (self.max_accept, )) except: self.close() raise def set_listener(self, listener): if hasattr(listener, 'accept'): if hasattr(listener, 'do_handshake'): raise TypeError( 'Expected a regular socket, not SSLSocket: %r' % (listener, )) self.family = listener.family self.address = listener.getsockname() self.socket = listener else: self.family, self.address = parse_address(listener) def set_spawn(self, spawn): if spawn == 'default': self.pool = None self._spawn = self._spawn elif hasattr(spawn, 'spawn'): self.pool = spawn self._spawn = spawn.spawn elif isinstance(spawn, (int, long)): from gevent.pool import Pool self.pool = Pool(spawn) self._spawn = self.pool.spawn else: self.pool = None self._spawn = spawn if hasattr(self.pool, 'full'): self.full = self.pool.full if self.pool is not None: self.pool._semaphore.rawlink(self._start_accepting_if_started) def set_handle(self, handle): if handle is not None: self.handle = handle if hasattr(self, 'handle'): self._handle = self.handle else: raise TypeError("'handle' must be provided") def _start_accepting_if_started(self, _event=None): if self.started: self.start_accepting() def start_accepting(self): if self._watcher is None: # just stop watcher without creating a new one? self._watcher = self.loop.io(self.socket.fileno(), 1) self._watcher.start(self._do_read) def stop_accepting(self): if self._watcher is not None: self._watcher.stop() self._watcher = None if self._timer is not None: self._timer.stop() self._timer = None def do_handle(self, *args): spawn = self._spawn if spawn is None: self._handle(*args) else: spawn(self._handle, *args) def _do_read(self): for _ in xrange(self.max_accept): if self.full(): self.stop_accepting() return try: args = self.do_read() self.delay = self.min_delay if not args: return except: self.loop.handle_error(self, *sys.exc_info()) ex = sys.exc_info()[1] if self.is_fatal_error(ex): self.close() sys.stderr.write('ERROR: %s failed with %s\n' % (self, str(ex) or repr(ex))) return if self.delay >= 0: self.stop_accepting() self._timer = self.loop.timer(self.delay) self._timer.start(self._start_accepting_if_started) self.delay = min(self.max_delay, self.delay * 2) break else: try: self.do_handle(*args) except: self.loop.handle_error((args[1:], self), *sys.exc_info()) if self.delay >= 0: self.stop_accepting() self._timer = self.loop.timer(self.delay) self._timer.start(self._start_accepting_if_started) self.delay = min(self.max_delay, self.delay * 2) break def full(self): return False def __repr__(self): return '<%s at %s %s>' % (type(self).__name__, hex( id(self)), self._formatinfo()) def __str__(self): return '<%s %s>' % (type(self).__name__, self._formatinfo()) def _formatinfo(self): if hasattr(self, 'socket'): try: fileno = self.socket.fileno() except Exception as ex: fileno = str(ex) result = 'fileno=%s ' % fileno else: result = '' try: if isinstance(self.address, tuple) and len(self.address) == 2: result += 'address=%s:%s' % self.address else: result += 'address=%s' % (self.address, ) except Exception as ex: result += str(ex) or '<error>' try: handle = getfuncname(self.__dict__['handle']) except Exception: handle = None if handle is not None: result += ' handle=' + handle return result @property def server_host(self): """IP address that the server is bound to (string).""" if isinstance(self.address, tuple): return self.address[0] @property def server_port(self): """Port that the server is bound to (an integer).""" if isinstance(self.address, tuple): return self.address[1] def init_socket(self): """If the user initialized the server with an address rather than socket, then this function will create a socket, bind it and put it into listening mode. It is not supposed to be called by the user, it is called by :meth:`start` before starting the accept loop.""" pass @property def started(self): return not self._stop_event.is_set() def start(self): """Start accepting the connections. If an address was provided in the constructor, then also create a socket, bind it and put it into the listening mode. """ self.init_socket() self._stop_event.clear() try: self.start_accepting() except: self.close() raise def close(self): """Close the listener socket and stop accepting.""" self._stop_event.set() try: self.stop_accepting() finally: try: self.socket.close() except Exception: pass finally: self.__dict__.pop('socket', None) self.__dict__.pop('handle', None) self.__dict__.pop('_handle', None) self.__dict__.pop('_spawn', None) self.__dict__.pop('full', None) if self.pool is not None: self.pool._semaphore.unlink( self._start_accepting_if_started) @property def closed(self): return not hasattr(self, 'socket') def stop(self, timeout=None): """Stop accepting the connections and close the listening socket. If the server uses a pool to spawn the requests, then :meth:`stop` also waits for all the handlers to exit. If there are still handlers executing after *timeout* has expired (default 1 second), then the currently running handlers in the pool are killed.""" self.close() if timeout is None: timeout = self.stop_timeout if self.pool: self.pool.join(timeout=timeout) self.pool.kill(block=True, timeout=1) def serve_forever(self, stop_timeout=None): """Start the server if it hasn't been already started and wait until it's stopped.""" # add test that serve_forever exists on stop() if not self.started: self.start() try: self._stop_event.wait() finally: Greenlet.spawn(self.stop, timeout=stop_timeout).join() def is_fatal_error(self, ex): return isinstance(ex, _socket.error) and ex[0] in self.fatal_errors
def single_queue_send( transport: 'UDPTransport', recipient: typing.Address, queue: Queue_T, event_stop: Event, event_healthy: Event, event_unhealthy: Event, message_retries: int, message_retry_timeout: int, message_retry_max_timeout: int, ): """ Handles a single message queue for `recipient`. Notes: - This task must be the only consumer of queue. - This task can be killed at any time, but the intended usage is to stop it with the event_stop. - If there are many queues for the same recipient, it is the caller's responsibility to not start them together to avoid congestion. - This task assumes the endpoint is never cleared after it's first known. If this assumption changes the code must be updated to handle unknown addresses. """ # A NotifyingQueue is required to implement cancelability, otherwise the # task cannot be stopped while the greenlet waits for an element to be # inserted in the queue. if not isinstance(queue, NotifyingQueue): raise ValueError('queue must be a NotifyingQueue.') # Reusing the event, clear must be carefully done data_or_stop = event_first_of( queue, event_stop, ) # Wait for the endpoint registration or to quit event_first_of( event_healthy, event_stop, ).wait() while True: data_or_stop.wait() if event_stop.is_set(): return # The queue is not empty at this point, so this won't raise Empty. # This task being the only consumer is a requirement. (messagedata, message_id) = queue.peek(block=False) backoff = timeout_exponential_backoff( message_retries, message_retry_timeout, message_retry_max_timeout, ) try: acknowledged = retry_with_recovery( transport, messagedata, message_id, recipient, event_stop, event_healthy, event_unhealthy, backoff, ) except RaidenShuttingDown: # For a clean shutdown process return if acknowledged: queue.get() # Checking the length of the queue does not trigger a # context-switch, so it's safe to assume the length of the queue # won't change under our feet and when a new item will be added the # event will be set again. if not queue: data_or_stop.clear() if event_stop.is_set(): return
class HttpHealthCheckShareAdjuster(ShareAdjuster): def __init__(self, endpoint, signal_update_fn, route='/health', interval=5, timeout=3.0, unhealthy_threshold=2, healthy_threshold=2, port_name=None, http_method='GET'): """ A basic http health check implementation. Parameters match those available on an Elastic Loadbalancer. Checks for 200 response code. Args: endpoint - Endpoint to check. signal_update_fn - function - function to call on status update. route - str - http route to check. interval - int - seconds between checks. timeout - float - seconds before a check attempt times out. unhealthy_threshold - int - failures before endpoint marked unhealthy. healthy_threshold - int - successes before endpoint marked healthy. port_name - str - Optional name of port to check. EG: 'health'. http_method - str - Optional uppercase name of the http verb. EG: GET or HEAD """ super(HttpHealthCheckShareAdjuster, self).__init__(endpoint, signal_update_fn) self._route = route self._interval = int(interval) self._timeout = float(timeout) self._unhealthy_threshold = int(unhealthy_threshold) self._healthy_threshold = int(healthy_threshold) self._port_name = port_name max_result_len = self._healthy_threshold + self._unhealthy_threshold self._check_results = collections.deque(maxlen=max_result_len) self._status = HealthCheckStatus.INITIALIZING self._stop_event = Event() if http_method.upper() not in SUPPORTED_HEALTHCHECK_METHODS: raise Exception('http_method only supports: {}'.format( ', '.join(SUPPORTED_HEALTHCHECK_METHODS), )) self._http_method = http_method.lower() @property def status(self): """ Get current status of endpoint. Returns: A HealthCheckStatus value. """ return self._status def start(self): """ Start running healthchecks against endpoint. """ spawn_later(self._interval, self._check) self._record(HttpHealthCheckLogEvent.STARTED_CHECKER, HttpHealthCheckLogResult.SUCCESS) def stop(self): """ Stop running healthchecks against endpoint. """ self._stop_event.set() self._record(HttpHealthCheckLogEvent.STOPPED_CHECKER, HttpHealthCheckLogResult.SUCCESS) @property def auditable_share(self): """Return current share adjustment factor. """ if self.status in HEALTHY_STATUSES: return 1.0, AuditItem('health', '1.0') else: return 0.0, AuditItem('health', '0.0') def _build_check_uri(self): """ Builds the URI to check. Returns: Check URI string. """ uri_template = 'http://{0}:{1}{2}' if self._port_name: port = self._endpoint.context['port_map'][self._port_name] else: port = self._endpoint.port return uri_template.format(self._endpoint.host, port, self._route) def _check(self): """ Run healthcheck. Args: restart_timer - bool - Whether to restart check timer after checking. """ if self._stop_event.is_set(): return check_uri = self._build_check_uri() error_log_fn = None try: self._record(HttpHealthCheckLogEvent.STARTING_CHECK, HttpHealthCheckLogResult.SUCCESS, log_fn=logger.debug) r = getattr(requests, self._http_method)(check_uri, timeout=self._timeout) if r.status_code == requests.codes.ok: check_result = HealthCheckResult.SUCCESS self._record(HttpHealthCheckLogEvent.RUNNING_CHECK, HttpHealthCheckLogResult.SUCCESS, log_fn=logger.debug) else: check_result = HealthCheckResult.ERROR_CODE self._record(HttpHealthCheckLogEvent.RUNNING_CHECK, HttpHealthCheckLogResult.FAILURE, 'status_code:{0}'.format(r.status_code)) except requests.exceptions.Timeout: check_result = HealthCheckResult.TIMEOUT self._record(HttpHealthCheckLogEvent.RUNNING_CHECK, HttpHealthCheckLogResult.TIMEOUT) except requests.exceptions.ConnectionError as ex: if 'gaierror' in unicode(ex): check_result = HealthCheckResult.KNOWN_LOCAL_ERROR error_log_fn = logger.error elif 'connection refused' in unicode(ex).lower(): check_result = HealthCheckResult.KNOWN_REMOTE_ERROR error_log_fn = logger.error else: check_result = HealthCheckResult.UNKNOWN_ERROR error_log_fn = logger.exception except Exception: check_result = HealthCheckResult.UNKNOWN_ERROR error_log_fn = logger.exception if error_log_fn: error_log_fn('Exception when executing HttpHealthCheck.') self._record(HttpHealthCheckLogEvent.RUNNING_CHECK, check_result) self._update_status(check_result) spawn_later(self._interval, self._check) def _record(self, event, result, msg='', log_fn=logger.info): """ Utility to record HttpHealthCheck events and results. Args: event - HttpHealthCheckLogEvent. result - HttpHealthCheckLogResult. msg - str - Extra message. log_fn - function - logger function to use. """ f = 'event:%(event)s result:%(result)s check_uri:%(check_uri)s msg:%(msg)s' context = { 'event': event, 'result': result, 'check_uri': self._build_check_uri(), 'msg': msg } log_fn(f, context) def _update_status(self, check_result): """ If necessary based on configuration, update status of this check. Calls self._callback if set. Args: check_result - HttpCheckResult """ if check_result in UNCHANGED_RESULTS: return self._check_results.append(check_result) check_results = copy.copy(self._check_results) calculated_status = self._status healthy_lookback = list(check_results)[-self._healthy_threshold:] if len(healthy_lookback) == self._healthy_threshold and \ all([cr in HEALTHY_RESULTS for cr in healthy_lookback]): calculated_status = HealthCheckStatus.HEALTHY unhealthy_lookback = list(check_results)[-self._unhealthy_threshold:] if len(unhealthy_lookback) == self._unhealthy_threshold and \ all([cr in UNHEALTHY_RESULTS for cr in unhealthy_lookback]): calculated_status = HealthCheckStatus.UNHEALTHY if self._status != calculated_status: old_status = self._status self._status = calculated_status self._record(HttpHealthCheckLogEvent.UPDATED_HEALTH_STATUS, HttpHealthCheckLogResult.SUCCESS, '{0} -> {1}'.format(old_status, calculated_status)) if self._signal_update_fn: try: # Execute callback, passing old and new status self._signal_update_fn() except Exception: logger.exception('Exception when executing callback on ' 'BasicHttpHealthCheck status change.') self._record(HttpHealthCheckLogEvent.RUNNING_CALLBACK, HttpHealthCheckLogResult.ERROR)
class Actor(object): """ The actor class is the abstract base class for all implementing compysition actors. In order to be a valid 'module' and connectable with the compysition event flow, a module must be an extension of this class. The Actor is responsible for putting events on outbox queues, and consuming incoming events on inbound queues. """ __metaclass__ = abc.ABCMeta DEFAULT_EVENT_SERVICE = "default" input = Event output = Event REQUIRED_EVENT_ATTRIBUTES = None __NOT_DEFINED = object() def __init__(self, name, size=0, blocking_consume=False, rescue=False, max_rescue=5, *args, **kwargs): """ **Base class for all compysition actors** Parameters: name (str): | The instance name size (Optional[int]): | The max amount of events any outbound queue connected to this actor may contain. A value of 0 represents an infinite qsize | (Default: 0) blocking_consume (Optional[bool]): | Define if this module should spawn a greenlet for every single 'consume' execution, or if | it should execute 'consume' and block until that 'consume' is complete. This is usually | only necessary if executing work on an event in the order that it was received is critical. | (Default: False) """ self.blockdiag_config = {"shape": "box"} self.name = name self.size = size self.pool = QueuePool(size) self.logger = Logger(name, self.pool.logs) self.__loop = True self.threads = RestartPool(logger=self.logger, sleep_interval=1) self.__run = GEvent() self.__run.clear() self.__block = GEvent() self.__block.clear() self.__blocking_consume = blocking_consume self.rescue = rescue self.max_rescue = max_rescue def block(self): self.__block.wait() def connect_error_queue(self, destination_queue_name="inbox", *args, **kwargs): self.__connect_queue( pool_scope=self.pool.error, destination_queue_name="error_{0}".format(destination_queue_name), *args, **kwargs) def connect_log_queue(self, destination_queue_name="inbox", *args, **kwargs): self.__connect_queue( pool_scope=self.pool.logs, destination_queue_name="log_{0}".format(destination_queue_name), *args, **kwargs) def connect_queue(self, *args, **kwargs): self.__connect_queue(pool_scope=self.pool.outbound, *args, **kwargs) def __connect_queue(self, source_queue_name="outbox", destination=None, destination_queue_name="inbox", pool_scope=None, check_existing=True): """Connects the <source_queue_name> queue to the <destination> queue. If the destination queue already exists, the source queue is changed to be a reference to that queue, as Many to One connections are supported, but One to Many is not""" source_queue = pool_scope.get(source_queue_name, None) destination_queue = destination.pool.inbound.get( destination_queue_name, None) if check_existing: if source_queue: raise QueueConnected( "Outbound queue {queue_name} on {source_name} is already connected" .format(queue_name=source_queue_name, source_name=self.name)) if destination_queue: raise QueueConnected( "Inbound queue {queue_name} on {destination_name} is already connected" .format(queue_name=destination_queue_name, destination_name=destination.name)) if not source_queue: if not destination_queue: source_queue = pool_scope.add(source_queue_name) destination.register_consumer(destination_queue_name, source_queue) elif destination_queue: pool_scope.add(source_queue_name, queue=destination_queue) else: if not destination_queue: destination.register_consumer(destination_queue_name, source_queue) else: source_queue.dump(destination_queue) pool_scope.add(destination_queue.name, queue=destination_queue) self.logger.info("Connected queue '{0}' to '{1}.{2}'".format( source_queue_name, destination.name, destination_queue_name)) def loop(self): '''The global lock for this module''' return self.__loop def is_running(self): return self.__run.is_set() def register_consumer(self, queue_name, queue): ''' Add the passed queue and queue name to ''' self.pool.inbound.add(queue_name, queue=queue) self.threads.spawn(self.__consumer, self.consume, queue) def start(self): '''Starts the module.''' if not isinstance(self.input, tuple): if isinstance(self.input, list): self.input = tuple(self.input) else: self.input = (self.input, ) if not isinstance(self.output, tuple): if isinstance(self.output, list): self.output = tuple(self.output) else: self.output = (self.output, ) if hasattr(self, "pre_hook"): self.logger.debug("pre_hook() found, executing") self.pre_hook() self.__run.set() self.logger.debug( "Started with max queue size of {size} events".format( size=self.size)) def stop(self): '''Stops the loop lock and waits until all registered consumers have exit.''' self.__loop = False self.__block.set() # This should do a self.threads.join() but currently it is blocking. This issue needs to be resolved # But in the meantime post_hook will execute if hasattr(self, "post_hook"): self.logger.debug("post_hook() found, executing") self.post_hook() def send_event(self, event, queues=__NOT_DEFINED, check_output=True): """ Sends event to all registered outbox queues. If multiple queues are consuming the event, a deepcopy of the event is sent instead of raw event. """ if queues is self.__NOT_DEFINED: queues = self.pool.outbound.values() self._loop_send(event, queues) def send_error(self, event): """ Calls 'send_event' with all error queues as the 'queues' parameter """ queues = self.pool.error.values() self._loop_send(event, queues=queues, check_output=False) def _loop_send(self, event, queues, check_output=True): """ :param event: :param queues: :return: """ if check_output and not isinstance(event, self.output): raise InvalidActorOutput( "Event was of type '{_type}', expected '{output}'".format( _type=type(event), output=self.output)) if len(queues) > 0: self._send(queues[0], deepcopy(event)) map(lambda _queue: self._send(_queue, deepcopy(event)), queues[1:]) def _send(self, queue, event): queue.put(event) sleep(0) def __consumer(self, function, queue): '''Greenthread which applies <function> to each element from <queue> ''' self.__run.wait() while self.loop(): queue.wait_until_content() try: event = queue.get(timeout=10) except QueueEmpty: pass else: if self.__blocking_consume: self.__do_consume(function, event, queue) else: self.threads.spawn(self.__do_consume, function, event, queue, restart=False) while True: if queue.qsize() > 0: try: event = queue.get() except QueueEmpty as err: break else: self.threads.spawn(self.__do_consume, function, event, queue, restart=False) else: break def __do_consume(self, function, event, queue): """ A function designed to be spun up in a greenlet to maximize concurrency for the __consumer method This function actually calls the consume function for the actor """ try: if not isinstance(event, self.input): new_event = event.convert(self.input[0]) self.logger.warning( "Incoming event was of type '{_type}' when type {input} was expected. Converted to {converted}" .format(_type=type(event), input=self.input, converted=type(new_event)), event=event) event = new_event if self.REQUIRED_EVENT_ATTRIBUTES: missing = [ event.get(attribute) for attribute in self.REQUIRED_EVENT_ATTRIBUTES if not event.get(attribute, None) ] if len(missing) > 0: raise InvalidActorInput( "Required incoming event attributes were missing: {missing}" .format(missing=missing)) function(event, origin=queue.name, origin_queue=queue) except QueueFull as err: err.wait_until_free() queue.put(event) except InvalidActorInput as error: self.logger.error("Invalid input detected: {0}".format(error)) except InvalidEventConversion: self.logger.error( "Event was of type '{_type}', expected '{input}'".format( _type=type(event), input=self.input)) except Exception as err: self.logger.warning("Event exception caught: {traceback}".format( traceback=traceback.format_exc()), event=event) rescue_tracker = "{actor}_rescue_num".format(actor=self.name) if self.rescue and event.get(rescue_tracker, 0) < self.max_rescue: setattr(event, rescue_tracker, event.get(rescue_tracker, 0) + 1) sleep(1) queue.put(event) else: event.error = err self.send_error(event) def create_event(self, *args, **kwargs): if len(self.output) == 1: return self.output[0](**kwargs) raise ValueError( "Unable to call create_event function with multiple output types defined" ) @abc.abstractmethod def consume(self, event, *args, **kwargs): """ Args: event: The implementation of event.Event this actor is consuming *args: **kwargs: """ pass
class EchoNode: def __init__(self, api, token_address): assert isinstance(api, RaidenAPI) self.ready = Event() self.api = api self.token_address = token_address existing_channels = self.api.get_channel_list( api.raiden.default_registry.address, self.token_address, ) open_channels = [ channel_state for channel_state in existing_channels if channel.get_status(channel_state) == CHANNEL_STATE_OPENED ] if len(open_channels) == 0: token = self.api.raiden.chain.token(self.token_address) if not token.balance_of(self.api.raiden.address) > 0: raise ValueError( 'not enough funds for echo node %s for token %s' % ( pex(self.api.raiden.address), pex(self.token_address), )) self.api.token_network_connect( self.api.raiden.default_registry.address, self.token_address, token.balance_of(self.api.raiden.address), initial_channel_target=10, joinable_funds_target=.5, ) self.last_poll_offset = 0 self.received_transfers = Queue() self.stop_signal = None # used to signal REMOVE_CALLBACK and stop echo_workers self.greenlets = list() self.lock = BoundedSemaphore() self.seen_transfers = deque(list(), TRANSFER_MEMORY) self.num_handled_transfers = 0 self.lottery_pool = Queue() # register ourselves with the raiden alarm task self.api.raiden.alarm.register_callback(self.echo_node_alarm_callback) self.echo_worker_greenlet = gevent.spawn(self.echo_worker) log.info('Echo node started') def echo_node_alarm_callback(self, block_number): """ This can be registered with the raiden AlarmTask. If `EchoNode.stop()` is called, it will give the return signal to be removed from the AlarmTask callbacks. """ if not self.ready.is_set(): self.ready.set() log.debug('echo_node callback', block_number=block_number) if self.stop_signal is not None: return REMOVE_CALLBACK else: self.greenlets.append(gevent.spawn(self.poll_all_received_events)) return True def poll_all_received_events(self): """ This will be triggered once for each `echo_node_alarm_callback`. It polls all channels for `EventPaymentReceivedSuccess` events, adds all new events to the `self.received_transfers` queue and respawns `self.echo_node_worker`, if it died. """ locked = False try: with Timeout(10): locked = self.lock.acquire(blocking=False) if not locked: return else: received_transfers = self.api.get_raiden_events_payment_history( token_address=self.token_address, offset=self.last_poll_offset, ) # received transfer is a tuple of (block_number, event) received_transfers = [ event for event in received_transfers if type(event) == EventPaymentReceivedSuccess ] for event in received_transfers: transfer = copy.deepcopy(event) self.received_transfers.put(transfer) # set last_poll_block after events are enqueued (timeout safe) if received_transfers: self.last_poll_offset += len(received_transfers) if not self.echo_worker_greenlet.started: log.debug( 'restarting echo_worker_greenlet', dead=self.echo_worker_greenlet.dead, successful=self.echo_worker_greenlet.successful(), exception=self.echo_worker_greenlet.exception, ) self.echo_worker_greenlet = gevent.spawn( self.echo_worker) except Timeout: log.info('timeout while polling for events') finally: if locked: self.lock.release() def echo_worker(self): """ The `echo_worker` works through the `self.received_transfers` queue and spawns `self.on_transfer` greenlets for all not-yet-seen transfers. """ log.debug('echo worker', qsize=self.received_transfers.qsize()) while self.stop_signal is None: if self.received_transfers.qsize() > 0: transfer = self.received_transfers.get() if transfer in self.seen_transfers: log.debug( 'duplicate transfer ignored', initiator=pex(transfer.initiator), amount=transfer.amount, identifier=transfer.identifier, ) else: self.seen_transfers.append(transfer) self.greenlets.append( gevent.spawn(self.on_transfer, transfer)) else: gevent.sleep(.5) def on_transfer(self, transfer): """ This handles the echo logic, as described in https://github.com/raiden-network/raiden/issues/651: - for transfers with an amount that satisfies `amount % 3 == 0`, it sends a transfer with an amount of `amount - 1` back to the initiator - for transfers with a "lucky number" amount `amount == 7` it does not send anything back immediately -- after having received "lucky number transfers" from 7 different addresses it sends a transfer with `amount = 49` to one randomly chosen one (from the 7 lucky addresses) - consecutive entries to the lucky lottery will receive the current pool size as the `echo_amount` - for all other transfers it sends a transfer with the same `amount` back to the initiator """ echo_amount = 0 if transfer.amount % 3 == 0: log.info( 'ECHO amount - 1', initiator=pex(transfer.initiator), amount=transfer.amount, identifier=transfer.identifier, ) echo_amount = transfer.amount - 1 elif transfer.amount == 7: log.info( 'ECHO lucky number draw', initiator=pex(transfer.initiator), amount=transfer.amount, identifier=transfer.identifier, poolsize=self.lottery_pool.qsize(), ) # obtain a local copy of the pool pool = self.lottery_pool.copy() tickets = [pool.get() for _ in range(pool.qsize())] assert pool.empty() del pool if any(ticket.initiator == transfer.initiator for ticket in tickets): assert transfer not in tickets log.debug( 'duplicate lottery entry', initiator=pex(transfer.initiator), identifier=transfer.identifier, poolsize=len(tickets), ) # signal the poolsize to the participant echo_amount = len(tickets) # payout elif len(tickets) == 6: log.info('payout!') # reset the pool assert self.lottery_pool.qsize() == 6 self.lottery_pool = Queue() # add new participant tickets.append(transfer) # choose the winner transfer = random.choice(tickets) echo_amount = 49 else: self.lottery_pool.put(transfer) else: log.debug( 'echo transfer received', initiator=pex(transfer.initiator), amount=transfer.amount, identifier=transfer.identifier, ) echo_amount = transfer.amount if echo_amount: log.debug( 'sending echo transfer', target=pex(transfer.initiator), amount=echo_amount, orig_identifier=transfer.identifier, echo_identifier=transfer.identifier + echo_amount, token_address=pex(self.token_address), num_handled_transfers=self.num_handled_transfers + 1, ) self.api.transfer( self.api.raiden.default_registry.address, self.token_address, echo_amount, transfer.initiator, identifier=transfer.identifier + echo_amount, ) self.num_handled_transfers += 1 def stop(self): self.stop_signal = True self.greenlets.append(self.echo_worker_greenlet) gevent.joinall(self.greenlets, raise_error=True)
class Manager(object): def __init__(self, name, google_key, locale, units, timezone, time_limit, max_attempts, location, cache_type, geofence_file, debug): # Set the name of the Manager self.name = str(name).lower() self._log = self._create_logger(self.name) self._rule_log = self.get_child_logger('rules') self.__debug = debug # Get the Google Maps AP# TODO: Improve error checking self._google_key = None self._gmaps_service = None if str(google_key).lower() != 'none': self._google_key = google_key self._gmaps_service = GMaps(google_key) self._gmaps_reverse_geocode = False self._gmaps_distance_matrix = set() self._language = locale self.__locale = Locale(locale) # Setup the language-specific stuff self.__units = units # type of unit used for distances self.__timezone = timezone # timezone for time calculations self.__time_limit = time_limit # Minimum time remaining # Location should be [lat, lng] (or None for no location) self.__location = None if str(location).lower() != 'none': self.set_location(location) else: self._log.warning("NO LOCATION SET - this may cause issues " "with distance related DTS.") # Create cache self.__cache = cache_factory(self, cache_type) # Load and Setup the Pokemon Filters self._mons_enabled, self._mon_filters = False, OrderedDict() self._stops_enabled, self._stop_filters = False, OrderedDict() self._gyms_enabled, self._gym_filters = False, OrderedDict() self._ignore_neutral = False self._eggs_enabled, self._egg_filters = False, OrderedDict() self._raids_enabled, self._raid_filters = False, OrderedDict() self._weather_enabled, self._weather_filters = False, OrderedDict() # Create the Geofences to filter with from given file self.geofences = None if str(geofence_file).lower() != 'none': self.geofences = load_geofence_file(get_path(geofence_file)) # Create the alarms to send notifications out with self._alarms = {} self._max_attempts = int(max_attempts) # TODO: Move to alarm level # Initialize Rules self.__mon_rules = {} self.__stop_rules = {} self.__gym_rules = {} self.__egg_rules = {} self.__raid_rules = {} self.__weather_rules = {} # Initialize the queue and start the process self.__queue = Queue() self.__event = Event() self.__process = None # ~~~~~~~~~~~~~~~~~~~~~~~ MAIN PROCESS CONTROL ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Update the object into the queue def update(self, obj): self.__queue.put(obj) # Get the name of this Manager def get_name(self): return self.name # Tell the process to finish up and go home def stop(self): self._log.info("Manager {} shutting down... {} items in queue." "".format(self.name, self.__queue.qsize())) self.__event.set() def join(self): self.__process.join(timeout=20) if not self.__process.ready(): self._log.warning("Manager {} could not be stopped in time! " "Forcing process to stop.".format(self.name)) self.__process.kill(timeout=2, block=True) # Force stop else: self._log.info("Manager {} successfully stopped!".format( self.name)) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GMAPS API ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ def enable_gmaps_reverse_geocoding(self): """Enable GMaps Reverse Geocoding DTS for triggered Events. """ if not self._gmaps_service: raise ValueError("Unable to enable Google Maps Reverse Geocoding." "No GMaps API key has been set.") self._gmaps_reverse_geocode = True def disable_gmaps_reverse_geocoding(self): """Disable GMaps Reverse Geocoding DTS for triggered Events. """ self._gmaps_reverse_geocode = False def enable_gmaps_distance_matrix(self, mode): """Enable 'mode' Distance Matrix DTS for triggered Events. """ if not self.__location: raise ValueError("Unable to enable Google Maps Reverse Geocoding." "No Manager location has been set.") elif not self._gmaps_service: raise ValueError("Unable to enable Google Maps Reverse Geocoding." "No GMaps API key has been provided.") elif mode not in GMaps.TRAVEL_MODES: raise ValueError("Unable to enable distance matrix mode: " "{} is not a valid mode.".format(mode)) self._gmaps_distance_matrix.add(mode) def disable_gmaps_dm_walking(self, mode): """Disable 'mode' Distance Matrix DTS for triggered Events. """ if mode not in GMaps.TRAVEL_MODES: raise ValueError("Unable to disable distance matrix mode: " "Invalid mode specified.") self._gmaps_distance_matrix.discard(mode) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ LOGGING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @staticmethod def _create_logger(mgr_name): """ Internal method for initializing manager loggers. """ # Create a Filter to pass on manager name log = logging.getLogger('pokealarm.{}'.format(mgr_name)) return log def get_child_logger(self, name): """ Get a child logger of this manager. """ logger = self._log.getChild(name) logger.addFilter(ContextFilter()) return logger def set_log_level(self, log_level): if log_level == 1: self._log.setLevel(logging.WARNING) elif log_level == 2: self._log.setLevel(logging.INFO) self._log.getChild("cache").setLevel(logging.WARNING) self._log.getChild("filters").setLevel(logging.WARNING) self._log.getChild("alarms").setLevel(logging.WARNING) elif log_level == 3: self._log.setLevel(logging.INFO) self._log.getChild("cache").setLevel(logging.INFO) self._log.getChild("filters").setLevel(logging.WARNING) self._log.getChild("alarms").setLevel(logging.WARNING) elif log_level == 4: self._log.setLevel(logging.INFO) self._log.getChild("cache").setLevel(logging.INFO) self._log.getChild("filters").setLevel(logging.INFO) self._log.getChild("alarms").setLevel(logging.INFO) elif log_level == 5: self._log.setLevel(logging.DEBUG) self._log.getChild("cache").setLevel(logging.DEBUG) self._log.getChild("filters").setLevel(logging.DEBUG) self._log.getChild("alarms").setLevel(logging.DEBUG) else: raise ValueError("Unable to set verbosity, must be an " "integer between 1 and 5.") self._log.debug("Verbosity set to %s", log_level) def add_file_logger(self, path, max_size_mb, ct): setup_file_handler(self._log, path, max_size_mb, ct) self._log.debug("Added new file logger to %s", path) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FILTERS API ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Enable/Disable Monster notifications def set_monsters_enabled(self, boolean): self._mons_enabled = parse_bool(boolean) self._log.debug("Monster notifications %s", "enabled" if self._mons_enabled else "disabled") # Add new Monster Filter def add_monster_filter(self, name, settings): if name in self._mon_filters: raise ValueError("Unable to add Monster Filter: Filter with the " "name {} already exists!".format(name)) f = Filters.MonFilter(self, name, settings) self._mon_filters[name] = f self._log.debug("Monster filter '%s' set: %s", name, f) # Enable/Disable Stops notifications def set_stops_enabled(self, boolean): self._stops_enabled = parse_bool(boolean) self._log.debug("Stops notifications %s!", "enabled" if self._stops_enabled else "disabled") # Add new Stop Filter def add_stop_filter(self, name, settings): if name in self._stop_filters: raise ValueError("Unable to add Stop Filter: Filter with the " "name {} already exists!".format(name)) f = Filters.StopFilter(self, name, settings) self._stop_filters[name] = f self._log.debug("Stop filter '%s' set: %s", name, f) # Enable/Disable Gym notifications def set_gyms_enabled(self, boolean): self._gyms_enabled = parse_bool(boolean) self._log.debug("Gyms notifications %s!", "enabled" if self._gyms_enabled else "disabled") # Enable/Disable Stops notifications def set_ignore_neutral(self, boolean): self._ignore_neutral = parse_bool(boolean) self._log.debug("Ignore neutral set to %s!", self._ignore_neutral) # Add new Gym Filter def add_gym_filter(self, name, settings): if name in self._gym_filters: raise ValueError("Unable to add Gym Filter: Filter with the " "name {} already exists!".format(name)) f = Filters.GymFilter(self, name, settings) self._gym_filters[name] = f self._log.debug("Gym filter '%s' set: %s", name, f) # Enable/Disable Egg notifications def set_eggs_enabled(self, boolean): self._eggs_enabled = parse_bool(boolean) self._log.debug("Egg notifications %s!", "enabled" if self._eggs_enabled else "disabled") # Add new Egg Filter def add_egg_filter(self, name, settings): if name in self._egg_filters: raise ValueError("Unable to add Egg Filter: Filter with the " "name {} already exists!".format(name)) f = Filters.EggFilter(self, name, settings) self._egg_filters[name] = f self._log.debug("Egg filter '%s' set: %s", name, f) # Enable/Disable Stops notifications def set_raids_enabled(self, boolean): self._raids_enabled = parse_bool(boolean) self._log.debug("Raid notifications %s!", "enabled" if self._raids_enabled else "disabled") # Add new Raid Filter def add_raid_filter(self, name, settings): if name in self._raid_filters: raise ValueError("Unable to add Raid Filter: Filter with the " "name {} already exists!".format(name)) f = Filters.RaidFilter(self, name, settings) self._raid_filters[name] = f self._log.debug("Raid filter '%s' set: %s", name, f) # Enable/Disable Weather notifications def set_weather_enabled(self, boolean): self._weather_enabled = parse_bool(boolean) self._log.debug("Weather notifications %s!", "enabled" if self._weather_enabled else "disabled") # Add new Weather Filter def add_weather_filter(self, name, settings): if name in self._weather_filters: raise ValueError("Unable to add Weather Filter: Filter with the " "name {} already exists!".format(name)) f = Filters.WeatherFilter(self, name, settings) self._weather_filters[name] = f self._log.debug("Weather filter '%s' set: %s", name, f) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ALARMS API ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ def add_alarm(self, name, settings): if name in self._alarms: raise ValueError("Unable to add new Alarm: Alarm with the name " "{} already exists!".format(name)) alarm = Alarms.alarm_factory(self, settings, self._max_attempts, self._google_key) self._alarms[name] = alarm # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RULES API ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Add new Monster Rule def add_monster_rule(self, name, filters, alarms): if name in self.__mon_rules: raise ValueError("Unable to add Rule: Monster Rule with the name " "{} already exists!".format(name)) for filt in filters: if filt not in self._mon_filters: raise ValueError("Unable to create Rule: No Monster Filter " "named {}!".format(filt)) for alarm in alarms: if alarm not in self._alarms: raise ValueError("Unable to create Rule: No Alarm " "named {}!".format(alarm)) self.__mon_rules[name] = Rule(filters, alarms) # Add new Stop Rule def add_stop_rule(self, name, filters, alarms): if name in self.__stop_rules: raise ValueError("Unable to add Rule: Stop Rule with the name " "{} already exists!".format(name)) for filt in filters: if filt not in self._stop_filters: raise ValueError("Unable to create Rule: No Stop Filter " "named {}!".format(filt)) for alarm in alarms: if alarm not in self._alarms: raise ValueError("Unable to create Rule: No Alarm " "named {}!".format(alarm)) self.__stop_rules[name] = Rule(filters, alarms) # Add new Gym Rule def add_gym_rule(self, name, filters, alarms): if name in self.__gym_rules: raise ValueError("Unable to add Rule: Gym Rule with the name " "{} already exists!".format(name)) for filt in filters: if filt not in self._gym_filters: raise ValueError("Unable to create Rule: No Gym Filter " "named {}!".format(filt)) for alarm in alarms: if alarm not in self._alarms: raise ValueError("Unable to create Rule: No Alarm " "named {}!".format(alarm)) self.__gym_rules[name] = Rule(filters, alarms) # Add new Egg Rule def add_egg_rule(self, name, filters, alarms): if name in self.__egg_rules: raise ValueError("Unable to add Rule: Egg Rule with the name " "{} already exists!".format(name)) for filt in filters: if filt not in self._egg_filters: raise ValueError("Unable to create Rule: No Egg Filter " "named {}!".format(filt)) for alarm in alarms: if alarm not in self._alarms: raise ValueError("Unable to create Rule: No Alarm " "named {}!".format(alarm)) self.__egg_rules[name] = Rule(filters, alarms) # Add new Raid Rule def add_raid_rule(self, name, filters, alarms): if name in self.__raid_rules: raise ValueError("Unable to add Rule: Raid Rule with the name " "{} already exists!".format(name)) for filt in filters: if filt not in self._raid_filters: raise ValueError("Unable to create Rule: No Raid Filter " "named {}!".format(filt)) for alarm in alarms: if alarm not in self._alarms: raise ValueError("Unable to create Rule: No Alarm " "named {}!".format(alarm)) self.__raid_rules[name] = Rule(filters, alarms) # Add new Weather Rule def add_weather_rule(self, name, filters, alarms): if name in self.__weather_rules: raise ValueError("Unable to add Rule: Weather Rule with the name " "{} already exists!".format(name)) for filt in filters: if filt not in self._weather_filters: raise ValueError("Unable to create Rule: No Weather Filter " "named {}!".format(filt)) for alarm in alarms: if alarm not in self._alarms: raise ValueError("Unable to create Rule: No Alarm " "named {}!".format(alarm)) self.__weather_rules[name] = Rule(filters, alarms) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MANAGER LOADING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HANDLE EVENTS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Start it up def start(self): self.__process = gevent.spawn(self.run) def setup_in_process(self): # Update config config['DEBUG'] = self.__debug config['ROOT_PATH'] = os.path.abspath("{}/..".format( os.path.dirname(__file__))) # Hush some new loggers logging.getLogger('requests').setLevel(logging.WARNING) logging.getLogger('urllib3').setLevel(logging.WARNING) if config['DEBUG'] is True: logging.getLogger().setLevel(logging.DEBUG) # Conect the alarms and send the start up message for alarm in self._alarms.values(): alarm.connect() alarm.startup_message() # Main event handler loop def run(self): self.setup_in_process() last_clean = datetime.utcnow() while True: # Run forever and ever # Clean out visited every 5 minutes if datetime.utcnow() - last_clean > timedelta(minutes=5): self._log.debug("Cleaning cache...") self.__cache.clean_and_save() last_clean = datetime.utcnow() try: # Get next object to process event = self.__queue.get(block=True, timeout=5) except gevent.queue.Empty: # Check if the process should exit process if self.__event.is_set(): break # Explict context yield gevent.sleep(0) continue try: kind = type(event) self._log.debug("Processing event: %s", event.id) if kind == Events.MonEvent: self.process_monster(event) elif kind == Events.StopEvent: self.process_stop(event) elif kind == Events.GymEvent: self.process_gym(event) elif kind == Events.EggEvent: self.process_egg(event) elif kind == Events.RaidEvent: self.process_raid(event) elif kind == Events.WeatherEvent: self.process_weather(event) else: self._log.error( "!!! Manager does not support {} events!".format(kind)) self._log.debug("Finished event: %s", event.id) except Exception as e: self._log.error("Encountered error during processing: " "{}: {}".format(type(e).__name__, e)) self._log.error("Stack trace: \n {}" "".format(traceback.format_exc())) # Explict context yield gevent.sleep(0) # Save cache and exit self.__cache.clean_and_save() raise gevent.GreenletExit() # Set the location of the Manager def set_location(self, location): # Regex for Lat,Lng coordinate prog = re.compile("^(-?\d+\.\d+)[,\s]\s*(-?\d+\.\d+?)$") res = prog.match(location) if res: # If location is in a Lat,Lng coordinate self.__location = [float(res.group(1)), float(res.group(2))] else: # Check if key was provided if self._gmaps_service is None: raise ValueError("Unable to find location coordinates by name" " - no Google API key was provided.") # Attempt to geocode location location = self._gmaps_service.geocode(location) if location is None: raise ValueError("Unable to geocode coordinates from {}. " "Location will not be set.".format(location)) self.__location = location self._log.info("Location successfully set to '{},{}'.".format( location[0], location[1])) def _check_filters(self, event, filter_set, filter_names): """ Function for checking if an event passes any filters. """ for name in filter_names: f = filter_set.get(name) # Filter should always exist, but sanity check anyway if f: # If the Event passes, return True if f.check_event(event) and self.check_geofences(f, event): event.custom_dts = f.custom_dts return True else: self._log.critical("ERROR: No filter named %s found!", name) return False def _notify_alarms(self, event, alarm_names, func_name): """ Function for triggering notifications to alarms. """ # Generate the DTS for the event dts = event.generate_dts(self.__locale, self.__timezone, self.__units) # Get GMaps Triggers if self._gmaps_reverse_geocode: dts.update( self._gmaps_service.reverse_geocode((event.lat, event.lng), self._language)) for mode in self._gmaps_distance_matrix: dts.update( self._gmaps_service.distance_matrix(mode, (event.lat, event.lng), self.__location, self._language, self.__units)) # Spawn notifications in threads so they can work asynchronously threads = [] for name in alarm_names: alarm = self._alarms.get(name) if not alarm: self._log.critical("ERROR: No alarm named %s found!", name) continue func = getattr(alarm, func_name) threads.append(gevent.spawn(func, dts)) for thread in threads: # Wait for all alarms to finish thread.join() # Process new Monster data and decide if a notification needs to be sent def process_monster(self, mon): # type: (Events.MonEvent) -> None """ Process a monster event and notify alarms if it passes. """ # Make sure that monsters are enabled if self._mons_enabled is False: self._log.debug("Monster ignored: monster notifications " "are disabled.") return # Set the name for this event so we can log rejects better mon.name = self.__locale.get_pokemon_name(mon.monster_id) # Check if previously processed and update expiration #if self.__cache.monster_expiration(mon.enc_id) is not None: # self._log.debug("{} monster was skipped because it was " # "previously processed.".format(mon.name)) # return # self.__cache.monster_expiration(mon.enc_id, mon.disappear_time) # Check the time remaining seconds_left = (mon.disappear_time - datetime.utcnow()).total_seconds() if seconds_left < self.__time_limit: self._log.debug("{} monster was skipped because only {} seconds " "remained".format(mon.name, seconds_left)) return # Calculate distance and direction if self.__location is not None: mon.distance = get_earth_dist([mon.lat, mon.lng], self.__location, self.__units) mon.direction = get_cardinal_dir([mon.lat, mon.lng], self.__location) # Check for Rules rules = self.__mon_rules if len(rules) == 0: # If no rules, default to all rules = { "default": Rule(self._mon_filters.keys(), self._alarms.keys()) } rule_ct, alarm_ct = 0, 0 for r_name, rule in rules.iteritems(): # For all rules passed = self._check_filters(mon, self._mon_filters, rule.filter_names) if passed: rule_ct += 1 alarm_ct += len(rule.alarm_names) self._notify_alarms(mon, rule.alarm_names, 'pokemon_alert') if rule_ct > 0: self._rule_log.info( 'Monster %s passed %s rule(s) and triggered %s alarm(s).', mon.name, rule_ct, alarm_ct) else: self._rule_log.info('Monster %s rejected by all rules.', mon.name) def process_stop(self, stop): # type: (Events.StopEvent) -> None """ Process a stop event and notify alarms if it passes. """ # Make sure that stops are enabled if self._stops_enabled is False: self._log.debug("Stop ignored: stop notifications are disabled.") return # Check for lured if stop.expiration is None: self._log.debug("Stop ignored: stop was not lured") return # Check if previously processed and update expiration if self.__cache.stop_expiration(stop.stop_id) is not None: self._log.debug("Stop {} was skipped because it was " "previously processed.".format(stop.name)) return self.__cache.stop_expiration(stop.stop_id, stop.expiration) # Check the time remaining seconds_left = (stop.expiration - datetime.utcnow()).total_seconds() if seconds_left < self.__time_limit: self._log.debug("Stop {} was skipped because only {} seconds " "remained".format(stop.name, seconds_left)) return # Calculate distance and direction if self.__location is not None: stop.distance = get_earth_dist([stop.lat, stop.lng], self.__location, self.__units) stop.direction = get_cardinal_dir([stop.lat, stop.lng], self.__location) # Check for Rules rules = self.__stop_rules if len(rules) == 0: # If no rules, default to all rules = { "default": Rule(self._stop_filters.keys(), self._alarms.keys()) } rule_ct, alarm_ct = 0, 0 for r_name, rule in rules.iteritems(): # For all rules passed = self._check_filters(stop, self._stop_filters, rule.filter_names) if passed: rule_ct += 1 alarm_ct += len(rule.alarm_names) self._notify_alarms(stop, rule.alarm_names, 'pokestop_alert') if rule_ct > 0: self._rule_log.info( 'Stop %s passed %s rule(s) and triggered %s alarm(s).', stop.name, rule_ct, alarm_ct) else: self._rule_log.info('Stop %s rejected by all rules.', stop.name) def process_gym(self, gym): # type: (Events.GymEvent) -> None """ Process a gym event and notify alarms if it passes. """ # Update Gym details (if they exist) gym.gym_name = self.__cache.gym_name(gym.gym_id, gym.gym_name) gym.gym_description = self.__cache.gym_desc(gym.gym_id, gym.gym_description) gym.gym_image = self.__cache.gym_image(gym.gym_id, gym.gym_image) # Ignore changes to neutral if self._ignore_neutral and gym.new_team_id == 0: self._log.debug("%s gym update skipped: new team was neutral") return # Update Team Information gym.old_team_id = self.__cache.gym_team(gym.gym_id) self.__cache.gym_team(gym.gym_id, gym.new_team_id) # Check if notifications are on if self._gyms_enabled is False: self._log.debug("Gym ignored: gym notifications are disabled.") return # Doesn't look like anything to me if gym.new_team_id == gym.old_team_id: self._log.debug("%s gym update skipped: no change detected", gym.gym_id) return # Calculate distance and direction if self.__location is not None: gym.distance = get_earth_dist([gym.lat, gym.lng], self.__location, self.__units) gym.direction = get_cardinal_dir([gym.lat, gym.lng], self.__location) # Check for Rules rules = self.__gym_rules if len(rules) == 0: # If no rules, default to all rules = { "default": Rule(self._gym_filters.keys(), self._alarms.keys()) } rule_ct, alarm_ct = 0, 0 for r_name, rule in rules.iteritems(): # For all rules passed = self._check_filters(gym, self._gym_filters, rule.filter_names) if passed: rule_ct += 1 alarm_ct += len(rule.alarm_names) self._notify_alarms(gym, rule.alarm_names, 'gym_alert') if rule_ct > 0: self._rule_log.info( 'Gym %s passed %s rule(s) and triggered %s alarm(s).', gym.name, rule_ct, alarm_ct) else: self._rule_log.info('Gym %s rejected by all rules.', gym.name) def process_egg(self, egg): # type: (Events.EggEvent) -> None """ Process a egg event and notify alarms if it passes. """ # Update Gym details (if they exist) egg.gym_name = self.__cache.gym_name(egg.gym_id, egg.gym_name) egg.gym_description = self.__cache.gym_desc(egg.gym_id, egg.gym_description) egg.gym_image = self.__cache.gym_image(egg.gym_id, egg.gym_image) # Update Team if Unknown if Unknown.is_(egg.current_team_id): egg.current_team_id = self.__cache.gym_team(egg.gym_id) # Make sure that eggs are enabled if self._eggs_enabled is False: self._log.debug("Egg ignored: egg notifications are disabled.") return # Skip if previously processed if self.__cache.egg_expiration(egg.gym_id) is not None: self._log.debug("Egg {} was skipped because it was " "previously processed.".format(egg.name)) return self.__cache.egg_expiration(egg.gym_id, egg.hatch_time) # Check the time remaining seconds_left = (egg.hatch_time - datetime.utcnow()).total_seconds() if seconds_left < self.__time_limit: self._log.debug("Egg {} was skipped because only {} seconds " "remained".format(egg.name, seconds_left)) return # Calculate distance and direction if self.__location is not None: egg.distance = get_earth_dist([egg.lat, egg.lng], self.__location, self.__units) egg.direction = get_cardinal_dir([egg.lat, egg.lng], self.__location) # Check for Rules rules = self.__egg_rules if len(rules) == 0: # If no rules, default to all rules = { "default": Rule(self._egg_filters.keys(), self._alarms.keys()) } rule_ct, alarm_ct = 0, 0 for r_name, rule in rules.iteritems(): # For all rules passed = self._check_filters(egg, self._egg_filters, rule.filter_names) if passed: rule_ct += 1 alarm_ct += len(rule.alarm_names) self._notify_alarms(egg, rule.alarm_names, 'raid_egg_alert') if rule_ct > 0: self._rule_log.info( 'Egg %s passed %s rule(s) and triggered %s alarm(s).', egg.name, rule_ct, alarm_ct) else: self._rule_log.info('Egg %s rejected by all rules.', egg.name) def process_raid(self, raid): # type: (Events.RaidEvent) -> None """ Process a raid event and notify alarms if it passes. """ # Update Gym details (if they exist) raid.gym_name = self.__cache.gym_name(raid.gym_id, raid.gym_name) raid.gym_description = self.__cache.gym_desc(raid.gym_id, raid.gym_description) raid.gym_image = self.__cache.gym_image(raid.gym_id, raid.gym_image) # Update Team if Unknown if Unknown.is_(raid.current_team_id): raid.current_team_id = self.__cache.gym_team(raid.gym_id) # Make sure that raids are enabled if self._raids_enabled is False: self._log.debug("Raid ignored: raid notifications are disabled.") return # Skip if previously processed if self.__cache.raid_expiration(raid.gym_id) is not None: self._log.debug("Raid {} was skipped because it was " "previously processed.".format(raid.name)) return self.__cache.raid_expiration(raid.gym_id, raid.raid_end) # Check the time remaining seconds_left = (raid.raid_end - datetime.utcnow()).total_seconds() if seconds_left < self.__time_limit: self._log.debug("Raid {} was skipped because only {} seconds " "remained".format(raid.name, seconds_left)) return # Calculate distance and direction if self.__location is not None: raid.distance = get_earth_dist([raid.lat, raid.lng], self.__location, self.__units) raid.direction = get_cardinal_dir([raid.lat, raid.lng], self.__location) # Check for Rules rules = self.__raid_rules if len(rules) == 0: # If no rules, default to all rules = { "default": Rule(self._raid_filters.keys(), self._alarms.keys()) } rule_ct, alarm_ct = 0, 0 for r_name, rule in rules.iteritems(): # For all rules passed = self._check_filters(raid, self._raid_filters, rule.filter_names) if passed: rule_ct += 1 alarm_ct += len(rule.alarm_names) self._notify_alarms(raid, rule.alarm_names, 'raid_alert') if rule_ct > 0: self._rule_log.info( 'Raid %s passed %s rule(s) and triggered %s alarm(s).', raid.name, rule_ct, alarm_ct) else: self._rule_log.info('Raid %s rejected by all rules.', raid.name) def process_weather(self, weather): # type: (Events.WeatherEvent) -> None """ Process a weather event and notify alarms if it passes. """ # Set the name for this event so we can log rejects better weather.name = self.__locale.get_weather_name(weather.s2_cell_id) # Make sure that weather changes are enabled if self._weather_enabled is False: self._log.debug("Weather ignored: weather change " "notifications are disabled.") return # Calculate distance and direction if self.__location is not None: weather.distance = get_earth_dist([weather.lat, weather.lng], self.__location, self.__units) weather.direction = get_cardinal_dir([weather.lat, weather.lng], self.__location) # Store copy of cache info cache_weather_id = self.__cache.cell_weather_id(weather.s2_cell_id) cache_day_or_night_id = self.__cache.day_or_night_id( weather.s2_cell_id) cache_severity_id = self.__cache.severity_id(weather.s2_cell_id) # Update cache info self.__cache.cell_weather_id(weather.s2_cell_id, weather.weather_id) self.__cache.day_or_night_id(weather.s2_cell_id, weather.day_or_night_id) self.__cache.severity_id(weather.s2_cell_id, weather.severity_id) # Check and see if the weather hasn't changed and ignore if weather.weather_id == cache_weather_id and \ weather.day_or_night_id == cache_day_or_night_id and \ weather.severity_id == cache_severity_id: self._log.debug( "weather of %s, alert of %s, and day or night of %s skipped: " "no change detected", weather.weather_id, weather.severity_id, weather.day_or_night_id) return # Check for Rules rules = self.__weather_rules if len(rules) == 0: # If no rules, default to all rules = { "default": Rule(self._weather_filters.keys(), self._alarms.keys()) } rule_ct, alarm_ct = 0, 0 for r_name, rule in rules.iteritems(): # For all rules passed = self._check_filters(weather, self._weather_filters, rule.filter_names) if passed: rule_ct += 1 alarm_ct += len(rule.alarm_names) self._notify_alarms(weather, rule.alarm_names, 'weather_alert') if rule_ct > 0: self._rule_log.info( 'Weather %s passed %s rule(s) and triggered %s alarm(s).', weather.name, rule_ct, alarm_ct) else: self._rule_log.info('Weather %s rejected by all rules.', weather.name) # Check to see if a notification is within the given range # TODO: Move this into filters and add unit tests def check_geofences(self, f, e): """ Returns true if the event passes the filter's geofences. """ if self.geofences is None or f.geofences is None: # No geofences set return True targets = f.geofences if len(targets) == 1 and "all" in targets: targets = self.geofences.iterkeys() for name in targets: gf = self.geofences.get(name) if not gf: # gf doesn't exist self._log.error("Cannot check geofence %s: " "does not exist!", name) elif gf.contains(e.lat, e.lng): # e in gf self._log.debug("{} is in geofence {}!".format( e.name, gf.get_name())) e.geofence = name # Set the geofence for dts return True else: # e not in gf self._log.debug("%s not in %s.", e.name, name) self._log.debug("%s rejected from filter by geofences.", e.name) return False
class Container(object): """.""" def __init__(self, docker, runtime, registry, host, image, command, env, ports, options, formation, service, instance, restart=True, tty=False): self.docker = docker self.runtime = runtime self.registry = registry self.host = host self.id = shortuuid.uuid() cmd = ' '.join(command) if isinstance(command, list) else command self.log = logging.getLogger('container[{0}/{1}.{2} (image={3}, command="{4}")]'.format( formation, service, instance, image, cmd)) self.image = image self.command = command self.env = env self.ports = ports self.options = options self.formation = formation self.service = service self.instance = instance self.state = 'init' self.tty = tty self.reason = None self.status_code = None self._stopped = Event() self._cont_id = None self._registration = None self._runtime = None self._restart = restart self._reset() def start(self): self.log.info("start called") gevent.spawn(self._provision_and_start) return self def _reset(self): self._delay = 1 self._waiting = None def restart(self, image, command, env, ports): self.image = image self.command = command self.env = env self.ports = ports self.status_code = None if self._cont_id: self.docker.stop(self._cont_id) elif self._waiting: self._waiting.set() def dispose(self): """Dispose of the container.""" if not self._stopped.is_set(): self._stopped.set() if self._cont_id is not None: self.docker.stop(self._cont_id) def commit(self, repository, tag): data = self.docker.inspect_container(self._cont_id) self.docker.commit(self._cont_id, repository=repository, tag=tag, conf=data['Config']) def attach(self, stdin=True, stdout=True, stderr=True, stream=True, logs=False): """Attach to container.""" _int = lambda v: 1 if v else 0 params = { 'stdin': _int(stdin), 'stdout': _int(stdout), 'stderr': _int(stderr), 'stream': _int(stream), 'logs': _int(logs) } return self.docker.attach_websocket(self._cont_id, params) def resize(self, w, h): return self.docker.resize_tty(self._cont_id, w, h) def _register_with_service_registry(self): data = self.docker.inspect_container(self._cont_id) announcement = self.registry.build_announcement( self.formation, self.service, self.instance, dict(_port_mappings_from_inspect_data(data)), host=self.host) self._registration = self.registry.register( self.formation, self.service, self.instance, announcement) def _provision_and_start(self): while not self._stopped.is_set(): with self._update_state('pulling'): self.log.debug("start pulling %r" % (self.image,)) self.docker.pull(self.image) with self._update_state('starting'): self._create_container() self._set_state('running') if self.registry is not None: self._register_with_service_registry() self.status_code = self.docker.wait(self._cont_id) if self._registration is not None: self._registration.stop(timeout=5) self._registration = None if not self._restart: break elif not self._stopped.is_set(): self._set_error( "container stopped unexpectedly: exit code {0}".format( self.status_code)) self._pause() # kill the container completely and invalidate our handle. #cont_id, self._cont_id = self._cont_id, None with self._update_state('done'): self.docker.kill(self._cont_id) self._runtime.dispose() def _pause(self): self._delay = min(180, self._delay * 2.71828) self.log.info("will wait for {0:.1f} seconds before restarting".format( self._delay)) with self._update_state('error'): try: self._waiting = Event() self._waiting.wait(self._delay) finally: self._waiting = None def _create_container(self): """Create container.""" self._runtime = self.runtime(self) result = self.docker.create_container_from_config( self._runtime.make_config()) self._cont_id = result['Id'] self.docker.start(self._cont_id, port_bindings=_convert_ports_to_port_bindings(self.ports)) def _set_state(self, state): self.log.info('change state to %s from %s' % (state, self.state)) self.state = state def _set_error(self, reason): self.reason = reason self._set_state('error') self.log.warning('error: {0}'.format(self.reason)) @contextmanager def _update_state(self, state): self._set_state(state) try: yield except Exception, err: self._set_error(str(err)) raise
class DaemonWatchdog(Greenlet): """ DaemonWatchdog:: Watch Ceph daemons for failures. If an extended failure is detected (i.e. not intentional), then the watchdog will unmount file systems and send SIGTERM to all daemons. The duration of an extended failure is configurable with watchdog_daemon_timeout. watchdog_daemon_timeout [default: 300]: number of seconds a daemon is allowed to be failed before the watchdog will bark. """ def __init__(self, ctx, manager, config, thrashers): Greenlet.__init__(self) self.ctx = ctx self.config = config self.e = None self.logger = log.getChild('daemon_watchdog') self.manager = manager self.name = 'watchdog' self.stopping = Event() self.thrashers = thrashers def _run(self): try: self.watch() except Exception as e: # See _run exception comment for MDSThrasher self.e = e self.logger.exception("exception:") # allow successful completion so gevent doesn't see an exception... def log(self, x): """Write data to logger""" self.logger.info(x) def stop(self): self.stopping.set() def bark(self): self.log("BARK! unmounting mounts and killing all daemons") for mount in self.ctx.mounts.values(): try: mount.umount_wait(force=True) except: self.logger.exception("ignoring exception:") daemons = [] daemons.extend(filter(lambda daemon: daemon.running() and not daemon.proc.finished, self.ctx.daemons.iter_daemons_of_role('mds', cluster=self.manager.cluster))) daemons.extend(filter(lambda daemon: daemon.running() and not daemon.proc.finished, self.ctx.daemons.iter_daemons_of_role('mon', cluster=self.manager.cluster))) for daemon in daemons: try: daemon.signal(signal.SIGTERM) except: self.logger.exception("ignoring exception:") def watch(self): self.log("watchdog starting") daemon_timeout = int(self.config.get('watchdog_daemon_timeout', 300)) daemon_failure_time = {} while not self.stopping.is_set(): bark = False now = time.time() mons = self.ctx.daemons.iter_daemons_of_role('mon', cluster=self.manager.cluster) mdss = self.ctx.daemons.iter_daemons_of_role('mds', cluster=self.manager.cluster) clients = self.ctx.daemons.iter_daemons_of_role('client', cluster=self.manager.cluster) #for daemon in mons: # self.log("mon daemon {role}.{id}: running={r}".format(role=daemon.role, id=daemon.id_, r=daemon.running() and not daemon.proc.finished)) #for daemon in mdss: # self.log("mds daemon {role}.{id}: running={r}".format(role=daemon.role, id=daemon.id_, r=daemon.running() and not daemon.proc.finished)) daemon_failures = [] daemon_failures.extend(filter(lambda daemon: daemon.running() and daemon.proc.finished, mons)) daemon_failures.extend(filter(lambda daemon: daemon.running() and daemon.proc.finished, mdss)) for daemon in daemon_failures: name = daemon.role + '.' + daemon.id_ dt = daemon_failure_time.setdefault(name, (daemon, now)) assert dt[0] is daemon delta = now-dt[1] self.log("daemon {name} is failed for ~{t:.0f}s".format(name=name, t=delta)) if delta > daemon_timeout: bark = True # If a daemon is no longer failed, remove it from tracking: for name in daemon_failure_time.keys(): if name not in [d.role + '.' + d.id_ for d in daemon_failures]: self.log("daemon {name} has been restored".format(name=name)) del daemon_failure_time[name] for thrasher in self.thrashers: if thrasher.e is not None: self.log("thrasher on fs.{name} failed".format(name=thrasher.fs.name)) bark = True if bark: self.bark() return sleep(5) self.log("watchdog finished")
class MDSThrasher(Greenlet): """ MDSThrasher:: The MDSThrasher thrashes MDSs during execution of other tasks (workunits, etc). The config is optional. Many of the config parameters are a a maximum value to use when selecting a random value from a range. To always use the maximum value, set no_random to true. The config is a dict containing some or all of: seed: [no default] seed the random number generator randomize: [default: true] enables randomization and use the max/min values max_thrash: [default: 1] the maximum number of MDSs that will be thrashed at any given time. max_thrash_delay: [default: 30] maximum number of seconds to delay before thrashing again. max_revive_delay: [default: 10] maximum number of seconds to delay before bringing back a thrashed MDS thrash_in_replay: [default: 0.0] likelihood that the MDS will be thrashed during replay. Value should be between 0.0 and 1.0 max_replay_thrash_delay: [default: 4] maximum number of seconds to delay while in the replay state before thrashing thrash_weights: allows specific MDSs to be thrashed more/less frequently. This option overrides anything specified by max_thrash. This option is a dict containing mds.x: weight pairs. For example, [mds.a: 0.7, mds.b: 0.3, mds.c: 0.0]. Each weight is a value from 0.0 to 1.0. Any MDSs not specified will be automatically given a weight of 0.0. For a given MDS, by default the trasher delays for up to max_thrash_delay, trashes, waits for the MDS to recover, and iterates. If a non-zero weight is specified for an MDS, for each iteration the thrasher chooses whether to thrash during that iteration based on a random value [0-1] not exceeding the weight of that MDS. Examples:: The following example sets the likelihood that mds.a will be thrashed to 80%, mds.b to 20%, and other MDSs will not be thrashed. It also sets the likelihood that an MDS will be thrashed in replay to 40%. Thrash weights do not have to sum to 1. tasks: - ceph: - mds_thrash: thrash_weights: - mds.a: 0.8 - mds.b: 0.2 thrash_in_replay: 0.4 - ceph-fuse: - workunit: clients: all: [suites/fsx.sh] The following example disables randomization, and uses the max delay values: tasks: - ceph: - mds_thrash: max_thrash_delay: 10 max_revive_delay: 1 max_replay_thrash_delay: 4 """ def __init__(self, ctx, manager, mds_cluster, config, logger, failure_group, weight): super(MDSThrasher, self).__init__() self.ctx = ctx self.manager = manager assert self.manager.is_clean() self.mds_cluster = mds_cluster self.stopping = Event() self.logger = logger self.config = config self.randomize = bool(self.config.get('randomize', True)) self.max_thrash_delay = float(self.config.get('thrash_delay', 30.0)) self.thrash_in_replay = float( self.config.get('thrash_in_replay', False)) assert self.thrash_in_replay >= 0.0 and self.thrash_in_replay <= 1.0, 'thrash_in_replay ({v}) must be between [0.0, 1.0]'.format( v=self.thrash_in_replay) self.max_replay_thrash_delay = float( self.config.get('max_replay_thrash_delay', 4.0)) self.max_revive_delay = float(self.config.get('max_revive_delay', 10.0)) self.failure_group = failure_group self.weight = weight # TODO support multiple filesystems: will require behavioural change to select # which filesystem to act on when doing rank-ish things self.fs = Filesystem(self.ctx) def _run(self): try: self.do_thrash() except: # Log exceptions here so we get the full backtrace (it's lost # by the time someone does a .get() on this greenlet) self.logger.exception("Exception in do_thrash:") raise def log(self, x): """Write data to logger assigned to this MDThrasher""" self.logger.info(x) def stop(self): self.stopping.set() def kill_mds(self, mds): if self.config.get('powercycle'): (remote, ) = (self.ctx.cluster.only( 'mds.{m}'.format(m=mds)).remotes.iterkeys()) self.log('kill_mds on mds.{m} doing powercycle of {s}'.format( m=mds, s=remote.name)) self._assert_ipmi(remote) remote.console.power_off() else: self.ctx.daemons.get_daemon('mds', mds).stop() @staticmethod def _assert_ipmi(remote): assert remote.console.has_ipmi_credentials, ( "powercycling requested but RemoteConsole is not " "initialized. Check ipmi config.") def kill_mds_by_rank(self, rank): """ kill_mds wrapper to kill based on rank passed. """ status = self.mds_cluster.get_mds_info_by_rank(rank) self.kill_mds(status['name']) def revive_mds(self, mds, standby_for_rank=None): """ Revive mds -- do an ipmpi powercycle (if indicated by the config) and then restart (using --hot-standby if specified. """ if self.config.get('powercycle'): (remote, ) = (self.ctx.cluster.only( 'mds.{m}'.format(m=mds)).remotes.iterkeys()) self.log('revive_mds on mds.{m} doing powercycle of {s}'.format( m=mds, s=remote.name)) self._assert_ipmi(remote) remote.console.power_on() self.manager.make_admin_daemon_dir(self.ctx, remote) args = [] if standby_for_rank: args.extend(['--hot-standby', standby_for_rank]) self.ctx.daemons.get_daemon('mds', mds).restart(*args) def revive_mds_by_rank(self, rank, standby_for_rank=None): """ revive_mds wrapper to revive based on rank passed. """ status = self.mds_cluster.get_mds_info_by_rank(rank) self.revive_mds(status['name'], standby_for_rank) def get_mds_status_all(self): return self.fs.get_mds_map() def do_thrash(self): """ Perform the random thrashing action """ self.log('starting mds_do_thrash for failure group: ' + ', '.join( ['mds.{_id}'.format(_id=_f) for _f in self.failure_group])) while not self.stopping.is_set(): delay = self.max_thrash_delay if self.randomize: delay = random.randrange(0.0, self.max_thrash_delay) if delay > 0.0: self.log('waiting for {delay} secs before thrashing'.format( delay=delay)) self.stopping.wait(delay) if self.stopping.is_set(): continue skip = random.randrange(0.0, 1.0) if self.weight < 1.0 and skip > self.weight: self.log( 'skipping thrash iteration with skip ({skip}) > weight ({weight})' .format(skip=skip, weight=self.weight)) continue # find the active mds in the failure group statuses = [ self.mds_cluster.get_mds_info(m) for m in self.failure_group ] actives = filter(lambda s: s and s['state'] == 'up:active', statuses) assert len( actives) == 1, 'Can only have one active in a failure group' active_mds = actives[0]['name'] active_rank = actives[0]['rank'] self.log('kill mds.{id} (rank={r})'.format(id=active_mds, r=active_rank)) self.kill_mds_by_rank(active_rank) # wait for mon to report killed mds as crashed last_laggy_since = None itercount = 0 while True: failed = self.fs.get_mds_map()['failed'] status = self.mds_cluster.get_mds_info(active_mds) if not status: break if 'laggy_since' in status: last_laggy_since = status['laggy_since'] break if any([(f == active_mds) for f in failed]): break self.log( 'waiting till mds map indicates mds.{_id} is laggy/crashed, in failed state, or mds.{_id} is removed from mdsmap' .format(_id=active_mds)) itercount = itercount + 1 if itercount > 10: self.log('mds map: {status}'.format( status=self.mds_cluster.get_fs_map())) time.sleep(2) if last_laggy_since: self.log( 'mds.{_id} reported laggy/crashed since: {since}'.format( _id=active_mds, since=last_laggy_since)) else: self.log('mds.{_id} down, removed from mdsmap'.format( _id=active_mds, since=last_laggy_since)) # wait for a standby mds to takeover and become active takeover_mds = None takeover_rank = None itercount = 0 while True: statuses = [ self.mds_cluster.get_mds_info(m) for m in self.failure_group ] actives = filter(lambda s: s and s['state'] == 'up:active', statuses) if len(actives) > 0: assert len( actives ) == 1, 'Can only have one active in failure group' takeover_mds = actives[0]['name'] takeover_rank = actives[0]['rank'] break itercount = itercount + 1 if itercount > 10: self.log('mds map: {status}'.format( status=self.mds_cluster.get_fs_map())) self.log('New active mds is mds.{_id}'.format(_id=takeover_mds)) # wait for a while before restarting old active to become new # standby delay = self.max_revive_delay if self.randomize: delay = random.randrange(0.0, self.max_revive_delay) self.log( 'waiting for {delay} secs before reviving mds.{id}'.format( delay=delay, id=active_mds)) time.sleep(delay) self.log('reviving mds.{id}'.format(id=active_mds)) self.revive_mds(active_mds, standby_for_rank=takeover_rank) status = {} while True: status = self.mds_cluster.get_mds_info(active_mds) if status and (status['state'] == 'up:standby' or status['state'] == 'up:standby-replay'): break self.log( 'waiting till mds map indicates mds.{_id} is in standby or standby-replay' .format(_id=active_mds)) time.sleep(2) self.log('mds.{_id} reported in {state} state'.format( _id=active_mds, state=status['state'])) # don't do replay thrashing right now continue # this might race with replay -> active transition... if status['state'] == 'up:replay' and random.randrange( 0.0, 1.0) < self.thrash_in_replay: delay = self.max_replay_thrash_delay if self.randomize: delay = random.randrange(0.0, self.max_replay_thrash_delay) time.sleep(delay) self.log('kill replaying mds.{id}'.format(id=self.to_kill)) self.kill_mds(self.to_kill) delay = self.max_revive_delay if self.randomize: delay = random.randrange(0.0, self.max_revive_delay) self.log( 'waiting for {delay} secs before reviving mds.{id}'.format( delay=delay, id=self.to_kill)) time.sleep(delay) self.log('revive mds.{id}'.format(id=self.to_kill)) self.revive_mds(self.to_kill)
class BaseServer(object): """ An abstract base class that implements some common functionality for the servers in gevent. :param listener: Either be an address that the server should bind on or a :class:`gevent.socket.socket` instance that is already bound (and put into listening mode in case of TCP socket). :keyword handle: If given, the request handler. The request handler can be defined in a few ways. Most commonly, subclasses will implement a ``handle`` method as an instance method. Alternatively, a function can be passed as the ``handle`` argument to the constructor. In either case, the handler can later be changed by calling :meth:`set_handle`. When the request handler returns, the socket used for the request will be closed. :keyword spawn: If provided, is called to create a new greenlet to run the handler. By default, :func:`gevent.spawn` is used (meaning there is no artificial limit on the number of concurrent requests). Possible values for *spawn*: - a :class:`gevent.pool.Pool` instance -- ``handle`` will be executed using :meth:`gevent.pool.Pool.spawn` only if the pool is not full. While it is full, no new connections are accepted; - :func:`gevent.spawn_raw` -- ``handle`` will be executed in a raw greenlet which has a little less overhead then :class:`gevent.Greenlet` instances spawned by default; - ``None`` -- ``handle`` will be executed right away, in the :class:`Hub` greenlet. ``handle`` cannot use any blocking functions as it would mean switching to the :class:`Hub`. - an integer -- a shortcut for ``gevent.pool.Pool(integer)`` .. versionchanged:: 1.1a1 When the *handle* function returns from processing a connection, the client socket will be closed. This resolves the non-deterministic closing of the socket, fixing ResourceWarnings under Python 3 and PyPy. """ #: the number of seconds to sleep in case there was an error in accept() call #: for consecutive errors the delay will double until it reaches max_delay #: when accept() finally succeeds the delay will be reset to min_delay again min_delay = 0.01 max_delay = 1 #: Sets the maximum number of consecutive accepts that a process may perform on #: a single wake up. High values give higher priority to high connection rates, #: while lower values give higher priority to already established connections. #: Default is 100. Note, that in case of multiple working processes on the same #: listening value, it should be set to a lower value. (pywsgi.WSGIServer sets it #: to 1 when environ["wsgi.multiprocess"] is true) max_accept = 100 _spawn = Greenlet.spawn #: the default timeout that we wait for the client connections to close in stop() stop_timeout = 1 fatal_errors = (errno.EBADF, errno.EINVAL, errno.ENOTSOCK) def __init__(self, listener, handle=None, spawn='default'): self._stop_event = Event() self._stop_event.set() self._watcher = None self._timer = None self.pool = None try: self.set_listener(listener) self.set_spawn(spawn) self.set_handle(handle) self.delay = self.min_delay self.loop = get_hub().loop if self.max_accept < 1: raise ValueError('max_accept must be positive int: %r' % (self.max_accept, )) except: self.close() raise def set_listener(self, listener): if hasattr(listener, 'accept'): if hasattr(listener, 'do_handshake'): raise TypeError( 'Expected a regular socket, not SSLSocket: %r' % (listener, )) self.family = listener.family self.address = listener.getsockname() self.socket = listener else: self.family, self.address = parse_address(listener) def set_spawn(self, spawn): if spawn == 'default': self.pool = None self._spawn = self._spawn elif hasattr(spawn, 'spawn'): self.pool = spawn self._spawn = spawn.spawn elif isinstance(spawn, integer_types): from gevent.pool import Pool self.pool = Pool(spawn) self._spawn = self.pool.spawn else: self.pool = None self._spawn = spawn if hasattr(self.pool, 'full'): self.full = self.pool.full if self.pool is not None: self.pool._semaphore.rawlink(self._start_accepting_if_started) def set_handle(self, handle): if handle is not None: self.handle = handle if hasattr(self, 'handle'): self._handle = self.handle else: raise TypeError("'handle' must be provided") def _start_accepting_if_started(self, _event=None): if self.started: self.start_accepting() def start_accepting(self): if self._watcher is None: # just stop watcher without creating a new one? self._watcher = self.loop.io(self.socket.fileno(), 1) self._watcher.start(self._do_read) def stop_accepting(self): if self._watcher is not None: self._watcher.stop() self._watcher = None if self._timer is not None: self._timer.stop() self._timer = None def do_handle(self, *args): spawn = self._spawn handle = self._handle close = self.do_close try: if spawn is None: _handle_and_close_when_done(handle, close, args) else: spawn(_handle_and_close_when_done, handle, close, args) except: close(*args) raise def do_close(self, *args): pass def _do_read(self): for _ in xrange(self.max_accept): if self.full(): self.stop_accepting() return try: args = self.do_read() self.delay = self.min_delay if not args: return except: self.loop.handle_error(self, *sys.exc_info()) ex = sys.exc_info()[1] if self.is_fatal_error(ex): self.close() sys.stderr.write('ERROR: %s failed with %s\n' % (self, str(ex) or repr(ex))) return if self.delay >= 0: self.stop_accepting() self._timer = self.loop.timer(self.delay) self._timer.start(self._start_accepting_if_started) self.delay = min(self.max_delay, self.delay * 2) break else: try: self.do_handle(*args) except: self.loop.handle_error((args[1:], self), *sys.exc_info()) if self.delay >= 0: self.stop_accepting() self._timer = self.loop.timer(self.delay) self._timer.start(self._start_accepting_if_started) self.delay = min(self.max_delay, self.delay * 2) break def full(self): return False def __repr__(self): return '<%s at %s %s>' % (type(self).__name__, hex( id(self)), self._formatinfo()) def __str__(self): return '<%s %s>' % (type(self).__name__, self._formatinfo()) def _formatinfo(self): if hasattr(self, 'socket'): try: fileno = self.socket.fileno() except Exception as ex: fileno = str(ex) result = 'fileno=%s ' % fileno else: result = '' try: if isinstance(self.address, tuple) and len(self.address) == 2: result += 'address=%s:%s' % self.address else: result += 'address=%s' % (self.address, ) except Exception as ex: result += str(ex) or '<error>' handle = self.__dict__.get('handle') if handle is not None: fself = getattr(handle, '__self__', None) try: if fself is self: # Checks the __self__ of the handle in case it is a bound # method of self to prevent recursivly defined reprs. handle_repr = '<bound method %s.%s of self>' % ( self.__class__.__name__, handle.__name__, ) else: handle_repr = repr(handle) result += ' handle=' + handle_repr except Exception as ex: result += str(ex) or '<error>' return result @property def server_host(self): """IP address that the server is bound to (string).""" if isinstance(self.address, tuple): return self.address[0] @property def server_port(self): """Port that the server is bound to (an integer).""" if isinstance(self.address, tuple): return self.address[1] def init_socket(self): """If the user initialized the server with an address rather than socket, then this function will create a socket, bind it and put it into listening mode. It is not supposed to be called by the user, it is called by :meth:`start` before starting the accept loop.""" pass @property def started(self): return not self._stop_event.is_set() def start(self): """Start accepting the connections. If an address was provided in the constructor, then also create a socket, bind it and put it into the listening mode. """ self.init_socket() self._stop_event.clear() try: self.start_accepting() except: self.close() raise def close(self): """Close the listener socket and stop accepting.""" self._stop_event.set() try: self.stop_accepting() finally: try: self.socket.close() except Exception: pass finally: self.__dict__.pop('socket', None) self.__dict__.pop('handle', None) self.__dict__.pop('_handle', None) self.__dict__.pop('_spawn', None) self.__dict__.pop('full', None) if self.pool is not None: self.pool._semaphore.unlink( self._start_accepting_if_started) @property def closed(self): return not hasattr(self, 'socket') def stop(self, timeout=None): """ Stop accepting the connections and close the listening socket. If the server uses a pool to spawn the requests, then :meth:`stop` also waits for all the handlers to exit. If there are still handlers executing after *timeout* has expired (default 1 second, :attr:`stop_timeout`), then the currently running handlers in the pool are killed. If the server does not use a pool, then this merely stops accepting connections; any spawned greenlets that are handling requests continue running until they naturally complete. """ self.close() if timeout is None: timeout = self.stop_timeout if self.pool: self.pool.join(timeout=timeout) self.pool.kill(block=True, timeout=1) def serve_forever(self, stop_timeout=None): """Start the server if it hasn't been already started and wait until it's stopped.""" # add test that serve_forever exists on stop() if not self.started: self.start() try: self._stop_event.wait() finally: Greenlet.spawn(self.stop, timeout=stop_timeout).join() def is_fatal_error(self, ex): return isinstance(ex, _socket.error) and ex.args[0] in self.fatal_errors
class Thread(object): """ An enhanced replacement for the Python :class:`threading.Thread` class. This isn't actually a true thread, instead it uses Gevent to implement co-routines. Using :func:`gevent.monkey.patch_all`, all Python blocking functions are replaced with non-blocking Gevent alternatives which allow """ __initialized = False def __init__(self, group=None, name=None): """ Thread constructor :param group: should be ``None``; reserved for future extension when a :class:`ThreadGroup` class is implemented. :param name: the thread name. By default, a unique name is constructed of the form "Thread-*N*" where *N* is a small decimal number. If the subclass overrides the constructor, it must make sure to invoke the base class constructor (``Thread.__init__()``) before doing anything else to the thread. """ # WARNING: Not sure about the side-effects of this... # Monkeypatch a bunch of blocking and thread-related # constructs to use gevent alternatives. Threads are now # co-routines which yield to each other when a Gevent # blocking operation is called. from gevent import monkey monkey.patch_all() self.__name = str(name or _newname()) self.__ident = None self.__started = Event() self.__stopped = False self.__initialized = True def start(self): """ Start the thread's activity. It must be called at most once per thread object. It arranges for the object's :meth:`run` method to be invoked in a separate thread of control. This method will raise a :exc:`RuntimeError` if called more than once on the same thread object. """ if not self.__initialized: raise RuntimeError("thread.__init__() not called") if self.__started.is_set(): raise RuntimeError("thread already started") self._bootstrap() def _bootstrap(self): self.__ident = uuid.uuid4() self.__started.set() self._g_main = gevent.spawn(self.run) def stop(self, blocking=False): """ Stop the thread's activity. :param blocking: block until thread has stopped completely. """ if self.__stopped: raise RuntimeError("threads can only be stopped once") self.__stopped = True self._g_main.kill() self.shutdown() if blocking: self._g_main.join() def run(self): """ Method representing the thread's activity. You may override this method in a subclass. """ pass def join(self, timeout=None): """ Wait until the thread terminates. This blocks the calling thread until the thread whose :meth:`join` method is called terminates -- either normally or through an unhandled exception -- or until the optional timeout occurs. When the *timeout* argument is present and not ``None``, it should be a floating point number specifying a timeout for the operation in seconds (or fractions thereof). As :meth:`join` always returns ``None``, you must call :meth:`isAlive` after :meth:`join` to decide whether a timeout happened -- if the thread is still alive, the :meth:`join` call timed out. When the *timeout* argument is not present or ``None``, the operation will block until the thread terminates. A thread can be :meth:`join`\ ed many times. :meth:`join` raises a :exc:`RuntimeError` if an attempt is made to join the current thread as that would cause a deadlock. It is also an error to :meth:`join` a thread before it has been started and attempts to do so raises the same exception. """ if not self.__initialized: raise RuntimeError("Thread.__init__() not called") if not self.__started.is_set(): raise RuntimeError("cannot join thread before it is started") self._g_main.join(timeout) def shutdown(self): """ Cleanup method called when thread is stopping. This method is run when the thread is stopped. Any resources used by the thread (sockets and such) should be safely closed here. You may override this method in a subclass. """ pass def __repr__(self): assert self.__initialized, "Thread.__init__() was not called" status = "initial" if self.__started.is_set(): status = "started" if self.__stopped: status = "stopped" if self.__ident is not None: status += " %s" % self.__ident return "<%s(%s, %s)>" % (self.__class__.__name__, self.__name, status) def __enter__(self): return self def __exit__(self): self.stop() @property def name(self): assert self.__initialized, "Thread.__init__() not called" return self.__name @name.setter def name(self, name): assert self.__initialized, "Thread.__init__() not called" self.__name = str(name) @property def ident(self): assert self.__initialized, "Thread.__init__() not called" return self.__ident def isAlive(self): assert self.__initialized, "Thread.__init__() not called" return self.__started.is_set() and not self.__stopped is_alive = isAlive def getName(self): return self.name def setName(self, name): self.name = name
class CustomDaemon(Daemon): """ Custom daemon for test """ DAEMON_LAST_ACTION_FILE = "/tmp/daemon_last_action.txt" def _internal_init(self, pidfile, stdin, stdout, stderr, logfile, loglevel, on_start_exit_zero, max_open_files, change_dir, timeout_ms, logtosyslog=True, logtosyslog_facility=SysLogHandler.LOG_LOCAL0, logtoconsole=True, app_name="Test"): # Us self.is_running = True self.start_count = 0 self.stop_count = 0 self.reload_count = 0 self.status_count = 0 self.start_loop_exited = Event() self.last_action = "noaction" # Base Daemon._internal_init(self, pidfile, stdin, stdout, stderr, logfile, loglevel, on_start_exit_zero, max_open_files, change_dir, timeout_ms, logtosyslog, logtosyslog_facility, logtoconsole, app_name) # Log logger.debug("Done, self.class=%s", SolBase.get_classname(self)) @classmethod def get_daemon_instance(cls): """ Get a new Daemon instance :return CustomDaemon :rtype CustomDaemon """ return CustomDaemon() def _write_state(self): """ Write state """ f = open(CustomDaemon.DAEMON_LAST_ACTION_FILE, "w") buf = "" \ "pid={0}\nppid={1}\nis_running={2}\nstart_count={3}\nstop_count={4}\n" \ "reload_count={5}\nstatus_count={6}\nlast_action={7}\nstart_loop_exited={8}\n" \ .format(os.getpid(), os.getppid(), self.is_running, self.start_count, self.stop_count, self.reload_count, self.status_count, self.last_action, self.start_loop_exited.is_set(), ) f.write(buf) f.close() def _on_stop(self): """ Test """ logger.info("Called") self.is_running = False self.stop_count += 1 self.last_action = "stop" self._write_state() # Signal self.is_running = False # As described in https://github.com/gevent/gevent/issues/799 # - signals run into the main thread # - we cannot wait or switch here => direct exit return def _on_reload(self, *args, **kwargs): """ Test """ logger.info("Called") self.reload_count += 1 self.last_action = "reload" self._write_state() def _on_start(self): """ Test """ logger.info("Called") self.start_count += 1 self.last_action = "start" self._write_state() logger.info("Engaging running loop") while self.is_running: SolBase.sleep(10) logger.info("Exited running loop") self._write_state() self.start_loop_exited.set() logger.debug("Exited") def _on_status(self, *argv, **kwargs): """ Test """ logger.info("Called") self.status_count += 1 self.last_action = "status" self._write_state()
class Manager(object): def __init__(self, name, google_key, locale, units, timezone, time_limit, max_attempts, location, quiet, cache_type, filter_file, geofence_file, alarm_file, debug): # Set the name of the Manager self.__name = str(name).lower() log.info("----------- Manager '{}' ".format(self.__name) + " is being created.") self.__debug = debug # Get the Google Maps API self._google_key = google_key self._gmaps_service = GMaps(google_key) self._gmaps_reverse_geocode = False self._gmaps_distance_matrix = set() self._language = locale self.__locale = Locale(locale) # Setup the language-specific stuff self.__units = units # type of unit used for distances self.__timezone = timezone # timezone for time calculations self.__time_limit = time_limit # Minimum time remaining # Location should be [lat, lng] (or None for no location) self.__location = None if str(location).lower() != 'none': self.set_location(location) else: log.warning("NO LOCATION SET - " + " this may cause issues with distance related DTS.") # Quiet mode self.__quiet = quiet # Create cache self.__cache = cache_factory(cache_type, self.__name) # Load and Setup the Pokemon Filters self.__mons_enabled, self.__mon_filters = False, OrderedDict() self.__stops_enabled, self.__stop_filters = False, OrderedDict() self.__gyms_enabled, self.__gym_filters = False, OrderedDict() self.__ignore_neutral = False self.__eggs_enabled, self.__egg_filters = False, OrderedDict() self.__raids_enabled, self.__raid_filters = False, OrderedDict() self.__weather_enabled, self.__weather_filters = False, OrderedDict() self.__quest_enabled, self.__quest_filters = False, OrderedDict() self.load_filter_file(get_path(filter_file)) # Create the Geofences to filter with from given file self.geofences = None if str(geofence_file).lower() != 'none': self.geofences = load_geofence_file(get_path(geofence_file)) # Create the alarms to send notifications out with self.__alarms = {} self.load_alarms_file(get_path(alarm_file), int(max_attempts)) # Initialize Rules self.__mon_rules = {} self.__stop_rules = {} self.__gym_rules = {} self.__egg_rules = {} self.__raid_rules = {} self.__weather_rules = {} self.__quest_rules = {} # Initialize the queue and start the process self.__queue = Queue() self.__event = Event() self.__process = None log.info("----------- Manager '{}' ".format(self.__name) + " successfully created.") # ~~~~~~~~~~~~~~~~~~~~~~~ MAIN PROCESS CONTROL ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Update the object into the queue def update(self, obj): self.__queue.put(obj) # Get the name of this Manager def get_name(self): return self.__name # Tell the process to finish up and go home def stop(self): log.info("Manager {} shutting down... ".format(self.__name) + "{} items in queue.".format(self.__queue.qsize())) self.__event.set() def join(self): self.__process.join(timeout=20) if not self.__process.ready(): log.warning("Manager {} could not be stopped in time!" " Forcing process to stop.".format(self.__name)) self.__process.kill(timeout=2, block=True) # Force stop else: log.info("Manager {} successfully stopped!".format(self.__name)) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GMAPS API ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ def enable_gmaps_reverse_geocoding(self): """Enable GMaps Reverse Geocoding DTS for triggered Events. """ if not self._gmaps_service: raise ValueError("Unable to enable Google Maps Reverse Geocoding." "No GMaps API key has been set.") self._gmaps_reverse_geocode = True def disable_gmaps_reverse_geocoding(self): """Disable GMaps Reverse Geocoding DTS for triggered Events. """ self._gmaps_reverse_geocode = False def enable_gmaps_distance_matrix(self, mode): """Enable 'mode' Distance Matrix DTS for triggered Events. """ if not self.__location: raise ValueError("Unable to enable Google Maps Reverse Geocoding." "No Manager location has been set.") elif not self._gmaps_service: raise ValueError("Unable to enable Google Maps Reverse Geocoding." "No GMaps API key has been provided.") elif mode not in GMaps.TRAVEL_MODES: raise ValueError("Unable to enable distance matrix mode: " "{} is not a valid mode.".format(mode)) self._gmaps_distance_matrix.add(mode) def disable_gmaps_dm_walking(self, mode): """Disable 'mode' Distance Matrix DTS for triggered Events. """ if mode not in GMaps.TRAVEL_MODES: raise ValueError("Unable to disable distance matrix mode: " "Invalid mode specified.") self._gmaps_distance_matrix.discard(mode) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RULES API ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Add new Monster Rule def add_monster_rule(self, name, filters, alarms): if name in self.__mon_rules: raise ValueError("Unable to add Rule: Monster Rule with the name " "{} already exists!".format(name)) for filt in filters: if filt not in self.__mon_filters: raise ValueError("Unable to create Rule: No Monster Filter " "named {}!".format(filt)) for alarm in alarms: if alarm not in self.__alarms: raise ValueError("Unable to create Rule: No Alarm " "named {}!".format(alarm)) self.__mon_rules[name] = Rule(filters, alarms) # Add new Stop Rule def add_stop_rule(self, name, filters, alarms): if name in self.__stop_rules: raise ValueError("Unable to add Rule: Stop Rule with the name " "{} already exists!".format(name)) for filt in filters: if filt not in self.__stop_filters: raise ValueError("Unable to create Rule: No Stop Filter " "named {}!".format(filt)) for alarm in alarms: if alarm not in self.__alarms: raise ValueError("Unable to create Rule: No Alarm " "named {}!".format(alarm)) self.__stop_rules[name] = Rule(filters, alarms) # Add new Gym Rule def add_gym_rule(self, name, filters, alarms): if name in self.__gym_rules: raise ValueError("Unable to add Rule: Gym Rule with the name " "{} already exists!".format(name)) for filt in filters: if filt not in self.__gym_filters: raise ValueError("Unable to create Rule: No Gym Filter " "named {}!".format(filt)) for alarm in alarms: if alarm not in self.__alarms: raise ValueError("Unable to create Rule: No Alarm " "named {}!".format(alarm)) self.__gym_rules[name] = Rule(filters, alarms) # Add new Egg Rule def add_egg_rule(self, name, filters, alarms): if name in self.__egg_rules: raise ValueError("Unable to add Rule: Egg Rule with the name " "{} already exists!".format(name)) for filt in filters: if filt not in self.__egg_filters: raise ValueError("Unable to create Rule: No Egg Filter " "named {}!".format(filt)) for alarm in alarms: if alarm not in self.__alarms: raise ValueError("Unable to create Rule: No Alarm " "named {}!".format(alarm)) self.__egg_rules[name] = Rule(filters, alarms) # Add new Raid Rule def add_raid_rule(self, name, filters, alarms): if name in self.__raid_rules: raise ValueError("Unable to add Rule: Raid Rule with the name " "{} already exists!".format(name)) for filt in filters: if filt not in self.__raid_filters: raise ValueError("Unable to create Rule: No Raid Filter " "named {}!".format(filt)) for alarm in alarms: if alarm not in self.__alarms: raise ValueError("Unable to create Rule: No Alarm " "named {}!".format(alarm)) self.__raid_rules[name] = Rule(filters, alarms) # Add new Weather Rule def add_weather_rule(self, name, filters, alarms): if name in self.__weather_rules: raise ValueError("Unable to add Rule: Weather Rule with the name " "{} already exists!".format(name)) for filt in filters: if filt not in self.__weather_filters: raise ValueError("Unable to create Rule: No weather Filter " "named {}!".format(filt)) for alarm in alarms: if alarm not in self.__alarms: raise ValueError("Unable to create Rule: No Alarm " "named {}!".format(alarm)) self.__weather_rules[name] = Rule(filters, alarms) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Add new Quest Rule def add_quest_rule(self, name, filters, alarms): if name in self.__quest_rules: raise ValueError("Unable to add Rule: Quest Rule with the name " "{} already exists!".format(name)) for filt in filters: if filt not in self.__quest_filters: raise ValueError("Unable to create Rule: No quest Filter " "named {}!".format(filt)) for alarm in alarms: if alarm not in self.__alarms: raise ValueError("Unable to create Rule: No Alarm " "named {}!".format(alarm)) self.__quest_rules[name] = Rule(filters, alarms) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MANAGER LOADING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @staticmethod def load_filter_section(section, sect_name, filter_type): defaults = section.pop('defaults', {}) default_dts = defaults.pop('custom_dts', {}) filter_set = OrderedDict() for name, settings in section.pop('filters', {}).iteritems(): settings = dict(defaults.items() + settings.items()) try: local_dts = dict(default_dts.items() + settings.pop('custom_dts', {}).items()) if len(local_dts) > 0: settings['custom_dts'] = local_dts filter_set[name] = filter_type(name, settings) log.debug("Filter '%s' set as the following: %s", name, filter_set[name].to_dict()) except Exception as e: log.error("Encountered error inside filter named '%s'.", name) raise e # Pass the error up for key in section: # Reject leftover parameters raise ValueError("'{}' is not a recognized parameter for the " "'{}' section.".format(key, sect_name)) return filter_set # Load in a new filters file def load_filter_file(self, file_path): try: log.info("Loading Filters from file at {}".format(file_path)) with open(file_path, 'r') as f: filters = json.load(f, object_pairs_hook=OrderedDict) if type(filters) is not OrderedDict: log.critical("Filters files must be a JSON object:" " { \"monsters\":{...},... }") raise ValueError("Filter file did not contain a dict.") except ValueError as e: log.error("Encountered error while loading Filters:" " {}: {}".format(type(e).__name__, e)) log.error( "PokeAlarm has encountered a 'ValueError' while loading the " "Filters file. This typically means the file isn't in the " "correct json format. Try loading the file contents into a " "json validator.") log.debug("Stack trace: \n {}".format(traceback.format_exc())) sys.exit(1) except IOError as e: log.error("Encountered error while loading Filters: " "{}: {}".format(type(e).__name__, e)) log.error("PokeAlarm was unable to find a filters file " "at {}. Please check that this file exists " "and that PA has read permissions.".format(file_path)) log.debug("Stack trace: \n {}".format(traceback.format_exc())) sys.exit(1) try: # Load Monsters Section log.info("Parsing 'monsters' section.") section = filters.pop('monsters', {}) self.__mons_enabled = bool(section.pop('enabled', False)) self.__mon_filters = self.load_filter_section( section, 'monsters', Filters.MonFilter) # Load Stops Section log.info("Parsing 'stops' section.") section = filters.pop('stops', {}) self.__stops_enabled = bool(section.pop('enabled', False)) self.__stop_filters = self.load_filter_section( section, 'stops', Filters.StopFilter) # Load Gyms Section log.info("Parsing 'gyms' section.") section = filters.pop('gyms', {}) self.__gyms_enabled = bool(section.pop('enabled', False)) self.__ignore_neutral = bool(section.pop('ignore_neutral', False)) self.__gym_filters = self.load_filter_section( section, 'gyms', Filters.GymFilter) # Load Eggs Section log.info("Parsing 'eggs' section.") section = filters.pop('eggs', {}) self.__eggs_enabled = bool(section.pop('enabled', False)) self.__egg_filters = self.load_filter_section( section, 'eggs', Filters.EggFilter) # Load Raids Section log.info("Parsing 'raids' section.") section = filters.pop('raids', {}) self.__raids_enabled = bool(section.pop('enabled', False)) self.__raid_filters = self.load_filter_section( section, 'raids', Filters.RaidFilter) # Load Weather Section log.info("Parsing 'weather' section.") section = filters.pop('weather', {}) self.__weather_enabled = bool(section.pop('enabled', True)) self.__weather_filters = self.load_filter_section( section, 'weather', Filters.WeatherFilter) # Load Quest Section log.info("Parsing 'quest' section.") section = filters.pop('quest', {}) self.__quest_enabled = bool(section.pop('enabled', True)) self.__quest_filters = self.load_filter_section( section, 'quest', Filters.QuestFilter) return # exit function except Exception as e: log.error("Encountered error while parsing Filters. " "This is because of a mistake in your Filters file.") log.error("{}: {}".format(type(e).__name__, e)) log.debug("Stack trace: \n {}".format(traceback.format_exc())) sys.exit(1) def load_alarms_file(self, file_path, max_attempts): log.info("Loading Alarms from the file at {}".format(file_path)) try: with open(file_path, 'r') as f: alarm_settings = json.load(f) if type(alarm_settings) is not dict: log.critical( "Alarms file must be an object of Alarms objects " + "- { 'alarm1': {...}, ... 'alarm5': {...} }") sys.exit(1) self.__alarms = {} for name, alarm in alarm_settings.iteritems(): if parse_boolean( require_and_remove_key( 'active', alarm, "Alarm objects in file.")) is True: self.__alarms[name] = Alarms.alarm_factory( alarm, max_attempts, self._google_key) else: log.debug("Alarm not activated: {}".format(alarm['type']) + " because value not set to \"True\"") log.info("{} active alarms found.".format(len(self.__alarms))) return # all done except ValueError as e: log.error("Encountered error while loading Alarms file: " + "{}: {}".format(type(e).__name__, e)) log.error( "PokeAlarm has encountered a 'ValueError' while loading the " + " Alarms file. This typically means your file isn't in the " + "correct json format. Try loading your file contents into" + " a json validator.") except IOError as e: log.error("Encountered error while loading Alarms: " + "{}: {}".format(type(e).__name__, e)) log.error("PokeAlarm was unable to find a filters file " + "at {}. Please check that this file".format(file_path) + " exists and PA has read permissions.") except Exception as e: log.error("Encountered error while loading Alarms: " + "{}: {}".format(type(e).__name__, e)) log.debug("Stack trace: \n {}".format(traceback.format_exc())) sys.exit(1) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HANDLE EVENTS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Start it up def start(self): self.__process = gevent.spawn(self.run) def setup_in_process(self): # Update config config['DEBUG'] = self.__debug config['ROOT_PATH'] = os.path.abspath("{}/..".format( os.path.dirname(__file__))) # Hush some new loggers logging.getLogger('requests').setLevel(logging.WARNING) logging.getLogger('urllib3').setLevel(logging.WARNING) if config['DEBUG'] is True: logging.getLogger().setLevel(logging.DEBUG) # Conect the alarms and send the start up message for alarm in self.__alarms.values(): alarm.connect() alarm.startup_message() # Main event handler loop def run(self): self.setup_in_process() last_clean = datetime.utcnow() while True: # Run forever and ever # Clean out visited every 5 minutes if datetime.utcnow() - last_clean > timedelta(minutes=5): log.debug("Cleaning cache...") self.__cache.clean_and_save() last_clean = datetime.utcnow() try: # Get next object to process event = self.__queue.get(block=True, timeout=5) except gevent.queue.Empty: # Check if the process should exit process if self.__event.is_set(): break # Explict context yield gevent.sleep(0) continue try: kind = type(event) log.debug("Processing event: %s", event.id) if kind == Events.MonEvent: self.process_monster(event) elif kind == Events.StopEvent: self.process_stop(event) elif kind == Events.GymEvent: self.process_gym(event) elif kind == Events.EggEvent: self.process_egg(event) elif kind == Events.RaidEvent: self.process_raid(event) elif kind == Events.WeatherEvent: self.process_weather(event) elif kind == Events.QuestEvent: self.process_quest(event) else: log.error("!!! Manager does not support " + "{} events!".format(kind)) log.debug("Finished event: %s", event.id) except Exception as e: log.error("Encountered error during processing: " + "{}: {}".format(type(e).__name__, e)) log.debug("Stack trace: \n {}".format(traceback.format_exc())) # Explict context yield gevent.sleep(0) # Save cache and exit self.__cache.clean_and_save() raise gevent.GreenletExit() # Set the location of the Manager def set_location(self, location): # Regex for Lat,Lng coordinate prog = re.compile("^(-?\d+\.\d+)[,\s]\s*(-?\d+\.\d+?)$") res = prog.match(location) if res: # If location is in a Lat,Lng coordinate self.__location = [float(res.group(1)), float(res.group(2))] else: # Check if key was provided if self._gmaps_service is None: raise ValueError("Unable to find location coordinates by name" " - no Google API key was provided.") # Attempt to geocode location location = self._gmaps_service.geocode(location) if location is None: raise ValueError("Unable to geocode coordinates from {}. " "Location will not be set.".format(location)) self.__location = location log.info("Location successfully set to '{},{}'.".format( location[0], location[1])) # Process new Monster data and decide if a notification needs to be sent def process_monster(self, mon): # type: (Events.MonEvent) -> None """ Process a monster event and notify alarms if it passes. """ # Make sure that monsters are enabled if self.__mons_enabled is False: log.debug("Monster ignored: monster notifications are disabled.") return # Set the name for this event so we can log rejects better mon.name = self.__locale.get_pokemon_name(mon.monster_id) # Check if previously processed and update expiration if self.__cache.monster_expiration(mon.enc_id) is not None: log.debug("{} monster was skipped because it was previously " "processed.".format(mon.name)) return self.__cache.monster_expiration(mon.enc_id, mon.disappear_time) # Check the time remaining seconds_left = (mon.disappear_time - datetime.utcnow()).total_seconds() if seconds_left < self.__time_limit: log.debug("{} monster was skipped because only {} seconds remained" "".format(mon.name, seconds_left)) return # Calculate distance and direction if self.__location is not None: mon.distance = get_earth_dist([mon.lat, mon.lng], self.__location, self.__units) mon.direction = get_cardinal_dir([mon.lat, mon.lng], self.__location) # Check for Rules rules = self.__mon_rules if len(rules) == 0: # If no rules, default to all rules = { "default": Rule(self.__mon_filters.keys(), self.__alarms.keys()) } for r_name, rule in rules.iteritems(): # For all rules for f_name in rule.filter_names: # Check Filters in Rules f = self.__mon_filters.get(f_name) passed = f.check_event(mon) and self.check_geofences(f, mon) if not passed: continue # go to next filter mon.custom_dts = f.custom_dts if self.__quiet is False: log.info("{} monster notification" " has been triggered in rule '{}'!" "".format(mon.name, r_name)) self._trigger_mon(mon, rule.alarm_names) break # Next rule def _trigger_mon(self, mon, alarms): # Generate the DTS for the event dts = mon.generate_dts(self.__locale, self.__timezone, self.__units) # Get GMaps Triggers if self._gmaps_reverse_geocode: dts.update( self._gmaps_service.reverse_geocode((mon.lat, mon.lng), self._language)) for mode in self._gmaps_distance_matrix: dts.update( self._gmaps_service.distance_matrix(mode, (mon.lat, mon.lng), self.__location, self._language, self.__units)) threads = [] # Spawn notifications in threads so they can work in background for name in alarms: alarm = self.__alarms.get(name) if alarm: threads.append(gevent.spawn(alarm.pokemon_alert, dts)) else: log.critical("Alarm '{}' not found!".format(name)) for thread in threads: # Wait for all alarms to finish thread.join() def process_stop(self, stop): # type: (Events.StopEvent) -> None """ Process a stop event and notify alarms if it passes. """ # Make sure that stops are enabled if self.__stops_enabled is False: log.debug("Stop ignored: stop notifications are disabled.") return # Check for lured if stop.expiration is None: log.debug("Stop ignored: stop was not lured") return # Check if previously processed and update expiration if self.__cache.stop_expiration(stop.stop_id) is not None: log.debug("Stop {} was skipped because it was previously " "processed.".format(stop.name)) return self.__cache.stop_expiration(stop.stop_id, stop.expiration) # Check the time remaining seconds_left = (stop.expiration - datetime.utcnow()).total_seconds() if seconds_left < self.__time_limit: log.debug("Stop {} was skipped because only {} seconds remained" "".format(stop.name, seconds_left)) return # Calculate distance and direction if self.__location is not None: stop.distance = get_earth_dist([stop.lat, stop.lng], self.__location, self.__units) stop.direction = get_cardinal_dir([stop.lat, stop.lng], self.__location) # Check for Rules rules = self.__stop_rules if len(rules) == 0: # If no rules, default to all rules = { "default": Rule(self.__stop_filters.keys(), self.__alarms.keys()) } for r_name, rule in rules.iteritems(): # For all rules for f_name in rule.filter_names: # Check Filters in Rules f = self.__stop_filters.get(f_name) passed = f.check_event(stop) and self.check_geofences(f, stop) if not passed: continue # go to next filter stop.custom_dts = f.custom_dts if self.__quiet is False: log.info("{} stop notification" " has been triggered in rule '{}'!" "".format(stop.name, r_name)) self._trigger_stop(stop, rule.alarm_names) break # Next rule def _trigger_stop(self, stop, alarms): # Generate the DTS for the event dts = stop.generate_dts(self.__locale, self.__timezone, self.__units) # Get GMaps Triggers if self._gmaps_reverse_geocode: dts.update( self._gmaps_service.reverse_geocode((stop.lat, stop.lng), self._language)) for mode in self._gmaps_distance_matrix: dts.update( self._gmaps_service.distance_matrix(mode, (stop.lat, stop.lng), self.__location, self._language, self.__units)) threads = [] # Spawn notifications in threads so they can work in background for name in alarms: alarm = self.__alarms.get(name) if alarm: threads.append(gevent.spawn(alarm.pokestop_alert, dts)) else: log.critical("Alarm '{}' not found!".format(name)) for thread in threads: thread.join() def process_gym(self, gym): # type: (Events.GymEvent) -> None """ Process a gym event and notify alarms if it passes. """ # Update Gym details (if they exist) gym.gym_name = self.__cache.gym_name(gym.gym_id, gym.gym_name) gym.gym_description = self.__cache.gym_desc(gym.gym_id, gym.gym_description) gym.gym_image = self.__cache.gym_image(gym.gym_id, gym.gym_image) # Ignore changes to neutral if self.__ignore_neutral and gym.new_team_id == 0: log.debug("%s gym update skipped: new team was neutral") return # Update Team Information gym.old_team_id = self.__cache.gym_team(gym.gym_id) self.__cache.gym_team(gym.gym_id, gym.new_team_id) # Check if notifications are on if self.__gyms_enabled is False: log.debug("Gym ignored: gym notifications are disabled.") return # Doesn't look like anything to me if gym.new_team_id == gym.old_team_id: log.debug("%s gym update skipped: no change detected", gym.gym_id) return # Calculate distance and direction if self.__location is not None: gym.distance = get_earth_dist([gym.lat, gym.lng], self.__location, self.__units) gym.direction = get_cardinal_dir([gym.lat, gym.lng], self.__location) # Check for Rules rules = self.__gym_rules if len(rules) == 0: # If no rules, default to all rules = { "default": Rule(self.__gym_filters.keys(), self.__alarms.keys()) } for r_name, rule in rules.iteritems(): # For all rules for f_name in rule.filter_names: # Check Filters in Rules f = self.__gym_filters.get(f_name) passed = f.check_event(gym) and self.check_geofences(f, gym) if not passed: continue # go to next filter gym.custom_dts = f.custom_dts if self.__quiet is False: log.info("{} gym notification" " has been triggered in rule '{}'!" "".format(gym.name, r_name)) self._trigger_gym(gym, rule.alarm_names) break # Next rule def _trigger_gym(self, gym, alarms): # Generate the DTS for the event dts = gym.generate_dts(self.__locale, self.__timezone, self.__units) # Get GMaps Triggers if self._gmaps_reverse_geocode: dts.update( self._gmaps_service.reverse_geocode((gym.lat, gym.lng), self._language)) for mode in self._gmaps_distance_matrix: dts.update( self._gmaps_service.distance_matrix(mode, (gym.lat, gym.lng), self.__location, self._language, self.__units)) threads = [] # Spawn notifications in threads so they can work in background for name in alarms: alarm = self.__alarms.get(name) if alarm: threads.append(gevent.spawn(alarm.gym_alert, dts)) else: log.critical("Alarm '{}' not found!".format(name)) for thread in threads: # Wait for all alarms to finish thread.join() def process_egg(self, egg): # type: (Events.EggEvent) -> None """ Process a egg event and notify alarms if it passes. """ # Update Gym details (if they exist) egg.gym_name = self.__cache.gym_name(egg.gym_id, egg.gym_name) egg.gym_description = self.__cache.gym_desc(egg.gym_id, egg.gym_description) egg.gym_image = self.__cache.gym_image(egg.gym_id, egg.gym_image) # Update Team if Unknown if Unknown.is_(egg.current_team_id): egg.current_team_id = self.__cache.gym_team(egg.gym_id) # Make sure that eggs are enabled if self.__eggs_enabled is False: log.debug("Egg ignored: egg notifications are disabled.") return # Skip if previously processed if self.__cache.egg_expiration(egg.gym_id) is not None: log.debug("Egg {} was skipped because it was previously " "processed.".format(egg.name)) return self.__cache.egg_expiration(egg.gym_id, egg.hatch_time) # Check the time remaining seconds_left = (egg.hatch_time - datetime.utcnow()).total_seconds() if seconds_left < self.__time_limit: log.debug("Egg {} was skipped because only {} seconds remained" "".format(egg.name, seconds_left)) return # Calculate distance and direction if self.__location is not None: egg.distance = get_earth_dist([egg.lat, egg.lng], self.__location, self.__units) egg.direction = get_cardinal_dir([egg.lat, egg.lng], self.__location) # Check for Rules rules = self.__egg_rules if len(rules) == 0: # If no rules, default to all rules = { "default": Rule(self.__egg_filters.keys(), self.__alarms.keys()) } for r_name, rule in rules.iteritems(): # For all rules for f_name in rule.filter_names: # Check Filters in Rules f = self.__egg_filters.get(f_name) passed = f.check_event(egg) and self.check_geofences(f, egg) if not passed: continue # go to next filter egg.custom_dts = f.custom_dts if self.__quiet is False: log.info("{} egg notification" " has been triggered in rule '{}'!" "".format(egg.name, r_name)) self._trigger_egg(egg, rule.alarm_names) break # Next rule def _trigger_egg(self, egg, alarms): # Generate the DTS for the event dts = egg.generate_dts(self.__locale, self.__timezone, self.__units) # Get GMaps Triggers if self._gmaps_reverse_geocode: dts.update( self._gmaps_service.reverse_geocode((egg.lat, egg.lng), self._language)) for mode in self._gmaps_distance_matrix: dts.update( self._gmaps_service.distance_matrix(mode, (egg.lat, egg.lng), self.__location, self._language, self.__units)) threads = [] # Spawn notifications in threads so they can work in background for name in alarms: alarm = self.__alarms.get(name) if alarm: threads.append(gevent.spawn(alarm.raid_egg_alert, dts)) else: log.critical("Alarm '{}' not found!".format(name)) for thread in threads: # Wait for all alarms to finish thread.join() def process_raid(self, raid): # type: (Events.RaidEvent) -> None """ Process a raid event and notify alarms if it passes. """ # Update Gym details (if they exist) raid.gym_name = self.__cache.gym_name(raid.gym_id, raid.gym_name) raid.gym_description = self.__cache.gym_desc(raid.gym_id, raid.gym_description) raid.gym_image = self.__cache.gym_image(raid.gym_id, raid.gym_image) # Update Team if Unknown if Unknown.is_(raid.current_team_id): raid.current_team_id = self.__cache.gym_team(raid.gym_id) # Make sure that raids are enabled if self.__raids_enabled is False: log.debug("Raid ignored: raid notifications are disabled.") return # Skip if previously processed if self.__cache.raid_expiration(raid.gym_id) is not None: log.debug("Raid {} was skipped because it was previously " "processed.".format(raid.name)) return self.__cache.raid_expiration(raid.gym_id, raid.raid_end) # Check the time remaining seconds_left = (raid.raid_end - datetime.utcnow()).total_seconds() if seconds_left < self.__time_limit: log.debug("Raid {} was skipped because only {} seconds remained" "".format(raid.name, seconds_left)) return # Calculate distance and direction if self.__location is not None: raid.distance = get_earth_dist([raid.lat, raid.lng], self.__location, self.__units) raid.direction = get_cardinal_dir([raid.lat, raid.lng], self.__location) # Check for Rules rules = self.__raid_rules if len(rules) == 0: # If no rules, default to all rules = { "default": Rule(self.__raid_filters.keys(), self.__alarms.keys()) } for r_name, rule in rules.iteritems(): # For all rules for f_name in rule.filter_names: # Check Filters in Rules f = self.__raid_filters.get(f_name) passed = f.check_event(raid) and self.check_geofences(f, raid) if not passed: continue # go to next filter raid.custom_dts = f.custom_dts if self.__quiet is False: log.info("{} raid notification" " has been triggered in rule '{}'!" "".format(raid.name, r_name)) self._trigger_raid(raid, rule.alarm_names) break # Next rule def _trigger_raid(self, raid, alarms): # Generate the DTS for the event dts = raid.generate_dts(self.__locale, self.__timezone, self.__units) # Get GMaps Triggers if self._gmaps_reverse_geocode: dts.update( self._gmaps_service.reverse_geocode((raid.lat, raid.lng), self._language)) for mode in self._gmaps_distance_matrix: dts.update( self._gmaps_service.distance_matrix(mode, (raid.lat, raid.lng), self.__location, self._language, self.__units)) threads = [] # Spawn notifications in threads so they can work in background for name in alarms: alarm = self.__alarms.get(name) if alarm: threads.append(gevent.spawn(alarm.raid_alert, dts)) else: log.critical("Alarm '{}' not found!".format(name)) for thread in threads: # Wait for all alarms to finish thread.join() def process_weather(self, weather): # type: (Events.WeatherEvent) -> None """ Process a weather event and notify alarms if it passes. """ # Make sure that weather is enabled if self.__weather_enabled is False: log.debug("Weather ignored: weather notifications are disabled.") return # Skip if previously processed if self.__cache.get_cell_weather( weather.weather_cell_id) == weather.condition: log.debug("Weather alert for cell {} was skipped " "because it was already {} weather.".format( weather.weather_cell_id, weather.condition)) return self.__cache.update_cell_weather(weather.weather_cell_id, weather.condition) # Check for Rules rules = self.__weather_rules if len(rules) == 0: # If no rules, default to all rules = { "default": Rule(self.__weather_filters.keys(), self.__alarms.keys()) } for r_name, rule in rules.iteritems(): # For all rules for f_name in rule.filter_names: # Check Filters in Rules f = self.__weather_filters.get(f_name) passed = f.check_event(weather) and \ self.check_weather_geofences(f, weather) if not passed: continue # go to next filter weather.custom_dts = f.custom_dts if self.__quiet is False: log.info("{} weather notification" " has been triggered in rule '{}'!" "".format(weather.weather_cell_id, r_name)) self._trigger_weather(weather, rule.alarm_names) break # Next rule def _trigger_weather(self, weather, alarms): dts = weather.generate_dts(self.__locale, self.__timezone, self.__units) threads = [] # Spawn notifications in threads so they can work in background for name in alarms: alarm = self.__alarms.get(name) if alarm: threads.append(gevent.spawn(alarm.weather_alert, dts)) else: log.critical("Alarm '{}' not found!".format(name)) for thread in threads: # Wait for all alarms to finish thread.join() def process_quest(self, quest): # type: (Events.QuestEvent) -> None """ Process a quest event and notify alarms if it passes. """ # Make sure that stops are enabled if self.__quest_enabled is False: log.debug("Quest ignored: quest notifications are disabled.") return # Check if previously processed and update expiration if self.__cache.quest_reward(quest.stop_id) is not None: log.debug("Quest {} was skipped because it was previously " "processed.".format(quest.stop_name)) return self.__cache.quest_reward(quest.stop_id, quest.reward) # Calculate distance and direction if self.__location is not None: quest.distance = get_earth_dist([quest.lat, quest.lng], self.__location, self.__units) quest.direction = get_cardinal_dir([quest.lat, quest.lng], self.__location) # Check for Rules rules = self.__quest_rules if len(rules) == 0: # If no rules, default to all rules = { "default": Rule(self.__quest_filters.keys(), self.__alarms.keys()) } for r_name, rule in rules.iteritems(): # For all rules for f_name in rule.filter_names: # Check Filters in Rules f = self.__quest_filters.get(f_name) passed = f.check_event(quest) and self.check_geofences( f, quest) if not passed: continue # go to next filter quest.custom_dts = f.custom_dts if self.__quiet is False: log.info("{} quest notification" " has been triggered in rule '{}'!" "".format(quest.stop_name, r_name)) self._trigger_quest(quest, rule.alarm_names) break # Next rule def _trigger_quest(self, quest, alarms): # Generate the DTS for the event dts = quest.generate_dts(self.__locale, self.__timezone, self.__units) # Get GMaps Triggers if self._gmaps_reverse_geocode: dts.update( self._gmaps_service.reverse_geocode((quest.lat, quest.lng), self._language)) for mode in self._gmaps_distance_matrix: dts.update( self._gmaps_service.distance_matrix(mode, (quest.lat, quest.lng), self.__location, self._language, self.__units)) threads = [] # Spawn notifications in threads so they can work in background for name in alarms: alarm = self.__alarms.get(name) if alarm: threads.append(gevent.spawn(alarm.quest_alert, dts)) else: log.critical("Alarm '{}' not found!".format(name)) for thread in threads: thread.join() # Check to see if a notification is within the given range def check_geofences(self, f, e): """ Returns true if the event passes the filter's geofences. """ if self.geofences is None or f.geofences is None: # No geofences set return True targets = f.geofences if len(targets) == 1 and "all" in targets: targets = self.geofences.iterkeys() for name in targets: gf = self.geofences.get(name) if not gf: # gf doesn't exist log.error("Cannot check geofence %s: does not exist!", name) elif gf.contains(e.lat, e.lng): # e in gf log.debug("{} is in geofence {}!".format( e.name, gf.get_name())) e.geofence = name # Set the geofence for dts return True else: # e not in gf log.debug("%s not in %s.", e.name, name) f.reject(e, "not in geofences") return False # Check to see if a weather notification s2 cell # overlaps with a given range (geofence) def check_weather_geofences(self, f, weather): """ Returns true if the event passes the filter's geofences. """ if self.geofences is None or f.geofences is None: # No geofences set return True targets = f.geofences if len(targets) == 1 and "all" in targets: targets = self.geofences.iterkeys() for name in targets: gf = self.geofences.get(name) if not gf: # gf doesn't exist log.error("Cannot check geofence %s: does not exist!", name) elif gf.check_overlap(weather): # weather cell overlaps gf log.debug("{} is in geofence {}!".format( weather.weather_cell_id, gf.get_name())) weather.geofence = name # Set the geofence for dts return True else: # weather not in gf log.debug("%s not in %s.", weather.weather_cell_id, name) f.reject(weather, "not in geofences") return False