class JobSharedLock(object): """ Shared lock for jobs. Each job method can specify a lock which will be shared among all calls for that job and only one job can run at a time for this lock. """ def __init__(self, queue, name): self.queue = queue self.name = name self.jobs = [] self.semaphore = Semaphore() def add_job(self, job): self.jobs.append(job) def get_jobs(self): return self.jobs def remove_job(self, job): self.jobs.remove(job) def locked(self): return self.semaphore.locked() def acquire(self): return self.semaphore.acquire() def release(self): return self.semaphore.release()
class BlockingDeque(deque): def __init__(self, *args, **kwargs): super(BlockingDeque, self).__init__(*args, **kwargs) self.sema = Semaphore(len(self)) def append(self, *args, **kwargs): ret = super(BlockingDeque, self).append(*args, **kwargs) self.sema.release() return ret def appendleft(self, *args, **kwargs): ret = super(BlockingDeque, self).appendleft(*args, **kwargs) self.sema.release() return ret def clear(self, *args, **kwargs): ret = super(BlockingDeque, self).clear(*args, **kwargs) while not self.sema.locked(): self.sema.acquire(blocking=False) return ret def extend(self, *args, **kwargs): pre_n = len(self) ret = super(BlockingDeque, self).extend(*args, **kwargs) post_n = len(self) for i in range(pre_n, post_n): self.sema.release() return ret def extendleft(self, *args, **kwargs): pre_n = len(self) ret = super(BlockingDeque, self).extendleft(*args, **kwargs) post_n = len(self) for i in range(pre_n, post_n): self.sema.release() return ret def pop(self, *args, **kwargs): self.sema.acquire() return super(BlockingDeque, self).pop(*args, **kwargs) def popleft(self, *args, **kwargs): self.sema.acquire() return super(BlockingDeque, self).popleft(*args, **kwargs) def remove(self, *args, **kwargs): ret = super(BlockingDeque, self).remove(*args, **kwargs) self.sema.acquire() return ret
def test_subscription(self): org_id, user_id = self.create_test_data() channel_id = create_channel(Channel.Kind.Notice, user_id) # define a test handler to run on notificaion run_lock = Semaphore() def handler(subscribe_id, message): self.assertTrue(run_lock.acquire()) notify = handle_notify(subscribe_id, message) self.assertEqual(notify["title"], "fish") subscribe_id = subscribe_channel(channel_id, handler) notify_id = create_notify("fish", channel_id) time.sleep(0.0001) # simulate blocking operation self.assertTrue(run_lock.locked()) notify_id = create_notify("fish", channel_id) time.sleep(0.0001) # simulate blocking operation unsubscribe_channel(subscribe_id) delete_org(org_id)
class Account(Table, ErrorFunctions, InputFunctions, GreenletObject): """on ErrorFunctions member functions the variable need_reconnect is ignored """ _table_name = 'account' _table_created_event = True _table_deleted_event = True _private_account = False id = Column(('api', 'db')) name = Column(('api', 'db')) enabled = Column(('api', 'db'), fire_event=True, read_only=False) last_error = Column(('api', 'db')) last_error_type = Column(('api', 'db')) next_try = Column('api', lambda self, value: not value is None and int( value.eta * 1000) or value, fire_event=True) multi_account = Column('api') hoster = None # must be set before hoster.register() greenlet = Column(None, change_affects=['working']) working = Column('api', always_use_getter=True, getter_cached=True) # none means use defaults from hoster class, some value means account.foo > hoster.foo and hoster.foo or account.foo max_check_tasks = Column(always_use_getter=True) max_download_tasks = Column(always_use_getter=True) max_chunks = Column(always_use_getter=True) can_resume = Column(always_use_getter=True) def __init__(self, **kwargs): GreenletObject.__init__(self) self.account = self # needed for InputFunctions.solve_* functions self.multi_account = False self.lock = Semaphore() self.check_pool = VariableSizePool(size=self.max_check_tasks) self.download_pool = VariableSizePool(size=self.max_download_tasks) self.search_pool = VariableSizePool(size=10) self.reset() for k, v in kwargs.iteritems(): setattr(self, k, v) def __eq__(self, other): return isinstance(other, Account) and self.name == other.name def stop(self): with transaction: if self.working: self.greenlet.kill() self.greenlet = None def delete(self): with transaction: self.stop() self.table_delete() def get_login_data(self): """returns dict with data for sync (clone accounts on other clients) """ return dict() def match(self, file): return True def reset(self): """reset (logout ...) account""" with transaction: self.on_reset() def on_reset(self): self.max_check_tasks = None self.max_download_tasks = None self.max_chunks = None self.can_resume = None self.check_pool.set(self.max_check_tasks) self.download_pool.set(self.max_download_tasks) self.enabled = True self.reset_retry() self._initialized = False self._last_check = None def on_changed_next_try(self, old): if self.next_try is None: gevent.spawn(self.boot) def on_get_next_try(self, value): return None if value is None else value.eta def on_get_working(self, value): if not self.greenlet: return False #if isinstance(self.greenlet, gevent.Greenlet) and self.greenlet.dead: # return False return True def boot(self, return_when_locked=False): if return_when_locked and self.lock.locked(): return with self.lock: if self.working: return if not self.enabled: #TODO: raise GreenletExit ? return if self.next_try is not None: #TODO: raise GreenletExit ? return with transaction: self.last_error = None self.last_error_type = None if not self._initialized or self._last_check is None or self._last_check + config[ 'recheck_interval'] < time.time(): self.spawn(self.initialize).get() self.check_pool.set(self.max_check_tasks) self.download_pool.set(self.max_download_tasks) def reboot(self): self.reset() self.boot() def initialize(self): transaction.acquire() try: ctx_error_handler(self, self.on_initialize) self._initialized = True event.fire('account:initialized', self) except: event.fire('account:initialize_error', self) raise finally: self._last_check = time.time() transaction.release() def on_initialize(self): raise NotImplementedError() @property def log(self): if not hasattr(self, '_log'): self._log = logger.get("account {}".format(self.id)) return self._log @property def weight(self): try: self.boot() if not self.enabled: return None if self.next_try: return None return self.on_weight() except gevent.GreenletExit: return None _captcha_values = {True: 0, None: 1, False: 2} def on_weight(self): """"returns none when not useable """ return [ 1, self._captcha_values[self.has_captcha], 1000000 if self.max_download_speed is None else int( self.max_download_speed / 50), None if self.waiting_time is None else int(self.waiting_time / 60) ] def fatal(self, msg, type='fatal', abort_greenlet=True): with transaction: self.last_error = msg self.last_error_type = type self.enabled = False self.log.error(msg) if abort_greenlet: self.kill() def login_failed(self, msg=None): """default error message for failed logins""" if msg: self.fatal('login failed: {}'.format(msg)) else: self.fatal('login failed') def retry( self, msg, seconds, _=False ): # _ is a placeholder to make ErrorFunctions work. we have no need_reconnect with transaction: self.next_try = gevent.spawn_later(seconds, self.reset_retry) self.next_try.eta = time.time() + seconds self.fatal(msg, type='retry', abort_greenlet=False) self.log.info('retry in {} seconds: {}'.format(seconds, msg)) self.kill() def reset_retry(self): with transaction: self.next_try = None self.last_error = None self.last_error_type = None def get_task_pool(self, task): if task == 'check': return self.check_pool elif task == 'download': return self.download_pool elif task == 'search': return self.search_pool else: raise RuntimeError('unknown task pool: {}'.format(task)) # preferences from hoster.this # max_check_tasks def on_get_max_check_tasks(self, value): self.boot(True) return self.hoster.max_check_tasks if value is None else min( value, self.hoster.max_check_tasks) # max_download_tasks def on_get_max_download_tasks(self, value): self.boot(True) return self.hoster.max_download_tasks if value is None else min( value, self.hoster.max_download_tasks) # max_chunks def on_get_max_chunks(self, value): self.boot(True) return self.hoster.max_chunks if value is None else min( value, self.hoster.max_chunks) def on_get_can_resume(self, value): self.boot(True) return self.hoster.can_resume if value is None else value and self.hoster.can_resume @property def max_filesize(self): return self.hoster.max_filesize @property def max_download_speed(self): return self.hoster.max_download_speed @property def has_captcha(self): return self.hoster.has_captcha @property def waiting_time(self): return self.hoster.waiting_time def on_check_decorator(self, func, *args, **kwargs): return func(*args, **kwargs) def on_download_decorator(self, func, *args, **kwargs): return func(*args, **kwargs) on_download_next_decorator = on_download_decorator
class VncKombuClientBase(object): def _update_sandesh_status(self, status, msg=''): ConnectionState.update(conn_type=ConnType.DATABASE, name='RabbitMQ', status=status, message=msg, server_addrs=self._server_addrs) # end _update_sandesh_status def publish(self, message): self._publish_queue.put(message) # end publish def sigterm_handler(self): self.shutdown() exit() def __init__(self, rabbit_ip, rabbit_port, rabbit_user, rabbit_password, rabbit_vhost, rabbit_ha_mode, q_name, subscribe_cb, logger, heartbeat_seconds=0, register_handler=True, **kwargs): self._rabbit_ip = rabbit_ip self._rabbit_port = rabbit_port self._rabbit_user = rabbit_user self._rabbit_password = rabbit_password self._rabbit_vhost = rabbit_vhost self._subscribe_cb = subscribe_cb self._logger = logger self._publish_queue = Queue() self._conn_lock = Semaphore() self._heartbeat_seconds = heartbeat_seconds self.obj_upd_exchange = kombu.Exchange('vnc_config.object-update', 'fanout', durable=False) self._ssl_params = self._fetch_ssl_params(**kwargs) # Register a handler for SIGTERM so that we can release the lock # Without it, it can take several minutes before new master is elected # If any app using this wants to register their own sigterm handler, # then we will have to modify this function to perhaps take an argument if register_handler: gevent.signal(signal.SIGTERM, self.sigterm_handler) def num_pending_messages(self): return self._publish_queue.qsize() # end num_pending_messages def prepare_to_consume(self): # override this method return def _reconnect(self, delete_old_q=False): if self._conn_lock.locked(): # either connection-monitor or publisher should have taken # the lock. The one who acquired the lock would re-establish # the connection and releases the lock, so the other one can # just wait on the lock, till it gets released self._conn_lock.wait() if self._conn_state == ConnectionStatus.UP: return with self._conn_lock: msg = "RabbitMQ connection down" self._logger(msg, level=SandeshLevel.SYS_NOTICE) self._update_sandesh_status(ConnectionStatus.DOWN) self._conn_state = ConnectionStatus.DOWN self._conn.close() self._conn.ensure_connection() self._conn.connect() self._update_sandesh_status(ConnectionStatus.UP) self._conn_state = ConnectionStatus.UP msg = 'RabbitMQ connection ESTABLISHED %s' % repr(self._conn) self._logger(msg, level=SandeshLevel.SYS_NOTICE) self._channel = self._conn.channel() if self._subscribe_cb is not None: if delete_old_q: # delete the old queue in first-connect context # as db-resync would have caught up with history. try: bound_q = self._update_queue_obj(self._channel) bound_q.delete() except Exception as e: msg = 'Unable to delete the old ampq queue: %s' %(str(e)) self._logger(msg, level=SandeshLevel.SYS_ERR) self._consumer = kombu.Consumer(self._channel, queues=self._update_queue_obj, callbacks=[self._subscribe]) else: # only a producer self._consumer = None self._producer = kombu.Producer(self._channel, exchange=self.obj_upd_exchange) # end _reconnect def _delete_queue(self): # delete the queue try: bound_q = self._update_queue_obj(self._channel) if bound_q: bound_q.delete() except Exception as e: msg = 'Unable to delete the old ampq queue: %s' %(str(e)) self._logger(msg, level=SandeshLevel.SYS_ERR) #end _delete_queue def _connection_watch(self, connected): if not connected: self._reconnect() self.prepare_to_consume() while True: try: self._consumer.consume() self._conn.drain_events() except self._conn.connection_errors + self._conn.channel_errors as e: self._reconnect() # end _connection_watch def _connection_watch_forever(self): connected = True while True: try: self._connection_watch(connected) except Exception as e: msg = 'Error in rabbitmq drainer greenlet: %s' %(str(e)) self._logger(msg, level=SandeshLevel.SYS_ERR) # avoid 'reconnect()' here as that itself might cause exception connected = False # end _connection_watch_forever def _connection_heartbeat(self): while True: try: if self._conn.connected: self._conn.heartbeat_check() except Exception as e: msg = 'Error in rabbitmq heartbeat greenlet: %s' %(str(e)) self._logger(msg, level=SandeshLevel.SYS_ERR) finally: gevent.sleep(float(self._heartbeat_seconds/2)) # end _connection_heartbeat def _publisher(self): message = None connected = True while True: try: if not connected: self._reconnect() connected = True if not message: # earlier was sent fine, dequeue one more message = self._publish_queue.get() while True: try: self._producer.publish(message) message = None break except self._conn.connection_errors + self._conn.channel_errors as e: self._reconnect() except Exception as e: log_str = "Error in rabbitmq publisher greenlet: %s" %(str(e)) self._logger(log_str, level=SandeshLevel.SYS_ERR) # avoid 'reconnect()' here as that itself might cause exception connected = False # end _publisher def _subscribe(self, body, message): try: self._subscribe_cb(body) finally: message.ack() def _start(self, client_name): self._reconnect(delete_old_q=True) self._publisher_greenlet = vnc_greenlets.VncGreenlet( 'Kombu ' + client_name, self._publisher) self._connection_monitor_greenlet = vnc_greenlets.VncGreenlet( 'Kombu ' + client_name + '_ConnMon', self._connection_watch_forever) if self._heartbeat_seconds: self._connection_heartbeat_greenlet = vnc_greenlets.VncGreenlet( 'Kombu ' + client_name + '_ConnHeartBeat', self._connection_heartbeat) else: self._connection_heartbeat_greenlet = None def greenlets(self): ret = [self._publisher_greenlet, self._connection_monitor_greenlet] if self._connection_heartbeat_greenlet: ret.append(self._connection_heartbeat_greenlet) return ret def shutdown(self): self._publisher_greenlet.kill() self._connection_monitor_greenlet.kill() if self._connection_heartbeat_greenlet: self._connection_heartbeat_greenlet.kill() if self._consumer: self._delete_queue() self._conn.close() def reset(self): self._publish_queue = Queue() _SSL_PROTOCOLS = { "tlsv1": ssl.PROTOCOL_TLSv1, "sslv23": ssl.PROTOCOL_SSLv23 } @classmethod def validate_ssl_version(cls, version): version = version.lower() try: return cls._SSL_PROTOCOLS[version] except KeyError: raise RuntimeError('Invalid SSL version: {}'.format(version)) def _fetch_ssl_params(self, **kwargs): if strtobool(str(kwargs.get('rabbit_use_ssl', False))): ssl_params = dict() ssl_version = kwargs.get('kombu_ssl_version', '') keyfile = kwargs.get('kombu_ssl_keyfile', '') certfile = kwargs.get('kombu_ssl_certfile', '') ca_certs = kwargs.get('kombu_ssl_ca_certs', '') if ssl_version: ssl_params.update({'ssl_version': self.validate_ssl_version(ssl_version)}) if keyfile: ssl_params.update({'keyfile': keyfile}) if certfile: ssl_params.update({'certfile': certfile}) if ca_certs: ssl_params.update({'ca_certs': ca_certs}) ssl_params.update({'cert_reqs': ssl.CERT_REQUIRED}) return ssl_params or True return False
class Account(Table, ErrorFunctions, InputFunctions, GreenletObject): """on ErrorFunctions member functions the variable need_reconnect is ignored """ _table_name = 'account' _table_created_event = True _table_deleted_event = True _private_account = False id = Column(('api', 'db')) name = Column(('api', 'db')) enabled = Column(('api', 'db'), fire_event=True, read_only=False) last_error = Column(('api', 'db')) last_error_type = Column(('api', 'db')) next_try = Column('api', lambda self, value: not value is None and int(value.eta*1000) or value, fire_event=True) multi_account = Column('api') hoster = None # must be set before hoster.register() greenlet = Column(None, change_affects=['working']) working = Column('api', always_use_getter=True, getter_cached=True) # none means use defaults from hoster class, some value means account.foo > hoster.foo and hoster.foo or account.foo max_check_tasks = Column(always_use_getter=True) max_download_tasks = Column(always_use_getter=True) max_chunks = Column(always_use_getter=True) can_resume = Column(always_use_getter=True) def __init__(self, **kwargs): GreenletObject.__init__(self) self.account = self # needed for InputFunctions.solve_* functions self.multi_account = False self.lock = Semaphore() self.check_pool = VariableSizePool(size=self.max_check_tasks) self.download_pool = VariableSizePool(size=self.max_download_tasks) self.search_pool = VariableSizePool(size=10) self.reset() for k, v in kwargs.iteritems(): setattr(self, k, v) def __eq__(self, other): return isinstance(other, Account) and self.name == other.name def stop(self): with transaction: if self.working: self.greenlet.kill() self.greenlet = None def delete(self): with transaction: self.stop() self.table_delete() def get_login_data(self): """returns dict with data for sync (clone accounts on other clients) """ return dict() def match(self, file): return True def reset(self): """reset (logout ...) account""" with transaction: self.on_reset() def on_reset(self): self.max_check_tasks = None self.max_download_tasks = None self.max_chunks = None self.can_resume = None self.check_pool.set(self.max_check_tasks) self.download_pool.set(self.max_download_tasks) self.enabled = True self.reset_retry() self._initialized = False self._last_check = None def on_changed_next_try(self, old): if self.next_try is None: gevent.spawn(self.boot) def on_get_next_try(self, value): return None if value is None else value.eta def on_get_working(self, value): if not self.greenlet: return False #if isinstance(self.greenlet, gevent.Greenlet) and self.greenlet.dead: # return False return True def boot(self, return_when_locked=False): if return_when_locked and self.lock.locked(): return with self.lock: if self.working: return if not self.enabled: # TODO: raise GreenletExit ? return if self.next_try is not None: # TODO: raise GreenletExit ? return with transaction: self.last_error = None self.last_error_type = None if not self._initialized or self._last_check is None or self._last_check + config['recheck_interval'] < time.time(): self.spawn(self.initialize).get() self.check_pool.set(self.max_check_tasks) self.download_pool.set(self.max_download_tasks) def reboot(self): self.reset() self.boot() def initialize(self): transaction.acquire() try: ctx_error_handler(self, self.on_initialize) self._initialized = True event.fire('account:initialized', self) except: event.fire('account:initialize_error', self) raise finally: self._last_check = time.time() transaction.release() def on_initialize(self): raise NotImplementedError() @property def log(self): if not hasattr(self, '_log'): self._log = logger.get("account {}".format(self.id)) return self._log @property def weight(self): try: self.boot() if not self.enabled: return None if self.next_try: return None return self.on_weight() except gevent.GreenletExit: return None _captcha_values = { True: 0, None: 1, False: 2} def on_weight(self): """"returns none when not useable """ return [ 1, self._captcha_values[self.has_captcha], 1000000 if self.max_download_speed is None else int(self.max_download_speed/50), None if self.waiting_time is None else int(self.waiting_time/60)] def fatal(self, msg, type='fatal', abort_greenlet=True): with transaction: self.last_error = msg self.last_error_type = type self.enabled = False self.log.error(msg) if abort_greenlet: self.kill() def login_failed(self, msg=None): """default error message for failed logins""" if msg: self.fatal('login failed: {}'.format(msg)) else: self.fatal('login failed') def retry(self, msg, seconds, _=False): # _ is a placeholder to make ErrorFunctions work. we have no need_reconnect with transaction: self.next_try = gevent.spawn_later(seconds, self.reset_retry) self.next_try.eta = time.time() + seconds self.fatal(msg, type='retry', abort_greenlet=False) self.log.info('retry in {} seconds: {}'.format(seconds, msg)) self.kill() def reset_retry(self): with transaction: self.next_try = None self.last_error = None self.last_error_type = None def get_task_pool(self, task): if task == 'check': return self.check_pool elif task == 'download': return self.download_pool elif task == 'search': return self.search_pool else: raise RuntimeError('unknown task pool: {}'.format(task)) # preferences from hoster.this # max_check_tasks def on_get_max_check_tasks(self, value): self.boot(True) return self.hoster.max_check_tasks if value is None else min(value, self.hoster.max_check_tasks) # max_download_tasks def on_get_max_download_tasks(self, value): self.boot(True) return self.hoster.max_download_tasks if value is None else min(value, self.hoster.max_download_tasks) # max_chunks def on_get_max_chunks(self, value): self.boot(True) return self.hoster.max_chunks if value is None else min(value, self.hoster.max_chunks) def on_get_can_resume(self, value): self.boot(True) return self.hoster.can_resume if value is None else value and self.hoster.can_resume @property def max_filesize(self): return self.hoster.max_filesize @property def max_download_speed(self): return self.hoster.max_download_speed @property def has_captcha(self): return self.hoster.has_captcha @property def waiting_time(self): return self.hoster.waiting_time def on_check_decorator(self, func, *args, **kwargs): return func(*args, **kwargs) def on_download_decorator(self, func, *args, **kwargs): return func(*args, **kwargs) on_download_next_decorator = on_download_decorator
class RaidenService(Runnable): """ A Raiden node. """ def __init__( self, chain: BlockChainService, query_start_block: typing.BlockNumber, default_registry: TokenNetworkRegistry, default_secret_registry: SecretRegistry, private_key_bin, transport, raiden_event_handler, config, discovery=None, ): super().__init__() if not isinstance(private_key_bin, bytes) or len(private_key_bin) != 32: raise ValueError('invalid private_key') self.tokennetworkids_to_connectionmanagers = dict() self.identifier_to_results: typing.Dict[typing.PaymentID, AsyncResult, ] = dict() self.chain: BlockChainService = chain self.default_registry = default_registry self.query_start_block = query_start_block self.default_secret_registry = default_secret_registry self.config = config self.privkey = private_key_bin self.address = privatekey_to_address(private_key_bin) self.discovery = discovery self.private_key = PrivateKey(private_key_bin) self.pubkey = self.private_key.public_key.format(compressed=False) self.transport = transport self.blockchain_events = BlockchainEvents() self.alarm = AlarmTask(chain) self.raiden_event_handler = raiden_event_handler self.stop_event = Event() self.stop_event.set() # inits as stopped self.wal = None self.snapshot_group = 0 # This flag will be used to prevent the service from processing # state changes events until we know that pending transactions # have been dispatched. self.dispatch_events_lock = Semaphore(1) self.database_path = config['database_path'] if self.database_path != ':memory:': database_dir = os.path.dirname(config['database_path']) os.makedirs(database_dir, exist_ok=True) self.database_dir = database_dir # Prevent concurrent access to the same db self.lock_file = os.path.join(self.database_dir, '.lock') self.db_lock = filelock.FileLock(self.lock_file) else: self.database_path = ':memory:' self.database_dir = None self.lock_file = None self.serialization_file = None self.db_lock = None self.event_poll_lock = gevent.lock.Semaphore() def start(self): """ Start the node synchronously. Raises directly if anything went wrong on startup """ if not self.stop_event.ready(): raise RuntimeError(f'{self!r} already started') self.stop_event.clear() if self.database_dir is not None: self.db_lock.acquire(timeout=0) assert self.db_lock.is_locked # start the registration early to speed up the start if self.config['transport_type'] == 'udp': endpoint_registration_greenlet = gevent.spawn( self.discovery.register, self.address, self.config['transport']['udp']['external_ip'], self.config['transport']['udp']['external_port'], ) storage = sqlite.SQLiteStorage(self.database_path, serialize.JSONSerializer()) self.wal = wal.restore_to_state_change( transition_function=node.state_transition, storage=storage, state_change_identifier='latest', ) if self.wal.state_manager.current_state is None: log.debug( 'No recoverable state available, created inital state', node=pex(self.address), ) block_number = self.chain.block_number() state_change = ActionInitChain( random.Random(), block_number, self.chain.node_address, self.chain.network_id, ) self.wal.log_and_dispatch(state_change) payment_network = PaymentNetworkState( self.default_registry.address, [], # empty list of token network states as it's the node's startup ) state_change = ContractReceiveNewPaymentNetwork( constants.EMPTY_HASH, payment_network, ) self.handle_state_change(state_change) # On first run Raiden needs to fetch all events for the payment # network, to reconstruct all token network graphs and find opened # channels last_log_block_number = 0 else: # The `Block` state change is dispatched only after all the events # for that given block have been processed, filters can be safely # installed starting from this position without losing events. last_log_block_number = views.block_number( self.wal.state_manager.current_state) log.debug( 'Restored state from WAL', last_restored_block=last_log_block_number, node=pex(self.address), ) known_networks = views.get_payment_network_identifiers( views.state_from_raiden(self)) if known_networks and self.default_registry.address not in known_networks: configured_registry = pex(self.default_registry.address) known_registries = lpex(known_networks) raise RuntimeError( f'Token network address mismatch.\n' f'Raiden is configured to use the smart contract ' f'{configured_registry}, which conflicts with the current known ' f'smart contracts {known_registries}', ) # Clear ref cache & disable caching serialize.RaidenJSONDecoder.ref_cache.clear() serialize.RaidenJSONDecoder.cache_object_references = False # Restore the current snapshot group state_change_qty = self.wal.storage.count_state_changes() self.snapshot_group = state_change_qty // SNAPSHOT_STATE_CHANGES_COUNT # Install the filters using the correct from_block value, otherwise # blockchain logs can be lost. self.install_all_blockchain_filters( self.default_registry, self.default_secret_registry, last_log_block_number, ) # Complete the first_run of the alarm task and synchronize with the # blockchain since the last run. # # Notes about setup order: # - The filters must be polled after the node state has been primed, # otherwise the state changes won't have effect. # - The alarm must complete its first run before the transport is started, # to avoid rejecting messages for unknown channels. self.alarm.register_callback(self._callback_new_block) # alarm.first_run may process some new channel, which would start_health_check_for # a partner, that's why transport needs to be already started at this point self.transport.start(self) self.alarm.first_run() chain_state = views.state_from_raiden(self) # Dispatch pending transactions pending_transactions = views.get_pending_transactions(chain_state, ) log.debug( 'Processing pending transactions', num_pending_transactions=len(pending_transactions), node=pex(self.address), ) with self.dispatch_events_lock: for transaction in pending_transactions: try: self.raiden_event_handler.on_raiden_event( self, transaction) except RaidenRecoverableError as e: log.error(str(e)) except RaidenUnrecoverableError as e: if self.config['network_type'] == NetworkType.MAIN: if isinstance(e, InvalidDBData): raise log.error(str(e)) else: raise self.alarm.start() # after transport and alarm is started, send queued messages events_queues = views.get_all_messagequeues(chain_state) for queue_identifier, event_queue in events_queues.items(): self.start_health_check_for(queue_identifier.recipient) # repopulate identifier_to_results for pending transfers for event in event_queue: if type(event) == SendDirectTransfer: self.identifier_to_results[ event.payment_identifier] = AsyncResult() message = message_from_sendevent(event, self.address) self.sign(message) self.transport.send_async(queue_identifier, message) # exceptions on these subtasks should crash the app and bubble up self.alarm.link_exception(self.on_error) self.transport.link_exception(self.on_error) # Health check needs the transport layer self.start_neighbours_healthcheck() if self.config['transport_type'] == 'udp': endpoint_registration_greenlet.get( ) # re-raise if exception occurred super().start() def _run(self): """ Busy-wait on long-lived subtasks/greenlets, re-raise if any error occurs """ try: self.stop_event.wait() except gevent.GreenletExit: # killed without exception self.stop_event.set() gevent.killall([self.alarm, self.transport]) # kill children raise # re-raise to keep killed status except Exception: self.stop() raise def stop(self): """ Stop the node gracefully. Raise if any stop-time error occurred on any subtask """ if self.stop_event.ready(): # not started return # Needs to come before any greenlets joining self.stop_event.set() # Filters must be uninstalled after the alarm task has stopped. Since # the events are polled by an alarm task callback, if the filters are # uninstalled before the alarm task is fully stopped the callback # `poll_blockchain_events` will fail. # # We need a timeout to prevent an endless loop from trying to # contact the disconnected client try: self.transport.stop() self.alarm.stop() self.transport.get() self.alarm.get() self.blockchain_events.uninstall_all_event_listeners() except gevent.Timeout: pass self.blockchain_events.reset() if self.db_lock is not None: self.db_lock.release() def add_pending_greenlet(self, greenlet: gevent.Greenlet): greenlet.link_exception(self.on_error) def __repr__(self): return '<{} {}>'.format(self.__class__.__name__, pex(self.address)) def start_neighbours_healthcheck(self): for neighbour in views.all_neighbour_nodes( self.wal.state_manager.current_state): if neighbour != ConnectionManager.BOOTSTRAP_ADDR: self.start_health_check_for(neighbour) def get_block_number(self): return views.block_number(self.wal.state_manager.current_state) def handle_state_change(self, state_change): log.debug('STATE CHANGE', node=pex(self.address), state_change=state_change) event_list = self.wal.log_and_dispatch(state_change) if self.dispatch_events_lock.locked(): return [] for event in event_list: log.debug('RAIDEN EVENT', node=pex(self.address), raiden_event=event) try: self.raiden_event_handler.on_raiden_event( raiden=self, event=event, ) except RaidenRecoverableError as e: log.error(str(e)) except RaidenUnrecoverableError as e: if self.config['network_type'] == NetworkType.MAIN: if isinstance(e, InvalidDBData): raise log.error(str(e)) else: raise # Take a snapshot every SNAPSHOT_STATE_CHANGES_COUNT # TODO: Gather more data about storage requirements # and update the value to specify how often we need # capturing a snapshot should take place new_snapshot_group = self.wal.storage.count_state_changes( ) // SNAPSHOT_STATE_CHANGES_COUNT if new_snapshot_group > self.snapshot_group: log.debug(f'Storing snapshot: {new_snapshot_group}') self.wal.snapshot() self.snapshot_group = new_snapshot_group return event_list def set_node_network_state(self, node_address, network_state): state_change = ActionChangeNodeNetworkState(node_address, network_state) self.wal.log_and_dispatch(state_change) def start_health_check_for(self, node_address): self.transport.start_health_check(node_address) def _callback_new_block(self, latest_block): """Called once a new block is detected by the alarm task. Note: This should be called only once per block, otherwise there will be duplicated `Block` state changes in the log. Therefore this method should be called only once a new block is mined with the appropriate block_number argument from the AlarmTask. """ # Raiden relies on blockchain events to update its off-chain state, # therefore some APIs /used/ to forcefully poll for events. # # This was done for APIs which have on-chain side-effects, e.g. # openning a channel, where polling the event is required to update # off-chain state to providing a consistent view to the caller, e.g. # the channel exists after the API call returns. # # That pattern introduced a race, because the events are returned only # once per filter, and this method would be called concurrently by the # API and the AlarmTask. The following lock is necessary, to ensure the # expected side-effects are properly applied (introduced by the commit # 3686b3275ff7c0b669a6d5e2b34109c3bdf1921d) latest_block_number = latest_block['number'] with self.event_poll_lock: for event in self.blockchain_events.poll_blockchain_events( latest_block_number): # These state changes will be procesed with a block_number # which is /larger/ than the ChainState's block_number. on_blockchain_event(self, event) # On restart the Raiden node will re-create the filters with the # ethereum node. These filters will have the from_block set to the # value of the latest Block state change. To avoid missing events # the Block state change is dispatched only after all of the events # have been processed. # # This means on some corner cases a few events may be applied # twice, this will happen if the node crashed and some events have # been processed but the Block state change has not been # dispatched. state_change = Block( block_number=latest_block_number, gas_limit=latest_block['gasLimit'], block_hash=bytes(latest_block['hash']), ) self.handle_state_change(state_change) def sign(self, message): """ Sign message inplace. """ if not isinstance(message, SignedMessage): raise ValueError('{} is not signable.'.format(repr(message))) message.sign(self.private_key) def install_all_blockchain_filters( self, token_network_registry_proxy: TokenNetworkRegistry, secret_registry_proxy: SecretRegistry, from_block: typing.BlockNumber, ): with self.event_poll_lock: node_state = views.state_from_raiden(self) token_networks = views.get_token_network_identifiers( node_state, token_network_registry_proxy.address, ) self.blockchain_events.add_token_network_registry_listener( token_network_registry_proxy, from_block, ) self.blockchain_events.add_secret_registry_listener( secret_registry_proxy, from_block, ) for token_network in token_networks: token_network_proxy = self.chain.token_network(token_network) self.blockchain_events.add_token_network_listener( token_network_proxy, from_block, ) def connection_manager_for_token_network(self, token_network_identifier): if not is_binary_address(token_network_identifier): raise InvalidAddress('token address is not valid.') known_token_networks = views.get_token_network_identifiers( views.state_from_raiden(self), self.default_registry.address, ) if token_network_identifier not in known_token_networks: raise InvalidAddress('token is not registered.') manager = self.tokennetworkids_to_connectionmanagers.get( token_network_identifier) if manager is None: manager = ConnectionManager(self, token_network_identifier) self.tokennetworkids_to_connectionmanagers[ token_network_identifier] = manager return manager def leave_all_token_networks(self): state_change = ActionLeaveAllNetworks() self.wal.log_and_dispatch(state_change) def close_and_settle(self): log.info('raiden will close and settle all channels now') self.leave_all_token_networks() connection_managers = [ cm for cm in self.tokennetworkids_to_connectionmanagers.values() ] if connection_managers: waiting.wait_for_settle_all_channels( self, self.alarm.sleep_time, ) def mediated_transfer_async( self, token_network_identifier: typing.TokenNetworkID, amount: typing.TokenAmount, target: typing.Address, identifier: typing.PaymentID, ): """ Transfer `amount` between this node and `target`. This method will start an asynchronous transfer, the transfer might fail or succeed depending on a couple of factors: - Existence of a path that can be used, through the usage of direct or intermediary channels. - Network speed, making the transfer sufficiently fast so it doesn't expire. """ async_result = self.start_mediated_transfer( token_network_identifier, amount, target, identifier, ) return async_result def direct_transfer_async(self, token_network_identifier, amount, target, identifier): """ Do a direct transfer with target. Direct transfers are non cancellable and non expirable, since these transfers are a signed balance proof with the transferred amount incremented. Because the transfer is non cancellable, there is a level of trust with the target. After the message is sent the target is effectively paid and then it is not possible to revert. The async result will be set to False iff there is no direct channel with the target or the payer does not have balance to complete the transfer, otherwise because the transfer is non expirable the async result *will never be set to False* and if the message is sent it will hang until the target node acknowledge the message. This transfer should be used as an optimization, since only two packets are required to complete the transfer (from the payers perspective), whereas the mediated transfer requires 6 messages. """ self.start_health_check_for(target) if identifier is None: identifier = create_default_identifier() direct_transfer = ActionTransferDirect( token_network_identifier, target, identifier, amount, ) async_result = AsyncResult() self.identifier_to_results[identifier] = async_result self.handle_state_change(direct_transfer) def start_mediated_transfer( self, token_network_identifier: typing.TokenNetworkID, amount: typing.TokenAmount, target: typing.Address, identifier: typing.PaymentID, ): self.start_health_check_for(target) if identifier is None: identifier = create_default_identifier() if identifier in self.identifier_to_results: return self.identifier_to_results[identifier] async_result = AsyncResult() self.identifier_to_results[identifier] = async_result secret = random_secret() init_initiator_statechange = initiator_init( self, identifier, amount, secret, token_network_identifier, target, ) # Dispatch the state change even if there are no routes to create the # wal entry. self.handle_state_change(init_initiator_statechange) return async_result def mediate_mediated_transfer(self, transfer: LockedTransfer): init_mediator_statechange = mediator_init(self, transfer) self.handle_state_change(init_mediator_statechange) def target_mediated_transfer(self, transfer: LockedTransfer): self.start_health_check_for(transfer.initiator) init_target_statechange = target_init(transfer) self.handle_state_change(init_target_statechange)
class VncKombuClientBase(object): def _update_sandesh_status(self, status, msg=''): ConnectionState.update(conn_type=ConnType.DATABASE, name='RabbitMQ', status=status, message=msg, server_addrs=self._server_addrs) # end _update_sandesh_status def publish(self, message): self._publish_queue.put(message) # end publish def sigterm_handler(self): self.shutdown() exit() def __init__(self, rabbit_ip, rabbit_port, rabbit_user, rabbit_password, rabbit_vhost, rabbit_ha_mode, q_name, subscribe_cb, logger, heartbeat_seconds=0, **kwargs): self._rabbit_ip = rabbit_ip self._rabbit_port = rabbit_port self._rabbit_user = rabbit_user self._rabbit_password = rabbit_password self._rabbit_vhost = rabbit_vhost self._subscribe_cb = subscribe_cb self._logger = logger self._publish_queue = Queue() self._conn_lock = Semaphore() self._heartbeat_seconds = heartbeat_seconds self.obj_upd_exchange = kombu.Exchange('vnc_config.object-update', 'fanout', durable=False) self._ssl_params = self._fetch_ssl_params(**kwargs) # Register a handler for SIGTERM so that we can release the lock # Without it, it can take several minutes before new master is elected # If any app using this wants to register their own sigterm handler, # then we will have to modify this function to perhaps take an argument gevent.signal(signal.SIGTERM, self.sigterm_handler) def num_pending_messages(self): return self._publish_queue.qsize() # end num_pending_messages def prepare_to_consume(self): # override this method return def _reconnect(self, delete_old_q=False): if self._conn_lock.locked(): # either connection-monitor or publisher should have taken # the lock. The one who acquired the lock would re-establish # the connection and releases the lock, so the other one can # just wait on the lock, till it gets released self._conn_lock.wait() if self._conn_state == ConnectionStatus.UP: return with self._conn_lock: msg = "RabbitMQ connection down" self._logger(msg, level=SandeshLevel.SYS_NOTICE) self._update_sandesh_status(ConnectionStatus.DOWN) self._conn_state = ConnectionStatus.DOWN self._conn.close() self._conn.ensure_connection() self._conn.connect() self._update_sandesh_status(ConnectionStatus.UP) self._conn_state = ConnectionStatus.UP msg = 'RabbitMQ connection ESTABLISHED %s' % repr(self._conn) self._logger(msg, level=SandeshLevel.SYS_NOTICE) self._channel = self._conn.channel() if self._subscribe_cb is not None: if delete_old_q: # delete the old queue in first-connect context # as db-resync would have caught up with history. try: bound_q = self._update_queue_obj(self._channel) bound_q.delete() except Exception as e: msg = 'Unable to delete the old ampq queue: %s' % ( str(e)) self._logger(msg, level=SandeshLevel.SYS_ERR) self._consumer = kombu.Consumer(self._channel, queues=self._update_queue_obj, callbacks=[self._subscribe]) else: # only a producer self._consumer = None self._producer = kombu.Producer(self._channel, exchange=self.obj_upd_exchange) # end _reconnect def _delete_queue(self): # delete the queue try: bound_q = self._update_queue_obj(self._channel) if bound_q: bound_q.delete() except Exception as e: msg = 'Unable to delete the old ampq queue: %s' % (str(e)) self._logger(msg, level=SandeshLevel.SYS_ERR) #end _delete_queue def _connection_watch(self, connected): if not connected: self._reconnect() self.prepare_to_consume() while True: try: self._consumer.consume() self._conn.drain_events() except self._conn.connection_errors + self._conn.channel_errors as e: self._reconnect() # end _connection_watch def _connection_watch_forever(self): connected = True while True: try: self._connection_watch(connected) except Exception as e: msg = 'Error in rabbitmq drainer greenlet: %s' % (str(e)) self._logger(msg, level=SandeshLevel.SYS_ERR) # avoid 'reconnect()' here as that itself might cause exception connected = False # end _connection_watch_forever def _connection_heartbeat(self): while True: try: if self._conn.connected: self._conn.heartbeat_check() except Exception as e: msg = 'Error in rabbitmq heartbeat greenlet: %s' % (str(e)) self._logger(msg, level=SandeshLevel.SYS_ERR) finally: gevent.sleep(float(self._heartbeat_seconds / 2)) # end _connection_heartbeat def _publisher(self): message = None connected = True while True: try: if not connected: self._reconnect() connected = True if not message: # earlier was sent fine, dequeue one more message = self._publish_queue.get() while True: try: self._producer.publish(message) message = None break except self._conn.connection_errors + self._conn.channel_errors as e: self._reconnect() except Exception as e: log_str = "Error in rabbitmq publisher greenlet: %s" % (str(e)) self._logger(log_str, level=SandeshLevel.SYS_ERR) # avoid 'reconnect()' here as that itself might cause exception connected = False # end _publisher def _subscribe(self, body, message): try: self._subscribe_cb(body) finally: message.ack() def _start(self, client_name): self._reconnect(delete_old_q=True) self._publisher_greenlet = vnc_greenlets.VncGreenlet( 'Kombu ' + client_name, self._publisher) self._connection_monitor_greenlet = vnc_greenlets.VncGreenlet( 'Kombu ' + client_name + '_ConnMon', self._connection_watch_forever) if self._heartbeat_seconds: self._connection_heartbeat_greenlet = vnc_greenlets.VncGreenlet( 'Kombu ' + client_name + '_ConnHeartBeat', self._connection_heartbeat) else: self._connection_heartbeat_greenlet = None def greenlets(self): ret = [self._publisher_greenlet, self._connection_monitor_greenlet] if self._connection_heartbeat_greenlet: ret.append(self._connection_heartbeat_greenlet) return ret def shutdown(self): self._publisher_greenlet.kill() self._connection_monitor_greenlet.kill() if self._connection_heartbeat_greenlet: self._connection_heartbeat_greenlet.kill() self._producer.close() if self._consumer: self._consumer.close() self._delete_queue() self._conn.close() def reset(self): self._publish_queue = Queue() _SSL_PROTOCOLS = { "tlsv1": ssl.PROTOCOL_TLSv1, "sslv23": ssl.PROTOCOL_SSLv23 } @classmethod def validate_ssl_version(cls, version): version = version.lower() try: return cls._SSL_PROTOCOLS[version] except KeyError: raise RuntimeError('Invalid SSL version: {}'.format(version)) def _fetch_ssl_params(self, **kwargs): if strtobool(str(kwargs.get('rabbit_use_ssl', False))): ssl_params = dict() ssl_version = kwargs.get('kombu_ssl_version', '') keyfile = kwargs.get('kombu_ssl_keyfile', '') certfile = kwargs.get('kombu_ssl_certfile', '') ca_certs = kwargs.get('kombu_ssl_ca_certs', '') if ssl_version: ssl_params.update( {'ssl_version': self.validate_ssl_version(ssl_version)}) if keyfile: ssl_params.update({'keyfile': keyfile}) if certfile: ssl_params.update({'certfile': certfile}) if ca_certs: ssl_params.update({'ca_certs': ca_certs}) ssl_params.update({'cert_reqs': ssl.CERT_REQUIRED}) return ssl_params or True return False
class RaidenService(Runnable): """ A Raiden node. """ def __init__( self, chain: BlockChainService, query_start_block: BlockNumber, default_registry: TokenNetworkRegistry, default_secret_registry: SecretRegistry, transport, raiden_event_handler, message_handler, config, discovery=None, ): super().__init__() self.tokennetworkids_to_connectionmanagers: ConnectionManagerDict = dict() self.targets_to_identifiers_to_statuses: StatusesDict = defaultdict(dict) self.chain: BlockChainService = chain self.default_registry = default_registry self.query_start_block = query_start_block self.default_secret_registry = default_secret_registry self.config = config self.signer: Signer = LocalSigner(self.chain.client.privkey) self.address = self.signer.address self.discovery = discovery self.transport = transport self.blockchain_events = BlockchainEvents() self.alarm = AlarmTask(chain) self.raiden_event_handler = raiden_event_handler self.message_handler = message_handler self.stop_event = Event() self.stop_event.set() # inits as stopped self.wal: Optional[wal.WriteAheadLog] = None self.snapshot_group = 0 # This flag will be used to prevent the service from processing # state changes events until we know that pending transactions # have been dispatched. self.dispatch_events_lock = Semaphore(1) self.contract_manager = ContractManager(config['contracts_path']) self.database_path = config['database_path'] if self.database_path != ':memory:': database_dir = os.path.dirname(config['database_path']) os.makedirs(database_dir, exist_ok=True) self.database_dir = database_dir # Two raiden processes must not write to the same database, even # though the database itself may be consistent. If more than one # nodes writes state changes to the same WAL there are no # guarantees about recovery, this happens because during recovery # the WAL replay can not be deterministic. lock_file = os.path.join(self.database_dir, '.lock') self.db_lock = filelock.FileLock(lock_file) else: self.database_path = ':memory:' self.database_dir = None self.serialization_file = None self.db_lock = None self.event_poll_lock = gevent.lock.Semaphore() self.gas_reserve_lock = gevent.lock.Semaphore() self.payment_identifier_lock = gevent.lock.Semaphore() def start(self): """ Start the node synchronously. Raises directly if anything went wrong on startup """ if not self.stop_event.ready(): raise RuntimeError(f'{self!r} already started') self.stop_event.clear() if self.database_dir is not None: self.db_lock.acquire(timeout=0) assert self.db_lock.is_locked # start the registration early to speed up the start if self.config['transport_type'] == 'udp': endpoint_registration_greenlet = gevent.spawn( self.discovery.register, self.address, self.config['transport']['udp']['external_ip'], self.config['transport']['udp']['external_port'], ) self.maybe_upgrade_db() storage = sqlite.SerializedSQLiteStorage( database_path=self.database_path, serializer=serialize.JSONSerializer(), ) storage.log_run() self.wal = wal.restore_to_state_change( transition_function=node.state_transition, storage=storage, state_change_identifier='latest', ) if self.wal.state_manager.current_state is None: log.debug( 'No recoverable state available, created inital state', node=pex(self.address), ) # On first run Raiden needs to fetch all events for the payment # network, to reconstruct all token network graphs and find opened # channels last_log_block_number = self.query_start_block state_change = ActionInitChain( random.Random(), last_log_block_number, self.chain.node_address, self.chain.network_id, ) self.handle_state_change(state_change) payment_network = PaymentNetworkState( self.default_registry.address, [], # empty list of token network states as it's the node's startup ) state_change = ContractReceiveNewPaymentNetwork( constants.EMPTY_HASH, payment_network, last_log_block_number, ) self.handle_state_change(state_change) else: # The `Block` state change is dispatched only after all the events # for that given block have been processed, filters can be safely # installed starting from this position without losing events. last_log_block_number = views.block_number(self.wal.state_manager.current_state) log.debug( 'Restored state from WAL', last_restored_block=last_log_block_number, node=pex(self.address), ) known_networks = views.get_payment_network_identifiers(views.state_from_raiden(self)) if known_networks and self.default_registry.address not in known_networks: configured_registry = pex(self.default_registry.address) known_registries = lpex(known_networks) raise RuntimeError( f'Token network address mismatch.\n' f'Raiden is configured to use the smart contract ' f'{configured_registry}, which conflicts with the current known ' f'smart contracts {known_registries}', ) # Restore the current snapshot group state_change_qty = self.wal.storage.count_state_changes() self.snapshot_group = state_change_qty // SNAPSHOT_STATE_CHANGES_COUNT # Install the filters using the correct from_block value, otherwise # blockchain logs can be lost. self.install_all_blockchain_filters( self.default_registry, self.default_secret_registry, last_log_block_number, ) # Complete the first_run of the alarm task and synchronize with the # blockchain since the last run. # # Notes about setup order: # - The filters must be polled after the node state has been primed, # otherwise the state changes won't have effect. # - The alarm must complete its first run before the transport is started, # to reject messages for closed/settled channels. self.alarm.register_callback(self._callback_new_block) with self.dispatch_events_lock: self.alarm.first_run(last_log_block_number) chain_state = views.state_from_raiden(self) self._initialize_transactions_queues(chain_state) self._initialize_whitelists(chain_state) self._initialize_payment_statuses(chain_state) # send messages in queue before starting transport, # this is necessary to avoid a race where, if the transport is started # before the messages are queued, actions triggered by it can cause new # messages to be enqueued before these older ones self._initialize_messages_queues(chain_state) # The transport must not ever be started before the alarm task's # `first_run()` has been, because it's this method which synchronizes the # node with the blockchain, including the channel's state (if the channel # is closed on-chain new messages must be rejected, which will not be the # case if the node is not synchronized) self.transport.start( raiden_service=self, message_handler=self.message_handler, prev_auth_data=chain_state.last_transport_authdata, ) # First run has been called above! self.alarm.start() # exceptions on these subtasks should crash the app and bubble up self.alarm.link_exception(self.on_error) self.transport.link_exception(self.on_error) # Health check needs the transport layer self.start_neighbours_healthcheck(chain_state) if self.config['transport_type'] == 'udp': endpoint_registration_greenlet.get() # re-raise if exception occurred log.debug('Raiden Service started', node=pex(self.address)) super().start() def _run(self, *args, **kwargs): # pylint: disable=method-hidden """ Busy-wait on long-lived subtasks/greenlets, re-raise if any error occurs """ try: self.stop_event.wait() except gevent.GreenletExit: # killed without exception self.stop_event.set() gevent.killall([self.alarm, self.transport]) # kill children raise # re-raise to keep killed status except Exception: self.stop() raise def stop(self): """ Stop the node gracefully. Raise if any stop-time error occurred on any subtask """ if self.stop_event.ready(): # not started return # Needs to come before any greenlets joining self.stop_event.set() # Filters must be uninstalled after the alarm task has stopped. Since # the events are polled by an alarm task callback, if the filters are # uninstalled before the alarm task is fully stopped the callback # `poll_blockchain_events` will fail. # # We need a timeout to prevent an endless loop from trying to # contact the disconnected client self.transport.stop() self.alarm.stop() self.transport.join() self.alarm.join() self.blockchain_events.uninstall_all_event_listeners() # Close storage DB to release internal DB lock self.wal.storage.conn.close() if self.db_lock is not None: self.db_lock.release() log.debug('Raiden Service stopped', node=pex(self.address)) def add_pending_greenlet(self, greenlet: gevent.Greenlet): greenlet.link_exception(self.on_error) def __repr__(self): return '<{} {}>'.format(self.__class__.__name__, pex(self.address)) def start_neighbours_healthcheck(self, chain_state: ChainState): for neighbour in views.all_neighbour_nodes(chain_state): if neighbour != ConnectionManager.BOOTSTRAP_ADDR: self.start_health_check_for(neighbour) def get_block_number(self) -> BlockNumber: assert self.wal return views.block_number(self.wal.state_manager.current_state) def on_message(self, message: Message): self.message_handler.on_message(self, message) def handle_state_change(self, state_change: StateChange): assert self.wal log.debug( 'State change', node=pex(self.address), state_change=_redact_secret(serialize.JSONSerializer.serialize(state_change)), ) old_state = views.state_from_raiden(self) event_list = self.wal.log_and_dispatch(state_change) current_state = views.state_from_raiden(self) for balance_proof in views.detect_balance_proof_change(old_state, current_state): event_list.append(EventNewBalanceProofReceived(balance_proof)) if self.dispatch_events_lock.locked(): return [] for event in event_list: log.debug( 'Raiden event', node=pex(self.address), raiden_event=_redact_secret(serialize.JSONSerializer.serialize(event)), ) try: self.raiden_event_handler.on_raiden_event( raiden=self, event=event, ) except RaidenRecoverableError as e: log.error(str(e)) except InvalidDBData: raise except RaidenUnrecoverableError as e: log_unrecoverable = ( self.config['environment_type'] == Environment.PRODUCTION and not self.config['unrecoverable_error_should_crash'] ) if log_unrecoverable: log.error(str(e)) else: raise # Take a snapshot every SNAPSHOT_STATE_CHANGES_COUNT # TODO: Gather more data about storage requirements # and update the value to specify how often we need # capturing a snapshot should take place new_snapshot_group = self.wal.storage.count_state_changes() // SNAPSHOT_STATE_CHANGES_COUNT if new_snapshot_group > self.snapshot_group: log.debug('Storing snapshot', snapshot_id=new_snapshot_group) self.wal.snapshot() self.snapshot_group = new_snapshot_group return event_list def set_node_network_state(self, node_address: Address, network_state: str): state_change = ActionChangeNodeNetworkState(node_address, network_state) self.handle_state_change(state_change) def start_health_check_for(self, node_address: Address): # This function is a noop during initialization. It can be called # through the alarm task while polling for new channel events. The # healthcheck will be started by self.start_neighbours_healthcheck() if self.transport: self.transport.start_health_check(node_address) def _callback_new_block(self, latest_block: Dict): """Called once a new block is detected by the alarm task. Note: This should be called only once per block, otherwise there will be duplicated `Block` state changes in the log. Therefore this method should be called only once a new block is mined with the corresponding block data from the AlarmTask. """ # User facing APIs, which have on-chain side-effects, force polled the # blockchain to update the node's state. This force poll is used to # provide a consistent view to the user, e.g. a channel open call waits # for the transaction to be mined and force polled the event to update # the node's state. This pattern introduced a race with the alarm task # and the task which served the user request, because the events are # returned only once per filter. The lock below is to protect against # these races (introduced by the commit # 3686b3275ff7c0b669a6d5e2b34109c3bdf1921d) with self.event_poll_lock: latest_block_number = latest_block['number'] confirmation_blocks = self.config['blockchain']['confirmation_blocks'] confirmed_block_number = latest_block_number - confirmation_blocks confirmed_block = self.chain.client.web3.eth.getBlock(confirmed_block_number) # handle testing private chains confirmed_block_number = max(GENESIS_BLOCK_NUMBER, confirmed_block_number) for event in self.blockchain_events.poll_blockchain_events(confirmed_block_number): # These state changes will be procesed with a block_number # which is /larger/ than the ChainState's block_number. on_blockchain_event(self, event) # On restart the Raiden node will re-create the filters with the # ethereum node. These filters will have the from_block set to the # value of the latest Block state change. To avoid missing events # the Block state change is dispatched only after all of the events # have been processed. # # This means on some corner cases a few events may be applied # twice, this will happen if the node crashed and some events have # been processed but the Block state change has not been # dispatched. state_change = Block( block_number=confirmed_block_number, gas_limit=confirmed_block['gasLimit'], block_hash=BlockHash(bytes(confirmed_block['hash'])), ) self.handle_state_change(state_change) def _initialize_transactions_queues(self, chain_state: ChainState): pending_transactions = views.get_pending_transactions(chain_state) log.debug( 'Processing pending transactions', num_pending_transactions=len(pending_transactions), node=pex(self.address), ) with self.dispatch_events_lock: for transaction in pending_transactions: try: self.raiden_event_handler.on_raiden_event(self, transaction) except RaidenRecoverableError as e: log.error(str(e)) except InvalidDBData: raise except RaidenUnrecoverableError as e: log_unrecoverable = ( self.config['environment_type'] == Environment.PRODUCTION and not self.config['unrecoverable_error_should_crash'] ) if log_unrecoverable: log.error(str(e)) else: raise def _initialize_payment_statuses(self, chain_state: ChainState): """ Re-initialize targets_to_identifiers_to_statuses. """ with self.payment_identifier_lock: for task in chain_state.payment_mapping.secrethashes_to_task.values(): if not isinstance(task, InitiatorTask): continue # Every transfer in the transfers_list must have the same target # and payment_identifier, so using the first transfer is # sufficient. initiator = next(iter(task.manager_state.initiator_transfers.values())) transfer = initiator.transfer target = transfer.target identifier = transfer.payment_identifier balance_proof = transfer.balance_proof self.targets_to_identifiers_to_statuses[target][identifier] = PaymentStatus( payment_identifier=identifier, amount=transfer.lock.amount, token_network_identifier=balance_proof.token_network_identifier, payment_done=AsyncResult(), ) def _initialize_messages_queues(self, chain_state: ChainState): """ Push the message queues to the transport. """ events_queues = views.get_all_messagequeues(chain_state) for queue_identifier, event_queue in events_queues.items(): self.start_health_check_for(queue_identifier.recipient) for event in event_queue: message = message_from_sendevent(event, self.address) self.sign(message) self.transport.send_async(queue_identifier, message) def _initialize_whitelists(self, chain_state: ChainState): """ Whitelist neighbors and mediated transfer targets on transport """ for neighbour in views.all_neighbour_nodes(chain_state): if neighbour == ConnectionManager.BOOTSTRAP_ADDR: continue self.transport.whitelist(neighbour) events_queues = views.get_all_messagequeues(chain_state) for event_queue in events_queues.values(): for event in event_queue: if isinstance(event, SendLockedTransfer): transfer = event.transfer if transfer.initiator == self.address: self.transport.whitelist(address=transfer.target) def sign(self, message: Message): """ Sign message inplace. """ if not isinstance(message, SignedMessage): raise ValueError('{} is not signable.'.format(repr(message))) message.sign(self.signer) def install_all_blockchain_filters( self, token_network_registry_proxy: TokenNetworkRegistry, secret_registry_proxy: SecretRegistry, from_block: BlockNumber, ): with self.event_poll_lock: node_state = views.state_from_raiden(self) token_networks = views.get_token_network_identifiers( node_state, token_network_registry_proxy.address, ) self.blockchain_events.add_token_network_registry_listener( token_network_registry_proxy=token_network_registry_proxy, contract_manager=self.contract_manager, from_block=from_block, ) self.blockchain_events.add_secret_registry_listener( secret_registry_proxy=secret_registry_proxy, contract_manager=self.contract_manager, from_block=from_block, ) for token_network in token_networks: token_network_proxy = self.chain.token_network( TokenNetworkAddress(token_network), ) self.blockchain_events.add_token_network_listener( token_network_proxy=token_network_proxy, contract_manager=self.contract_manager, from_block=from_block, ) def connection_manager_for_token_network( self, token_network_identifier: TokenNetworkID, ) -> ConnectionManager: if not is_binary_address(token_network_identifier): raise InvalidAddress('token address is not valid.') known_token_networks = views.get_token_network_identifiers( views.state_from_raiden(self), self.default_registry.address, ) if token_network_identifier not in known_token_networks: raise InvalidAddress('token is not registered.') manager = self.tokennetworkids_to_connectionmanagers.get(token_network_identifier) if manager is None: manager = ConnectionManager(self, token_network_identifier) self.tokennetworkids_to_connectionmanagers[token_network_identifier] = manager return manager def mediated_transfer_async( self, token_network_identifier: TokenNetworkID, amount: PaymentAmount, target: TargetAddress, identifier: PaymentID, secret: Secret = None, secret_hash: SecretHash = None, ) -> PaymentStatus: """ Transfer `amount` between this node and `target`. This method will start an asynchronous transfer, the transfer might fail or succeed depending on a couple of factors: - Existence of a path that can be used, through the usage of direct or intermediary channels. - Network speed, making the transfer sufficiently fast so it doesn't expire. """ if secret is None: secret = random_secret() payment_status = self.start_mediated_transfer_with_secret( token_network_identifier, amount, target, identifier, secret, secret_hash, ) return payment_status def start_mediated_transfer_with_secret( self, token_network_identifier: TokenNetworkID, amount: PaymentAmount, target: TargetAddress, identifier: PaymentID, secret: Secret, secret_hash: SecretHash = None, ) -> PaymentStatus: if secret_hash is None: secret_hash = sha3(secret) # LEFTODO: Supply a proper block id secret_registered = self.default_secret_registry.check_registered( secrethash=secret_hash, block_identifier='latest', ) if secret_registered: raise RaidenUnrecoverableError( f'Attempted to initiate a locked transfer with secrethash {pex(secret_hash)}.' f' That secret is already registered onchain.', ) self.start_health_check_for(Address(target)) if identifier is None: identifier = create_default_identifier() with self.payment_identifier_lock: payment_status = self.targets_to_identifiers_to_statuses[target].get(identifier) if payment_status: payment_status_matches = payment_status.matches( token_network_identifier, amount, ) if not payment_status_matches: raise PaymentConflict( 'Another payment with the same id is in flight', ) return payment_status payment_status = PaymentStatus( payment_identifier=identifier, amount=amount, token_network_identifier=token_network_identifier, payment_done=AsyncResult(), secret=secret, secret_hash=secret_hash, ) self.targets_to_identifiers_to_statuses[target][identifier] = payment_status init_initiator_statechange = initiator_init( raiden=self, transfer_identifier=identifier, transfer_amount=amount, transfer_secret=secret, token_network_identifier=token_network_identifier, target_address=target, ) # Dispatch the state change even if there are no routes to create the # wal entry. self.handle_state_change(init_initiator_statechange) return payment_status def mediate_mediated_transfer(self, transfer: LockedTransfer): init_mediator_statechange = mediator_init(self, transfer) self.handle_state_change(init_mediator_statechange) def target_mediated_transfer(self, transfer: LockedTransfer): self.start_health_check_for(transfer.initiator) init_target_statechange = target_init(transfer) self.handle_state_change(init_target_statechange) def maybe_upgrade_db(self): manager = UpgradeManager(db_filename=self.database_path) manager.run()
class VncKombuClientBase(object): def _update_sandesh_status(self, status, msg=''): ConnectionState.update(conn_type=ConnectionType.DATABASE, name='RabbitMQ', status=status, message=msg, server_addrs=["%s:%s" % (self._rabbit_ip, self._rabbit_port)]) # end _update_sandesh_status def publish(self, message): self._publish_queue.put(message) # end publish def __init__(self, rabbit_ip, rabbit_port, rabbit_user, rabbit_password, rabbit_vhost, rabbit_ha_mode, q_name, subscribe_cb, logger): self._rabbit_ip = rabbit_ip self._rabbit_port = rabbit_port self._rabbit_user = rabbit_user self._rabbit_password = rabbit_password self._rabbit_vhost = rabbit_vhost self._subscribe_cb = subscribe_cb self._logger = logger self._publish_queue = Queue() self._conn_lock = Semaphore() self.obj_upd_exchange = kombu.Exchange('vnc_config.object-update', 'fanout', durable=False) def num_pending_messages(self): return self._publish_queue.qsize() # end num_pending_messages def prepare_to_consume(self): # override this method return def _reconnect(self, delete_old_q=False): if self._conn_lock.locked(): # either connection-monitor or publisher should have taken # the lock. The one who acquired the lock would re-establish # the connection and releases the lock, so the other one can # just wait on the lock, till it gets released self._conn_lock.wait() if self._conn_state == ConnectionStatus.UP: return with self._conn_lock: msg = "RabbitMQ connection down" self._logger(msg, level=SandeshLevel.SYS_ERR) self._update_sandesh_status(ConnectionStatus.DOWN) self._conn_state = ConnectionStatus.DOWN self._conn.close() self._conn.ensure_connection() self._conn.connect() self._update_sandesh_status(ConnectionStatus.UP) self._conn_state = ConnectionStatus.UP msg = 'RabbitMQ connection ESTABLISHED %s' % repr(self._conn) self._logger(msg, level=SandeshLevel.SYS_NOTICE) self._channel = self._conn.channel() if delete_old_q: # delete the old queue in first-connect context # as db-resync would have caught up with history. try: bound_q = self._update_queue_obj(self._channel) bound_q.delete() except Exception as e: msg = 'Unable to delete the old ampq queue: %s' %(str(e)) self._logger(msg, level=SandeshLevel.SYS_ERR) self._consumer = kombu.Consumer(self._channel, queues=self._update_queue_obj, callbacks=[self._subscribe]) self._producer = kombu.Producer(self._channel, exchange=self.obj_upd_exchange) # end _reconnect def _connection_watch(self, connected): if not connected: self._reconnect() self.prepare_to_consume() while True: try: self._consumer.consume() self._conn.drain_events() except self._conn.connection_errors + self._conn.channel_errors as e: self._reconnect() # end _connection_watch def _connection_watch_forever(self): connected = True while True: try: self._connection_watch(connected) except Exception as e: msg = 'Error in rabbitmq drainer greenlet: %s' %(str(e)) self._logger(msg, level=SandeshLevel.SYS_ERR) # avoid 'reconnect()' here as that itself might cause exception connected = False # end _connection_watch_forever def _publisher(self): message = None connected = True while True: try: if not connected: self._reconnect() connected = True if not message: # earlier was sent fine, dequeue one more message = self._publish_queue.get() while True: try: self._producer.publish(message) message = None break except self._conn.connection_errors + self._conn.channel_errors as e: self._reconnect() except Exception as e: log_str = "Error in rabbitmq publisher greenlet: %s" %(str(e)) self._logger(log_str, level=SandeshLevel.SYS_ERR) # avoid 'reconnect()' here as that itself might cause exception connected = False # end _publisher def _subscribe(self, body, message): try: self._subscribe_cb(body) finally: message.ack() def _start(self): self._reconnect(delete_old_q=True) self._publisher_greenlet = gevent.spawn(self._publisher) self._connection_monitor_greenlet = gevent.spawn(self._connection_watch_forever) def shutdown(self): self._publisher_greenlet.kill() self._connection_monitor_greenlet.kill() self._producer.close() self._consumer.close() self._conn.close()
class RaidenService(Runnable): """ A Raiden node. """ def __init__( self, chain: BlockChainService, query_start_block: BlockNumber, default_registry: TokenNetworkRegistry, default_secret_registry: SecretRegistry, private_key_bin, transport, raiden_event_handler, message_handler, config, discovery=None, ): super().__init__() if not isinstance(private_key_bin, bytes) or len(private_key_bin) != 32: raise ValueError('invalid private_key') self.tokennetworkids_to_connectionmanagers = dict() self.targets_to_identifiers_to_statuses: StatusesDict = defaultdict(dict) self.chain: BlockChainService = chain self.default_registry = default_registry self.query_start_block = query_start_block self.default_secret_registry = default_secret_registry self.config = config self.privkey = private_key_bin self.address = privatekey_to_address(private_key_bin) self.discovery = discovery self.private_key = PrivateKey(private_key_bin) self.pubkey = self.private_key.public_key.format(compressed=False) self.transport = transport self.blockchain_events = BlockchainEvents() self.alarm = AlarmTask(chain) self.raiden_event_handler = raiden_event_handler self.message_handler = message_handler self.stop_event = Event() self.stop_event.set() # inits as stopped self.wal = None self.snapshot_group = 0 # This flag will be used to prevent the service from processing # state changes events until we know that pending transactions # have been dispatched. self.dispatch_events_lock = Semaphore(1) self.contract_manager = ContractManager(config['contracts_path']) self.database_path = config['database_path'] if self.database_path != ':memory:': database_dir = os.path.dirname(config['database_path']) os.makedirs(database_dir, exist_ok=True) self.database_dir = database_dir # Prevent concurrent access to the same db self.lock_file = os.path.join(self.database_dir, '.lock') self.db_lock = filelock.FileLock(self.lock_file) else: self.database_path = ':memory:' self.database_dir = None self.lock_file = None self.serialization_file = None self.db_lock = None self.event_poll_lock = gevent.lock.Semaphore() self.gas_reserve_lock = gevent.lock.Semaphore() self.payment_identifier_lock = gevent.lock.Semaphore() def start(self): """ Start the node synchronously. Raises directly if anything went wrong on startup """ if not self.stop_event.ready(): raise RuntimeError(f'{self!r} already started') self.stop_event.clear() if self.database_dir is not None: self.db_lock.acquire(timeout=0) assert self.db_lock.is_locked # start the registration early to speed up the start if self.config['transport_type'] == 'udp': endpoint_registration_greenlet = gevent.spawn( self.discovery.register, self.address, self.config['transport']['udp']['external_ip'], self.config['transport']['udp']['external_port'], ) storage = sqlite.SQLiteStorage(self.database_path, serialize.JSONSerializer()) self.wal = wal.restore_to_state_change( transition_function=node.state_transition, storage=storage, state_change_identifier='latest', ) if self.wal.state_manager.current_state is None: log.debug( 'No recoverable state available, created inital state', node=pex(self.address), ) # On first run Raiden needs to fetch all events for the payment # network, to reconstruct all token network graphs and find opened # channels last_log_block_number = self.query_start_block state_change = ActionInitChain( random.Random(), last_log_block_number, self.chain.node_address, self.chain.network_id, ) self.handle_state_change(state_change) payment_network = PaymentNetworkState( self.default_registry.address, [], # empty list of token network states as it's the node's startup ) state_change = ContractReceiveNewPaymentNetwork( constants.EMPTY_HASH, payment_network, last_log_block_number, ) self.handle_state_change(state_change) else: # The `Block` state change is dispatched only after all the events # for that given block have been processed, filters can be safely # installed starting from this position without losing events. last_log_block_number = views.block_number(self.wal.state_manager.current_state) log.debug( 'Restored state from WAL', last_restored_block=last_log_block_number, node=pex(self.address), ) known_networks = views.get_payment_network_identifiers(views.state_from_raiden(self)) if known_networks and self.default_registry.address not in known_networks: configured_registry = pex(self.default_registry.address) known_registries = lpex(known_networks) raise RuntimeError( f'Token network address mismatch.\n' f'Raiden is configured to use the smart contract ' f'{configured_registry}, which conflicts with the current known ' f'smart contracts {known_registries}', ) # Restore the current snapshot group state_change_qty = self.wal.storage.count_state_changes() self.snapshot_group = state_change_qty // SNAPSHOT_STATE_CHANGES_COUNT # Install the filters using the correct from_block value, otherwise # blockchain logs can be lost. self.install_all_blockchain_filters( self.default_registry, self.default_secret_registry, last_log_block_number, ) # Complete the first_run of the alarm task and synchronize with the # blockchain since the last run. # # Notes about setup order: # - The filters must be polled after the node state has been primed, # otherwise the state changes won't have effect. # - The alarm must complete its first run before the transport is started, # to reject messages for closed/settled channels. self.alarm.register_callback(self._callback_new_block) with self.dispatch_events_lock: self.alarm.first_run(last_log_block_number) chain_state = views.state_from_raiden(self) self._initialize_transactions_queues(chain_state) self._initialize_whitelists(chain_state) # send messages in queue before starting transport, # this is necessary to avoid a race where, if the transport is started # before the messages are queued, actions triggered by it can cause new # messages to be enqueued before these older ones self._initialize_messages_queues(chain_state) # The transport must not ever be started before the alarm task's # `first_run()` has been, because it's this method which synchronizes the # node with the blockchain, including the channel's state (if the channel # is closed on-chain new messages must be rejected, which will not be the # case if the node is not synchronized) self.transport.start( raiden_service=self, message_handler=self.message_handler, prev_auth_data=chain_state.last_transport_authdata, ) # First run has been called above! self.alarm.start() # exceptions on these subtasks should crash the app and bubble up self.alarm.link_exception(self.on_error) self.transport.link_exception(self.on_error) # Health check needs the transport layer self.start_neighbours_healthcheck(chain_state) if self.config['transport_type'] == 'udp': endpoint_registration_greenlet.get() # re-raise if exception occurred log.debug('Raiden Service started', node=pex(self.address)) super().start() def _run(self, *args, **kwargs): # pylint: disable=method-hidden """ Busy-wait on long-lived subtasks/greenlets, re-raise if any error occurs """ try: self.stop_event.wait() except gevent.GreenletExit: # killed without exception self.stop_event.set() gevent.killall([self.alarm, self.transport]) # kill children raise # re-raise to keep killed status except Exception: self.stop() raise def stop(self): """ Stop the node gracefully. Raise if any stop-time error occurred on any subtask """ if self.stop_event.ready(): # not started return # Needs to come before any greenlets joining self.stop_event.set() # Filters must be uninstalled after the alarm task has stopped. Since # the events are polled by an alarm task callback, if the filters are # uninstalled before the alarm task is fully stopped the callback # `poll_blockchain_events` will fail. # # We need a timeout to prevent an endless loop from trying to # contact the disconnected client self.transport.stop() self.alarm.stop() self.transport.join() self.alarm.join() self.blockchain_events.uninstall_all_event_listeners() if self.db_lock is not None: self.db_lock.release() log.debug('Raiden Service stopped', node=pex(self.address)) def add_pending_greenlet(self, greenlet: gevent.Greenlet): greenlet.link_exception(self.on_error) def __repr__(self): return '<{} {}>'.format(self.__class__.__name__, pex(self.address)) def start_neighbours_healthcheck(self, chain_state: ChainState): for neighbour in views.all_neighbour_nodes(chain_state): if neighbour != ConnectionManager.BOOTSTRAP_ADDR: self.start_health_check_for(neighbour) def get_block_number(self) -> BlockNumber: return views.block_number(self.wal.state_manager.current_state) def on_message(self, message: Message): self.message_handler.on_message(self, message) def handle_state_change(self, state_change: StateChange): log.debug( 'State change', node=pex(self.address), state_change=_redact_secret(serialize.JSONSerializer.serialize(state_change)), ) event_list = self.wal.log_and_dispatch(state_change) if self.dispatch_events_lock.locked(): return [] for event in event_list: log.debug( 'Raiden event', node=pex(self.address), raiden_event=_redact_secret(serialize.JSONSerializer.serialize(event)), ) try: self.raiden_event_handler.on_raiden_event( raiden=self, event=event, ) except RaidenRecoverableError as e: log.error(str(e)) except InvalidDBData: raise except RaidenUnrecoverableError as e: log_unrecoverable = ( self.config['environment_type'] == Environment.PRODUCTION and not self.config['unrecoverable_error_should_crash'] ) if log_unrecoverable: log.error(str(e)) else: raise # Take a snapshot every SNAPSHOT_STATE_CHANGES_COUNT # TODO: Gather more data about storage requirements # and update the value to specify how often we need # capturing a snapshot should take place new_snapshot_group = self.wal.storage.count_state_changes() // SNAPSHOT_STATE_CHANGES_COUNT if new_snapshot_group > self.snapshot_group: log.debug('Storing snapshot', snapshot_id=new_snapshot_group) self.wal.snapshot() self.snapshot_group = new_snapshot_group return event_list def set_node_network_state(self, node_address: Address, network_state: str): state_change = ActionChangeNodeNetworkState(node_address, network_state) self.handle_state_change(state_change) def start_health_check_for(self, node_address: Address): # This function is a noop during initialization. It can be called # through the alarm task while polling for new channel events. The # healthcheck will be started by self.start_neighbours_healthcheck() if self.transport: self.transport.start_health_check(node_address) def _callback_new_block(self, latest_block: Dict): """Called once a new block is detected by the alarm task. Note: This should be called only once per block, otherwise there will be duplicated `Block` state changes in the log. Therefore this method should be called only once a new block is mined with the corresponding block data from the AlarmTask. """ # User facing APIs, which have on-chain side-effects, force polled the # blockchain to update the node's state. This force poll is used to # provide a consistent view to the user, e.g. a channel open call waits # for the transaction to be mined and force polled the event to update # the node's state. This pattern introduced a race with the alarm task # and the task which served the user request, because the events are # returned only once per filter. The lock below is to protect against # these races (introduced by the commit # 3686b3275ff7c0b669a6d5e2b34109c3bdf1921d) with self.event_poll_lock: latest_block_number = latest_block['number'] confirmation_blocks = self.config['blockchain']['confirmation_blocks'] confirmed_block_number = latest_block_number - confirmation_blocks confirmed_block = self.chain.client.web3.eth.getBlock(confirmed_block_number) # handle testing private chains confirmed_block_number = max(GENESIS_BLOCK_NUMBER, confirmed_block_number) for event in self.blockchain_events.poll_blockchain_events(confirmed_block_number): # These state changes will be procesed with a block_number # which is /larger/ than the ChainState's block_number. on_blockchain_event(self, event) # On restart the Raiden node will re-create the filters with the # ethereum node. These filters will have the from_block set to the # value of the latest Block state change. To avoid missing events # the Block state change is dispatched only after all of the events # have been processed. # # This means on some corner cases a few events may be applied # twice, this will happen if the node crashed and some events have # been processed but the Block state change has not been # dispatched. state_change = Block( block_number=confirmed_block_number, gas_limit=confirmed_block['gasLimit'], block_hash=bytes(confirmed_block['hash']), ) self.handle_state_change(state_change) def _register_payment_status( self, target: TargetAddress, identifier: PaymentID, balance_proof: BalanceProofUnsignedState, ): with self.payment_identifier_lock: self.targets_to_identifiers_to_statuses[target][identifier] = PaymentStatus( payment_identifier=identifier, amount=balance_proof.transferred_amount, token_network_identifier=balance_proof.token_network_identifier, payment_done=AsyncResult(), ) def _initialize_transactions_queues(self, chain_state: ChainState): pending_transactions = views.get_pending_transactions(chain_state) log.debug( 'Processing pending transactions', num_pending_transactions=len(pending_transactions), node=pex(self.address), ) with self.dispatch_events_lock: for transaction in pending_transactions: try: self.raiden_event_handler.on_raiden_event(self, transaction) except RaidenRecoverableError as e: log.error(str(e)) except InvalidDBData: raise except RaidenUnrecoverableError as e: log_unrecoverable = ( self.config['environment_type'] == Environment.PRODUCTION and not self.config['unrecoverable_error_should_crash'] ) if log_unrecoverable: log.error(str(e)) else: raise def _initialize_messages_queues(self, chain_state: ChainState): """ Push the queues to the transport and populate targets_to_identifiers_to_statuses. """ events_queues = views.get_all_messagequeues(chain_state) for queue_identifier, event_queue in events_queues.items(): self.start_health_check_for(queue_identifier.recipient) for event in event_queue: is_initiator = ( type(event) == SendLockedTransfer and event.transfer.initiator == self.address ) if is_initiator: self._register_payment_status( target=event.transfer.target, identifier=event.transfer.payment_identifier, balance_proof=event.transfer.balance_proof, ) message = message_from_sendevent(event, self.address) self.sign(message) self.transport.send_async(queue_identifier, message) def _initialize_whitelists(self, chain_state: ChainState): """ Whitelist neighbors and mediated transfer targets on transport """ for neighbour in views.all_neighbour_nodes(chain_state): if neighbour == ConnectionManager.BOOTSTRAP_ADDR: continue self.transport.whitelist(neighbour) events_queues = views.get_all_messagequeues(chain_state) for event_queue in events_queues.values(): for event in event_queue: is_initiator = ( type(event) == SendLockedTransfer and event.transfer.initiator == self.address ) if is_initiator: self.transport.whitelist(address=event.transfer.target) def sign(self, message: Message): """ Sign message inplace. """ if not isinstance(message, SignedMessage): raise ValueError('{} is not signable.'.format(repr(message))) message.sign(self.private_key) def install_all_blockchain_filters( self, token_network_registry_proxy: TokenNetworkRegistry, secret_registry_proxy: SecretRegistry, from_block: BlockNumber, ): with self.event_poll_lock: node_state = views.state_from_raiden(self) token_networks = views.get_token_network_identifiers( node_state, token_network_registry_proxy.address, ) self.blockchain_events.add_token_network_registry_listener( token_network_registry_proxy=token_network_registry_proxy, contract_manager=self.contract_manager, from_block=from_block, ) self.blockchain_events.add_secret_registry_listener( secret_registry_proxy=secret_registry_proxy, contract_manager=self.contract_manager, from_block=from_block, ) for token_network in token_networks: token_network_proxy = self.chain.token_network( TokenNetworkAddress(token_network), ) self.blockchain_events.add_token_network_listener( token_network_proxy=token_network_proxy, contract_manager=self.contract_manager, from_block=from_block, ) def connection_manager_for_token_network( self, token_network_identifier: TokenNetworkID, ) -> ConnectionManager: if not is_binary_address(token_network_identifier): raise InvalidAddress('token address is not valid.') known_token_networks = views.get_token_network_identifiers( views.state_from_raiden(self), self.default_registry.address, ) if token_network_identifier not in known_token_networks: raise InvalidAddress('token is not registered.') manager = self.tokennetworkids_to_connectionmanagers.get(token_network_identifier) if manager is None: manager = ConnectionManager(self, token_network_identifier) self.tokennetworkids_to_connectionmanagers[token_network_identifier] = manager return manager def mediated_transfer_async( self, token_network_identifier: TokenNetworkID, amount: TokenAmount, target: TargetAddress, identifier: PaymentID, ) -> AsyncResult: """ Transfer `amount` between this node and `target`. This method will start an asynchronous transfer, the transfer might fail or succeed depending on a couple of factors: - Existence of a path that can be used, through the usage of direct or intermediary channels. - Network speed, making the transfer sufficiently fast so it doesn't expire. """ secret = random_secret() async_result = self.start_mediated_transfer_with_secret( token_network_identifier, amount, target, identifier, secret, ) return async_result def start_mediated_transfer_with_secret( self, token_network_identifier: TokenNetworkID, amount: TokenAmount, target: TargetAddress, identifier: PaymentID, secret: Secret, ) -> AsyncResult: secret_hash = sha3(secret) if self.default_secret_registry.check_registered(secret_hash): raise RaidenUnrecoverableError( f'Attempted to initiate a locked transfer with secrethash {pex(secret_hash)}.' f' That secret is already registered onchain.', ) self.start_health_check_for(Address(target)) if identifier is None: identifier = create_default_identifier() with self.payment_identifier_lock: payment_status = self.targets_to_identifiers_to_statuses[target].get(identifier) if payment_status: payment_status_matches = payment_status.matches( token_network_identifier, amount, ) if not payment_status_matches: raise PaymentConflict( 'Another payment with the same id is in flight', ) return payment_status.payment_done payment_status = PaymentStatus( payment_identifier=identifier, amount=amount, token_network_identifier=token_network_identifier, payment_done=AsyncResult(), ) self.targets_to_identifiers_to_statuses[target][identifier] = payment_status init_initiator_statechange = initiator_init( raiden=self, transfer_identifier=identifier, transfer_amount=amount, transfer_secret=secret, token_network_identifier=token_network_identifier, target_address=target, ) # Dispatch the state change even if there are no routes to create the # wal entry. self.handle_state_change(init_initiator_statechange) return payment_status.payment_done def mediate_mediated_transfer(self, transfer: LockedTransfer): init_mediator_statechange = mediator_init(self, transfer) self.handle_state_change(init_mediator_statechange) def target_mediated_transfer(self, transfer: LockedTransfer): self.start_health_check_for(transfer.initiator) init_target_statechange = target_init(transfer) self.handle_state_change(init_target_statechange)
class KombuAmqpClient(object): _SSL_PROTOCOLS = { "tlsv1": ssl.PROTOCOL_TLSv1, "sslv23": ssl.PROTOCOL_SSLv23 } def __init__(self, logger, config, heartbeat=0): self._logger = logger servers = re.compile(r'[,\s]+').split(config.servers) urls = self._parse_servers(servers, config) ssl_params = self._fetch_ssl_params(config) self._queue_args = {"x-ha-policy": "all"} if config.ha_mode else None self._heartbeat = float(heartbeat) self._connection_lock = Semaphore() self._consumer_lock = Semaphore() self._consumer_event = Event() self._consumers_created_event = Event() self._publisher_queue = Queue() self._connection = kombu.Connection( urls, ssl=ssl_params, heartbeat=heartbeat, transport_options={'confirm_publish': True}) self._connected = False self._exchanges = {} self._consumers = {} self._removed_consumers = [] self._running = False self._consumers_changed = True self._consumer_gl = None self._publisher_gl = None self._heartbeat_gl = None # end __init__ def get_exchange(self, name): return self._exchanges.get(name) # end get_exchange def add_exchange(self, name, type='direct', durable=False, **kwargs): if name in self._exchanges: raise ValueError("Exchange with name '%s' already exists" % name) exchange = kombu.Exchange(name, type=type, durable=durable, **kwargs) self._exchanges[name] = exchange return exchange # end add_exchange def add_consumer(self, name, exchange, routing_key='', callback=None, durable=False, wait=False, **kwargs): if name in self._consumers: raise ValueError("Consumer with name '%s' already exists" % name) exchange_obj = self.get_exchange(exchange) queue = kombu.Queue(name, exchange_obj, routing_key=routing_key, durable=durable, **kwargs) consumer = AttrDict(queue=queue, callback=callback) self._consumers[name] = consumer self._consumers_created_event.clear() self._consumer_event.set() self._consumers_changed = True if wait: self._consumers_created_event.wait() msg = 'KombuAmqpClient: Added consumer: %s' % name self._logger(msg, level=SandeshLevel.SYS_DEBUG) return consumer # end add_consumer def remove_consumer(self, name): if name not in self._consumers: raise ValueError("Consumer with name '%s' does not exist" % name) consumer = self._consumers.pop(name) self._removed_consumers.append(consumer) self._consumer_event.set() self._consumers_changed = True msg = 'KombuAmqpClient: Removed consumer: %s' % name self._logger(msg, level=SandeshLevel.SYS_DEBUG) # end remove_consumer def publish(self, message, exchange, routing_key=None, **kwargs): if message is not None and isinstance(message, basestring) and \ len(message) == 0: message = None msg = 'KombuAmqpClient: Publishing message to exchange %s, routing_key %s' % ( exchange, routing_key) self._logger(msg, level=SandeshLevel.SYS_DEBUG) self._publisher_queue.put( AttrDict(message=message, exchange=exchange, routing_key=routing_key, kwargs=kwargs)) # end publish def run(self): self._running = True self._consumer_gl = gevent.spawn(self._start_consuming) self._publisher_gl = gevent.spawn(self._start_publishing) if self._heartbeat: self._heartbeat_gl = gevent.spawn(self._heartbeat_check) # end run def stop(self): self._running = False if self._heartbeat_gl is not None: self._heartbeat_gl.kill() if self._publisher_gl is not None: self._publisher_gl.kill() if self._consumer_gl is not None: self._consumer_gl.kill() for consumer in (self._removed_consumers + list(self._consumers.values())): self._delete_consumer(consumer) self._connection.close() # end stop def _delete_consumer(self, consumer): msg = 'KombuAmqpClient: Removing queue %s' % consumer.queue.name self._logger(msg, level=SandeshLevel.SYS_DEBUG) consumer.queue.maybe_bind(self._connection) try: consumer.queue.delete(if_unused=True, nowait=False) except self._connection.channel_errors: pass # end _delete_consumer def _create_consumer_list(self): valid = False consumer_list = [] while not valid: consumer_candidate_list = list(self._consumers.keys()) # This code can yield the CPU to another greenlet consumer_list = [ kombu.Consumer(self._connection, queues=c.queue, callbacks=[c.callback] if c.callback else None) for c in list(self._consumers.values()) ] # Other greenlets can add more entries to self._consumers here # so check to see if the self._consumers has changed. # If the self._consumers has changed, recreate consumer list valid = True for c_key in list(self._consumers.keys()): if c_key not in consumer_candidate_list: valid = False break return consumer_list # end _create_consumer_list def _start_consuming(self): errors = (self._connection.connection_errors + self._connection.channel_errors) removed_consumer = None msg = 'KombuAmqpClient: Starting consumer greenlet' self._logger(msg, level=SandeshLevel.SYS_DEBUG) while self._running: try: self._ensure_connection(self._connection, "Consumer") self._connected = True while len(self._removed_consumers) > 0 or removed_consumer: if removed_consumer is None: removed_consumer = self._removed_consumers.pop(0) self._delete_consumer(removed_consumer) removed_consumer = None if len(list(self._consumers.values())) == 0: msg = 'KombuAmqpClient: Waiting for consumer' self._logger(msg, level=SandeshLevel.SYS_DEBUG) self._consumer_event.wait() self._consumer_event.clear() continue consumers = self._create_consumer_list() msg = 'KombuAmqpClient: Created consumers %s' % str( list(self._consumers.keys())) self._logger(msg, level=SandeshLevel.SYS_DEBUG) with nested(*consumers): self._consumers_created_event.set() if self._consumer_lock.locked(): self._consumer_lock.release() while self._running and not self._consumers_changed: try: self._connection.drain_events(timeout=1) except socket.timeout: pass self._consumers_changed = False self._consumer_lock.acquire() except errors as e: msg = 'KombuAmqpClient: Connection error in Kombu amqp consumer greenlet: %s' % str( e) self._logger(msg, level=SandeshLevel.SYS_WARN) self._connected = False gevent.sleep(0.1) except Exception as e: msg = 'KombuAmqpClient: Error in Kombu amqp consumer greenlet: %s' % str( e) self._logger(msg, level=SandeshLevel.SYS_ERR) self._connected = False gevent.sleep(0.1) msg = 'KombuAmqpClient: Exited consumer greenlet' self._logger(msg, level=SandeshLevel.SYS_DEBUG) # end _start_consuming def _start_publishing(self): errors = (self._connection.connection_errors + self._connection.channel_errors) payload = None connection = self._connection.clone() msg = 'KombuAmqpClient: Starting publisher greenlet' self._logger(msg, level=SandeshLevel.SYS_DEBUG) while self._running: try: self._ensure_connection(connection, "Publisher") producer = kombu.Producer(connection) while self._running: if payload is None: payload = self._publisher_queue.get() exchange = self.get_exchange(payload.exchange) with self._consumer_lock: msg = 'KombuAmqpClient: Producer publish: {}'.format( payload.routing_key) self._logger(msg, level=SandeshLevel.SYS_DEBUG) producer.publish(payload.message, exchange=exchange, routing_key=payload.routing_key, **payload.kwargs) payload = None except errors as e: msg = 'KombuAmqpClient: Connection error in Kombu amqp publisher greenlet: %s' % str( e) self._logger(msg, level=SandeshLevel.SYS_WARN) except Exception as e: msg = 'KombuAmqpClient: Error in Kombu amqp publisher greenlet: %s' % str( e) self._logger(msg, level=SandeshLevel.SYS_ERR) msg = 'KombuAmqpClient: Exiting publisher greenlet' self._logger(msg, level=SandeshLevel.SYS_DEBUG) # end _start_publishing def _heartbeat_check(self): while self._running: try: if self._connected and len(list(self._consumers.values())) > 0: self._connection.heartbeat_check() except Exception as e: msg = 'KombuAmqpClient: Error in Kombu amqp heartbeat greenlet: %s' % str( e) self._logger(msg, level=SandeshLevel.SYS_DEBUG) finally: gevent.sleep(float(self._heartbeat) / 2) # end _heartbeat_check def _ensure_connection(self, connection, name): msg = 'KombuAmqpClient: Ensuring %s connection' % name self._logger(msg, level=SandeshLevel.SYS_DEBUG) connection.close() connection.ensure_connection() connection.connect() msg = 'KombuAmqpClient: %s connection established %s' %\ (name, str(self._connection)) self._logger(msg, level=SandeshLevel.SYS_INFO) # end _ensure_connection @staticmethod def _parse_servers(servers, config): required_keys = ['user', 'password', 'port', 'vhost'] urls = [] for server in servers: match = re.match( r"(?:(?P<user>.*?)(?::(?P<password>.*?))*@)*(?P<host>.*?)(?::(?P<port>\d+))*$", server) if match: host = match.groupdict().copy() for key in required_keys: if key not in host or host[key] is None: host[key] = config[key] url = "pyamqp://%(user)s:%(password)s@%(host)s:%(port)s/%(vhost)s" % host urls.append(url) return urls # end _parse_servers @classmethod def _fetch_ssl_params(cls, config): if not config.use_ssl: return False ssl_params = dict() if config.ssl_version: ssl_params['ssl_version'] = cls._validate_ssl_version( config.ssl_version) if config.ssl_keyfile: ssl_params['keyfile'] = config.ssl_keyfile if config.ssl_certfile: ssl_params['certfile'] = config.ssl_certfile if config.ssl_ca_certs: ssl_params['ca_certs'] = config.ssl_ca_certs ssl_params['cert_reqs'] = ssl.CERT_REQUIRED return ssl_params or True # end _fetch_ssl_params @classmethod def _validate_ssl_version(cls, version): version = version.lower() try: return cls._SSL_PROTOCOLS[version] except KeyError: raise RuntimeError('Invalid SSL version: {}'.format(version))
class IronicKombuClient(object): def __init__(self, rabbit_server, rabbit_port, rabbit_user, rabbit_password, notification_level, ironic_notif_mgr_obj, **kwargs): self._rabbit_port = rabbit_port self._rabbit_user = rabbit_user self._rabbit_password = rabbit_password self._rabbit_hosts = self._parse_rabbit_hosts(rabbit_server) self._rabbit_ip = self._rabbit_hosts[0]["host"] self._notification_level = notification_level self._ironic_notification_manager = ironic_notif_mgr_obj self._conn_lock = Semaphore() # Register a handler for SIGTERM so that we can release the lock # Without it, it can take several minutes before new master is elected # If any app using this wants to register their own sigterm handler, # then we will have to modify this function to perhaps take an argument # gevent.signal(signal.SIGTERM, self.sigterm_handler) self._url = "amqp://%s:%s@%s:%s/" % (self._rabbit_user, self._rabbit_password, self._rabbit_ip, self._rabbit_port) msg = "Initializing RabbitMQ connection, urls %s" % self._url # self._conn_state = ConnectionStatus.INIT self._conn = kombu.Connection(self._url) self._exchange = self._set_up_exchange() self._queues = [] self._queues = self._set_up_queues(self._notification_level) if not self._queues: exit() def _parse_rabbit_hosts(self, rabbit_servers): default_dict = {'user': self._rabbit_user, 'password': self._rabbit_password, 'port': self._rabbit_port} ret = [] rabbit_hosts = re.compile('[,\s]+').split(rabbit_servers) for s in rabbit_hosts: match = re.match("(?:(?P<user>.*?)" "(?::(?P<password>.*?))" "*@)*(?P<host>.*?)(?::(?P<port>\d+))*$", s) if match: mdict = match.groupdict().copy() for key in ['user', 'password', 'port']: if not mdict[key]: mdict[key] = default_dict[key] ret.append(mdict) return ret def _set_up_exchange(self, exchange_name=None): if exchange_name: exchange = kombu.Exchange(str(exchange_name), type="topic", durable=False) else: exchange = kombu.Exchange("ironic", type="topic", durable=False) def _set_up_queues(self, notification_level): if notification_level not in ['info', 'debug', 'warning', 'error']: msg = "Unrecongized notification level: " + \ str(notification_level) + \ "\nPlease enter a valid notification level from: " \ "'info', 'debug', 'warning', 'error'" return 0 sub_queue_names = [] sub_queues = [] log_levels = [] if notification_level == "debug": log_levels = ['debug', 'info', 'warning', 'error'] elif notification_level == "info": log_levels = ['info', 'warning', 'error'] elif notification_level == "warning": log_levels = ['warning', 'error'] elif notification_level == "error": log_levels = ['error'] for level in log_levels: sub_queue_names.append('ironic_versioned_notifications.' + str(level)) for sub_queue_name in sub_queue_names: sub_queues.append(kombu.Queue(str(sub_queue_name), durable=False, exchange=self._exchange, routing_key=str(sub_queue_name))) return sub_queues def _reconnect(self, delete_old_q=False): if self._conn_lock.locked(): # either connection-monitor or publisher should have taken # the lock. The one who acquired the lock would re-establish # the connection and releases the lock, so the other one can # just wait on the lock, till it gets released self._conn_lock.wait() # if self._conn_state == ConnectionStatus.UP: # return with self._conn_lock: msg = "RabbitMQ connection down" # self._logger(msg, level=SandeshLevel.SYS_NOTICE) # self._update_sandesh_status(ConnectionStatus.DOWN) # self._conn_state = ConnectionStatus.DOWN self._conn.close() self._conn.ensure_connection() self._conn.connect() # self._update_sandesh_status(ConnectionStatus.UP) # self._conn_state = ConnectionStatus.UP msg = 'RabbitMQ connection ESTABLISHED %s' % repr(self._conn) # self._logger(msg, level=SandeshLevel.SYS_NOTICE) self._channel = self._conn.channel() self._consumer = kombu.Consumer(self._conn, queues=self._queues, callbacks=[self._subscriber], accept=["application/json"]) # end _reconnect def _connection_watch(self, connected, timeout=10000): if not connected: self._reconnect() while True: try: self._consumer.consume() self._conn.drain_events() except self._conn.connection_errors + self._conn.channel_errors: self._reconnect() # end _connection_watch def _connection_watch_forever(self, timeout=10000): connected = True while True: try: self._connection_watch(connected, timeout) except Exception as e: msg = 'Error in rabbitmq drainer greenlet: %s' % (str(e)) print(msg) # avoid 'reconnect()' here as that itself might cause exception connected = False # end _connection_watch_forever def _process_message_dict(self, message_dict): return message_dict["event_type"] def _subscribe_cb(self, body): # print("The body is {}".format(body)) message_dict = json.loads(str(body["oslo.message"])) # print("Message: \n" + str(message_dict)) message_dict_payload = message_dict.pop("payload") ironic_object_data = message_dict_payload["ironic_object.data"] for k in message_dict: ironic_object_data[k] = message_dict[k] ironic_node_list = [] ironic_node_list.append(ironic_object_data) self._ironic_notification_manager.process_ironic_node_info( ironic_node_list) def _subscriber(self, body, message): try: self._subscribe_cb(body) message.ack() except Exception as e: print("The error is " + str(e)) def _start(self): self._reconnect() self._connection_watch_forever() def shutdown(self): self._conn.close()
class ConfigUrl(object): def __init__(self, url): self.url = url self.lock = Semaphore() self.last_update = None @property def log(self): if not hasattr(self, '_log'): self._log = logger.get('patch.config_url.{}'.format(self.url)) return self._log def update(self): locked = self.lock.locked() with self.lock: if locked: return if self.last_update is not None and time.time() - self.last_update < 60: return try: self._update() except requests.ConnectionError as e: self.log.error('update error: {}'.format(e)) finally: self.last_update = time.time() def _update(self): found_sources = list() resp = requests.get(self.url, stream=True) try: resp.raise_for_status() data = yaml.load(resp.raw) finally: resp.close() assert len(data.keys()) > 0 group = Group() def _add_source(url): try: source = add_source(url, self.url) except: self.log.warning('error adding new repo {}'.format(url)) else: found_sources.append(source) for name, url in data.iteritems(): try: Url(url) except: self.log.warning('invalid patch source entry: {}'.format(url)) try: source = sources[name] except KeyError: self.log.info('adding new repo {}'.format(url)) group.spawn(_add_source, url) else: found_sources.append(source) if source.url != url: source.log.info('changing url to {}'.format(url)) with transaction: source.url = url source.unlink() group.join() for source in sources.values(): if source.config_url == self.url and source not in found_sources: source.log.info('erasing repo') source.delete(True)
class IronicKombuClient(object): def __init__(self, rabbit_server, rabbit_port, rabbit_user, rabbit_password, notification_level, ironic_notif_mgr_obj, **kwargs): self._rabbit_port = rabbit_port self._rabbit_user = rabbit_user self._rabbit_password = rabbit_password self._rabbit_hosts = self._parse_rabbit_hosts(rabbit_server) self._rabbit_ip = self._rabbit_hosts[0]["host"] self._notification_level = notification_level self._ironic_notification_manager = ironic_notif_mgr_obj self._conn_lock = Semaphore() # Register a handler for SIGTERM so that we can release the lock # Without it, it can take several minutes before new master is elected # If any app using this wants to register their own sigterm handler, # then we will have to modify this function to perhaps take an argument # gevent.signal(signal.SIGTERM, self.sigterm_handler) self._url = "amqp://%s:%s@%s:%s/" % (self._rabbit_user, self._rabbit_password, self._rabbit_ip, self._rabbit_port) msg = "Initializing RabbitMQ connection, urls %s" % self._url #self._conn_state = ConnectionStatus.INIT self._conn = kombu.Connection(self._url) self._exchange = self._set_up_exchange() self._queues = [] self._queues = self._set_up_queues(self._notification_level) if not self._queues: exit() def _parse_rabbit_hosts(self, rabbit_servers): default_dict = {'user': self._rabbit_user, 'password': self._rabbit_password, 'port': self._rabbit_port} ret = [] rabbit_hosts = re.compile('[,\s]+').split(rabbit_servers) for s in rabbit_hosts: match = re.match("(?:(?P<user>.*?)(?::(?P<password>.*?))*@)*(?P<host>.*?)(?::(?P<port>\d+))*$", s) if match: mdict = match.groupdict().copy() for key in ['user', 'password', 'port']: if not mdict[key]: mdict[key] = default_dict[key] ret.append(mdict) return ret def _set_up_exchange(self, exchange_name=None): if exchange_name: exchange = kombu.Exchange(str(exchange_name), type="topic", durable=False) else: exchange = kombu.Exchange("ironic", type="topic", durable=False) def _set_up_queues(self, notification_level): if notification_level not in ['info', 'debug', 'warning', 'error']: msg = "Unrecongized notification level: " + str(notification_level) + \ "\nPlease enter a valid notification level from: 'info', 'debug', 'warning', 'error'" return 0 sub_queue_names = [] sub_queues = [] log_levels = [] if notification_level == "debug": log_levels = ['debug', 'info', 'warning', 'error'] elif notification_level == "info": log_levels = ['info', 'warning', 'error'] elif notification_level == "warning": log_levels = ['warning', 'error'] elif notification_level == "error": log_levels = ['error'] for level in log_levels: sub_queue_names.append('ironic_versioned_notifications.'+str(level)) for sub_queue_name in sub_queue_names: sub_queues.append(kombu.Queue(str(sub_queue_name), durable=False, exchange=self._exchange, routing_key=str(sub_queue_name))) return sub_queues def _reconnect(self, delete_old_q=False): if self._conn_lock.locked(): # either connection-monitor or publisher should have taken # the lock. The one who acquired the lock would re-establish # the connection and releases the lock, so the other one can # just wait on the lock, till it gets released self._conn_lock.wait() #if self._conn_state == ConnectionStatus.UP: # return with self._conn_lock: msg = "RabbitMQ connection down" #self._logger(msg, level=SandeshLevel.SYS_NOTICE) #self._update_sandesh_status(ConnectionStatus.DOWN) #self._conn_state = ConnectionStatus.DOWN self._conn.close() self._conn.ensure_connection() self._conn.connect() #self._update_sandesh_status(ConnectionStatus.UP) #self._conn_state = ConnectionStatus.UP msg = 'RabbitMQ connection ESTABLISHED %s' % repr(self._conn) #self._logger(msg, level=SandeshLevel.SYS_NOTICE) self._channel = self._conn.channel() self._consumer = kombu.Consumer(self._conn, queues=self._queues, callbacks=[self._subscriber], accept=["application/json"]) # end _reconnect def _connection_watch(self, connected, timeout=10000): if not connected: self._reconnect() while True: try: self._consumer.consume() self._conn.drain_events() except self._conn.connection_errors + self._conn.channel_errors as e: self._reconnect() # end _connection_watch def _connection_watch_forever(self, timeout=10000): connected = True while True: try: self._connection_watch(connected, timeout) except Exception as e: msg = 'Error in rabbitmq drainer greenlet: %s' %(str(e)) print(msg) # avoid 'reconnect()' here as that itself might cause exception connected = False # end _connection_watch_forever def _process_message_dict(self, message_dict): return message_dict["event_type"] def _subscribe_cb(self, body): #print("The body is {}".format(body)) message_dict = json.loads(str(body["oslo.message"])) #print("Event recorded: " + str(self._process_message_dict(message_dict))) #print("Message: \n" + str(message_dict)) message_dict_payload = message_dict.pop("payload") ironic_object_data = message_dict_payload["ironic_object.data"] for k in message_dict: ironic_object_data[k] = message_dict[k] ironic_node_list = [] ironic_node_list.append(ironic_object_data) self._ironic_notification_manager.process_ironic_node_info(ironic_node_list) def _subscriber(self, body, message): try: self._subscribe_cb(body) message.ack() except Exception as e: print("The error is " + str(e)) def _start(self): self._reconnect() self._connection_watch_forever() def shutdown(self): self._conn.close()
class VncKombuClientBase(object): def _update_sandesh_status(self, status, msg=''): ConnectionState.update(conn_type=ConnectionType.DATABASE, name='RabbitMQ', status=status, message=msg, server_addrs=["%s:%s" % (self._rabbit_ip, self._rabbit_port)]) # end _update_sandesh_status def publish(self, message): self._publish_queue.put(message) # end publish def __init__(self, rabbit_ip, rabbit_port, rabbit_user, rabbit_password, rabbit_vhost, rabbit_ha_mode, q_name, subscribe_cb, logger): self._rabbit_ip = rabbit_ip self._rabbit_port = rabbit_port self._rabbit_user = rabbit_user self._rabbit_password = rabbit_password self._rabbit_vhost = rabbit_vhost self._subscribe_cb = subscribe_cb self._logger = logger self._publish_queue = Queue() self._conn_lock = Semaphore() self.obj_upd_exchange = kombu.Exchange('vnc_config.object-update', 'fanout', durable=False) def num_pending_messages(self): return self._publish_queue.qsize() # end num_pending_messages def prepare_to_consume(self): # override this method return def _reconnect(self): if self._conn_lock.locked(): # either connection-monitor or publisher should have taken # the lock. The one who acquired the lock would re-establish # the connection and releases the lock, so the other one can # just wait on the lock, till it gets released self._conn_lock.wait() return self._conn_lock.acquire() msg = "RabbitMQ connection down" self._logger(msg, level=SandeshLevel.SYS_ERR) self._update_sandesh_status(ConnectionStatus.DOWN) self._conn_state = ConnectionStatus.DOWN self._conn.close() self._conn.ensure_connection() self._conn.connect() self._update_sandesh_status(ConnectionStatus.UP) self._conn_state = ConnectionStatus.UP msg = 'RabbitMQ connection ESTABLISHED %s' % repr(self._conn) self._logger(msg, level=SandeshLevel.SYS_NOTICE) self._channel = self._conn.channel() self._consumer = kombu.Consumer(self._channel, queues=self._update_queue_obj, callbacks=[self._subscribe]) if self._can_consume: self._consumer.consume() self._producer = kombu.Producer(self._channel, exchange=self.obj_upd_exchange) self._conn_lock.release() # end _reconnect def _connection_watch(self): self.prepare_to_consume() self._can_consume = True self._consumer.consume() while True: try: self._conn.drain_events() except self._conn.connection_errors + self._conn.channel_errors as e: self._reconnect() # end _connection_watch def _publisher(self): while True: try: message = self._publish_queue.get() while True: try: self._producer.publish(message) break except self._conn.connection_errors + self._conn.channel_errors as e: self._reconnect() except Exception as e: log_str = "Unknown exception in _publisher greenlet" + str(e) self._logger(log_str, level=SandeshLevel.SYS_ERR) # end _publisher def _subscribe(self, body, message): try: self._subscribe_cb(body) finally: message.ack() def _start(self): self._can_consume = False self._reconnect() self._publisher_greenlet = gevent.spawn(self._publisher) self._connection_monitor_greenlet = gevent.spawn(self._connection_watch) def shutdown(self): self._publisher_greenlet.kill() self._connection_monitor_greenlet.kill() self._producer.close() self._consumer.close() self._conn.close()