class SeleniumDownloadHandler(HTTP11DownloadHandler): def __init__(self, settings): super(SeleniumDownloadHandler, self).__init__(settings) self._enable_driver = settings.getbool('WEB_DRIVER_ENABLED') self._driver_name = settings.get('WEB_DRIVER_NAME') self._driver_path = settings.get('WEB_DRIVER_PATH') selenium_concurrent_request = settings.get( 'WEB_DRIVER_CONCURRENT_REQUESTS', 16) self._thread_pool = ThreadPool(minthreads=selenium_concurrent_request, maxthreads=selenium_concurrent_request) self._thread_pool.start() self._driver_timeout = settings.get('WEB_DRIVER_TIMEOUT', 300) def download_request(self, request, spider): if self._enable_driver and request.meta.get('selenium_needed', False): agent = SeleniumAsyncAgent(contextFactory=self._contextFactory, pool=self._thread_pool, driverName=self._driver_name, driverPath=self._driver_path, connectTimeout=self._driver_timeout) return agent.download_request(request) else: return super(SeleniumDownloadHandler, self).download_request(request, spider) def close(self): super(SeleniumDownloadHandler, self).close() self._thread_pool.stop()
class AddressResolver(object): pool = None def __init__(self, minthreads=1, maxthreads=4): self.pool = ThreadPool(minthreads=minthreads, maxthreads=maxthreads) # unclosed ThreadPool leads to reactor hangs at shutdown # this is a problem in many situation, so better enforce pool stop here reactor.addSystemEventTrigger( "before", "shutdown", self.pool.stop ) self.pool.start() def get_host_by_name(self, address): d = defer.Deferred() def func(): try: reactor.callFromThread( d.callback, socket.gethostbyname(address) ) except Exception as e: reactor.callFromThread(d.errback, e) self.pool.callInThread(func) return d def close(self): self.pool.stop()
class FakeReactor(object): """ A fake reactor implementation which just supports enough reactor APIs for L{ThreadedResolver}. """ implements(IReactorTime, IReactorThreads) def __init__(self): self._clock = Clock() self.callLater = self._clock.callLater self._threadpool = ThreadPool() self._threadpool.start() self.getThreadPool = lambda: self._threadpool self._threadCalls = Queue() def callFromThread(self, f, *args, **kwargs): self._threadCalls.put((f, args, kwargs)) def _runThreadCalls(self): f, args, kwargs = self._threadCalls.get() f(*args, **kwargs) def _stop(self): self._threadpool.stop()
class ProviderServer(object): def __init__(self, reactor, port): self.reactor = reactor self.reactor.addSystemEventTrigger('before', 'shutdown', self.before_shutdown) self.reactor.suggestThreadPoolSize(config.REACTOR_THREAD_POOL_MAX) self.app = create_app() self.thread_pool = ThreadPool(maxthreads=config.FLASK_THREAD_POOL_MAX) self.thread_pool.start() wsgi_resource = WSGIResource(self.reactor, self.thread_pool, self.app) root_resource = RootResource(wsgi_resource) root_resource.putChild("metrics", MetricsResource()) site = Site(root_resource) self.bind = self.reactor.listenTCP(port, site) log.info('Provider is listening on {} ...'.format(port)) def run(self): self.reactor.run() def stop_services(self): log.info("Shutting down provider...") self.app.cleanup() self.thread_pool.stop() return maybeDeferred(self.bind.stopListening).addCallbacks( callback=lambda _: log.info("Port listening was stopped"), errback=lambda failure: log.error( "Error while stopping port listening: {}".format(failure))) @inlineCallbacks def before_shutdown(self): self.app.stop() yield self.stop_services()
class AddressResolver(object): pool = None def __init__(self, minthreads=1, maxthreads=4): self.pool = ThreadPool(minthreads=minthreads, maxthreads=maxthreads) # unclosed ThreadPool leads to reactor hangs at shutdown # this is a problem in many situation, so better enforce pool stop here reactor.addSystemEventTrigger("before", "shutdown", self.pool.stop) self.pool.start() def get_host_by_name(self, address): d = defer.Deferred() def func(): try: reactor.callFromThread(d.callback, socket.gethostbyname(address)) except Exception as e: reactor.callFromThread(d.errback, e) self.pool.callInThread(func) return d def close(self): self.pool.stop()
class FakeReactor(object): """ A fake reactor implementation which just supports enough reactor APIs for L{ThreadedResolver}. """ def __init__(self): self._clock = Clock() self.callLater = self._clock.callLater self._threadpool = ThreadPool() self._threadpool.start() self.getThreadPool = lambda: self._threadpool self._threadCalls = Queue() def callFromThread(self, f, *args, **kwargs): self._threadCalls.put((f, args, kwargs)) def _runThreadCalls(self): f, args, kwargs = self._threadCalls.get() f(*args, **kwargs) def _stop(self): self._threadpool.stop()
class VMMasterServer(object): def __init__(self, reactor, port): self.reactor = reactor self.app = create_app() self.thread_pool = ThreadPool(maxthreads=config.THREAD_POOL_MAX) self.thread_pool.start() wsgi_resource = WSGIResource(self.reactor, self.thread_pool, self.app) root_resource = RootResource(wsgi_resource) root_resource.putChild("proxy", ProxyResource(self.app)) site = Site(root_resource) site.protocol = HTTPChannelWithClient self.bind = self.reactor.listenTCP(port, site) log.info('Server is listening on %s ...' % port) def run(self): self.reactor.addSystemEventTrigger('before', 'shutdown', self.before_shutdown) self.reactor.run() del self def __del__(self): log.info("Shutting down server...") d = self.bind.stopListening() _block_on(d, 20) self.app.cleanup() self.thread_pool.stop() log.info("Server gracefully shut down") def wait_for_end_active_sessions(self): active_sessions = self.app.sessions.active() def wait_for(): while active_sessions: log.info("Waiting for {} sessions to complete: {}".format( len(active_sessions), [(i.id, i.status) for i in active_sessions])) for session in active_sessions: if session.is_done: log.debug("Session {} is done".format(session.id)) active_sessions.remove(session) time.sleep(1) log.info("Wait for end %s active session[s]:" " %s" % (len(active_sessions), active_sessions)) return deferToThread(wait_for).addCallbacks( callback=lambda _: log.info( "All active sessions has been completed"), errback=lambda failure: log.error( "Error while waiting for active_sessions: {}".format(failure))) @inlineCallbacks def before_shutdown(self): self.app.running = False yield self.wait_for_end_active_sessions()
class ThreadPoolService(Service): def __init__(self): self.threadpool = ThreadPool() def startService(self): self.threadpool.start() def stopService(self): self.threadpool.stop()
class TestDonationAPI(TestCase): """ Tests for L{bdm.resource.DonationAPI}. """ def setUp(self): self.store = Store() self.threadPool = ThreadPool() self.threadPool.start() self.api = DonationAPI(self.store, 'nothing', self.threadPool) def tearDown(self): self.threadPool.stop() def test_serverStatsSuccess(self): """ L{serverStats} returns the expected dictionary results when passed a valid [IP, PORT]. """ def _cb(result): expected = [{ 'server_name': 'Test Server', 'map': 'testmap', 'player_count': 8, 'max_players': 16, 'online': True, 'location': 'ZA' }] self.assertEqual(expected, result) servers = [['1.1.1.1', 27015, "ZA"]] return self.api.serverStats(servers, querier=MockServerQuerier).addCallback(_cb) def test_serverStatsOffline(self): """ No exception is raised if the server is inaccesable, and the online status is set to C{False} """ def _cb(result): expected = [{ 'server_name': '1.1.1.2', 'online': False, 'location': 'ZA' }] self.assertEqual(expected, result) servers = [['1.1.1.2', 27015, "ZA"]] return self.api.serverStats(servers, querier=MockServerQuerier).addCallback(_cb)
def main(config_path): cfg = ConfigParser() cfg.read(config_path) # Start Twisted logging to console. log.startLogging(stderr) # Read database configuration options. db_url = cfg.get('database', 'url') # Read website configuration options. http_debug = cfg.getboolean('http', 'debug', fallback=False) http_host = cfg.get('http', 'host', fallback='localhost') http_port = cfg.getint('http', 'port', fallback=5000) http_pool = cfg.getint('http', 'pool_size', fallback=4) # Default to much saner database query defaults and always # commit and/or flush statements explicitly. # factory = sessionmaker(autocommit=False, autoflush=False) # Prepare database connection with table reflection. engine = create_engine(db_url) session = scoped_session(sessionmaker(autocommit=False, autoflush=False)) db = SQLSoup(engine, session=session) # Extract manager options, sans the pool_size we handle here. # pool_size = int(manager_opts.pop('pool_size', 2)) pool_size = 2 # Set the correct thread pool size for the manager. reactor.suggestThreadPoolSize(pool_size) # Prepare the website that will get exposed to the users. site = make_site(db, debug=http_debug) # Prepare WSGI site with a separate thread pool. pool = ThreadPool(http_pool, http_pool, 'http') site = Site(WSGIResource(reactor, pool, site)) pool.start() # Bind the website to it's address. reactor.listenTCP(http_port, site, interface=http_host) # Run the Twisted reactor until the user terminates us. reactor.run() # Kill the HTTP ThreadPool. pool.stop()
class FakeReactor: """ A fake reactor implementation which just supports enough reactor APIs for L{ThreadedResolver}. """ def __init__(self): self._clock = Clock() self.callLater = self._clock.callLater self._threadpool = ThreadPool() self._threadpool.start() self.getThreadPool = lambda: self._threadpool self._threadCalls = Queue() def callFromThread(self, callable: Callable[..., Any], *args, **kwargs): self._threadCalls.put((callable, args, kwargs)) def _runThreadCalls(self): f, args, kwargs = self._threadCalls.get() f(*args, **kwargs) def _stop(self): self._threadpool.stop() def getDelayedCalls(self): # IReactorTime.getDelayedCalls pass def seconds(self): # IReactorTime.seconds pass def callInThread(self, callable: Callable[..., Any], *args, **kwargs): # IReactorInThreads.callInThread pass def suggestThreadPoolSize(self, size): # IReactorThreads.suggestThreadPoolSize pass
class TwistedThreadScheduler(BaseScheduler): """Run jobs in threads, chaperoned by Twisted.""" def __init__(self, num_threads, install_signal_handlers=True): """Create a new `TwistedThreadScheduler`. :param num_threads: The number of threads to allocate to the thread pool. :type num_threads: int :param install_signal_handlers: Whether the Twisted reactor should install signal handlers or not. This is intented for testing - set to False to avoid layer violations - but may be useful in other situations. :type install_signal_handlers: bool """ self._thread_pool = ThreadPool(0, num_threads) self._install_signal_handlers = install_signal_handlers self._jobs = [] def schedule(self, func, *args, **kwargs): self._jobs.append( deferToThreadPool( reactor, self._thread_pool, func, *args, **kwargs)) def run(self): jobs, self._jobs = self._jobs[:], [] jobs_done = DeferredList(jobs) jobs_done.addBoth(lambda ignore: self._thread_pool.stop()) jobs_done.addBoth(lambda ignore: reactor.stop()) reactor.callWhenRunning(self._thread_pool.start) reactor.run(self._install_signal_handlers)
def run_by_pool(): urls = [LINK_URL % n for n in range(1, PAGE_NUM + 1)] print (urls) # 5*20 最大100线程在运行 error_log("start:" + str(time.time())) pool = ThreadPool(minthreads=1, maxthreads=5) for url in urls: pool.callInThread(start, url, save_path=IMAGE_SAVE_BASEPATH) pool.start() while True: # 每20s判断一次线程池状态,没有线程正在运行则停止下载进程 time.sleep(20) if len(pool.working) == 0: pool.stop() error_log("end:" + str(time.time())) break
class VMMasterServer(object): def __init__(self, reactor, port): self.reactor = reactor self.app = create_app() self.thread_pool = ThreadPool(maxthreads=config.THREAD_POOL_MAX) self.thread_pool.start() wsgi_resource = WSGIResource(self.reactor, self.thread_pool, self.app) root_resource = RootResource(wsgi_resource) root_resource.putChild("proxy", ProxyResource(self.app)) site = Site(root_resource) site.protocol = HTTPChannelWithClient self.bind = self.reactor.listenTCP(port, site) log.info('Server is listening on %s ...' % port) def run(self): self.reactor.addSystemEventTrigger('before', 'shutdown', self.before_shutdown) self.reactor.run() del self def __del__(self): d = self.bind.stopListening() _block_on(d, 20) self.app.cleanup() self.thread_pool.stop() def wait_for_end_active_sessions(self): active_sessions = self.app.sessions.active() def wait_for(): while active_sessions: for session in active_sessions: if session.status in ('failed', 'succeed'): active_sessions.remove(session) time.sleep(1) log.info("Wait for end %s active session[s]:" " %s" % (len(active_sessions), active_sessions)) return deferToThread(wait_for, self).addBoth( lambda i: log.info("All active sessions has been completed")) @inlineCallbacks def before_shutdown(self): self.app.running = False yield self.wait_for_end_active_sessions()
class ThreadedRunner(SimpleRunner): """Run tests using a threadpool. Uses TwistedPython's thread pool""" def __init__(self, result_class): from twisted.python.threadpool import ThreadPool SimpleRunner.__init__(self, _threadclass(result_class)) self._pool = ThreadPool() self._pool.start() def run(self, fixture): assert not self._done self._pool.dispatch(None, fixture, self._result) def result(self): self._pool.stop() return SimpleRunner.result(self)
class TestDonationAPI(TestCase): """ Tests for L{bdm.resource.DonationAPI}. """ def setUp(self): self.store = Store() self.threadPool = ThreadPool() self.threadPool.start() self.api = DonationAPI(self.store, 'nothing', self.threadPool) def tearDown(self): self.threadPool.stop() def test_serverStatsSuccess(self): """ L{serverStats} returns the expected dictionary results when passed a valid [IP, PORT]. """ def _cb(result): expected = [{'server_name': 'Test Server', 'map': 'testmap', 'player_count': 8, 'max_players': 16, 'online': True, 'location': 'ZA'}] self.assertEqual(expected, result) servers = [['1.1.1.1', 27015, "ZA"]] return self.api.serverStats(servers, querier=MockServerQuerier).addCallback(_cb) def test_serverStatsOffline(self): """ No exception is raised if the server is inaccesable, and the online status is set to C{False} """ def _cb(result): expected = [{'server_name': '1.1.1.2', 'online': False, 'location':'ZA'}] self.assertEqual(expected, result) servers = [['1.1.1.2', 27015, "ZA"]] return self.api.serverStats(servers, querier=MockServerQuerier).addCallback(_cb)
def stop(self): """ Flushes any remaining data, closes the underlying files, then stops the thread pool. .. warning:: Because this method is usually called when the reactor is stopping all file handling happens in the main thread. """ if not self.started or self.joined: return logger.debug("Logging thread pool is shutting down.") self.stopped = True for protocol_id in list(self.logs.keys()): self.close_log(protocol_id) ThreadPool.stop(self)
def __init__(self, pool=None, minthreads=1, maxthreads=4, **kwargs): """Creates a twisted aware Session Notes ~~~~~ * If you provide both `pool` and `max_workers`, the latter is ignored and provided threadpool is used as is. """ requestsSession.__init__(self, **kwargs) self.ownPool = False if pool is None: self.ownPool = True pool = ThreadPool(minthreads=minthreads, maxthreads=maxthreads) # unclosed ThreadPool leads to reactor hangs at shutdown # this is a problem in many situation, so better enforce pool stop here reactor.addSystemEventTrigger("after", "shutdown", lambda: pool.stop() if pool.started else None) self.pool = pool if self.ownPool: pool.start()
def __init__(self, pool=None, minthreads=1, maxthreads=4, **kwargs): """Creates a twisted aware Session Notes ~~~~~ * If you provide both `pool` and `max_workers`, the latter is ignored and provided threadpool is used as is. """ requestsSession.__init__(self, **kwargs) self.ownPool = False if pool is None: self.ownPool = True pool = ThreadPool(minthreads=minthreads, maxthreads=maxthreads) # unclosed ThreadPool leads to reactor hangs at shutdown # this is a problem in many situation, so better enforce pool stop here reactor.addSystemEventTrigger("before", "shutdown", lambda:pool.stop()) self.pool = pool if self.ownPool: pool.start()
class ReactorBase(PluggableResolverMixin): """ Default base class for Reactors. @ivar _stopped: A flag which is true between paired calls to C{reactor.run} and C{reactor.stop}. This should be replaced with an explicit state machine. @ivar _justStopped: A flag which is true between the time C{reactor.stop} is called and the time the shutdown system event is fired. This is used to determine whether that event should be fired after each iteration through the mainloop. This should be replaced with an explicit state machine. @ivar _started: A flag which is true from the time C{reactor.run} is called until the time C{reactor.run} returns. This is used to prevent calls to C{reactor.run} on a running reactor. This should be replaced with an explicit state machine. @ivar running: See L{IReactorCore.running} @ivar _registerAsIOThread: A flag controlling whether the reactor will register the thread it is running in as the I/O thread when it starts. If C{True}, registration will be done, otherwise it will not be. @ivar _exitSignal: See L{_ISupportsExitSignalCapturing._exitSignal} """ _registerAsIOThread = True _stopped = True installed = False usingThreads = False _exitSignal = None __name__ = "twisted.internet.reactor" def __init__(self) -> None: super().__init__() self.threadCallQueue: List[_ThreadCall] = [] self._eventTriggers: Dict[str, _ThreePhaseEvent] = {} self._pendingTimedCalls: List[DelayedCall] = [] self._newTimedCalls: List[DelayedCall] = [] self._cancellations = 0 self.running = False self._started = False self._justStopped = False self._startedBefore = False # reactor internal readers, e.g. the waker. # Using Any as the type here… unable to find a suitable defined interface self._internalReaders: Set[Any] = set() self.waker: Any = None # Arrange for the running attribute to change to True at the right time # and let a subclass possibly do other things at that time (eg install # signal handlers). self.addSystemEventTrigger("during", "startup", self._reallyStartRunning) self.addSystemEventTrigger("during", "shutdown", self.crash) self.addSystemEventTrigger("during", "shutdown", self.disconnectAll) if platform.supportsThreads(): self._initThreads() self.installWaker() # override in subclasses _lock = None def installWaker(self) -> None: raise NotImplementedError( reflect.qual(self.__class__) + " did not implement installWaker") def wakeUp(self) -> None: """ Wake up the event loop. """ if self.waker: self.waker.wakeUp() # if the waker isn't installed, the reactor isn't running, and # therefore doesn't need to be woken up def doIteration(self, delay: Optional[float]) -> None: """ Do one iteration over the readers and writers which have been added. """ raise NotImplementedError( reflect.qual(self.__class__) + " did not implement doIteration") def addReader(self, reader: IReadDescriptor) -> None: raise NotImplementedError( reflect.qual(self.__class__) + " did not implement addReader") def addWriter(self, writer: IWriteDescriptor) -> None: raise NotImplementedError( reflect.qual(self.__class__) + " did not implement addWriter") def removeReader(self, reader: IReadDescriptor) -> None: raise NotImplementedError( reflect.qual(self.__class__) + " did not implement removeReader") def removeWriter(self, writer: IWriteDescriptor) -> None: raise NotImplementedError( reflect.qual(self.__class__) + " did not implement removeWriter") def removeAll(self) -> List[Union[IReadDescriptor, IWriteDescriptor]]: raise NotImplementedError( reflect.qual(self.__class__) + " did not implement removeAll") def getReaders(self) -> List[IReadDescriptor]: raise NotImplementedError( reflect.qual(self.__class__) + " did not implement getReaders") def getWriters(self) -> List[IWriteDescriptor]: raise NotImplementedError( reflect.qual(self.__class__) + " did not implement getWriters") # IReactorCore def resolve( self, name: str, timeout: Sequence[int] = (1, 3, 11, 45)) -> Deferred[str]: """ Return a Deferred that will resolve a hostname.""" if not name: # XXX - This is *less than* '::', and will screw up IPv6 servers return defer.succeed("0.0.0.0") if abstract.isIPAddress(name): return defer.succeed(name) return self.resolver.getHostByName(name, timeout) def stop(self) -> None: """ See twisted.internet.interfaces.IReactorCore.stop. """ if self._stopped: raise error.ReactorNotRunning( "Can't stop reactor that isn't running.") self._stopped = True self._justStopped = True self._startedBefore = True def crash(self) -> None: """ See twisted.internet.interfaces.IReactorCore.crash. Reset reactor state tracking attributes and re-initialize certain state-transition helpers which were set up in C{__init__} but later destroyed (through use). """ self._started = False self.running = False self.addSystemEventTrigger("during", "startup", self._reallyStartRunning) def sigInt(self, number: int, frame: Optional[FrameType] = None) -> None: """ Handle a SIGINT interrupt. @param number: See handler specification in L{signal.signal} @param frame: See handler specification in L{signal.signal} """ log.msg("Received SIGINT, shutting down.") self.callFromThread(self.stop) self._exitSignal = number def sigBreak(self, number: int, frame: Optional[FrameType] = None) -> None: """ Handle a SIGBREAK interrupt. @param number: See handler specification in L{signal.signal} @param frame: See handler specification in L{signal.signal} """ log.msg("Received SIGBREAK, shutting down.") self.callFromThread(self.stop) self._exitSignal = number def sigTerm(self, number: int, frame: Optional[FrameType] = None) -> None: """ Handle a SIGTERM interrupt. @param number: See handler specification in L{signal.signal} @param frame: See handler specification in L{signal.signal} """ log.msg("Received SIGTERM, shutting down.") self.callFromThread(self.stop) self._exitSignal = number def disconnectAll(self) -> None: """Disconnect every reader, and writer in the system.""" selectables = self.removeAll() for reader in selectables: log.callWithLogger(reader, reader.connectionLost, Failure(main.CONNECTION_LOST)) def iterate(self, delay: float = 0.0) -> None: """ See twisted.internet.interfaces.IReactorCore.iterate. """ self.runUntilCurrent() self.doIteration(delay) def fireSystemEvent(self, eventType: str) -> None: """ See twisted.internet.interfaces.IReactorCore.fireSystemEvent. """ event = self._eventTriggers.get(eventType) if event is not None: event.fireEvent() def addSystemEventTrigger( self, phase: str, eventType: str, callable: Callable[..., Any], *args: object, **kwargs: object, ) -> _SystemEventID: """ See twisted.internet.interfaces.IReactorCore.addSystemEventTrigger. """ assert builtins.callable(callable), f"{callable} is not callable" if eventType not in self._eventTriggers: self._eventTriggers[eventType] = _ThreePhaseEvent() return _SystemEventID(( eventType, self._eventTriggers[eventType].addTrigger(phase, callable, *args, **kwargs), )) def removeSystemEventTrigger(self, triggerID: _SystemEventID) -> None: """ See twisted.internet.interfaces.IReactorCore.removeSystemEventTrigger. """ eventType, handle = triggerID self._eventTriggers[eventType].removeTrigger(handle) def callWhenRunning(self, callable: Callable[..., Any], *args: object, **kwargs: object) -> Optional[_SystemEventID]: """ See twisted.internet.interfaces.IReactorCore.callWhenRunning. """ if self.running: callable(*args, **kwargs) return None else: return self.addSystemEventTrigger("after", "startup", callable, *args, **kwargs) def startRunning(self) -> None: """ Method called when reactor starts: do some initialization and fire startup events. Don't call this directly, call reactor.run() instead: it should take care of calling this. This method is somewhat misnamed. The reactor will not necessarily be in the running state by the time this method returns. The only guarantee is that it will be on its way to the running state. """ if self._started: raise error.ReactorAlreadyRunning() if self._startedBefore: raise error.ReactorNotRestartable() self._started = True self._stopped = False if self._registerAsIOThread: threadable.registerAsIOThread() self.fireSystemEvent("startup") def _reallyStartRunning(self) -> None: """ Method called to transition to the running state. This should happen in the I{during startup} event trigger phase. """ self.running = True def run(self) -> None: # IReactorCore.run raise NotImplementedError() # IReactorTime seconds = staticmethod(runtimeSeconds) def callLater(self, delay: float, callable: Callable[..., Any], *args: object, **kw: object) -> DelayedCall: """ See twisted.internet.interfaces.IReactorTime.callLater. """ assert builtins.callable(callable), f"{callable} is not callable" assert delay >= 0, f"{delay} is not greater than or equal to 0 seconds" delayedCall = DelayedCall( self.seconds() + delay, callable, args, kw, self._cancelCallLater, self._moveCallLaterSooner, seconds=self.seconds, ) self._newTimedCalls.append(delayedCall) return delayedCall def _moveCallLaterSooner(self, delayedCall: DelayedCall) -> None: # Linear time find: slow. heap = self._pendingTimedCalls try: pos = heap.index(delayedCall) # Move elt up the heap until it rests at the right place. elt = heap[pos] while pos != 0: parent = (pos - 1) // 2 if heap[parent] <= elt: break # move parent down heap[pos] = heap[parent] pos = parent heap[pos] = elt except ValueError: # element was not found in heap - oh well... pass def _cancelCallLater(self, delayedCall: DelayedCall) -> None: self._cancellations += 1 def getDelayedCalls(self) -> Sequence[IDelayedCall]: """ See L{twisted.internet.interfaces.IReactorTime.getDelayedCalls} """ return [ x for x in (self._pendingTimedCalls + self._newTimedCalls) if not x.cancelled ] def _insertNewDelayedCalls(self) -> None: for call in self._newTimedCalls: if call.cancelled: self._cancellations -= 1 else: call.activate_delay() heappush(self._pendingTimedCalls, call) self._newTimedCalls = [] def timeout(self) -> Optional[float]: """ Determine the longest time the reactor may sleep (waiting on I/O notification, perhaps) before it must wake up to service a time-related event. @return: The maximum number of seconds the reactor may sleep. """ # insert new delayed calls to make sure to include them in timeout value self._insertNewDelayedCalls() if not self._pendingTimedCalls: return None delay = self._pendingTimedCalls[0].time - cast(float, self.seconds()) # Pick a somewhat arbitrary maximum possible value for the timeout. # This value is 2 ** 31 / 1000, which is the number of seconds which can # be represented as an integer number of milliseconds in a signed 32 bit # integer. This particular limit is imposed by the epoll_wait(3) # interface which accepts a timeout as a C "int" type and treats it as # representing a number of milliseconds. longest = 2147483 # Don't let the delay be in the past (negative) or exceed a plausible # maximum (platform-imposed) interval. return max(0, min(longest, delay)) def runUntilCurrent(self) -> None: """ Run all pending timed calls. """ if self.threadCallQueue: # Keep track of how many calls we actually make, as we're # making them, in case another call is added to the queue # while we're in this loop. count = 0 total = len(self.threadCallQueue) for (f, a, kw) in self.threadCallQueue: try: f(*a, **kw) except BaseException: log.err() count += 1 if count == total: break del self.threadCallQueue[:count] if self.threadCallQueue: self.wakeUp() # insert new delayed calls now self._insertNewDelayedCalls() now = self.seconds() while self._pendingTimedCalls and (self._pendingTimedCalls[0].time <= now): call = heappop(self._pendingTimedCalls) if call.cancelled: self._cancellations -= 1 continue if call.delayed_time > 0.0: call.activate_delay() heappush(self._pendingTimedCalls, call) continue try: call.called = 1 call.func(*call.args, **call.kw) except BaseException: log.deferr() if hasattr(call, "creator"): e = "\n" e += (" C: previous exception occurred in " + "a DelayedCall created here:\n") e += " C:" e += "".join(call.creator).rstrip().replace("\n", "\n C:") e += "\n" log.msg(e) if (self._cancellations > 50 and self._cancellations > len(self._pendingTimedCalls) >> 1): self._cancellations = 0 self._pendingTimedCalls = [ x for x in self._pendingTimedCalls if not x.cancelled ] heapify(self._pendingTimedCalls) if self._justStopped: self._justStopped = False self.fireSystemEvent("shutdown") # IReactorThreads if platform.supportsThreads(): assert ThreadPool is not None threadpool = None # ID of the trigger starting the threadpool _threadpoolStartupID = None # ID of the trigger stopping the threadpool threadpoolShutdownID = None def _initThreads(self) -> None: self.installNameResolver(_GAIResolver(self, self.getThreadPool)) self.usingThreads = True # `IReactorFromThreads` defines the first named argument as # `callable: Callable[..., Any]` but this defines it as `f` # really both should be defined using py3.8 positional only def callFromThread( # type: ignore[override] self, f: Callable[..., Any], *args: object, **kwargs: object) -> None: """ See L{twisted.internet.interfaces.IReactorFromThreads.callFromThread}. """ assert callable(f), f"{f} is not callable" # lists are thread-safe in CPython, but not in Jython # this is probably a bug in Jython, but until fixed this code # won't work in Jython. self.threadCallQueue.append((f, args, kwargs)) self.wakeUp() def _initThreadPool(self) -> None: """ Create the threadpool accessible with callFromThread. """ self.threadpool = ThreadPool(0, 10, "twisted.internet.reactor") self._threadpoolStartupID = self.callWhenRunning( self.threadpool.start) self.threadpoolShutdownID = self.addSystemEventTrigger( "during", "shutdown", self._stopThreadPool) def _uninstallHandler(self) -> None: pass def _stopThreadPool(self) -> None: """ Stop the reactor threadpool. This method is only valid if there is currently a threadpool (created by L{_initThreadPool}). It is not intended to be called directly; instead, it will be called by a shutdown trigger created in L{_initThreadPool}. """ triggers = [self._threadpoolStartupID, self.threadpoolShutdownID] for trigger in filter(None, triggers): try: self.removeSystemEventTrigger(trigger) except ValueError: pass self._threadpoolStartupID = None self.threadpoolShutdownID = None assert self.threadpool is not None self.threadpool.stop() self.threadpool = None def getThreadPool(self) -> ThreadPool: """ See L{twisted.internet.interfaces.IReactorThreads.getThreadPool}. """ if self.threadpool is None: self._initThreadPool() assert self.threadpool is not None return self.threadpool # `IReactorInThreads` defines the first named argument as # `callable: Callable[..., Any]` but this defines it as `_callable` # really both should be defined using py3.8 positional only def callInThread( # type: ignore[override] self, _callable: Callable[..., Any], *args: object, **kwargs: object) -> None: """ See L{twisted.internet.interfaces.IReactorInThreads.callInThread}. """ self.getThreadPool().callInThread(_callable, *args, **kwargs) def suggestThreadPoolSize(self, size: int) -> None: """ See L{twisted.internet.interfaces.IReactorThreads.suggestThreadPoolSize}. """ self.getThreadPool().adjustPoolsize(maxthreads=size) else: # This is for signal handlers. def callFromThread(self, f: Callable[..., Any], *args: object, **kwargs: object) -> None: assert callable(f), f"{f} is not callable" # See comment in the other callFromThread implementation. self.threadCallQueue.append((f, args, kwargs))
class pylabsTaskletRunner(TaskletRunner): def __init__(self, engine, threadpoolsize=10): self.engine = engine # Job queue self._queue = Queue.Queue() # Threadpool self._runners = list() self._threadpool = None reactor.addSystemEventTrigger('after', 'startup', self.start, threadpoolsize) reactor.addSystemEventTrigger('before', 'shutdown', self.shutdown) def start(self, threadpoolsize): self._threadpool = ThreadPool(minthreads=threadpoolsize, maxthreads=threadpoolsize + 1) # Set up threadpool q.logger.log('[PMTASKLETS] Constructing taskletserver threadpool', 6) self._threadpool.start() for i in xrange(threadpoolsize): runner = TaskletRunnerThread(self._queue) self._runners.append(runner) self._threadpool.callInThread(runner.run) self._running = True def queue(self, params, author=None, name=None, tags=None, priority=-1, logname=None): author = author or '*' name = name or '*' tags = tags or list() priority = priority if priority > -1 else -1 q.logger.log('[PMTASKLETS] Queue: params=%s, author=%s, name=%s, ' 'tags=%s, priority=%d' % \ (params, author, name, tags, priority), 4) # Wrap the tasklet executor methods so the appname (for logging) is set # correctly def logwrapper(func): @functools.wraps(func) def _wrapped(*args, **kwargs): import pylabs oldappname = pylabs.q.application.appname if logname: pylabs.q.application.appname = \ 'applicationserver:pmtasklets:%s' % logname else: pylabs.q.application.appname = \ 'applicationserver:pmtasklets' try: ret = func(*args, **kwargs) finally: pylabs.q.application.appname = oldappname return ret return _wrapped execute_args = { 'author': author, 'name': name, 'tags': tags, 'priority': priority, 'params': params, 'wrapper': logwrapper, } #Append list of tasklet methods to run to the queue self._queue.put((self.engine, execute_args, )) def shutdown(self): q.logger.log('Shutting down tasklet runner', 5) self._running = False #Tell all threads to stop running for runner in self._runners: runner.keep_running = False self._threadpool.stop() @classmethod def install(cls): log.msg('Installing pylabs tasklet runner') import applicationserver applicationserver.TaskletRunner = cls
class UpdateTest(unittest.TestCase): def setUp(self): self.db = MockStore() GSM.registerUtility(MockZStorm(self.db)) self.tp = ThreadPool(0, 2) self.sm = MockServerManager(reactor, SERVERS) self.updater = Updater(Transactor(self.tp), self.sm) self.tp.start() def tearDown(self): self.tp.stop() def _H(self, **k): h = Home() for k, v in k.iteritems(): setattr(h, k, v) self.db.add(h) return h def _S(self, **k): h = HomeState() for k, v in k.iteritems(): setattr(h, k, v) self.db.add(h) return h @defer.inlineCallbacks def test_creation(self): home = self._H( server_name="foo", path="/test", ) fooserv = yield self.sm.getServer("foo") self.assertNotIn((home.id, "foo"), self.db.objects[HomeState]) self.assertNotIn("/data/homes/test", fooserv.known_paths) done = yield self.updater.updateOne(home, []) self.assertIn((home.id, "foo"), self.db.objects[HomeState]) self.assertIn("/data/homes/test", self.sm.servers["foo"].known_paths) @defer.inlineCallbacks def test_move(self): home = self._H( server_name="foo", path="/test", ) status = self._S( id=home.id, server_name="foo", path="/old_dir", status=HomeState.ACTIVE, ) fooserv = yield self.sm.getServer("foo") fooserv.known_paths.add("/data/homes/old_dir") done = yield self.updater.updateOne(home, [status]) self.assertIn((home.id, "foo"), self.db.objects[HomeState]) status = self.db.objects[HomeState][(home.id, "foo")] self.assertEquals(status.path, home.path) self.assertIn("/data/homes/test", self.sm.servers["foo"].known_paths) self.assertNotIn("/data/homes/old_dir", self.sm.servers["foo"].known_paths) @defer.inlineCallbacks def test_archive(self): home = self._H( server_name="foo", path=None, ) status = self._S( id=home.id, server_name="foo", path="/foo", status=HomeState.ACTIVE, ) fooserv = yield self.sm.getServer("foo") fooserv.known_paths.add("/data/homes/foo") self.assertNotIn("/data/archive/foo", fooserv.known_paths) done = yield self.updater.updateOne(home, [status]) status = self.db.objects[HomeState][(home.id, "foo")] self.assertEquals(status.status, HomeState.ARCHIVED) self.assertIn("/data/archive/foo", fooserv.known_paths) self.assertNotIn("/data/homes/foo", fooserv.known_paths) @defer.inlineCallbacks def test_sync(self): home = self._H( server_name="foo", path="/foo", ) status = self._S( id=home.id, server_name="bar", path="/bar", status=HomeState.ACTIVE, ) fooserv = yield self.sm.getServer("foo") barserv = yield self.sm.getServer("bar") barserv.known_paths.add("/data/homes/bar") self.assertNotIn("/data/homes/foo", fooserv.known_paths) done = yield self.updater.updateOne(home, [status]) status = self.db.objects[HomeState][(home.id, "foo")] self.assertEquals(status.server_name, "foo") self.assertEquals(status.path, "/foo") self.assertEquals(status.status, HomeState.ACTIVE) self.assertNotIn((home.id, "bar"), self.db.objects[HomeState]) self.assertIn("/data/homes/foo", fooserv.known_paths) self.assertNotIn("/data/homes/bar", barserv.known_paths) @defer.inlineCallbacks def test_sync_missing_source(self): home = self._H( server_name="foo", path="/foo", ) status = self._S( id=home.id, server_name="bar", path="/bar", status=HomeState.ACTIVE, ) fooserv = yield self.sm.getServer("foo") barserv = yield self.sm.getServer("bar") self.assertNotIn("/data/homes/foo", fooserv.known_paths) done = yield self.updater.updateOne(home, [status]) status = self.db.objects[HomeState][(home.id, "foo")] self.assertEquals(status.server_name, "foo") self.assertEquals(status.path, "/foo") self.assertEquals(status.status, HomeState.ACTIVE) self.assertNotIn((home.id, "bar"), self.db.objects[HomeState]) self.assertIn("/data/homes/foo", fooserv.known_paths) self.assertNotIn("/data/homes/bar", barserv.known_paths) @defer.inlineCallbacks def test_load_remote_archive(self): home = self._H( server_name="foo", path="/foo", ) status = self._S( id=home.id, server_name="bar", path="/baz", status=HomeState.ARCHIVED, ) fooserv = yield self.sm.getServer("foo") barserv = yield self.sm.getServer("bar") barserv.known_paths.add("/data/archive/baz") self.assertNotIn("/data/homes/foo", fooserv.known_paths) done = yield self.updater.updateOne(home, [status]) self.assertIn("/data/homes/foo", fooserv.known_paths) self.assertNotIn("/data/homes/baz", barserv.known_paths) self.assertNotIn((home.id, "bar"), self.db.objects[HomeState]) newstate = self.db.objects[HomeState][(home.id, "foo")] self.assertEquals(newstate.path, "/foo") self.assertEquals(newstate.status, HomeState.ACTIVE) self.assertEquals(newstate.server_name, "foo") @defer.inlineCallbacks def test_load_local_archive(self): home = self._H( server_name="bar", path="/foo", ) status = self._S( id=home.id, server_name="bar", path="/baz", status=HomeState.ARCHIVED, ) barserv = yield self.sm.getServer("bar") barserv.known_paths.add("/data/archive/baz") self.assertNotIn("/data/homes/foo", barserv.known_paths) done = yield self.updater.updateOne(home, [status]) self.assertIn("/data/homes/foo", barserv.known_paths) self.assertNotIn("/data/homes/baz", barserv.known_paths) newstate = self.db.objects[HomeState][(home.id, "bar")] self.assertEquals(newstate.path, "/foo") self.assertEquals(newstate.status, HomeState.ACTIVE) self.assertEquals(newstate.server_name, "bar") @defer.inlineCallbacks def test_clean_others(self): home = self._H( server_name="bar", path="/bar", ) s1 = self._S( id=home.id, server_name="bar", path="/bar", status=HomeState.ACTIVE, ) s2 = self._S( id=home.id, server_name="foo", path="/bar", status=HomeState.ACTIVE, ) fooserv = yield self.sm.getServer("foo") barserv = yield self.sm.getServer("bar") fooserv.known_paths.add("/data/homes/bar") barserv.known_paths.add("/data/homes/bar") done = yield self.updater.updateOne(home, [s1, s2]) self.assertIn("/data/homes/bar", barserv.known_paths) self.assertNotIn("/data/homes/bar", fooserv.known_paths) self.assertIn((home.id, "bar"), self.db.objects[HomeState]) self.assertNotIn((home.id, "foo"), self.db.objects[HomeState]) @defer.inlineCallbacks def test_unknown_server_home(self): home = self._H( server_name="baz", path="/bar", ) s1 = self._S( id=home.id, server_name="bar", path="/bar", status=HomeState.ACTIVE, ) barserv = yield self.sm.getServer("bar") barserv.known_paths.add("/data/homes/bar") done = yield self.updater.updateOne( home, [s1]).addErrback(lambda err: err.trap(defer.FirstError)) self.assertIn("/data/homes/bar", barserv.known_paths) self.assertIn((home.id, "bar"), self.db.objects[HomeState]) self.assertNotIn((home.id, "baz"), self.db.objects[HomeState]) @defer.inlineCallbacks def test_unknown_server_source(self): home = self._H( server_name="bar", path="/bar", ) s1 = self._S( id=home.id, server_name="baz", path="/bar", status=HomeState.ARCHIVED, ) barserv = yield self.sm.getServer("bar") done = yield self.updater.updateOne( home, [s1]).addErrback(lambda err: err.trap(defer.FirstError)) self.assertNotIn("/data/homes/bar", barserv.known_paths) self.assertNotIn((home.id, "bar"), self.db.objects[HomeState]) self.assertIn((home.id, "baz"), self.db.objects[HomeState])
class DirectoryService(BaseDirectoryService): """ LDAP directory service. """ log = Logger() fieldName = ConstantsContainer((BaseFieldName, FieldName)) recordType = ConstantsContainer(( BaseRecordType.user, BaseRecordType.group, )) def __init__( self, url, baseDN, credentials=None, timeout=None, tlsCACertificateFile=None, tlsCACertificateDirectory=None, useTLS=False, fieldNameToAttributesMap=DEFAULT_FIELDNAME_ATTRIBUTE_MAP, recordTypeSchemas=DEFAULT_RECORDTYPE_SCHEMAS, extraFilters=None, ownThreadpool=True, threadPoolMax=10, authConnectionMax=5, queryConnectionMax=5, tries=3, warningThresholdSeconds=5, _debug=False, ): """ @param url: The URL of the LDAP server to connect to. @type url: L{unicode} @param baseDN: The base DN for queries. @type baseDN: L{unicode} @param credentials: The credentials to use to authenticate with the LDAP server. @type credentials: L{IUsernamePassword} @param timeout: A timeout, in seconds, for LDAP queries. @type timeout: number @param tlsCACertificateFile: ... @type tlsCACertificateFile: L{FilePath} @param tlsCACertificateDirectory: ... @type tlsCACertificateDirectory: L{FilePath} @param useTLS: Enable the use of TLS. @type useTLS: L{bool} @param fieldNameToAttributesMap: A mapping of field names to LDAP attribute names. @type fieldNameToAttributesMap: mapping with L{NamedConstant} keys and sequence of L{unicode} values @param recordTypeSchemas: Schema information for record types. @type recordTypeSchemas: mapping from L{NamedConstant} to L{RecordTypeSchema} @param extraFilters: A dict (keyed off recordType) of extra filter fragments to AND in to any generated queries. @type extraFilters: L{dicts} of L{unicode} """ self.url = url self._baseDN = baseDN self._credentials = credentials self._timeout = timeout self._extraFilters = extraFilters self._tries = tries self._warningThresholdSeconds = warningThresholdSeconds if tlsCACertificateFile is None: self._tlsCACertificateFile = None else: self._tlsCACertificateFile = tlsCACertificateFile.path if tlsCACertificateDirectory is None: self._tlsCACertificateDirectory = None else: self._tlsCACertificateDirectory = tlsCACertificateDirectory.path self._useTLS = useTLS if _debug: self._debug = 255 else: self._debug = None if self.fieldName.recordType in fieldNameToAttributesMap: raise TypeError("Record type field may not be mapped") if BaseFieldName.uid not in fieldNameToAttributesMap: raise DirectoryConfigurationError("Mapping for uid required") self._fieldNameToAttributesMap = fieldNameToAttributesMap self._attributeToFieldNameMap = {} for name, attributes in fieldNameToAttributesMap.iteritems(): for attribute in attributes: if ":" in attribute: attribute, ignored = attribute.split(":", 1) self._attributeToFieldNameMap.setdefault(attribute, []).append(name) self._recordTypeSchemas = recordTypeSchemas attributesToFetch = set() for attributes in fieldNameToAttributesMap.values(): for attribute in attributes: if ":" in attribute: attribute, ignored = attribute.split(":", 1) attributesToFetch.add(attribute.encode("utf-8")) self._attributesToFetch = list(attributesToFetch) # Threaded connection pool. # The connection size limit here is the size for connections doing # queries. # There will also be one-off connections for authentications which also # run in their own threads. # Thus the threadpool max ought to be larger than the connection max to # allow for both pooled query connections and one-off auth-only # connections. self.ownThreadpool = ownThreadpool if self.ownThreadpool: self.threadpool = ThreadPool( minthreads=1, maxthreads=threadPoolMax, name="LDAPDirectoryService", ) else: # Use the default threadpool but adjust its size to fit our needs self.threadpool = reactor.getThreadPool() self.threadpool.adjustPoolsize( max(threadPoolMax, self.threadpool.max)) # Separate pools for LDAP queries and LDAP binds. self.connectionPools = { "query": ConnectionPool("query", self, credentials, queryConnectionMax), "auth": ConnectionPool("auth", self, None, authConnectionMax), } self.poolStats = collections.defaultdict(int) reactor.callWhenRunning(self.start) reactor.addSystemEventTrigger("during", "shutdown", self.stop) def getPreferredRecordTypesOrder(self): # Not doing this in init( ) because we get our recordTypes assigned later if not hasattr(self, "_preferredRecordTypesOrder"): self._preferredRecordTypesOrder = [] for recordTypeName in [ "user", "location", "resource", "group", "address" ]: try: recordType = self.recordType.lookupByName(recordTypeName) self._preferredRecordTypesOrder.append(recordType) except ValueError: pass return self._preferredRecordTypesOrder def start(self): """ Start up this service. Initialize the threadpool (if we own it). """ if self.ownThreadpool: self.threadpool.start() def stop(self): """ Stop the service. Stop the threadpool if we own it and do other clean-up. """ if self.ownThreadpool: self.threadpool.stop() # FIXME: we should probably also close the pool of active connections # too. @property def realmName(self): return u"{self.url}".format(self=self) class Connection(object): """ ContextManager object for getting a connection from the pool. On exit the connection will be put back in the pool if no exception was raised. Otherwise, the connection will be removed from the active connection list, which will allow a new "clean" connection to be created later if needed. """ def __init__(self, ds, poolName): self.pool = ds.connectionPools[poolName] def __enter__(self): self.connection = self.pool.getConnection() return self.connection def __exit__(self, exc_type, exc_val, exc_tb): if exc_type is None: self.pool.returnConnection(self.connection) return True else: self.pool.failedConnection(self.connection) return False def _authenticateUsernamePassword(self, dn, password): """ Open a secondary connection to the LDAP server and try binding to it with the given credentials @returns: True if the password is correct, False otherwise @rtype: deferred C{bool} @raises: L{LDAPConnectionError} if unable to connect. """ d = deferToThreadPool(reactor, self.threadpool, self._authenticateUsernamePassword_inThread, dn, password) qsize = self.threadpool._queue.qsize() if qsize > 0: self.log.error("LDAP thread pool overflowing: {qsize}", qsize=qsize) self.poolStats["connection-thread-blocked"] += 1 return d def _authenticateUsernamePassword_inThread(self, dn, password, testStats=None): """ Open a secondary connection to the LDAP server and try binding to it with the given credentials. This method is always called in a thread. @returns: True if the password is correct, False otherwise @rtype: C{bool} @raises: L{LDAPConnectionError} if unable to connect. """ self.log.debug("Authenticating {dn}", dn=dn) # Retry if we get ldap.SERVER_DOWN for retryNumber in xrange(self._tries): # For unit tests, a bit of instrumentation so we can examine # retryNumber: if testStats is not None: testStats["retryNumber"] = retryNumber try: with DirectoryService.Connection(self, "auth") as connection: try: # During testing, allow an exception to be raised. # Note: I tried to use patch( ) to accomplish this # but that seemed to create a race condition in the # restoration of the patched value and that would cause # unit tests to occasionally fail. if testStats is not None: if "raise" in testStats: raise testStats["raise"] connection.simple_bind_s(dn, password) self.log.debug("Authenticated {dn}", dn=dn) return True except ( ldap.INAPPROPRIATE_AUTH, ldap.INVALID_CREDENTIALS, ldap.INVALID_DN_SYNTAX, ): self.log.debug("Unable to authenticate {dn}", dn=dn) return False except ldap.CONSTRAINT_VIOLATION: self.log.info("Account locked {dn}", dn=dn) return False except ldap.SERVER_DOWN as e: # Catch this below for retry raise e except Exception as e: self.log.error( "Unexpected error {error} trying to authenticate {dn}", error=str(e), dn=dn) return False else: # Do an unauthenticated bind on this connection at the end in # case the server limits the number of concurrent auths by a given user. connection.simple_bind_s("", "") except ldap.SERVER_DOWN as e: self.log.error("LDAP server unavailable") if retryNumber + 1 == self._tries: # We've hit SERVER_DOWN self._tries times, giving up. raise LDAPQueryError("LDAP server down", e) else: self.log.error("LDAP connection failure; retrying...") def _recordsFromQueryString(self, queryString, recordTypes=None, limitResults=None, timeoutSeconds=None): d = deferToThreadPool(reactor, self.threadpool, self._recordsFromQueryString_inThread, queryString, recordTypes, limitResults=limitResults, timeoutSeconds=timeoutSeconds) qsize = self.threadpool._queue.qsize() if qsize > 0: self.log.error("LDAP thread pool overflowing: {qsize}", qsize=qsize) self.poolStats["connection-thread-blocked"] += 1 return d def _addExtraFilter(self, recordType, queryString): if self._extraFilters and self._extraFilters.get(recordType, ""): queryString = u"(&{extra}{query})".format( extra=self._extraFilters[recordType], query=queryString) return queryString def _recordsFromQueryString_inThread(self, queryString, recordTypes=None, limitResults=None, timeoutSeconds=None, testStats=None): # This method is always called in a thread. if recordTypes is None: # recordTypes = list(self.recordTypes()) # Quick hack to optimize the order in which we query by record type: recordTypes = self.getPreferredRecordTypesOrder() # Retry if we get ldap.SERVER_DOWN for retryNumber in xrange(self._tries): # For unit tests, a bit of instrumentation so we can examine # retryNumber: if testStats is not None: testStats["retryNumber"] = retryNumber records = [] try: with DirectoryService.Connection(self, "query") as connection: for recordType in recordTypes: if limitResults is not None: if limitResults < 1: break try: rdn = self._recordTypeSchemas[ recordType].relativeDN except KeyError: # Skip this unknown record type continue rdn = (ldap.dn.str2dn(rdn.lower()) + ldap.dn.str2dn(self._baseDN.lower())) filteredQuery = self._addExtraFilter( recordType, queryString) self.log.debug( "Performing LDAP query: " "{rdn} {query} {recordType}{limit}{timeout}", rdn=rdn, query=filteredQuery.encode("utf-8"), recordType=recordType, limit=(" limit={}".format(limitResults) if limitResults else ""), timeout=(" timeout={}".format(timeoutSeconds) if timeoutSeconds else ""), ) try: startTime = time.time() s = ldap. async .List(connection) s.startSearch( ldap.dn.dn2str(rdn), ldap.SCOPE_SUBTREE, filteredQuery.encode("utf-8"), attrList=self._attributesToFetch, timeout=(timeoutSeconds if timeoutSeconds else -1), sizelimit=(limitResults if limitResults else 0), ) s.processResults() except ldap.SIZELIMIT_EXCEEDED as e: self.log.debug( "LDAP result limit exceeded: {limit}", limit=limitResults, ) except ldap.TIMELIMIT_EXCEEDED as e: self.log.warn( "LDAP timeout exceeded: {timeout} seconds", timeout=timeoutSeconds, ) except ldap.FILTER_ERROR as e: self.log.error( "Unable to perform query {query!r}: {err}", query=queryString, err=e) raise LDAPQueryError("Unable to perform query", e) except ldap.NO_SUCH_OBJECT as e: # self.log.warn( # "RDN {rdn} does not exist, skipping", rdn=rdn # ) continue except ldap.INVALID_SYNTAX as e: self.log.error( "LDAP invalid syntax {query!r}: {err}", query=queryString, err=e) continue except ldap.SERVER_DOWN as e: # Catch this below for retry raise e except Exception as e: self.log.error("LDAP error {query!r}: {err}", query=queryString, err=e) raise LDAPQueryError("Unable to perform query", e) reply = [ resultItem for _ignore_resultType, resultItem in s.allResults ] totalTime = time.time() - startTime if totalTime > self._warningThresholdSeconds: if filteredQuery and len(filteredQuery) > 500: filteredQuery = "%s..." % ( filteredQuery[:500], ) self.log.error( "LDAP query exceeded threshold: {totalTime:.2f} seconds for {rdn} {query} (#results={resultCount})", totalTime=totalTime, rdn=rdn, query=filteredQuery, resultCount=len(reply)) newRecords = self._recordsFromReply( reply, recordType=recordType) self.log.debug( "Records from LDAP query " "({rdn} {query} {recordType}): {count}", rdn=rdn, query=queryString, recordType=recordType, count=len(newRecords)) if limitResults is not None: limitResults = limitResults - len(newRecords) records.extend(newRecords) except ldap.SERVER_DOWN as e: self.log.error("LDAP server unavailable") if retryNumber + 1 == self._tries: # We've hit SERVER_DOWN self._tries times, giving up. raise LDAPQueryError("LDAP server down", e) else: self.log.error("LDAP connection failure; retrying...") else: # Only retry if we got ldap.SERVER_DOWN, otherwise break out of # loop. break self.log.debug("LDAP result count ({query}): {count}", query=queryString, count=len(records)) return records def _recordWithDN(self, dn): d = deferToThreadPool(reactor, self.threadpool, self._recordWithDN_inThread, dn) qsize = self.threadpool._queue.qsize() if qsize > 0: self.log.error("LDAP thread pool overflowing: {qsize}", qsize=qsize) self.poolStats["connection-thread-blocked"] += 1 return d def _recordWithDN_inThread(self, dn, testStats=None): """ @param dn: The DN of the record to search for @type dn: C{str} """ # This method is always called in a thread. records = [] # Retry if we get ldap.SERVER_DOWN for retryNumber in xrange(self._tries): # For unit tests, a bit of instrumentation: if testStats is not None: testStats["retryNumber"] = retryNumber try: with DirectoryService.Connection(self, "query") as connection: self.log.debug("Performing LDAP DN query: {dn}", dn=dn) try: reply = connection.search_s( dn, ldap.SCOPE_SUBTREE, "(objectClass=*)", attrlist=self._attributesToFetch) records = self._recordsFromReply(reply) except ldap.NO_SUCH_OBJECT: records = [] except ldap.INVALID_DN_SYNTAX: self.log.warn("Invalid LDAP DN syntax: '{dn}'", dn=dn) records = [] except ldap.SERVER_DOWN as e: self.log.error("LDAP server unavailable") if retryNumber + 1 == self._tries: # We've hit SERVER_DOWN self._tries times, giving up raise LDAPQueryError("LDAP server down", e) else: self.log.error("LDAP connection failure; retrying...") else: # Only retry if we got ldap.SERVER_DOWN, otherwise break out of # loop break if len(records): return records[0] else: return None def _recordsFromReply(self, reply, recordType=None): records = [] for dn, recordData in reply: # Determine the record type if recordType is None: recordType = recordTypeForDN(self._baseDN, self._recordTypeSchemas, dn) if recordType is None: recordType = recordTypeForRecordData(self._recordTypeSchemas, recordData) if recordType is None: self.log.debug( "Ignoring LDAP record data; unable to determine record " "type: {recordData!r}", recordData=recordData, ) continue # Populate a fields dictionary fields = {} for fieldName, attributeRules in ( self._fieldNameToAttributesMap.iteritems()): valueType = self.fieldName.valueType(fieldName) for attributeRule in attributeRules: attributeName = attributeRule.split(":")[0] if attributeName in recordData: values = recordData[attributeName] if valueType in (unicode, UUID): if not isinstance(values, list): values = [values] if valueType is unicode: newValues = [] for v in values: if isinstance(v, unicode): # because the ldap unit test produces # unicode values (?) newValues.append(v) else: try: newValues.append( unicode(v, "utf-8")) except UnicodeDecodeError: # Log and re-raise so the net behavior is as before during debugging self.log.error( "Received non-UTF-8 bytes from LDAP for {dn} in {name}", dn=dn, name=fieldName) raise else: try: newValues = [valueType(v) for v in values] except Exception, e: self.log.warn( "Can't parse value {name} {values} " "({error})", name=fieldName, values=values, error=str(e)) continue if self.fieldName.isMultiValue(fieldName): if fieldName in fields: fields[fieldName].extend(newValues) else: fields[fieldName] = newValues else: # First one in the list wins if fieldName not in fields: fields[fieldName] = newValues[0] elif valueType is bool: if not isinstance(values, list): values = [values] if ":" in attributeRule: ignored, trueValue = attributeRule.split(":") else: trueValue = "true" for value in values: if value == trueValue: fields[fieldName] = True break else: fields[fieldName] = False elif issubclass(valueType, Names): if not isinstance(values, list): values = [values] _ignore_attribute, attributeValue, fieldValue = ( attributeRule.split(":")) for value in values: if value == attributeValue: # convert to a constant try: fieldValue = ( valueType.lookupByName(fieldValue)) fields[fieldName] = fieldValue except ValueError: pass break else: raise LDAPConfigurationError( "Unknown value type {0} for field {1}".format( valueType, fieldName)) # Skip any results missing the uid, which is a required field if self.fieldName.uid not in fields: continue # Set record type and dn fields fields[self.fieldName.recordType] = recordType fields[self.fieldName.dn] = dn.decode("utf-8") # Make a record object from fields. record = DirectoryRecord(self, fields) records.append(record) # self.log.debug("LDAP results: {records}", records=records) return records
def test_contemporaneous_requests(): ''' We're going to create two request-response cycles here: Cycle 1 will begin. Cycle 2 will begin. Cycle 2 will return. Cycle 1 will return. This way, we can prove that the crosstown_traffic created by cycle 1 is not resolved by the return of cycle 2. ''' tp = ThreadPool(maxthreads=20) tp.start() log.debug("\n\nStarting the two stream stuff.") request1 = DummyRequest([b'r1']) request1.isSecure = lambda: False request1.content = "Nothing really here." request1.requestHeaders.addRawHeader('llamas', 'dingo') request1.client = IPv4Address("TCP", b"50.0.50.0", 5000) hr = HendrixWSGIResource(reactor, tp, wsgi_application) yield deferToThreadPool(reactor, tp, hr.render, request1) request2 = DummyRequest([b'r2']) request2.isSecure = lambda: False request2.content = b"Nothing really here." request2.requestHeaders.addRawHeader('llamas', 'dingo') request2.client = IPv4Address("TCP", b"100.0.50.0", 5000) yield deferToThreadPool(reactor, tp, hr.render, request2) # def woah_stop(failure): # nameSpace.async_task_was_done.put_nowait(False) # nameSpace.second_cycle_complete.put_nowait(False) # nameSpace.ready_to_proceed_with_second_cycle.put_nowait(False) # # d1.addErrback(woah_stop) # d2.addErrback(woah_stop) # combo_deferred = gatherResults([d1, d2]) # yield d1 # yield d2 # combo_deferred = DeferredList([d1, d2]) def wait_for_queue_resolution(): nameSpace.async_task_was_done.get(True, 3) # combo_deferred.addCallback( # lambda _: # ) # yield deferToThreadPool(reactor, tp, wait_for_queue_resolution) # combo_deferred.addCallback( # lambda _: # ) assert nameSpace.async_task_was_run tp.stop()
class DBScheduler(object): ''' Database operation scheduler We will have one or more read thread and only one write thread. ''' log = logging.getLogger('raceday.DBScheduler') def __init__(self): from twisted.internet import reactor # Imported here.inside self.reactor = reactor engine = get_engine() # create_schema(engine) self.read_pool = ThreadPool( minthreads=1, maxthreads=16, name="ReadPool") self.write_pool = ThreadPool( minthreads=1, maxthreads=1, name="WritePool") self.read_pool.start() self.write_pool.start() self.signals = SignalManager(dispatcher.Any).connect( self.stop_threadpools, spider_closed) self.counters = defaultdict(lambda: Counter()) self.cache = defaultdict( lambda: dict()) self.write_queue = Queue() self.writelock = False # Write queue mutex def stop_threadpools(self): self.read_pool.stop() self.write_pool.stop() for counter, results in self.counters.iteritems(): print(counter) for modelname, count in results.iteritems(): print(' ', modelname.__name__, '-', count) def _do_save(self): assert not isInIOThread() while not self.write_queue.empty(): items = [] try: self.writelock = True try: while True: items.append(self.write_queue.get_nowait()) except Empty: pass session = Session() try: session.add_all(items) session.commit() except: session.rollback() raise finally: session.close() finally: self.writelock = False def save(self, obj): self.write_queue.put(obj) if self.writelock: return None else: return deferToThreadPool( self.reactor, self.write_pool, self._do_save) def _do_get_id(self, model, unique, fval, fields): assert not isInIOThread() return Session().query(model).filter( getattr(model, unique) == fval).one().id @inlineCallbacks def get_id(self, model, unique, fields): ''' Get an ID from the cache or from the database. If doesn't exist - create an item. All database operations are done from the separate thread ''' assert isInIOThread() fval = fields[unique] try: result = self.cache[model][fval] self.counters['hit'][model] += 1 returnValue(result) except KeyError: self.counters['miss'][model] += 1 selectors = {unique: fval} result, created = yield deferToThreadPool( self.reactor, self.read_pool, get_or_create, model, fields, **selectors) result = result.id if created: self.counters['db_create'][model] += 1 else: self.counters['db_hit'][model] += 1 self.cache[model][fval] = result returnValue(result)
class HeadlessDownloadHandler(object): lazy = False _default_handler_cls = HTTP11DownloadHandler def __init__(self, settings): if "SELENIUM_GRID_URL" not in settings: raise NotConfigured("SELENIUM_GRID_URL has to be set") if "SELENIUM_NODES" not in settings: raise NotConfigured("SELENIUM_NODES has to be set") if "SELENIUM_CAPABILITIES" not in settings: raise NotConfigured("SELENIUM_CAPABILITIES has to be set") self.grid_url = settings["SELENIUM_GRID_URL"] self.selenium_nodes = settings["SELENIUM_NODES"] self.capabilities = settings["SELENIUM_CAPABILITIES"] selenium_proxy = settings.get("SELENIUM_PROXY", None) if selenium_proxy: self.set_selenium_proxy(selenium_proxy) self._drivers = set() self._data = threading.local() self._threadpool = ThreadPool(self.selenium_nodes, self.selenium_nodes) self._default_handler = self._default_handler_cls(settings) def close(self): for driver in self._drivers: driver.quit() self._threadpool.stop() def set_selenium_proxy(self, selenium_proxy): proxy = Proxy() proxy.http_proxy = selenium_proxy proxy.ftp_proxy = selenium_proxy proxy.sslProxy = selenium_proxy proxy.no_proxy = None proxy.proxy_type = ProxyType.MANUAL proxy.add_to_capabilities(self.capabilities) self.capabilities["acceptSslCerts"] = True def download_request(self, request, spider): if isinstance(request, HeadlessRequest): if not self._threadpool.started: self._threadpool.start() return threads.deferToThreadPool(reactor, self._threadpool, self.process_request, request, spider) return self._default_handler.download_request(request, spider) def process_request(self, request, spider): driver = self.get_driver(spider) try: driver.get(request.url) if request.driver_callback is not None: request.driver_callback(driver) body = to_bytes(driver.page_source) curr_url = driver.current_url except WebDriverException as e: raise ResponseFailed("WebDriverException %s" % e) return HtmlResponse(curr_url, body=body, encoding="utf-8", request=request) def get_driver(self, spider): try: driver = self._data.driver except AttributeError: driver = Remote(command_executor=self.grid_url, desired_capabilities=self.capabilities) self._drivers.add(driver) self._data.driver = driver return driver
class UpdateTest(unittest.TestCase): def setUp(self): self.db = MockStore() GSM.registerUtility(MockZStorm(self.db)) self.tp = ThreadPool(0, 2) self.sm = MockServerManager(reactor, SERVERS) self.updater = Updater(Transactor(self.tp), self.sm) self.tp.start() def tearDown(self): self.tp.stop() def _H(self, **k): h = Home() for k, v in k.iteritems(): setattr(h, k, v) self.db.add(h) return h def _S(self, **k): h = HomeState() for k, v in k.iteritems(): setattr(h, k, v) self.db.add(h) return h @defer.inlineCallbacks def test_creation(self): home = self._H( server_name = "foo", path = "/test", ) fooserv = yield self.sm.getServer("foo") self.assertNotIn((home.id, "foo"), self.db.objects[HomeState]) self.assertNotIn("/data/homes/test", fooserv.known_paths) done = yield self.updater.updateOne(home, []) self.assertIn((home.id, "foo"), self.db.objects[HomeState]) self.assertIn("/data/homes/test", self.sm.servers["foo"].known_paths) @defer.inlineCallbacks def test_move(self): home = self._H( server_name = "foo", path = "/test", ) status = self._S( id = home.id, server_name = "foo", path = "/old_dir", status = HomeState.ACTIVE, ) fooserv = yield self.sm.getServer("foo") fooserv.known_paths.add("/data/homes/old_dir") done = yield self.updater.updateOne(home, [status]) self.assertIn((home.id, "foo"), self.db.objects[HomeState]) status = self.db.objects[HomeState][(home.id, "foo")] self.assertEquals(status.path, home.path) self.assertIn("/data/homes/test", self.sm.servers["foo"].known_paths) self.assertNotIn("/data/homes/old_dir", self.sm.servers["foo"].known_paths) @defer.inlineCallbacks def test_archive(self): home = self._H( server_name = "foo", path = None, ) status = self._S( id = home.id, server_name = "foo", path = "/foo", status = HomeState.ACTIVE, ) fooserv = yield self.sm.getServer("foo") fooserv.known_paths.add("/data/homes/foo") self.assertNotIn("/data/archive/foo", fooserv.known_paths) done = yield self.updater.updateOne(home, [status]) status = self.db.objects[HomeState][(home.id, "foo")] self.assertEquals(status.status, HomeState.ARCHIVED) self.assertIn("/data/archive/foo", fooserv.known_paths) self.assertNotIn("/data/homes/foo", fooserv.known_paths) @defer.inlineCallbacks def test_sync(self): home = self._H( server_name = "foo", path="/foo", ) status = self._S( id=home.id, server_name="bar", path="/bar", status=HomeState.ACTIVE, ) fooserv = yield self.sm.getServer("foo") barserv = yield self.sm.getServer("bar") barserv.known_paths.add("/data/homes/bar") self.assertNotIn("/data/homes/foo", fooserv.known_paths) done = yield self.updater.updateOne(home, [status]) status = self.db.objects[HomeState][(home.id, "foo")] self.assertEquals(status.server_name, "foo") self.assertEquals(status.path, "/foo") self.assertEquals(status.status, HomeState.ACTIVE) self.assertNotIn((home.id, "bar"), self.db.objects[HomeState]) self.assertIn("/data/homes/foo", fooserv.known_paths) self.assertNotIn("/data/homes/bar", barserv.known_paths) @defer.inlineCallbacks def test_sync_missing_source(self): home = self._H( server_name = "foo", path="/foo", ) status = self._S( id=home.id, server_name="bar", path="/bar", status=HomeState.ACTIVE, ) fooserv = yield self.sm.getServer("foo") barserv = yield self.sm.getServer("bar") self.assertNotIn("/data/homes/foo", fooserv.known_paths) done = yield self.updater.updateOne(home, [status]) status = self.db.objects[HomeState][(home.id, "foo")] self.assertEquals(status.server_name, "foo") self.assertEquals(status.path, "/foo") self.assertEquals(status.status, HomeState.ACTIVE) self.assertNotIn((home.id, "bar"), self.db.objects[HomeState]) self.assertIn("/data/homes/foo", fooserv.known_paths) self.assertNotIn("/data/homes/bar", barserv.known_paths) @defer.inlineCallbacks def test_load_remote_archive(self): home = self._H( server_name="foo", path="/foo", ) status = self._S( id=home.id, server_name="bar", path="/baz", status=HomeState.ARCHIVED, ) fooserv = yield self.sm.getServer("foo") barserv = yield self.sm.getServer("bar") barserv.known_paths.add("/data/archive/baz") self.assertNotIn("/data/homes/foo", fooserv.known_paths) done = yield self.updater.updateOne(home, [status]) self.assertIn("/data/homes/foo", fooserv.known_paths) self.assertNotIn("/data/homes/baz", barserv.known_paths) self.assertNotIn((home.id, "bar"), self.db.objects[HomeState]) newstate = self.db.objects[HomeState][(home.id, "foo")] self.assertEquals(newstate.path, "/foo") self.assertEquals(newstate.status, HomeState.ACTIVE) self.assertEquals(newstate.server_name, "foo") @defer.inlineCallbacks def test_load_local_archive(self): home = self._H( server_name="bar", path="/foo", ) status = self._S( id=home.id, server_name="bar", path="/baz", status=HomeState.ARCHIVED, ) barserv = yield self.sm.getServer("bar") barserv.known_paths.add("/data/archive/baz") self.assertNotIn("/data/homes/foo", barserv.known_paths) done = yield self.updater.updateOne(home, [status]) self.assertIn("/data/homes/foo", barserv.known_paths) self.assertNotIn("/data/homes/baz", barserv.known_paths) newstate = self.db.objects[HomeState][(home.id, "bar")] self.assertEquals(newstate.path, "/foo") self.assertEquals(newstate.status, HomeState.ACTIVE) self.assertEquals(newstate.server_name, "bar") @defer.inlineCallbacks def test_clean_others(self): home = self._H( server_name="bar", path="/bar", ) s1 = self._S( id=home.id, server_name="bar", path="/bar", status=HomeState.ACTIVE, ) s2 = self._S( id=home.id, server_name="foo", path="/bar", status=HomeState.ACTIVE, ) fooserv = yield self.sm.getServer("foo") barserv = yield self.sm.getServer("bar") fooserv.known_paths.add("/data/homes/bar") barserv.known_paths.add("/data/homes/bar") done = yield self.updater.updateOne(home, [s1, s2]) self.assertIn("/data/homes/bar", barserv.known_paths) self.assertNotIn("/data/homes/bar", fooserv.known_paths) self.assertIn((home.id, "bar"), self.db.objects[HomeState]) self.assertNotIn((home.id, "foo"), self.db.objects[HomeState]) @defer.inlineCallbacks def test_unknown_server_home(self): home = self._H( server_name="baz", path="/bar", ) s1 = self._S( id=home.id, server_name="bar", path="/bar", status=HomeState.ACTIVE, ) barserv = yield self.sm.getServer("bar") barserv.known_paths.add("/data/homes/bar") done = yield self.updater.updateOne(home, [s1]).addErrback( lambda err:err.trap(defer.FirstError)) self.assertIn("/data/homes/bar", barserv.known_paths) self.assertIn((home.id, "bar"), self.db.objects[HomeState]) self.assertNotIn((home.id, "baz"), self.db.objects[HomeState]) @defer.inlineCallbacks def test_unknown_server_source(self): home = self._H( server_name="bar", path="/bar", ) s1 = self._S( id=home.id, server_name="baz", path="/bar", status=HomeState.ARCHIVED, ) barserv = yield self.sm.getServer("bar") done = yield self.updater.updateOne(home, [s1]).addErrback( lambda err:err.trap(defer.FirstError)) self.assertNotIn("/data/homes/bar", barserv.known_paths) self.assertNotIn((home.id, "bar"), self.db.objects[HomeState]) self.assertIn((home.id, "baz"), self.db.objects[HomeState])
class NodeEngagementMutex: """ TODO: Does this belong on middleware? TODO: There are a couple of ways this can break. If one fo the jobs hangs, the whole thing will hang. Also, if there are fewer successfully completed than percent_to_complete_before_release, the partial queue will never release. TODO: Make registry per... I guess Policy? It's weird to be able to accidentally enact again. """ log = Logger("Policy") def __init__( self, callable_to_engage, # TODO: typing.Protocol nodes, network_middleware, percent_to_complete_before_release=5, note=None, threadpool_size=120, timeout=20, *args, **kwargs): self.f = callable_to_engage self.nodes = nodes self.network_middleware = network_middleware self.args = args self.kwargs = kwargs self.completed = {} self.failed = {} self._started = False self._finished = False self.timeout = timeout self.percent_to_complete_before_release = percent_to_complete_before_release self._partial_queue = Queue() self._completion_queue = Queue() self._block_until_this_many_are_complete = math.ceil( len(nodes) * self.percent_to_complete_before_release / 100) self.nodes_contacted_during_partial_block = False self.when_complete = Deferred( ) # TODO: Allow cancelling via KB Interrupt or some other way? if note is None: self._repr = f"{callable_to_engage} to {len(nodes)} nodes" else: self._repr = f"{note}: {callable_to_engage} to {len(nodes)} nodes" self._threadpool = ThreadPool(minthreads=threadpool_size, maxthreads=threadpool_size, name=self._repr) self.log.info(f"NEM spinning up {self._threadpool}") self._threadpool.callInThread(self._bail_on_timeout) def __repr__(self): return self._repr def _bail_on_timeout(self): while True: if self.when_complete.called: return duration = datetime.datetime.now() - self._started if duration.seconds >= self.timeout: try: self._threadpool.stop() except AlreadyQuit: raise RuntimeError( "Is there a race condition here? If this line is being hit, it's a bug." ) raise RuntimeError( f"Timed out. Nodes completed: {self.completed}") time.sleep(.5) def block_until_success_is_reasonably_likely(self): """ https://www.youtube.com/watch?v=OkSLswPSq2o """ if len(self.completed) < self._block_until_this_many_are_complete: try: completed_for_reasonable_likelihood_of_success = self._partial_queue.get( timeout=self.timeout) # TODO: Shorter timeout here? except Empty: raise RuntimeError( f"Timed out. Nodes completed: {self.completed}") self.log.debug( f"{len(self.completed)} nodes were contacted while blocking for a little while." ) return completed_for_reasonable_likelihood_of_success else: return self.completed def block_until_complete(self): if self.total_disposed() < len(self.nodes): try: _ = self._completion_queue.get( timeout=self.timeout ) # Interesting opportuntiy to pass some data, like the list of contacted nodes above. except Empty: raise RuntimeError( f"Timed out. Nodes completed: {self.completed}") if not reactor.running and not self._threadpool.joined: # If the reactor isn't running, the user *must* call this, because this is where we stop. self._threadpool.stop() def _handle_success(self, response, node): if response.status_code == 201: self.completed[node] = response else: assert False # TODO: What happens if this is a 300 or 400 level response? (A 500 response will propagate as an error and be handled in the errback chain.) if self.nodes_contacted_during_partial_block: self._consider_finalizing() else: if len(self.completed) >= self._block_until_this_many_are_complete: contacted = tuple(self.completed.keys()) self.nodes_contacted_during_partial_block = contacted self.log.debug( f"Blocked for a little while, completed {contacted} nodes") self._partial_queue.put(contacted) return response def _handle_error(self, failure, node): self.failed[node] = failure # TODO: Add a failfast mode? self._consider_finalizing() self.log.warn(f"{node} failed: {failure}") def total_disposed(self): return len(self.completed) + len(self.failed) def _consider_finalizing(self): if not self._finished: if self.total_disposed() == len(self.nodes): # TODO: Consider whether this can possibly hang. self._finished = True if reactor.running: reactor.callInThread(self._threadpool.stop) self._completion_queue.put(self.completed) self.when_complete.callback(self.completed) self.log.info(f"{self} finished.") else: raise RuntimeError("Already finished.") def _engage_node(self, node): maybe_coro = self.f(node, network_middleware=self.network_middleware, *self.args, **self.kwargs) d = ensureDeferred(maybe_coro) d.addCallback(self._handle_success, node) d.addErrback(self._handle_error, node) return d def start(self): if self._started: raise RuntimeError("Already started.") self._started = datetime.datetime.now() self.log.info(f"NEM Starting {self._threadpool}") for node in self.nodes: self._threadpool.callInThread(self._engage_node, node) self._threadpool.start()
def run(self): """Override Process run method to provide a custom wrapper for the API. This provides a continuous loop for watching the service while keeping an ear open to the main process from openContentPlatform, listening for any interrupt requests. """ ## Setup requested log handler try: ## Twisted imports here to avoid issues with epoll on Linux from twisted.internet import reactor, ssl from twisted.python.filepath import FilePath from twisted.web.server import Site from twisted.web.wsgi import WSGIResource from twisted.python.threadpool import ThreadPool print('Starting {}'.format(self.serviceName)) self.getLocalLogger() self.logger.info('Starting {}'.format(self.serviceName)) self.logger.info('Setting up the API application...') ## Setup shared resources for our WSGIResource instances to use self.getSharedLogger() self.getSharedDbPool() ## Create a PID file for system administration purposes pidEntryService(self.serviceName, env, self.pid) ## Reference the magic WSGI throwable from our root module using Hug application = apiResourceRoot.__hug_wsgi__ ## Setup the WSGI to be hosted through Twisted's web server wsgiThreadPool = ThreadPool() wsgiThreadPool.start() ## For some reason the system event wasn't working all the time, ## so I'm adding an explicit wsgiThreadPool.stop() below as well, ## which was needed before reactor.stop() would properly cleanup. reactor.addSystemEventTrigger('after', 'shutdown', wsgiThreadPool.stop) resource = WSGIResource(reactor, wsgiThreadPool, application) self.logger.info('calling listener on {}:{}.'.format( str(self.serviceEndpoint), self.listeningPort)) if self.useCertificates: ## Use TLS to encrypt the communication certData = FilePath( os.path.join( env.privateInternalCertPath, self.globalSettings.get( 'ocpCertificateCaFile'))).getContent() certificate = ssl.PrivateCertificate.loadPEM(certData) reactor.listenSSL(self.listeningPort, Site(resource), certificate.options()) else: ## Plain text communication reactor.listenTCP(self.listeningPort, Site(resource), interface=self.serviceEndpoint) ## Normally we'd just call reactor.run() here and let twisted handle ## the wait loop while watching for signals. The problem is that we ## need openContentPlatform (parent process) to manage this process. ## So this is a bit hacky in that I'm using the reactor code, but I ## am manually calling what would be called if I just called run(): reactor.startRunning() ## Start event wait loop while reactor._started and not self.shutdownEvent.is_set(): try: ## Four lines from twisted.internet.main.mainloop: reactor.runUntilCurrent() t2 = reactor.timeout() t = reactor.running and t2 reactor.doIteration(t) except: exception = traceback.format_exception( sys.exc_info()[0], sys.exc_info()[1], sys.exc_info()[2]) self.logger.error('Exception in {}: {}'.format( self.serviceName, str(exception))) break if self.shutdownEvent.is_set(): self.logger.info('Process received shutdownEvent') with suppress(Exception): wsgiThreadPool.stop() with suppress(Exception): reactor.stop() except: exception = traceback.format_exception(sys.exc_info()[0], sys.exc_info()[1], sys.exc_info()[2]) self.logger.error('Exception in {}: {}'.format( self.serviceName, str(exception))) ## Cleanup pidRemoveService(self.serviceName, env, self.pid) self.logger.info('Stopped {}'.format(self.serviceName)) print('Stopped {}'.format(self.serviceName)) ## end run return
class HathorManager: """ HathorManager manages the node with the help of other specialized classes. Its primary objective is to handle DAG-related matters, ensuring that the DAG is always valid and connected. """ class NodeState(Enum): # This node is still initializing INITIALIZING = 'INITIALIZING' # This node is ready to establish new connections, sync, and exchange transactions. READY = 'READY' def __init__(self, reactor: IReactorCore, peer_id: Optional[PeerId] = None, network: Optional[str] = None, hostname: Optional[str] = None, pubsub: Optional[PubSubManager] = None, wallet: Optional[BaseWallet] = None, tx_storage: Optional[TransactionStorage] = None, peer_storage: Optional[Any] = None, default_port: int = 40403, wallet_index: bool = False, stratum_port: Optional[int] = None, ssl: bool = True, enable_sync_v1: bool = True, enable_sync_v2: bool = False, capabilities: Optional[List[str]] = None, checkpoints: Optional[List[Checkpoint]] = None, rng: Optional[Random] = None, soft_voided_tx_ids: Optional[Set[bytes]] = None) -> None: """ :param reactor: Twisted reactor which handles the mainloop and the events. :param peer_id: Id of this node. If not given, a new one is created. :param network: Name of the network this node participates. Usually it is either testnet or mainnet. :type network: string :param hostname: The hostname of this node. It is used to generate its entrypoints. :type hostname: string :param pubsub: If not given, a new one is created. :type pubsub: :py:class:`hathor.pubsub.PubSubManager` :param tx_storage: Required storage backend. :type tx_storage: :py:class:`hathor.transaction.storage.transaction_storage.TransactionStorage` :param peer_storage: If not given, a new one is created. :type peer_storage: :py:class:`hathor.p2p.peer_storage.PeerStorage` :param default_port: Network default port. It is used when only ip addresses are discovered. :type default_port: int :param wallet_index: If should add a wallet index in the storage :type wallet_index: bool :param stratum_port: Stratum server port. Stratum server will only be created if it is not None. :type stratum_port: Optional[int] """ from hathor.metrics import Metrics from hathor.p2p.factory import HathorClientFactory, HathorServerFactory from hathor.p2p.manager import ConnectionsManager if not (enable_sync_v1 or enable_sync_v2): raise TypeError( f'{type(self).__name__}() at least one sync version is required' ) if tx_storage is None: raise TypeError( f'{type(self).__name__}() missing 1 required positional argument: \'tx_storage\'' ) self.log = logger.new() if rng is None: rng = Random() self.rng = rng self.reactor = reactor if hasattr(self.reactor, 'addSystemEventTrigger'): self.reactor.addSystemEventTrigger('after', 'shutdown', self.stop) self.state: Optional[HathorManager.NodeState] = None self.profiler: Optional[Any] = None # Hostname, used to be accessed by other peers. self.hostname = hostname # Remote address, which can be different from local address. self.remote_address = None self.my_peer = peer_id or PeerId() self.network = network or 'testnet' self.is_started: bool = False self.cpu = cpu # XXX: first checkpoint must be genesis (height=0) self.checkpoints: List[Checkpoint] = checkpoints or [] self.checkpoints_ready: List[bool] = [False] * len(self.checkpoints) if not self.checkpoints or self.checkpoints[0].height > 0: self.checkpoints.insert(0, Checkpoint(0, settings.GENESIS_BLOCK_HASH)) self.checkpoints_ready.insert(0, True) else: self.checkpoints_ready[0] = True # XXX Should we use a singleton or a new PeerStorage? [msbrogli 2018-08-29] self.pubsub = pubsub or PubSubManager(self.reactor) self.tx_storage = tx_storage self.tx_storage.pubsub = self.pubsub if wallet_index and self.tx_storage.with_index: assert self.tx_storage.indexes is not None self.log.debug('enable wallet indexes') self.tx_storage.indexes.enable_address_index(self.pubsub) self.tx_storage.indexes.enable_tokens_index() self.metrics = Metrics( pubsub=self.pubsub, avg_time_between_blocks=settings.AVG_TIME_BETWEEN_BLOCKS, tx_storage=self.tx_storage, reactor=self.reactor, ) self.soft_voided_tx_ids = soft_voided_tx_ids or set() self.consensus_algorithm = ConsensusAlgorithm(self.soft_voided_tx_ids) self.peer_discoveries: List[PeerDiscovery] = [] self.ssl = ssl self.server_factory = HathorServerFactory(self.network, self.my_peer, node=self, use_ssl=ssl) self.client_factory = HathorClientFactory(self.network, self.my_peer, node=self, use_ssl=ssl) self.connections = ConnectionsManager(self.reactor, self.my_peer, self.server_factory, self.client_factory, self.pubsub, self, ssl, whitelist_only=False, rng=self.rng, enable_sync_v1=enable_sync_v1, enable_sync_v2=enable_sync_v2) self.wallet = wallet if self.wallet: self.wallet.pubsub = self.pubsub self.wallet.reactor = self.reactor if stratum_port: # XXX: only import if needed from hathor.stratum import StratumFactory self.stratum_factory: Optional[StratumFactory] = StratumFactory( manager=self, port=stratum_port) else: self.stratum_factory = None # Set stratum factory for metrics object self.metrics.stratum_factory = self.stratum_factory self._allow_mining_without_peers = False # Thread pool used to resolve pow when sending tokens self.pow_thread_pool = ThreadPool(minthreads=0, maxthreads=settings.MAX_POW_THREADS, name='Pow thread pool') # List of addresses to listen for new connections (eg: [tcp:8000]) self.listen_addresses: List[str] = [] # Full verification execute all validations for transactions and blocks when initializing the node # Can be activated on the command line with --full-verification self._full_verification = False # List of whitelisted peers self.peers_whitelist: List[str] = [] # List of capabilities of the peer if capabilities is not None: self.capabilities = capabilities else: self.capabilities = DEFAULT_CAPABILITIES def start(self) -> None: """ A factory must be started only once. And it is usually automatically started. """ if self.is_started: raise Exception('HathorManager is already started') self.is_started = True self.log.info('start manager', network=self.network) # If it's a full verification, we save on the storage that we are starting it # this is required because if we stop the initilization in the middle, the metadata # saved on the storage is not reliable anymore, only if we finish it if self._full_verification: self.tx_storage.start_full_verification() else: # If it's a fast initialization and the last time a full initialization stopped in the middle # we can't allow the full node to continue, so we need to remove the storage and do a full sync # or execute an initialization with full verification if self.tx_storage.is_running_full_verification(): self.log.error( 'Error initializing node. The last time you started your node you did a full verification ' 'that was stopped in the middle. The storage is not reliable anymore and, because of that, ' 'you must initialize with a full verification again or remove your storage and do a full sync.' ) sys.exit(-1) # If self.tx_storage.is_running_manager() is True, the last time the node was running it had a sudden crash # because of that, we must run a full verification because some storage data might be wrong. # The metadata is the only piece of the storage that may be wrong, not the blocks and transactions. if self.tx_storage.is_running_manager(): self.log.error( 'Error initializing node. The last time you executed your full node it wasn\'t stopped correctly. ' 'The storage is not reliable anymore and, because of that, so you must run a full verification ' 'or remove your storage and do a full sync.') sys.exit(-1) self.state = self.NodeState.INITIALIZING self.pubsub.publish(HathorEvents.MANAGER_ON_START) self.connections.start() self.pow_thread_pool.start() # Disable get transaction lock when initializing components self.tx_storage.disable_lock() # Initialize manager's components. self._initialize_components() if self._full_verification: # Before calling self._initialize_components() I start 'full verification' mode and after that I need to # finish it. It's just to know if the full node has stopped a full initialization in the middle self.tx_storage.finish_full_verification() self.tx_storage.enable_lock() # Metric starts to capture data self.metrics.start() for description in self.listen_addresses: self.listen(description) self.do_discovery() self.start_time = time.time() if self.wallet: self.wallet.start() if self.stratum_factory: self.stratum_factory.start() # Start running self.tx_storage.start_running_manager() def stop(self) -> Deferred: if not self.is_started: raise Exception('HathorManager is already stopped') self.is_started = False waits = [] self.log.info('stop manager') self.tx_storage.stop_running_manager() self.connections.stop() self.pubsub.publish(HathorEvents.MANAGER_ON_STOP) if self.pow_thread_pool.started: self.pow_thread_pool.stop() # Metric stops to capture data self.metrics.stop() if self.wallet: self.wallet.stop() if self.stratum_factory: wait_stratum = self.stratum_factory.stop() if wait_stratum: waits.append(wait_stratum) return defer.DeferredList(waits) def do_discovery(self) -> None: """ Do a discovery and connect on all discovery strategies. """ for peer_discovery in self.peer_discoveries: peer_discovery.discover_and_connect(self.connections.connect_to) def start_profiler(self, *, reset: bool = False) -> None: """ Start profiler. It can be activated from a web resource, as well. """ if reset or not self.profiler: import cProfile self.profiler = cProfile.Profile() self.profiler.enable() def stop_profiler(self, save_to: Optional[str] = None) -> None: """ Stop the profile and optionally save the results for future analysis. :param save_to: path where the results will be saved :type save_to: str """ assert self.profiler is not None self.profiler.disable() if save_to: self.profiler.dump_stats(save_to) def _initialize_components(self) -> None: """You are not supposed to run this method manually. You should run `doStart()` to initialize the manager. This method runs through all transactions, verifying them and updating our wallet. """ self.log.info('initialize') if self.wallet: self.wallet._manually_initialize() t0 = time.time() t1 = t0 cnt = 0 cnt2 = 0 t2 = t0 h = 0 block_count = 0 tx_count = 0 self.tx_storage.pre_init() assert self.tx_storage.indexes is not None # After introducing soft voided transactions we need to guarantee the full node is not using # a database that already has the soft voided transaction before marking them in the metadata # Any new sync from the beginning should work fine or starting with the latest snapshot # that already has the soft voided transactions marked for soft_voided_id in settings.SOFT_VOIDED_TX_IDS: try: soft_voided_tx = self.tx_storage.get_transaction( soft_voided_id) except TransactionDoesNotExist: # This database does not have this tx that should be soft voided # so it's fine, we will mark it as soft voided when we get it through sync pass else: soft_voided_meta = soft_voided_tx.get_metadata() voided_set = soft_voided_meta.voided_by or set() # If the tx is not marked as soft voided, then we can't continue the initialization if settings.SOFT_VOIDED_ID not in voided_set: self.log.error( 'Error initializing node. Your database is not compatible with the current version of the' ' full node. You must use the latest available snapshot or sync from the beginning.' ) sys.exit(-1) assert {soft_voided_id, settings.SOFT_VOIDED_ID}.issubset(voided_set) # Checkpoints as {height: hash} checkpoint_heights = {} for cp in self.checkpoints: checkpoint_heights[cp.height] = cp.hash # self.start_profiler() if self._full_verification: self.log.debug('reset all metadata') for tx in self.tx_storage.get_all_transactions(): tx.reset_metadata() self.log.debug('load blocks and transactions') for tx in self.tx_storage._topological_sort(): if self._full_verification: tx.update_initial_metadata() assert tx.hash is not None tx_meta = tx.get_metadata() t2 = time.time() dt = LogDuration(t2 - t1) dcnt = cnt - cnt2 tx_rate = '?' if dt == 0 else dcnt / dt h = max(h, tx_meta.height) if dt > 30: ts_date = datetime.datetime.fromtimestamp( self.tx_storage.latest_timestamp) if h == 0: self.log.debug('start loading transactions...') else: self.log.info('load transactions...', tx_rate=tx_rate, tx_new=dcnt, dt=dt, total=cnt, latest_ts=ts_date, height=h) t1 = t2 cnt2 = cnt cnt += 1 # It's safe to skip block weight verification during initialization because # we trust the difficulty stored in metadata skip_block_weight_verification = True if block_count % settings.VERIFY_WEIGHT_EVERY_N_BLOCKS == 0: skip_block_weight_verification = False try: if self._full_verification: # TODO: deal with invalid tx if self.tx_storage.is_tx_needed(tx.hash): assert isinstance(tx, Transaction) tx._height_cache = self.tx_storage.needed_index_height( tx.hash) if tx.can_validate_full(): assert self.tx_storage.indexes is not None self.tx_storage.add_to_indexes(tx) assert tx.validate_full(skip_block_weight_verification= skip_block_weight_verification) self.consensus_algorithm.update(tx) self.tx_storage.indexes.mempool_tips.update(tx) self.step_validations([tx]) else: assert tx.validate_basic( skip_block_weight_verification= skip_block_weight_verification) self.tx_storage.add_to_deps_index( tx.hash, tx.get_all_dependencies()) self.tx_storage.add_needed_deps(tx) self.tx_storage.save_transaction(tx, only_metadata=True) else: # TODO: deal with invalid tx if not tx_meta.validation.is_final(): if not tx_meta.validation.is_checkpoint(): assert tx_meta.validation.is_at_least_basic( ), f'invalid: {tx.hash_hex}' self.tx_storage.add_needed_deps(tx) elif tx.is_transaction and tx_meta.first_block is None and not tx_meta.voided_by: assert self.tx_storage.indexes is not None self.tx_storage.indexes.mempool_tips.update(tx) self.tx_storage.add_to_indexes(tx) if tx.is_transaction and tx_meta.voided_by: self.tx_storage.del_from_indexes(tx) except (InvalidNewTransaction, TxValidationError): self.log.error('unexpected error when initializing', tx=tx, exc_info=True) raise if tx.is_block: block_count += 1 # this works because blocks on the best chain are iterated from lower to higher height assert tx.hash is not None assert tx_meta.validation.is_at_least_basic() if not tx_meta.voided_by and tx_meta.validation.is_fully_connected( ): # XXX: this might not be needed when making a full init because the consensus should already have self.tx_storage.indexes.height.add_reorg( tx_meta.height, tx.hash, tx.timestamp) # Check if it's a checkpoint block if tx_meta.height in checkpoint_heights: if tx.hash == checkpoint_heights[tx_meta.height]: del checkpoint_heights[tx_meta.height] else: # If the hash is different from checkpoint hash, we stop the node self.log.error( 'Error initializing the node. Checkpoint validation error.' ) sys.exit() else: tx_count += 1 if time.time() - t2 > 1: dt = LogDuration(time.time() - t2) self.log.warn('tx took too long to load', tx=tx.hash_hex, dt=dt) # we have to have a best_block by now # assert best_block is not None self.log.debug('done loading transactions') # Check if all checkpoints in database are ok my_best_height = self.tx_storage.get_height_best_block() if checkpoint_heights: # If I have checkpoints that were not validated I must check if they are all in a height I still don't have first = min(list(checkpoint_heights.keys())) if first <= my_best_height: # If the height of the first checkpoint not validated is lower than the height of the best block # Then it's missing this block self.log.error( 'Error initializing the node. Checkpoint validation error.' ) sys.exit() # restart all validations possible deps_size = self.tx_storage.count_deps_index() if deps_size > 0: self.log.debug('run pending validations', deps_size=deps_size) depended_final_txs: List[BaseTransaction] = [] for tx_hash in self.tx_storage.iter_deps_index(): if not self.tx_storage.transaction_exists(tx_hash): continue tx = self.tx_storage.get_transaction(tx_hash) if tx.get_metadata().validation.is_final(): depended_final_txs.append(tx) self.step_validations(depended_final_txs) new_deps_size = self.tx_storage.count_deps_index() self.log.debug('pending validations finished', changes=deps_size - new_deps_size) best_height = self.tx_storage.get_height_best_block() if best_height != h: self.log.warn('best height doesn\'t match', best_height=best_height, max_height=h) # self.stop_profiler(save_to='profiles/initializing.prof') self.state = self.NodeState.READY tdt = LogDuration(t2 - t0) tx_rate = '?' if tdt == 0 else cnt / tdt self.log.info('ready', tx_count=cnt, tx_rate=tx_rate, total_dt=tdt, height=h, blocks=block_count, txs=tx_count) def add_listen_address(self, addr: str) -> None: self.listen_addresses.append(addr) def add_peer_discovery(self, peer_discovery: PeerDiscovery) -> None: self.peer_discoveries.append(peer_discovery) def get_new_tx_parents(self, timestamp: Optional[float] = None) -> List[bytes]: """Select which transactions will be confirmed by a new transaction. :return: The hashes of the parents for a new transaction. :rtype: List[bytes(hash)] """ timestamp = timestamp or self.reactor.seconds() parent_txs = self.generate_parent_txs(timestamp) return list(parent_txs.get_random_parents(self.rng)) def generate_parent_txs(self, timestamp: Optional[float]) -> 'ParentTxs': """Select which transactions will be confirmed by a new block. This method tries to return a stable result, such that for a given timestamp and storage state it will always return the same. """ if timestamp is None: timestamp = self.reactor.seconds() can_include_intervals = sorted( self.tx_storage.get_tx_tips(timestamp - 1)) assert can_include_intervals, 'tips cannot be empty' max_timestamp = max(int(i.begin) for i in can_include_intervals) must_include: List[bytes] = [] assert len(can_include_intervals ) > 0, f'invalid timestamp "{timestamp}", no tips found"' if len(can_include_intervals) < 2: # If there is only one tip, let's randomly choose one of its parents. must_include_interval = can_include_intervals[0] must_include = [must_include_interval.data] can_include_intervals = sorted( self.tx_storage.get_tx_tips(must_include_interval.begin - 1)) can_include = [i.data for i in can_include_intervals] return ParentTxs(max_timestamp, can_include, must_include) def allow_mining_without_peers(self) -> None: """Allow mining without being synced to at least one peer. It should be used only for debugging purposes. """ self._allow_mining_without_peers = True def can_start_mining(self) -> bool: """ Return whether we can start mining. """ if self._allow_mining_without_peers: return True return self.connections.has_synced_peer() def get_block_templates(self, parent_block_hash: Optional[bytes] = None, timestamp: Optional[int] = None) -> BlockTemplates: """ Cached version of `make_block_templates`, cache is invalidated when latest_timestamp changes.""" if parent_block_hash is not None: return BlockTemplates( [self.make_block_template(parent_block_hash, timestamp)], storage=self.tx_storage) return BlockTemplates(self.make_block_templates(timestamp), storage=self.tx_storage) # FIXME: the following caching scheme breaks tests: # cached_timestamp: Optional[int] # cached_block_template: BlockTemplates # cached_timestamp, cached_block_template = getattr(self, '_block_templates_cache', (None, None)) # if cached_timestamp == self.tx_storage.latest_timestamp: # return cached_block_template # block_templates = BlockTemplates(self.make_block_templates(), storage=self.tx_storage) # setattr(self, '_block_templates_cache', (self.tx_storage.latest_timestamp, block_templates)) # return block_templates def make_block_templates(self, timestamp: Optional[int] = None ) -> Iterator[BlockTemplate]: """ Makes block templates for all possible best tips as of the latest timestamp. Each block template has all the necessary info to build a block to be mined without requiring further information from the blockchain state. Which is ideal for use by external mining servers. """ for parent_block_hash in self.tx_storage.get_best_block_tips(): yield self.make_block_template(parent_block_hash, timestamp) def make_block_template(self, parent_block_hash: bytes, timestamp: Optional[int] = None) -> BlockTemplate: """ Makes a block template using the given parent block. """ parent_block = self.tx_storage.get_transaction(parent_block_hash) assert isinstance(parent_block, Block) parent_txs = self.generate_parent_txs( parent_block.timestamp + settings.MAX_DISTANCE_BETWEEN_BLOCKS) if timestamp is None: current_timestamp = int( max(self.tx_storage.latest_timestamp, self.reactor.seconds())) else: current_timestamp = timestamp return self._make_block_template(parent_block, parent_txs, current_timestamp) def make_custom_block_template( self, parent_block_hash: bytes, parent_tx_hashes: List[bytes], timestamp: Optional[int] = None) -> BlockTemplate: """ Makes a block template using the given parent block and txs. """ parent_block = self.tx_storage.get_transaction(parent_block_hash) assert isinstance(parent_block, Block) # gather the actual txs to query their timestamps parent_tx_list: List[Transaction] = [] for tx_hash in parent_tx_hashes: tx = self.tx_storage.get_transaction(tx_hash) assert isinstance(tx, Transaction) parent_tx_list.append(tx) max_timestamp = max(tx.timestamp for tx in parent_tx_list) parent_txs = ParentTxs(max_timestamp, parent_tx_hashes, []) if timestamp is None: current_timestamp = int( max(self.tx_storage.latest_timestamp, self.reactor.seconds())) else: current_timestamp = timestamp return self._make_block_template(parent_block, parent_txs, current_timestamp) def _make_block_template(self, parent_block: Block, parent_txs: 'ParentTxs', current_timestamp: int, with_weight_decay: bool = False) -> BlockTemplate: """ Further implementation of making block template, used by make_block_template and make_custom_block_template """ assert parent_block.hash is not None # the absolute minimum would be the previous timestamp + 1 timestamp_abs_min = parent_block.timestamp + 1 # and absolute maximum limited by max time between blocks if not parent_block.is_genesis: timestamp_abs_max = parent_block.timestamp + settings.MAX_DISTANCE_BETWEEN_BLOCKS - 1 else: timestamp_abs_max = 0xffffffff assert timestamp_abs_max > timestamp_abs_min # actual minimum depends on the timestamps of the parent txs # it has to be at least the max timestamp of parents + 1 timestamp_min = max(timestamp_abs_min, parent_txs.max_timestamp + 1) assert timestamp_min <= timestamp_abs_max # when we have weight decay, the max timestamp will be when the next decay happens if with_weight_decay and settings.WEIGHT_DECAY_ENABLED: # we either have passed the first decay or not, the range will vary depending on that if timestamp_min > timestamp_abs_min + settings.WEIGHT_DECAY_ACTIVATE_DISTANCE: timestamp_max_decay = timestamp_min + settings.WEIGHT_DECAY_WINDOW_SIZE else: timestamp_max_decay = timestamp_abs_min + settings.WEIGHT_DECAY_ACTIVATE_DISTANCE timestamp_max = min(timestamp_abs_max, timestamp_max_decay) else: timestamp_max = timestamp_abs_max timestamp = min(max(current_timestamp, timestamp_min), timestamp_max) weight = daa.calculate_next_weight(parent_block, timestamp) parent_block_metadata = parent_block.get_metadata() height = parent_block_metadata.height + 1 parents = [parent_block.hash] + parent_txs.must_include parents_any = parent_txs.can_include # simplify representation when you only have one to choose from if len(parents) + len(parents_any) == 3: parents.extend(sorted(parents_any)) parents_any = [] assert len(parents) + len( parents_any) >= 3, 'There should be enough parents to choose from' assert 1 <= len(parents) <= 3, 'Impossible number of parents' if __debug__ and len(parents) == 3: assert len( parents_any ) == 0, 'Extra parents to choose from that cannot be chosen' return BlockTemplate( versions={ TxVersion.REGULAR_BLOCK.value, TxVersion.MERGE_MINED_BLOCK.value }, reward=daa.get_tokens_issued_per_block(height), weight=weight, timestamp_now=current_timestamp, timestamp_min=timestamp_min, timestamp_max=timestamp_max, parents=parents, parents_any=parents_any, height=height, score=sum_weights(parent_block_metadata.score, weight), ) def generate_mining_block( self, timestamp: Optional[int] = None, parent_block_hash: Optional[bytes] = None, data: bytes = b'', address: Optional[bytes] = None, merge_mined: bool = False) -> Union[Block, MergeMinedBlock]: """ Generates a block ready to be mined. The block includes new issued tokens, parents, and the weight. :return: A block ready to be mined :rtype: :py:class:`hathor.transaction.Block` """ if address is None: if self.wallet is None: raise ValueError( 'No wallet available and no mining address given') address = self.wallet.get_unused_address_bytes(mark_as_used=False) assert address is not None block = self.get_block_templates( parent_block_hash, timestamp ).generate_mining_block( rng=self.rng, merge_mined=merge_mined, address=address or None, # XXX: because we allow b'' for explicit empty output script data=data, ) return block def get_tokens_issued_per_block(self, height: int) -> int: """Return the number of tokens issued (aka reward) per block of a given height.""" return daa.get_tokens_issued_per_block(height) def submit_block(self, blk: Block, fails_silently: bool = True) -> bool: """Used by submit block from all mining APIs. """ tips = self.tx_storage.get_best_block_tips() parent_hash = blk.get_block_parent_hash() if parent_hash not in tips: self.log.warn('submit_block(): Ignoring block: parent not a tip', blk=blk.hash_hex) return False return self.propagate_tx(blk, fails_silently=fails_silently) def propagate_tx(self, tx: BaseTransaction, fails_silently: bool = True) -> bool: """Push a new transaction to the network. It is used by both the wallet and the mining modules. :return: True if the transaction was accepted :rtype: bool """ if tx.storage: assert tx.storage == self.tx_storage, 'Invalid tx storage' else: tx.storage = self.tx_storage return self.on_new_tx(tx, fails_silently=fails_silently, propagate_to_peers=True) @cpu.profiler('on_new_tx') def on_new_tx(self, tx: BaseTransaction, *, conn: Optional[HathorProtocol] = None, quiet: bool = False, fails_silently: bool = True, propagate_to_peers: bool = True, skip_block_weight_verification: bool = False, sync_checkpoints: bool = False, partial: bool = False) -> bool: """ New method for adding transactions or blocks that steps the validation state machine. :param tx: transaction to be added :param conn: optionally specify the protocol instance where this tx was received from :param quiet: if True will not log when a new tx is accepted :param fails_silently: if False will raise an exception when tx cannot be added :param propagate_to_peers: if True will relay the tx to other peers if it is accepted :param skip_block_weight_verification: if True will not check the tx PoW :param sync_checkpoints: if True and also partial=True, will try to validate as a checkpoint and set the proper validation state, this is used for adding txs from the sync-checkpoints phase :param partial: if True will accept txs that can't be fully validated yet (because of missing parent/input) but will run a basic validation of what can be validated (PoW and other basic fields) """ assert tx.hash is not None if self.tx_storage.transaction_exists(tx.hash): if not fails_silently: raise InvalidNewTransaction( 'Transaction already exists {}'.format(tx.hash_hex)) self.log.warn('on_new_tx(): Transaction already exists', tx=tx.hash_hex) return False if tx.timestamp - self.reactor.seconds( ) > settings.MAX_FUTURE_TIMESTAMP_ALLOWED: if not fails_silently: raise InvalidNewTransaction( 'Ignoring transaction in the future {} (timestamp={})'. format(tx.hash_hex, tx.timestamp)) self.log.warn('on_new_tx(): Ignoring transaction in the future', tx=tx.hash_hex, future_timestamp=tx.timestamp) return False tx.storage = self.tx_storage try: metadata = tx.get_metadata() except TransactionDoesNotExist: if not fails_silently: raise InvalidNewTransaction('missing parent') self.log.warn('on_new_tx(): missing parent', tx=tx.hash_hex) return False if metadata.validation.is_invalid(): if not fails_silently: raise InvalidNewTransaction('previously marked as invalid') self.log.warn('on_new_tx(): previously marked as invalid', tx=tx.hash_hex) return False # if partial=False (the default) we don't even try to partially validate transactions if not partial or (metadata.validation.is_fully_connected() or tx.can_validate_full()): if isinstance(tx, Transaction) and self.tx_storage.is_tx_needed( tx.hash): tx._height_cache = self.tx_storage.needed_index_height(tx.hash) if not metadata.validation.is_fully_connected(): try: tx.validate_full(sync_checkpoints=sync_checkpoints) except HathorError as e: if not fails_silently: raise InvalidNewTransaction( 'full validation failed') from e self.log.warn('on_new_tx(): full validation failed', tx=tx.hash_hex, exc_info=True) return False # The method below adds the tx as a child of the parents # This needs to be called right before the save because we were adding the children # in the tx parents even if the tx was invalid (failing the verifications above) # then I would have a children that was not in the storage tx.update_initial_metadata() self.tx_storage.save_transaction(tx, add_to_indexes=True) try: self.consensus_algorithm.update(tx) except HathorError as e: if not fails_silently: raise InvalidNewTransaction( 'consensus update failed') from e self.log.warn('on_new_tx(): consensus update failed', tx=tx.hash_hex) return False else: assert tx.validate_full(skip_block_weight_verification=True) self.tx_fully_validated(tx) elif sync_checkpoints: metadata.children = self.tx_storage.children_from_deps(tx.hash) try: tx.validate_checkpoint(self.checkpoints) except HathorError: if not fails_silently: raise InvalidNewTransaction('checkpoint validation failed') self.log.warn('on_new_tx(): checkpoint validation failed', tx=tx.hash_hex, exc_info=True) return False self.tx_storage.save_transaction(tx) self.tx_storage.add_to_deps_index(tx.hash, tx.get_all_dependencies()) self.tx_storage.add_needed_deps(tx) else: if isinstance(tx, Block) and not tx.has_basic_block_parent(): if not fails_silently: raise InvalidNewTransaction( 'block parent needs to be at least basic-valid') self.log.warn( 'on_new_tx(): block parent needs to be at least basic-valid', tx=tx.hash_hex) return False if not tx.validate_basic(): if not fails_silently: raise InvalidNewTransaction('basic validation failed') self.log.warn('on_new_tx(): basic validation failed', tx=tx.hash_hex) return False # The method below adds the tx as a child of the parents # This needs to be called right before the save because we were adding the children # in the tx parents even if the tx was invalid (failing the verifications above) # then I would have a children that was not in the storage tx.update_initial_metadata() self.tx_storage.save_transaction(tx) self.tx_storage.add_to_deps_index(tx.hash, tx.get_all_dependencies()) self.tx_storage.add_needed_deps(tx) if tx.is_transaction: self.tx_storage.remove_from_needed_index(tx.hash) try: self.step_validations([tx]) except (AssertionError, HathorError) as e: if not fails_silently: raise InvalidNewTransaction('step validations failed') from e self.log.warn('on_new_tx(): step validations failed', tx=tx.hash_hex, exc_info=True) return False if not quiet: ts_date = datetime.datetime.fromtimestamp(tx.timestamp) now = datetime.datetime.fromtimestamp(self.reactor.seconds()) if tx.is_block: self.log.info('new block', tx=tx, ts_date=ts_date, time_from_now=tx.get_time_from_now(now)) else: self.log.info('new tx', tx=tx, ts_date=ts_date, time_from_now=tx.get_time_from_now(now)) if propagate_to_peers: # Propagate to our peers. self.connections.send_tx_to_peers(tx) return True def step_validations(self, txs: Iterable[BaseTransaction]) -> None: """ Step all validations until none can be stepped anymore. """ # cur_txs will be empty when there are no more new txs that reached full # validation because of an initial trigger for ready_tx in txs: assert ready_tx.hash is not None self.tx_storage.remove_ready_for_validation(ready_tx.hash) for tx in map(self.tx_storage.get_transaction, self.tx_storage.next_ready_for_validation()): assert tx.hash is not None tx.update_initial_metadata() try: assert tx.validate_full() except (AssertionError, HathorError): # TODO raise else: self.tx_storage.save_transaction(tx, only_metadata=True, add_to_indexes=True) self.consensus_algorithm.update(tx) # save and process its dependencies even if it became invalid # because invalidation state also has to propagate to children self.tx_storage.remove_ready_for_validation(tx.hash) self.tx_fully_validated(tx) def tx_fully_validated(self, tx: BaseTransaction) -> None: """ Handle operations that need to happen once the tx becomes fully validated. This might happen immediately after we receive the tx, if we have all dependencies already. Or it might happen later. """ assert tx.hash is not None assert self.tx_storage.indexes is not None # Publish to pubsub manager the new tx accepted, now that it's full validated self.pubsub.publish(HathorEvents.NETWORK_NEW_TX_ACCEPTED, tx=tx) self.tx_storage.del_from_deps_index(tx.hash) self.tx_storage.indexes.mempool_tips.update(tx) if self.wallet: # TODO Remove it and use pubsub instead. self.wallet.on_new_tx(tx) def listen(self, description: str, use_ssl: Optional[bool] = None) -> None: endpoint = self.connections.listen(description, use_ssl) if self.hostname: proto, _, _ = description.partition(':') address = '{}://{}:{}'.format(proto, self.hostname, endpoint._port) self.my_peer.entrypoints.append(address) def has_sync_version_capability(self) -> bool: return settings.CAPABILITY_SYNC_VERSION in self.capabilities def add_peer_to_whitelist(self, peer_id): if not settings.ENABLE_PEER_WHITELIST: return if peer_id in self.peers_whitelist: self.log.info('peer already in whitelist', peer_id=peer_id) else: self.peers_whitelist.append(peer_id) def remove_peer_from_whitelist_and_disconnect(self, peer_id: str) -> None: if not settings.ENABLE_PEER_WHITELIST: return if peer_id in self.peers_whitelist: self.peers_whitelist.remove(peer_id) # disconnect from node self.connections.drop_connection_by_peer_id(peer_id)
class SQLCipherU1DBSync(SQLCipherDatabase): """ Soledad syncer implementation. """ _sync_enc_pool = None """ The name of the local symmetrically encrypted documents to sync database file. """ LOCAL_SYMMETRIC_SYNC_FILE_NAME = 'sync.u1db' """ Period or recurrence of the Looping Call that will do the encryption to the syncdb (in seconds). """ ENCRYPT_LOOP_PERIOD = 1 """ A dictionary that hold locks which avoid multiple sync attempts from the same database replica. """ syncing_lock = defaultdict(threading.Lock) def __init__(self, opts, soledad_crypto, replica_uid, cert_file, defer_encryption=False): self._opts = opts self._path = opts.path self._crypto = soledad_crypto self.__replica_uid = replica_uid self._cert_file = cert_file self._sync_db_key = opts.sync_db_key self._sync_db = None self._sync_enc_pool = None # we store syncers in a dictionary indexed by the target URL. We also # store a hash of the auth info in case auth info expires and we need # to rebuild the syncer for that target. The final self._syncers # format is the following: # # self._syncers = {'<url>': ('<auth_hash>', syncer), ...} self._syncers = {} # Storage for the documents received during a sync self.received_docs = [] self.running = False self._sync_threadpool = None self._initialize_sync_threadpool() self._reactor = reactor self._reactor.callWhenRunning(self._start) self._db_handle = None self._initialize_main_db() # the sync_db is used both for deferred encryption and decryption, so # we want to initialize it anyway to allow for all combinations of # deferred encryption and decryption configurations. self._initialize_sync_db(opts) if defer_encryption: # initialize syncing queue encryption pool self._sync_enc_pool = encdecpool.SyncEncrypterPool( self._crypto, self._sync_db) self.shutdownID = None @property def _replica_uid(self): return str(self.__replica_uid) def _start(self): if not self.running: self._sync_threadpool.start() self.shutdownID = self._reactor.addSystemEventTrigger( 'during', 'shutdown', self.finalClose) self.running = True def _defer_to_sync_threadpool(self, meth, *args, **kwargs): return deferToThreadPool( self._reactor, self._sync_threadpool, meth, *args, **kwargs) def _initialize_main_db(self): def _init_db(): self._db_handle = initialize_sqlcipher_db( self._opts, check_same_thread=False) self._real_replica_uid = None self._ensure_schema() self.set_document_factory(soledad_doc_factory) return self._defer_to_sync_threadpool(_init_db) def _initialize_sync_threadpool(self): """ Initialize a ThreadPool with exactly one thread, that will be used to run all the network blocking calls for syncing on a separate thread. TODO this needs to be ported away from urllib and into twisted async calls, and then we can ditch this syncing thread and reintegrate into the main reactor. """ # XXX if the number of threads in this thread pool is ever changed, we # should make sure that no operations on the database shuold occur # before the database has been initialized. self._sync_threadpool = ThreadPool(0, 1) def _initialize_sync_db(self, opts): """ Initialize the Symmetrically-Encrypted document to be synced database, and the queue to communicate with subprocess workers. :param opts: :type opts: SQLCipherOptions """ soledad_assert(opts.sync_db_key is not None) sync_db_path = None if opts.path != ":memory:": sync_db_path = "%s-sync" % opts.path else: sync_db_path = ":memory:" # we copy incoming options because the opts object might be used # somewhere else sync_opts = SQLCipherOptions.copy( opts, path=sync_db_path, create=True) self._sync_db = getConnectionPool( sync_opts, extra_queries=self._sync_db_extra_init) @property def _sync_db_extra_init(self): """ Queries for creating tables for the local sync documents db if needed. They are passed as extra initialization to initialize_sqlciphjer_db :rtype: tuple of strings """ maybe_create = "CREATE TABLE IF NOT EXISTS %s (%s)" encr = encdecpool.SyncEncrypterPool decr = encdecpool.SyncDecrypterPool sql_encr_table_query = (maybe_create % ( encr.TABLE_NAME, encr.FIELD_NAMES)) sql_decr_table_query = (maybe_create % ( decr.TABLE_NAME, decr.FIELD_NAMES)) return (sql_encr_table_query, sql_decr_table_query) def sync(self, url, creds=None, defer_decryption=True): """ Synchronize documents with remote replica exposed at url. This method defers a sync to a 1-threaded threadpool. The main database initialziation was deferred to that thread during this object's initialization. As there's currently only one thread in that threadpool, the db init was queued before this method was called, so we don't need to actually wait for the db to be ready. If this ever changes, we should add a thread-safe condition to ensure the db is ready before using it. :param url: The url of the target replica to sync with. :type url: str :param creds: optional dictionary giving credentials to authorize the operation with the server. :type creds: dict :param defer_decryption: Whether to defer the decryption process using the intermediate database. If False, decryption will be done inline. :type defer_decryption: bool :return: A Deferred, that will fire with the local generation (type `int`) before the synchronisation was performed. :rtype: Deferred """ # the following context manager blocks until the syncing lock can be # acquired. with self._syncer(url, creds=creds) as syncer: def _record_received_docs(result): # beware, closure. syncer is in scope. self.received_docs = syncer.received_docs return result # XXX could mark the critical section here... d = syncer.sync(defer_decryption=defer_decryption) d.addCallback(_record_received_docs) return d @contextmanager def _syncer(self, url, creds=None): """ Accesor for synchronizer. As we reuse the same synchronizer for every sync, there can be only one instance synchronizing the same database replica at the same time. Because of that, this method blocks until the syncing lock can be acquired. :param creds: optional dictionary giving credentials to authorize the operation with the server. :type creds: dict """ with self.syncing_lock[self._path]: syncer = self._get_syncer(url, creds=creds) yield syncer @property def syncing(self): lock = self.syncing_lock[self._path] acquired_lock = lock.acquire(False) if acquired_lock is False: return True lock.release() return False def _get_syncer(self, url, creds=None): """ Get a synchronizer for ``url`` using ``creds``. :param url: The url of the target replica to sync with. :type url: str :param creds: optional dictionary giving credentials. to authorize the operation with the server. :type creds: dict :return: A synchronizer. :rtype: Synchronizer """ # we want to store at most one syncer for each url, so we also store a # hash of the connection credentials and replace the stored syncer for # a certain url if credentials have changed. h = sha256(json.dumps([url, creds])).hexdigest() cur_h, syncer = self._syncers.get(url, (None, None)) if syncer is None or h != cur_h: syncer = SoledadSynchronizer( self, SoledadHTTPSyncTarget( url, # XXX is the replica_uid ready? self._replica_uid, creds=creds, crypto=self._crypto, cert_file=self._cert_file, sync_db=self._sync_db, sync_enc_pool=self._sync_enc_pool)) self._syncers[url] = (h, syncer) # in order to reuse the same synchronizer multiple times we have to # reset its state (i.e. the number of documents received from target # and inserted in the local replica). syncer.num_inserted = 0 return syncer # # Symmetric encryption of syncing docs # def get_generation(self): # FIXME # XXX this SHOULD BE a callback return self._get_generation() def finalClose(self): """ This should only be called by the shutdown trigger. """ self.shutdownID = None self._sync_threadpool.stop() self.running = False def close(self): """ Close the syncer and syncdb orderly """ # close all open syncers for url in self._syncers.keys(): del self._syncers[url] # stop the encryption pool if self._sync_enc_pool is not None: self._sync_enc_pool.close() self._sync_enc_pool = None # close the sync database if self._sync_db is not None: self._sync_db.close() self._sync_db = None
class DBScheduler(object): ''' Database operation scheduler We will have one or more read thread and only one write thread. ''' log = logging.getLogger('hkjc.DBScheduler') def __init__(self): from twisted.internet import reactor # Imported here.inside self.reactor = reactor engine = get_engine() create_schema(engine) self.read_pool = ThreadPool( minthreads=1, maxthreads=16, name="ReadPool") self.write_pool = ThreadPool( minthreads=1, maxthreads=1, name="WritePool") self.read_pool.start() self.write_pool.start() self.signals = SignalManager(dispatcher.Any).connect( self.stop_threadpools, spider_closed) self.counters = defaultdict(lambda: Counter()) self.cache = defaultdict( lambda: dict()) self.write_queue = Queue() self.writelock = False # Write queue mutex def stop_threadpools(self): self.read_pool.stop() self.write_pool.stop() for counter, results in self.counters.iteritems(): print(counter) for modelname, count in results.iteritems(): print(' ', modelname.__name__, '-', count) def _do_save(self): assert not isInIOThread() while not self.write_queue.empty(): items = [] try: self.writelock = True try: while True: items.append(self.write_queue.get_nowait()) except Empty: pass session = Session() try: session.add_all(items) session.commit() except: session.rollback() raise finally: session.close() finally: self.writelock = False def save(self, obj): self.write_queue.put(obj) if self.writelock: return None else: return deferToThreadPool( self.reactor, self.write_pool, self._do_save) def _do_get_id(self, model, unique, fval, fields): assert not isInIOThread() return Session().query(model).filter( getattr(model, unique) == fval).one().id @inlineCallbacks def get_id(self, model, unique, fields): ''' Get an ID from the cache or from the database. If doesn't exist - create an item. All database operations are done from the separate thread ''' assert isInIOThread() fval = fields[unique] try: result = self.cache[model][fval] self.counters['hit'][model] += 1 returnValue(result) except KeyError: self.counters['miss'][model] += 1 selectors = {unique: fval} result, created = yield deferToThreadPool( self.reactor, self.read_pool, get_or_create, model, fields, **selectors) result = result.id if created: self.counters['db_create'][model] += 1 else: self.counters['db_hit'][model] += 1 self.cache[model][fval] = result returnValue(result)
class Validator(object): DefaultTransactionFamilies = [ endpoint_registry ] def __init__(self, gossip_obj, journal_obj, stat_domains, config, windows_service=False, http_port=None, ): ''' Creates a validator. As a current side-effect, does some initialization on it's ledger_obj argumenet Args: node_obj: (gossip.Node) journal_obj: (journal.Journal) config: (dict) windows_service: (bool) http_port: (int) ''' self.status = 'stopped' self.config = config self.stat_domains = stat_domains self.gossip = gossip_obj node_obj = gossip_obj.LocalNode self._gossip_host = node_obj.NetHost self._gossip_port = node_obj.NetAddress self._endpoint_host = node_obj.endpoint_host self._endpoint_port = node_obj.endpoint_port self._endpoint_http_port = http_port self.journal = journal_obj self.profile = self.config.get('Profile', False) if self.profile: self.pr = cProfile.Profile() self.pr.enable() self.windows_service = windows_service # flag to indicate that a topology update is in progress self._topology_update_in_progress = False self.delay_start = self.config['DelayStart'] # set up signal handlers for shutdown if not windows_service: signal.signal(signal.SIGTERM, self.handle_shutdown_signal) signal.signal(signal.SIGINT, self.handle_shutdown_signal) # ---------- Initialize the configuration ---------- self.initialize_common_configuration() # ---------- Initialize the NodeMap ---------- self.initialize_node_map() # ---------- Initialize the Ledger ---------- self.initialize_ledger_object() maxsize = self.config.get("WebPoolSize", 8) self.web_thread_pool = ThreadPool(0, maxsize, "WebThreadPool") def handle_shutdown_signal(self, signum, frame): logger.warn('received shutdown signal') self.shutdown() def shutdown(self): """ Shutdown the validator. There are several things that need to happen on shutdown: 1) disconnect this node from the network, 2) close all the databases, and 3) shutdown twisted. We need time for each to finish. """ self.status = 'stopping' if self.profile: self.pr.create_stats() loc = os.path.join(self.config.get('DataDirectory', '/tmp'), '{0}.cprofile'.format( self.config.get('NodeName', str(os.getpid())))) self.pr.dump_stats(loc) # send the transaction to remove this node from the endpoint # registry (or send it to the web server) if self.gossip is not None: self.unregister_endpoint(self.gossip.LocalNode) # Need to wait long enough for all the shutdown packets to be sent out reactor.callLater(1.0, self.handle_ledger_shutdown) def handle_ledger_shutdown(self): self.journal.shutdown() self.gossip.shutdown() # Need to wait long enough for all the shutdown packets to be sent out # if a shutdown packet was the reason for the shutdown reactor.callLater(1.0, self.handle_shutdown) def handle_shutdown(self): self.web_thread_pool.stop() reactor.stop() self.status = 'stopped' def initialize_common_configuration(self): # Handle the common configuration variables if 'NetworkFlowRate' in self.config: token_bucket.TokenBucket.DefaultDripRate = self.config[ 'NetworkFlowRate'] if 'NetworkBurstRate' in self.config: token_bucket.TokenBucket.DefaultDripRate = self.config[ 'NetworkBurstRate'] if 'AdministrationNode' in self.config: logger.info('set administration node to %s', self.config.get('AdministrationNode')) shutdown_message.AdministrationNode = self.config[ 'AdministrationNode'] if 'NetworkDelayRange' in self.config: node.Node.DelayRange = self.config['NetworkDelayRange'] if 'UseFixedDelay' in self.config: node.Node.UseFixedDelay = self.config['UseFixedDelay'] def initialize_node_map(self): self.NodeMap = {} for nodedata in self.config.get("Nodes", []): addr = (socket.gethostbyname(nodedata["Host"]), nodedata["Port"]) nd = node.Node(address=addr, identifier=nodedata["Identifier"], name=nodedata["NodeName"]) self.NodeMap[nodedata["NodeName"]] = nd def initialize_ledger_object(self): assert self.journal for txnfamily in self.DefaultTransactionFamilies: txnfamily.register_transaction_types(self.journal) self.gossip.onNodeDisconnect += self.handle_node_disconnect_event logger.info("starting ledger %s with id %s at network address %s", self.gossip.LocalNode, self.gossip.LocalNode.Identifier[:8], self.gossip.LocalNode.NetAddress) def add_transaction_family(self, txnfamily): txnfamily.register_transaction_types(self.journal) def pre_start(self): if self.delay_start is True: logger.debug("DelayStart is in effect, waiting for /start") reactor.callLater(1, self.pre_start) else: self.status = 'starting' self.start() def start(self): # add blacklist before we attempt any peering self.gossip.blacklist = self.config.get('Blacklist', []) # connect this node into the validator network self.initialize_ledger_connection() def handle_node_disconnect_event(self, nodeid): """ Handle the situation where a peer is marked as disconnected. """ logger.info('node %s dropped, reassess connectivity', nodeid) # first see if we are already handling the situation if self._topology_update_in_progress: logger.info('topology update already in progress') return # there are many possible policies for when to kick off # new topology probes. for the moment, just use the initial # connectivity as a lower threshhold minpeercount = self.config.get("InitialConnectivity", 1) peerlist = self.gossip.peer_list() if len(peerlist) <= minpeercount: def disconnect_callback(): logger.info('topology update finished, %s peers connected', len(self.gossip.peer_list())) logger.info('connectivity has dropped below mimimal levels, ' 'kick off topology update') self._topology_update_in_progress = True reactor.callLater(2.0, self.initialize_ledger_topology, disconnect_callback) def _get_candidate_peers(self): """ Return the candidate (potential) peers to send connection requests; in addition to the list of nodes directly specified in the configuration file, pull a list from the LedgerURL. Once the list of potential peers is constructed, pick from it those specified in the Peers configuration variable. If that is not enough, then pick more at random from the list. """ # Continue to support existing config files with single # string values. if isinstance(self.config.get('LedgerURL'), basestring): urls = [self.config.get('LedgerURL')] else: urls = self.config.get('LedgerURL', []) # We randomize the url list here so that we avoid the # condition of a small number of validators referencing # each other's empty EndpointRegistries forever. random.shuffle(urls) for url in urls: logger.info('attempting to load peers using url %s', url) try: peers = self.get_endpoint_nodes(url) # If the Endpoint Registry is empty, try the next # url in the shuffled list if len(peers) == 0: continue for peer in peers: self.NodeMap[peer.Name] = peer break except MessageException as e: logger.error("Unable to get endpoints from LedgerURL: %s", str(e)) # We may also be able to rediscover peers via the persistence layer. for blockid in self.journal.global_store_map.persistmap_keys(): blk = self.journal.global_store_map.get_block_store(blockid) sto = blk.get_transaction_store('/EndpointRegistryTransaction') for key in sto: nd = self._endpoint_info_to_node(sto[key]) self.NodeMap[nd.Name] = nd # Build a list of nodes that we can use for the initial connection minpeercount = self.config.get("InitialConnectivity", 1) peerset = set(self.config.get('Peers', [])) nodeset = set(self.NodeMap.keys()) if len(peerset) < minpeercount and len(nodeset) > 0: nodeset.discard(self.gossip.LocalNode.Name) nodeset = nodeset.difference(peerset) peerset = peerset.union(random.sample(list(nodeset), min( minpeercount - len(peerset), len(nodeset)))) return peerset def _connect_to_peers(self): min_peer_count = self.config.get("InitialConnectivity", 1) current_peer_count = len(self.gossip.peer_list()) logger.debug("peer count is %d of %d", current_peer_count, min_peer_count) if current_peer_count < min_peer_count: peerset = self._get_candidate_peers() # Add the candidate nodes to the gossip object so we can send # connect requests to them for peername in peerset: peer = self.NodeMap.get(peername) if peer: logger.info('add peer %s with identifier %s', peername, peer.Identifier) connect_message.send_connection_request(self.gossip, peer) else: logger.info('requested connection to unknown peer %s', peername) return False else: return True def initialize_ledger_connection(self): """ Connect the ledger to the rest of the network. """ assert self.journal self.status = 'waiting for initial connections' if not self._connect_to_peers(): reactor.callLater(2.0, self.initialize_ledger_connection) else: callback = self.start_ledger if self.journal.restored is False: callback = self.start_journal_transfer reactor.callLater(2.0, self.initialize_ledger_topology, callback) def initialize_ledger_topology(self, callback): """ Make certain that there is at least one connected peer and then kick off the configured topology generation protocol. """ logger.debug('initialize ledger topology') if not self._connect_to_peers(): reactor.callLater(2.0, self.initialize_ledger_topology, callback) return self._topology_update_in_progress = False # and now its time to pick the topology protocol topology = self.config.get("TopologyAlgorithm", "RandomWalk") if topology == "RandomWalk": if 'TargetConnectivity' in self.config: random_walk.TargetConnectivity = self.config[ 'TargetConnectivity'] self.random_walk_initialization(callback) elif topology == "BarabasiAlbert": if 'MaximumConnectivity' in self.config: barabasi_albert.MaximumConnectivity = self.config[ 'MaximumConnectivity'] if 'MinimumConnectivity' in self.config: barabasi_albert.MinimumConnectivity = self.config[ 'MinimumConnectivity'] self.barabasi_initialization(callback) else: logger.error("unknown topology protocol %s", topology) self.shutdown() return def barabasi_initialization(self, callback): logger.info("ledger connections using BarabasiAlbert topology") barabasi_albert.start_topology_update(self.gossip, callback) def random_walk_initialization(self, callback): logger.info("ledger connections using RandomWalk topology") random_walk.start_topology_update(self.gossip, callback) def start_journal_transfer(self): self.status = 'transferring ledger' journal_transfer.start_journal_transfer(self.gossip, self.journal, self.start_ledger) def start_ledger(self): logger.info('ledger initialization complete') self.journal.initialization_complete() self.status = 'started' self.register_endpoint(self.gossip.LocalNode) def register_endpoint(self, node): txn = endpoint_registry.EndpointRegistryTransaction.register_node( node, httpport=self._endpoint_http_port) txn.sign_from_node(node) msg = endpoint_registry.EndpointRegistryTransactionMessage() msg.Transaction = txn logger.info('register endpoint %s with name %s', node.Identifier[:8], node.Name) self.gossip.broadcast_message(msg) def unregister_endpoint(self, node): txn = endpoint_registry.EndpointRegistryTransaction \ .unregister_node(node) txn.sign_from_node(node) # Since unregister is often called on shutdown, we really need to make # this a system message for the purpose of sending it out from our own # queue msg = endpoint_registry.EndpointRegistryTransactionMessage() msg.Transaction = txn logger.info('unregister endpoint %s with name %s', node.Identifier[:8], node.Name) self.gossip.broadcast_message(msg) def get_endpoint_nodes(self, url): client = EndpointClient(url) nodes = [] for epinfo in client.get_endpoint_list(): nodes.append(self._endpoint_info_to_node(epinfo)) return nodes @staticmethod def _endpoint_info_to_node(epinfo): addr = (socket.gethostbyname(epinfo["Host"]), epinfo["Port"]) nd = node.Node(address=addr, identifier=epinfo["NodeIdentifier"], name=epinfo["Name"]) return nd
class Validator(object): DefaultTransactionFamilies = [endpoint_registry] def __init__( self, gossip_obj, journal_obj, stat_domains, config, windows_service=False, http_port=None, ): ''' Creates a validator. As a current side-effect, does some initialization on it's ledger_obj argumenet Args: node_obj: (gossip.Node) journal_obj: (journal.Journal) config: (dict) windows_service: (bool) http_port: (int) ''' self.status = 'stopped' self.config = config self.stat_domains = stat_domains self.gossip = gossip_obj node_obj = gossip_obj.LocalNode self._gossip_host = node_obj.NetHost self._gossip_port = node_obj.NetAddress self._endpoint_host = node_obj.endpoint_host self._endpoint_port = node_obj.endpoint_port self._endpoint_http_port = http_port self.journal = journal_obj self.profile = self.config.get('Profile', False) if self.profile: self.pr = cProfile.Profile() self.pr.enable() self.windows_service = windows_service # flag to indicate that a topology update is in progress self._topology_update_in_progress = False self.delay_start = self.config['DelayStart'] # set up signal handlers for shutdown if not windows_service: signal.signal(signal.SIGTERM, self.handle_shutdown_signal) signal.signal(signal.SIGINT, self.handle_shutdown_signal) # ---------- Initialize the configuration ---------- self.initialize_common_configuration() # ---------- Initialize the NodeMap ---------- self.initialize_node_map() # ---------- Initialize the Ledger ---------- self.initialize_ledger_object() maxsize = self.config.get("WebPoolSize", 8) self.web_thread_pool = ThreadPool(0, maxsize, "WebThreadPool") def handle_shutdown_signal(self, signum, frame): logger.warn('received shutdown signal') self.shutdown() def shutdown(self): """ Shutdown the validator. There are several things that need to happen on shutdown: 1) disconnect this node from the network, 2) close all the databases, and 3) shutdown twisted. We need time for each to finish. """ self.status = 'stopping' if self.profile: self.pr.create_stats() loc = os.path.join( self.config.get('DataDirectory', '/tmp'), '{0}.cprofile'.format( self.config.get('NodeName', str(os.getpid())))) self.pr.dump_stats(loc) # send the transaction to remove this node from the endpoint # registry (or send it to the web server) if self.gossip is not None: self.unregister_endpoint(self.gossip.LocalNode) # Need to wait long enough for all the shutdown packets to be sent out reactor.callLater(1.0, self.handle_ledger_shutdown) def handle_ledger_shutdown(self): self.journal.shutdown() self.gossip.shutdown() # Need to wait long enough for all the shutdown packets to be sent out # if a shutdown packet was the reason for the shutdown reactor.callLater(1.0, self.handle_shutdown) def handle_shutdown(self): self.web_thread_pool.stop() reactor.stop() self.status = 'stopped' def initialize_common_configuration(self): self.genesis_ledger = self.config.get('GenesisLedger', False) # Handle the common configuration variables if 'NetworkFlowRate' in self.config: token_bucket.TokenBucket.DefaultDripRate = self.config[ 'NetworkFlowRate'] if 'NetworkBurstRate' in self.config: token_bucket.TokenBucket.DefaultDripRate = self.config[ 'NetworkBurstRate'] if 'AdministrationNode' in self.config: logger.info('set administration node to %s', self.config.get('AdministrationNode')) shutdown_message.AdministrationNode = self.config[ 'AdministrationNode'] if 'NetworkDelayRange' in self.config: node.Node.DelayRange = self.config['NetworkDelayRange'] if 'UseFixedDelay' in self.config: node.Node.UseFixedDelay = self.config['UseFixedDelay'] def initialize_node_map(self): self.NodeMap = {} for nodedata in self.config.get("Nodes", []): addr = (socket.gethostbyname(nodedata["Host"]), nodedata["Port"]) nd = node.Node(address=addr, identifier=nodedata["Identifier"], name=nodedata["NodeName"]) self.NodeMap[nodedata["NodeName"]] = nd def initialize_ledger_object(self): assert self.journal for txnfamily in self.DefaultTransactionFamilies: txnfamily.register_transaction_types(self.journal) self.gossip.onNodeDisconnect += self.handle_node_disconnect_event logger.info("starting ledger %s with id %s at network address %s", self.gossip.LocalNode, self.gossip.LocalNode.Identifier[:8], self.gossip.LocalNode.NetAddress) def add_transaction_family(self, txnfamily): txnfamily.register_transaction_types(self.journal) def pre_start(self): if self.delay_start is True: logger.debug("DelayStart is in effect, waiting for /start") reactor.callLater(1, self.pre_start) else: self.status = 'starting' self.start() def start(self): # add blacklist before we attempt any peering self.gossip.blacklist = self.config.get('Blacklist', []) # if this is the genesis ledger then there isn't anything left to do if self.genesis_ledger: self.start_ledger() return # if this isn't the genesis ledger then we need to connect # this node into the validator network self.initialize_ledger_connection() def handle_node_disconnect_event(self, nodeid): """ Handle the situation where a peer is marked as disconnected. """ logger.info('node %s dropped, reassess connectivity', nodeid) # first see if we are already handling the situation if self._topology_update_in_progress: logger.info('topology update already in progress') return # there are many possible policies for when to kick off # new topology probes. for the moment, just use the initial # connectivity as a lower threshhold minpeercount = self.config.get("InitialConnectivity", 1) peerlist = self.gossip.peer_list() if len(peerlist) <= minpeercount: def disconnect_callback(): logger.info('topology update finished, %s peers connected', len(self.gossip.peer_list())) logger.info('connectivity has dropped below mimimal levels, ' 'kick off topology update') self._topology_update_in_progress = True reactor.callLater(2.0, self.initialize_ledger_topology, disconnect_callback) def _get_candidate_peers(self): """ Return the candidate (potential) peers to send connection requests; in addition to the list of nodes directly specified in the configuration file, pull a list from the LedgerURL. Once the list of potential peers is constructed, pick from it those specified in the Peers configuration variable. If that is not enough, then pick more at random from the list. """ # Continue to support existing config files with single # string values. if isinstance(self.config.get('LedgerURL'), basestring): urls = [self.config.get('LedgerURL')] else: urls = self.config.get('LedgerURL', []) # We randomize the url list here so that we avoid the # condition of a small number of validators referencing # each other's empty EndpointRegistries forever. random.shuffle(urls) for url in urls: logger.info('attempting to load peers using url %s', url) try: peers = self.get_endpoint_nodes(url) # If the Endpoint Registry is empty, try the next # url in the shuffled list if len(peers) == 0: continue for peer in peers: self.NodeMap[peer.Name] = peer break except MessageException as e: logger.error("Unable to get endpoints from LedgerURL: %s", str(e)) # We may also be able to rediscover peers via the persistence layer. for blockid in self.journal.global_store_map.persistmap_keys(): blk = self.journal.global_store_map.get_block_store(blockid) sto = blk.get_transaction_store('/EndpointRegistryTransaction') for key in sto: nd = self._endpoint_info_to_node(sto[key]) self.NodeMap[nd.Name] = nd # Build a list of nodes that we can use for the initial connection minpeercount = self.config.get("InitialConnectivity", 1) peerset = set(self.config.get('Peers', [])) nodeset = set(self.NodeMap.keys()) if len(peerset) < minpeercount and len(nodeset) > 0: nodeset.discard(self.gossip.LocalNode.Name) nodeset = nodeset.difference(peerset) peerset = peerset.union( random.sample(list(nodeset), min(minpeercount - len(peerset), len(nodeset)))) return peerset def _connect_to_peers(self): min_peer_count = self.config.get("InitialConnectivity", 1) current_peer_count = len(self.gossip.peer_list()) logger.debug("peer count is %d of %d", current_peer_count, min_peer_count) if current_peer_count < min_peer_count: peerset = self._get_candidate_peers() # Add the candidate nodes to the gossip object so we can send # connect requests to them for peername in peerset: peer = self.NodeMap.get(peername) if peer: logger.info('add peer %s with identifier %s', peername, peer.Identifier) connect_message.send_connection_request(self.gossip, peer) self.gossip.add_node(peer) else: logger.info('requested connection to unknown peer %s', peername) return False else: return True def initialize_ledger_connection(self): """ Connect the ledger to the rest of the network. """ assert self.journal self.status = 'waiting for initial connections' if not self._connect_to_peers(): reactor.callLater(2.0, self.initialize_ledger_connection) else: callback = self.start_ledger if self.journal.restored is False: callback = self.start_journal_transfer reactor.callLater(2.0, self.initialize_ledger_topology, callback) def initialize_ledger_topology(self, callback): """ Make certain that there is at least one connected peer and then kick off the configured topology generation protocol. """ logger.debug('initialize ledger topology') if not self._connect_to_peers(): reactor.callLater(2.0, self.initialize_ledger_topology, callback) return self._topology_update_in_progress = False # and now its time to pick the topology protocol topology = self.config.get("TopologyAlgorithm", "RandomWalk") if topology == "RandomWalk": if 'TargetConnectivity' in self.config: random_walk.TargetConnectivity = self.config[ 'TargetConnectivity'] self.random_walk_initialization(callback) elif topology == "BarabasiAlbert": if 'MaximumConnectivity' in self.config: barabasi_albert.MaximumConnectivity = self.config[ 'MaximumConnectivity'] if 'MinimumConnectivity' in self.config: barabasi_albert.MinimumConnectivity = self.config[ 'MinimumConnectivity'] self.barabasi_initialization(callback) else: logger.error("unknown topology protocol %s", topology) self.shutdown() return def barabasi_initialization(self, callback): logger.info("ledger connections using BarabasiAlbert topology") barabasi_albert.start_topology_update(self.gossip, callback) def random_walk_initialization(self, callback): logger.info("ledger connections using RandomWalk topology") random_walk.start_topology_update(self.gossip, callback) def start_journal_transfer(self): self.status = 'transferring ledger' journal_transfer.start_journal_transfer(self.gossip, self.journal, self.start_ledger) def start_ledger(self): logger.info('ledger initialization complete') self.journal.initialization_complete() self.status = 'started' self.register_endpoint(self.gossip.LocalNode) def register_endpoint(self, node): txn = endpoint_registry.EndpointRegistryTransaction.register_node( node, httpport=self._endpoint_http_port) txn.sign_from_node(node) msg = endpoint_registry.EndpointRegistryTransactionMessage() msg.Transaction = txn logger.info('register endpoint %s with name %s', node.Identifier[:8], node.Name) self.gossip.broadcast_message(msg) def unregister_endpoint(self, node): txn = endpoint_registry.EndpointRegistryTransaction \ .unregister_node(node) txn.sign_from_node(node) # Since unregister is often called on shutdown, we really need to make # this a system message for the purpose of sending it out from our own # queue msg = endpoint_registry.EndpointRegistryTransactionMessage() msg.Transaction = txn logger.info('unregister endpoint %s with name %s', node.Identifier[:8], node.Name) self.gossip.broadcast_message(msg) def get_endpoint_nodes(self, url): client = EndpointClient(url) nodes = [] for epinfo in client.get_endpoint_list(): nodes.append(self._endpoint_info_to_node(epinfo)) return nodes @staticmethod def _endpoint_info_to_node(epinfo): addr = (socket.gethostbyname(epinfo["Host"]), epinfo["Port"]) nd = node.Node(address=addr, identifier=epinfo["NodeIdentifier"], name=epinfo["Name"]) return nd
class HathorManager: """ HathorManager manages the node with the help of other specialized classes. Its primary objective is to handle DAG-related matters, ensuring that the DAG is always valid and connected. """ class NodeState(Enum): # This node is still initializing INITIALIZING = 'INITIALIZING' # This node is ready to establish new connections, sync, and exchange transactions. READY = 'READY' def __init__(self, reactor: IReactorCore, peer_id: Optional[PeerId] = None, network: Optional[str] = None, hostname: Optional[str] = None, pubsub: Optional[PubSubManager] = None, wallet: Optional[BaseWallet] = None, tx_storage: Optional[TransactionStorage] = None, peer_storage: Optional[Any] = None, default_port: int = 40403, wallet_index: bool = False, stratum_port: Optional[int] = None, min_block_weight: Optional[int] = None, ssl: bool = True) -> None: """ :param reactor: Twisted reactor which handles the mainloop and the events. :param peer_id: Id of this node. If not given, a new one is created. :param network: Name of the network this node participates. Usually it is either testnet or mainnet. :type network: string :param hostname: The hostname of this node. It is used to generate its entrypoints. :type hostname: string :param pubsub: If not given, a new one is created. :type pubsub: :py:class:`hathor.pubsub.PubSubManager` :param tx_storage: If not given, a :py:class:`TransactionMemoryStorage` one is created. :type tx_storage: :py:class:`hathor.transaction.storage.transaction_storage.TransactionStorage` :param peer_storage: If not given, a new one is created. :type peer_storage: :py:class:`hathor.p2p.peer_storage.PeerStorage` :param default_port: Network default port. It is used when only ip addresses are discovered. :type default_port: int :param wallet_index: If should add a wallet index in the storage :type wallet_index: bool :param stratum_port: Stratum server port. Stratum server will only be created if it is not None. :type stratum_port: Optional[int] :param min_block_weight: Minimum weight for blocks. :type min_block_weight: Optional[int] """ from hathor.p2p.factory import HathorServerFactory, HathorClientFactory from hathor.p2p.manager import ConnectionsManager from hathor.transaction.storage.memory_storage import TransactionMemoryStorage from hathor.metrics import Metrics self.log = logger.new() self.reactor = reactor if hasattr(self.reactor, 'addSystemEventTrigger'): self.reactor.addSystemEventTrigger('after', 'shutdown', self.stop) self.state: Optional[HathorManager.NodeState] = None self.profiler: Optional[Any] = None # Hostname, used to be accessed by other peers. self.hostname = hostname # Remote address, which can be different from local address. self.remote_address = None self.my_peer = peer_id or PeerId() self.network = network or 'testnet' # XXX Should we use a singleton or a new PeerStorage? [msbrogli 2018-08-29] self.pubsub = pubsub or PubSubManager(self.reactor) self.tx_storage = tx_storage or TransactionMemoryStorage() self.tx_storage.pubsub = self.pubsub if wallet_index and self.tx_storage.with_index: self.tx_storage.wallet_index = WalletIndex(self.pubsub) self.tx_storage.tokens_index = TokensIndex() self.avg_time_between_blocks = settings.AVG_TIME_BETWEEN_BLOCKS self.min_block_weight = min_block_weight or settings.MIN_BLOCK_WEIGHT self.min_tx_weight = settings.MIN_TX_WEIGHT self.metrics = Metrics( pubsub=self.pubsub, avg_time_between_blocks=self.avg_time_between_blocks, tx_storage=tx_storage, reactor=self.reactor, ) self.consensus_algorithm = ConsensusAlgorithm() self.peer_discoveries: List[PeerDiscovery] = [] self.ssl = ssl self.server_factory = HathorServerFactory(self.network, self.my_peer, node=self, use_ssl=ssl) self.client_factory = HathorClientFactory(self.network, self.my_peer, node=self, use_ssl=ssl) self.connections = ConnectionsManager(self.reactor, self.my_peer, self.server_factory, self.client_factory, self.pubsub, self, ssl) self.wallet = wallet if self.wallet: self.wallet.pubsub = self.pubsub self.wallet.reactor = self.reactor # When manager is in test mode we reduce the weight of blocks/transactions. self.test_mode: int = 0 # Multiplier coefficient to adjust the minimum weight of a normal tx to 18 self.min_tx_weight_coefficient = 1.6 # Amount in which tx min weight reaches the middle point between the minimum and maximum weight. self.min_tx_weight_k = 100 self.stratum_factory = StratumFactory( manager=self, port=stratum_port) if stratum_port else None # Set stratum factory for metrics object self.metrics.stratum_factory = self.stratum_factory self._allow_mining_without_peers = False # Thread pool used to resolve pow when sending tokens self.pow_thread_pool = ThreadPool(minthreads=0, maxthreads=settings.MAX_POW_THREADS, name='Pow thread pool') # List of addresses to listen for new connections (eg: [tcp:8000]) self.listen_addresses: List[str] = [] def start(self) -> None: """ A factory must be started only once. And it is usually automatically started. """ self.log.info('Starting HathorManager...') self.log.info('Network: {network}', network=self.network) self.state = self.NodeState.INITIALIZING self.pubsub.publish(HathorEvents.MANAGER_ON_START) self.connections.start() self.pow_thread_pool.start() # Initialize manager's components. self._initialize_components() for description in self.listen_addresses: self.listen(description, ssl=self.ssl) for peer_discovery in self.peer_discoveries: peer_discovery.discover_and_connect(self.connections.connect_to) self.start_time = time.time() # Metric starts to capture data self.metrics.start() if self.wallet: self.wallet.start() if self.stratum_factory: self.stratum_factory.start() def stop(self) -> Deferred: waits = [] self.log.info('Stopping HathorManager...') self.connections.stop() self.pubsub.publish(HathorEvents.MANAGER_ON_STOP) if self.pow_thread_pool.started: self.pow_thread_pool.stop() # Metric stops to capture data self.metrics.stop() if self.wallet: self.wallet.stop() if self.stratum_factory: wait_stratum = self.stratum_factory.stop() if wait_stratum: waits.append(wait_stratum) return defer.DeferredList(waits) def start_profiler(self) -> None: """ Start profiler. It can be activated from a web resource, as well. """ if not self.profiler: import cProfile self.profiler = cProfile.Profile() self.profiler.enable() def stop_profiler(self, save_to: Optional[str] = None) -> None: """ Stop the profile and optionally save the results for future analysis. :param save_to: path where the results will be saved :type save_to: str """ assert self.profiler is not None self.profiler.disable() if save_to: self.profiler.dump_stats(save_to) def _initialize_components(self) -> None: """You are not supposed to run this method manually. You should run `doStart()` to initialize the manager. This method runs through all transactions, verifying them and updating our wallet. """ self.log.info('Initializing node...') if self.wallet: self.wallet._manually_initialize() t0 = time.time() t1 = t0 cnt = 0 # self.start_profiler() for tx in self.tx_storage._topological_sort(): assert tx.hash is not None t2 = time.time() if t2 - t1 > 5: ts_date = datetime.datetime.fromtimestamp( self.tx_storage.latest_timestamp) self.log.info( 'Verifying transations in storage... avg={avg:.4f} tx/s total={total} (latest timedate: {ts})', avg=cnt / (t2 - t0), total=cnt, ts=ts_date, ) t1 = t2 cnt += 1 try: assert self.on_new_tx(tx, quiet=True, fails_silently=False) except (InvalidNewTransaction, TxValidationError): pretty_json = json.dumps(tx.to_json(), indent=4) self.log.error( 'An unexpected error occurred when initializing {tx.hash_hex}\n' '{pretty_json}', tx=tx, pretty_json=pretty_json) raise if time.time() - t2 > 1: self.log.warn( 'Warning: {} took {} seconds to be processed.'.format( tx.hash.hex(), time.time() - t2)) # self.stop_profiler(save_to='profiles/initializing.prof') self.state = self.NodeState.READY self.log.info( 'Node successfully initialized (total={total}, avg={avg:.2f} tx/s in {dt} seconds).', total=cnt, avg=cnt / (t2 - t0), dt=t2 - t0, ) def add_listen_address(self, addr: str) -> None: self.listen_addresses.append(addr) def add_peer_discovery(self, peer_discovery: PeerDiscovery) -> None: self.peer_discoveries.append(peer_discovery) def get_new_tx_parents(self, timestamp: Optional[float] = None) -> List[bytes]: """Select which transactions will be confirmed by a new transaction. :return: The hashes of the parents for a new transaction. :rtype: List[bytes(hash)] """ timestamp = timestamp or self.reactor.seconds() ret = list(self.tx_storage.get_tx_tips(timestamp - 1)) random.shuffle(ret) ret = ret[:2] if len(ret) == 1: # If there is only one tip, let's randomly choose one of its parents. parents = list(self.tx_storage.get_tx_tips(ret[0].begin - 1)) ret.append(random.choice(parents)) assert len(ret) == 2, 'timestamp={} tips={}'.format( timestamp, [x.hex() for x in self.tx_storage.get_tx_tips(timestamp - 1)]) return [x.data for x in ret] def allow_mining_without_peers(self) -> None: """Allow mining without being synced to at least one peer. It should be used only for debugging purposes. """ self._allow_mining_without_peers = True def can_start_mining(self) -> bool: """ Return whether we can start mining. """ if self._allow_mining_without_peers: return True return self.connections.has_synced_peer() def generate_mining_block( self, timestamp: Optional[float] = None, parent_block_hash: Optional[bytes] = None, data: bytes = b'', address: Optional[bytes] = None, merge_mined: bool = False) -> Union[Block, MergeMinedBlock]: """ Generates a block ready to be mined. The block includes new issued tokens, parents, and the weight. :return: A block ready to be mined :rtype: :py:class:`hathor.transaction.Block` """ from hathor.transaction.scripts import create_output_script if not timestamp: timestamp = max(self.tx_storage.latest_timestamp, self.reactor.seconds()) if parent_block_hash is None: tip_blocks = self.tx_storage.get_best_block_tips(timestamp) else: tip_blocks = [parent_block_hash] parent_block = self.tx_storage.get_transaction( random.choice(tip_blocks)) if not parent_block.is_genesis and timestamp - parent_block.timestamp > settings.MAX_DISTANCE_BETWEEN_BLOCKS: timestamp = parent_block.timestamp + settings.MAX_DISTANCE_BETWEEN_BLOCKS assert timestamp is not None tip_txs = self.get_new_tx_parents(timestamp - 1) assert len(tip_blocks) >= 1 assert len(tip_txs) == 2 parents = [parent_block.hash] + tip_txs parents_tx = [self.tx_storage.get_transaction(x) for x in parents] timestamp1 = int(timestamp) timestamp2 = max(x.timestamp for x in parents_tx) + 1 if address is None: if self.wallet is None: raise ValueError( 'No wallet available and no mining address given') address = self.wallet.get_unused_address_bytes(mark_as_used=False) height = parent_block.get_metadata().height + 1 amount = self.get_tokens_issued_per_block(height) output_script = create_output_script(address) tx_outputs = [TxOutput(amount, output_script)] cls: Union[Type['Block'], Type['MergeMinedBlock']] if merge_mined: cls = MergeMinedBlock else: cls = Block blk = cls(outputs=tx_outputs, parents=parents, storage=self.tx_storage, data=data) blk.timestamp = max(timestamp1, timestamp2) blk.weight = self.calculate_block_difficulty(blk) return blk def get_tokens_issued_per_block(self, height: int) -> int: """Return the number of tokens issued (aka reward) per block of a given height.""" return hathor.util._get_tokens_issued_per_block(height) def validate_new_tx(self, tx: BaseTransaction) -> bool: """ Process incoming transaction during initialization. These transactions came only from storage. """ assert tx.hash is not None if self.state == self.NodeState.INITIALIZING: if tx.is_genesis: return True else: if tx.is_genesis: raise InvalidNewTransaction('Genesis? {}'.format( tx.hash.hex())) if tx.timestamp - self.reactor.seconds( ) > settings.MAX_FUTURE_TIMESTAMP_ALLOWED: raise InvalidNewTransaction( 'Ignoring transaction in the future {} (timestamp={})'.format( tx.hash.hex(), tx.timestamp)) # Verify transaction and raises an TxValidationError if tx is not valid. tx.verify() if tx.is_block: tx = cast(Block, tx) assert tx.hash is not None # XXX: it appears that after casting this assert "casting" is lost # Validate minimum block difficulty block_weight = self.calculate_block_difficulty(tx) if tx.weight < block_weight - settings.WEIGHT_TOL: raise InvalidNewTransaction( 'Invalid new block {}: weight ({}) is smaller than the minimum weight ({})' .format(tx.hash.hex(), tx.weight, block_weight)) parent_block = tx.get_block_parent() tokens_issued_per_block = self.get_tokens_issued_per_block( parent_block.get_metadata().height + 1) if tx.sum_outputs != tokens_issued_per_block: raise InvalidNewTransaction( 'Invalid number of issued tokens tag=invalid_issued_tokens' ' tx.hash={tx.hash_hex} issued={tx.sum_outputs} allowed={allowed}' .format( tx=tx, allowed=tokens_issued_per_block, )) else: assert tx.hash is not None # XXX: it appears that after casting this assert "casting" is lost # Validate minimum tx difficulty min_tx_weight = self.minimum_tx_weight(tx) if tx.weight < min_tx_weight - settings.WEIGHT_TOL: raise InvalidNewTransaction( 'Invalid new tx {}: weight ({}) is smaller than the minimum weight ({})' .format(tx.hash.hex(), tx.weight, min_tx_weight)) return True def propagate_tx(self, tx: BaseTransaction, fails_silently: bool = True) -> bool: """Push a new transaction to the network. It is used by both the wallet and the mining modules. :return: True if the transaction was accepted :rtype: bool """ if tx.storage: assert tx.storage == self.tx_storage, 'Invalid tx storage' else: tx.storage = self.tx_storage return self.on_new_tx(tx, fails_silently=fails_silently) def on_new_tx(self, tx: BaseTransaction, *, conn: Optional[HathorProtocol] = None, quiet: bool = False, fails_silently: bool = True, propagate_to_peers: bool = True) -> bool: """This method is called when any transaction arrive. If `fails_silently` is False, it may raise either InvalidNewTransaction or TxValidationError. :return: True if the transaction was accepted :rtype: bool """ assert tx.hash is not None if self.state != self.NodeState.INITIALIZING: if self.tx_storage.transaction_exists(tx.hash): if not fails_silently: raise InvalidNewTransaction( 'Transaction already exists {}'.format(tx.hash.hex())) self.log.debug( 'on_new_tx(): Already have transaction {}'.format( tx.hash.hex())) return False try: assert self.validate_new_tx(tx) is True except (InvalidNewTransaction, TxValidationError) as e: # Discard invalid Transaction/block. self.log.debug('Transaction/Block discarded', tx=tx, exc=e) if not fails_silently: raise return False if self.state != self.NodeState.INITIALIZING: self.tx_storage.save_transaction(tx) else: tx.reset_metadata() self.tx_storage._add_to_cache(tx) try: tx.update_initial_metadata() self.consensus_algorithm.update(tx) except Exception: pretty_json = json.dumps(tx.to_json(), indent=4) self.log.error( 'An unexpected error occurred when processing {tx.hash_hex}\n' '{pretty_json}', tx=tx, pretty_json=pretty_json) self.tx_storage.remove_transaction(tx) raise if not quiet: ts_date = datetime.datetime.fromtimestamp(tx.timestamp) if tx.is_block: self.log.info('New block found', tag='new_block', tx=tx, ts_date=ts_date, time_from_now=tx.get_time_from_now()) else: self.log.info('New transaction found', tag='new_tx', tx=tx, ts_date=ts_date, time_from_now=tx.get_time_from_now()) if propagate_to_peers: # Propagate to our peers. self.connections.send_tx_to_peers(tx) if self.wallet: # TODO Remove it and use pubsub instead. self.wallet.on_new_tx(tx) # Publish to pubsub manager the new tx accepted self.pubsub.publish(HathorEvents.NETWORK_NEW_TX_ACCEPTED, tx=tx) return True def get_weight_decay_amount(self, distance: int) -> float: """Return the amount to be reduced in the weight of the block.""" if not settings.WEIGHT_DECAY_ENABLED: return 0.0 if distance < settings.WEIGHT_DECAY_ACTIVATE_DISTANCE: return 0.0 dt = distance - settings.WEIGHT_DECAY_ACTIVATE_DISTANCE # Calculate the number of windows. n_windows = 1 + (dt // settings.WEIGHT_DECAY_WINDOW_SIZE) return n_windows * settings.WEIGHT_DECAY_AMOUNT def calculate_block_difficulty(self, block: Block) -> float: """ Calculate block difficulty according to the ascendents of `block`, aka DAA/difficulty adjustment algorithm The algorithm used is described in [RFC 22](https://gitlab.com/HathorNetwork/rfcs/merge_requests/22). The new difficulty must not be less than `self.min_block_weight`. """ # In test mode we don't validate the block difficulty if self.test_mode & TestMode.TEST_BLOCK_WEIGHT: return 1.0 if block.is_genesis: return self.min_block_weight root = block parent = root.get_block_parent() N = min(2 * settings.BLOCK_DIFFICULTY_N_BLOCKS, parent.get_metadata().height - 1) K = N // 2 T = self.avg_time_between_blocks S = 5 if N < 10: return self.min_block_weight blocks: List[Block] = [] while len(blocks) < N + 1: root = root.get_block_parent() assert isinstance(root, Block) assert root is not None blocks.append(root) # TODO: revise if this assertion can be safely removed assert blocks == sorted(blocks, key=lambda tx: -tx.timestamp) blocks = list(reversed(blocks)) assert len(blocks) == N + 1 solvetimes, weights = zip( *((block.timestamp - prev_block.timestamp, block.weight) for prev_block, block in hathor.util.iwindows(blocks, 2))) assert len(solvetimes) == len( weights ) == N, f'got {len(solvetimes)}, {len(weights)} expected {N}' sum_solvetimes = 0.0 logsum_weights = 0.0 prefix_sum_solvetimes = [0] for x in solvetimes: prefix_sum_solvetimes.append(prefix_sum_solvetimes[-1] + x) # Loop through N most recent blocks. N is most recently solved block. for i in range(K, N): solvetime = solvetimes[i] weight = weights[i] x = (prefix_sum_solvetimes[i + 1] - prefix_sum_solvetimes[i - K]) / K ki = K * (x - T)**2 / (2 * T * T) ki = max(1, ki / S) sum_solvetimes += ki * solvetime logsum_weights = sum_weights(logsum_weights, log(ki, 2) + weight) weight = logsum_weights - log(sum_solvetimes, 2) + log(T, 2) # Apply weight decay weight -= self.get_weight_decay_amount(block.timestamp - parent.timestamp) # Apply minimum weight if weight < self.min_block_weight: weight = self.min_block_weight return weight def minimum_tx_weight(self, tx: BaseTransaction) -> float: """ Returns the minimum weight for the param tx The minimum is calculated by the following function: w = alpha * log(size, 2) + 4.0 + 4.0 ---------------- 1 + k / amount :param tx: tx to calculate the minimum weight :type tx: :py:class:`hathor.transaction.transaction.Transaction` :return: minimum weight for the tx :rtype: float """ # In test mode we don't validate the minimum weight for tx # We do this to allow generating many txs for testing if self.test_mode & TestMode.TEST_TX_WEIGHT: return 1 if tx.is_genesis: return self.min_tx_weight tx_size = len(tx.get_struct()) # We need to take into consideration the decimal places because it is inside the amount. # For instance, if one wants to transfer 20 HTRs, the amount will be 2000. # Max below is preventing division by 0 when handling authority methods that have no outputs amount = max(1, tx.sum_outputs) / (10**settings.DECIMAL_PLACES) weight = (+self.min_tx_weight_coefficient * log(tx_size, 2) + 4 / (1 + self.min_tx_weight_k / amount) + 4) # Make sure the calculated weight is at least the minimum weight = max(weight, self.min_tx_weight) return weight def listen(self, description: str, ssl: bool = False) -> None: endpoint = self.connections.listen(description, ssl) if self.hostname: proto, _, _ = description.partition(':') address = '{}://{}:{}'.format(proto, self.hostname, endpoint._port) self.my_peer.entrypoints.append(address)
class WorkerPool: """ A generalized class that can start multiple workers in a thread pool with values drawn from the given value factory object, and wait for their completion and a given number of successes (a worker returning something without throwing an exception). """ class TimedOut(WorkerPoolException): """Raised if waiting for the target number of successes timed out.""" def __init__(self, timeout: float, *args, **kwargs): self.timeout = timeout super().__init__( message_prefix=f"Execution timed out after {timeout}s", *args, **kwargs) class OutOfValues(WorkerPoolException): """Raised if the value factory is out of values, but the target number was not reached.""" def __init__(self, *args, **kwargs): super().__init__( message_prefix= "Execution stopped before completion - not enough available values", *args, **kwargs) def __init__(self, worker: Callable[[Any], Any], value_factory: Callable[[int], Optional[List[Any]]], target_successes, timeout: float, stagger_timeout: float = 0, threadpool_size: int = None): # TODO: make stagger_timeout a part of the value factory? self._worker = worker self._value_factory = value_factory self._timeout = timeout self._stagger_timeout = stagger_timeout self._target_successes = target_successes thread_pool_kwargs = {} if threadpool_size is not None: thread_pool_kwargs['minthreads'] = threadpool_size thread_pool_kwargs['maxthreads'] = threadpool_size self._threadpool = ThreadPool(**thread_pool_kwargs) # These three tasks must be run in separate threads # to avoid being blocked by workers in the thread pool. self._bail_on_timeout_thread = Thread(target=self._bail_on_timeout) self._produce_values_thread = Thread(target=self._produce_values) self._process_results_thread = Thread(target=self._process_results) self._successes = {} self._failures = {} self._started_tasks = 0 self._finished_tasks = 0 self._cancel_event = Event() self._result_queue = Queue() self._target_value = Future() self._producer_error = Future() self._results_lock = Lock() self._threadpool_stop_lock = Lock() self._threadpool_stopped = False def start(self): # TODO: check if already started? self._threadpool.start() self._produce_values_thread.start() self._process_results_thread.start() self._bail_on_timeout_thread.start() def cancel(self): """ Cancels the tasks enqueued in the thread pool and stops the producer thread. """ self._cancel_event.set() def _stop_threadpool(self): # This can be called from multiple threads # (`join()` itself can be called from multiple threads, # and we also attempt to stop the pool from the `_process_results()` thread). with self._threadpool_stop_lock: if not self._threadpool_stopped: self._threadpool.stop() self._threadpool_stopped = True def _check_for_producer_error(self): # Check for any unexpected exceptions in the producer thread if self._producer_error.is_set(): # Will raise if Future was set with an exception self._producer_error.get() def join(self): """ Waits for all the threads to finish. Can be called several times. """ self._produce_values_thread.join() self._process_results_thread.join() self._bail_on_timeout_thread.join() # In most cases `_threadpool` will be stopped by the `_process_results()` thread. # But in case there's some unexpected bug in its code, we're making sure the pool is stopped # to avoid the whole process hanging. self._stop_threadpool() self._check_for_producer_error() def _sleep(self, timeout): """ Sleeps for a given timeout, can be interrupted by a cancellation event. """ if self._cancel_event.wait(timeout): raise Cancelled def block_until_target_successes(self) -> Dict: """ Blocks until the target number of successes is reached. Returns a dictionary of values matched to results. Can be called several times. """ self._check_for_producer_error() result = self._target_value.get() if result == TIMEOUT_TRIGGERED: raise self.TimedOut(timeout=self._timeout, failures=self.get_failures()) elif result == PRODUCER_STOPPED: raise self.OutOfValues(failures=self.get_failures()) return result def get_failures(self) -> Dict: """ Get the current failures, as a dictionary of values to thrown exceptions. """ with self._results_lock: return dict(self._failures) def get_successes(self) -> Dict: """ Get the current successes, as a dictionary of values to worker return values. """ with self._results_lock: return dict(self._successes) def _bail_on_timeout(self): """ A service thread that cancels the pool on timeout. """ if not self._cancel_event.wait(timeout=self._timeout): self._target_value.set(TIMEOUT_TRIGGERED) self._cancel_event.set() def _worker_wrapper(self, value): """ A wrapper that catches exceptions thrown by the worker and sends the results to the processing thread. """ try: # If we're in the cancelled state, interrupt early self._sleep(0) result = self._worker(value) self._result_queue.put(Success(value, result)) except Cancelled as e: self._result_queue.put(e) except BaseException as e: self._result_queue.put(Failure(value, sys.exc_info())) def _process_results(self): """ A service thread that processes worker results and waits for the target number of successes to be reached. """ producer_stopped = False success_event_reached = False while True: result = self._result_queue.get() if result == PRODUCER_STOPPED: producer_stopped = True else: self._finished_tasks += 1 if isinstance(result, Success): with self._results_lock: self._successes[result.value] = result.result len_successes = len(self._successes) if not success_event_reached and len_successes == self._target_successes: # A protection for the case of repeating values. # Only trigger the target value once. success_event_reached = True self._target_value.set(self.get_successes()) if isinstance(result, Failure): with self._results_lock: self._failures[result.value] = result.exc_info if success_event_reached: # no need to continue processing results self.cancel() # to cancel the timeout thread break if producer_stopped and self._finished_tasks == self._started_tasks: self.cancel() # to cancel the timeout thread self._target_value.set(PRODUCER_STOPPED) break self._stop_threadpool() def _produce_values(self): while True: try: with self._results_lock: len_successes = len(self._successes) batch = self._value_factory(len_successes) if not batch: break self._started_tasks += len(batch) for value in batch: # There is a possible race between `callInThread()` and `stop()`, # But we never execute them at the same time, # because `join()` checks that the producer thread is stopped. self._threadpool.callInThread(self._worker_wrapper, value) self._sleep(self._stagger_timeout) except Cancelled: break except BaseException: self._producer_error.set_exception() self.cancel() break self._result_queue.put(PRODUCER_STOPPED)
class DbPool(object,service.Service): """\ Manage a pool of database connections. TODO: shrink the pool. TODO: issue periodic keepalive requests. """ timeout = 70 # one minute plus implements(service.IService) def __init__(self,*a,**k): """\ Create a pool of database connections, for processing (a sequence of) SQL commands in the background. """ k['_single_thread'] = True self.db = [] self.args = a self.kwargs = k self.lock = Lock() self.cleaner = None self._tb = {} self.stopping = False self.threads = ThreadPool(minthreads=2, maxthreads=100, name="Database") self.threads.start() #reactor.addSystemEventTrigger('before', 'shutdown', self.stop) reactor.addSystemEventTrigger('after', 'shutdown', self._dump) reactor.addSystemEventTrigger('after', 'shutdown', self.stop2) def stop2(self): if self.db is not None: for db in self.db: db[0].close("AfterShutdown Service") self.threads.stop() def stop(self): self.stopping = True def _get_db(self,tid=None): if self.db: r = self.db.pop()[0] s="OLD" else: r = _DbThread(self) s="NEW" if tid: debug(s, r.tid,tid) r.tid=tid else: debug(s, r.tid) return r def _put_db(self,db): if self.db is None or self.stopping: db.close("Shutdown") return db.done if db.q is None: raise RuntimeError("Queueing closed DB handle") for d in self.db: if db is d[0]: raise RuntimeError("DoubleQueued") db.count = 0 try: t = time()+self.timeout self.db.append((db,t)) if self.cleaner is None: self.cleaner = reactor.callLater(self.timeout,self._clean) except Exception: print_exc() else: debug("BACK",db.tid) def _clean(self): self.cleaner = None t = time() while self.db and self.db[0][1] <= t: db = self.db.pop(0)[0] db.close("Timeout") if self.db: self.cleaner = reactor.callLater(self.db[0][1]-t,self._clean) def __del__(self): if self.cleaner: reactor.cancelCallLater(self.cleaner) self.cleaner = None while self.db: db = self.db.pop(0)[0] db.close("Nonref Parent") def stopService(self): super(DbPool,self).stopService() if self.cleaner: self.cleaner.cancel() self.cleaner = None dbl = self.db self.db = None dl = [] for db in dbl: db = db[0] db.close("Shutdown Service") dl.append(db.done) return DeferredList(dl) def __call__(self, job=None,retry=0): """\ Get a new connection from the database pool (or start a new one) and return a thread handler. Usage: >>> @inlineCallbacks >>> def something(...): >>> dbpool = DbPool(...) # arguments like sqlmix.Db() >>> with dbpool() as db: >>> d = db.Do("...") >>> assert(isinstance(d,twisted.internet.defer.Deferred)) >>> res = yield d The transaction will be committed if you leave the "with" block normally or with a CommitThread exception. Otherwise, it will be rolled back. Note that you must use the @inlineCallbacks method if you want to use the database connection within the block. Otherwise, control will have left the "with" block and the connection will be dead. Alternately, you can pass a procedure and an optional repeat count: >>> def proc(db): >>> d = db.Do("...") >>> return d >>> d = dbpool(proc, 10) The procedure will be retried up to 10 times if there are errors; if they persist, the first error will be re-raised. """ if not job: return self._get_db() return self._call(job,retry) @inlineCallbacks def _call(self, job, retry): global tid tid += 1 mtid = tid debug("STARTCALL",job,retry,mtid) e1 = None try: while True: db = self._get_db(mtid) self._note(db) try: debug("CALL JOB",mtid) d = job(db) debug("RET JOB",mtid,d) def pr(r): debug("RES JOB",mtid,r) return r d.addBoth(pr) res = yield d except (EnvironmentError,NameError): self._denote(db) yield db.rollback() raise except Exception: self._denote(db) yield db.rollback() if retry: retry -= 1 continue raise except BaseException: self._denote(db) yield db.rollback() raise else: self._denote(db) if isinstance(res,BaseException): yield db.rollback() if isinstance(res,(EnvironmentError,NameError)): returnValue( res ) elif isinstance(res,Exception): if retry: retry -= 1 continue returnValue( res ) else: # BaseException returnValue( res ) else: yield db.commit() returnValue( res ) finally: debug("ENDCALL",job,retry) def _note(self,x): if not _DEBUG: return import inspect self._tb[x.tid] = inspect.stack(1) def _denote(self,x): if not _DEBUG: return del self._tb[x.tid] def _dump(self): if not _DEBUG: return for a,b in self._tb.items(): #(<frame object at 0x8a1b724>, '/mnt/daten/src/git/sqlmix/sqlmix/twisted.py', 250, '_note', ['\t\tself._tb[x.tid] = inspect.stack(1)\n'], 0) print >>sys.stderr,"Stack",a for fr,f,l,fn,lin,lini in b[::-1]: if fn == "__call__": break print >>sys.stderr,"Line %d in %s: %s" % (l,f,fn) print >>sys.stderr,"\t"+lin[lini].strip()
class DockerClient: """ Asynchronous Docker client (living on a background thread pool). """ _docker = docker log = txaio.make_logger() CONSOLE_HISTORY = 60 WAIT_TIMEOUT = 1 EXCLUDE_DIRS_ANY = ['.cache'] # type: List[str] def __init__(self, reactor, controller): """ Set up our async Docker interface. """ self._reactor = reactor self._controller = controller self._finished = True self._channels = None self._threadpool = None self._events = None def console(self, section, status): self.log.info(f'docker - {section} - {status}') def startup(self): """ Startup Docker client. """ if not self._finished: self.log.warn('Docker client already running!') return self.console('module', 'starting') self._finished = False self._channels = Channels() # dedicated threadpool for docker work self._threadpool = ThreadPool(minthreads=4, maxthreads=100, name='docker') self._threadpool.start() # our 'events' pub/sub docker events emulator threads.deferToThreadPool(self._reactor, self._threadpool, self.events) # our 'logs' pub/sub docker console output threads.deferToThreadPool(self._reactor, self._threadpool, self.logs) # our 'keepalive' monitor threads.deferToThreadPool(self._reactor, self._threadpool, self.keepalive) self.console('module', 'started') def shutdown(self): """ Shutdown Docker client. """ if self._finished: self.console('module', 'already stopped') return self.console('module', 'stopping') self._finished = True self.console('keepalive', 'stopping') if self._events: self.console('events', 'stopping') self._events.close() if self._threadpool: self.console('threads', 'stopping') self._threadpool.stop() self.console('module', 'stopped') def keepalive(self): """ Monitor all our active channels and expire any once keepalive's stop """ self.console('keepalive', 'started') while not self._finished: self._channels.expire() for x in range(10): if self._finished: break time.sleep(self.WAIT_TIMEOUT) self.console('keepalive', 'stopped') def logs(self): """ Forward console logs from containers back into Crossbar """ self.console('logs', 'started') while not self._finished: worklist = self._channels.select(self.WAIT_TIMEOUT) for (key, events) in worklist: id = key.data['id'] line = os.read(key.fd, 8192) if not line: self._channels.silence(id) continue tty_id = self._channels.get_tty(id) if tty_id >= 0: self._reactor.callFromThread( self._controller.publish, f'crossbar.worker.{self._controller._uri_prefix}.docker.tty_{tty_id}', {'line': line.decode('utf-8')}) time.sleep(0.1) self.console('logs', 'stopped') def events(self): """ Called from node controller in a background thread to watch (blocking!) for Docker events and publish those as WAMP events on the main thread. """ self.console('events', 'started') # DOCKER records logs with 1 second granularity, so it will potentiall have MANY lines with # the same timestamp. Asking for timestamp + delta is unsafe as it can only ask for the # next second, which will potentially lose all records logged which were effectively logged # against the previous second, but happened "after" that last call to "events". Docker "should" # log with time.time() or similar, but sadly ... # IF there is a problem with "events" in docker, it needs to be identified and fixed. # - this routine has been live on "demo1" for 9 months with no problems reported. while not self._finished: # # "events" will close if docker is restarted, we aim to survive that event ... # try: self._events = self._docker.from_env().events() for event in self._events: if self._finished: break event = json.loads(event, encoding='utf8') ident = event.get('id') if not ident: continue etype = event.get('Type') eactn = event.get('Action') topic = u'crossbar.worker.{}.docker.on_{}_{}'.format( self._controller._uri_prefix, etype, eactn) if etype == 'container' and eactn == 'restart': self.watch(ident, self._channels.get_tty(ident)) try: payload = {'id': ident} self.log.debug('publish : {topic} => {packet}', topic=topic, packet=payload) if self._controller: self._reactor.callFromThread( self._controller.publish, topic, payload) except Exception as e: self.log.error( 'Error: not able to handle event type :: {}'. format(topic)) print(e) except Exception as e: self.log.error(f'error in "events" - {str(e)}') self.console('events', 'stopped') @inlineCallbacks def create(self, image, kwargs): """ Create a new container and get it ready to run """ def shim(image, **kwargs): client = self._docker.from_env() try: container = client.containers.create(image, **kwargs) return {'id': container.id} except docker.errors.ImageNotFound: self.log.info('No Image ({image}) attempting to pull', image=image) try: client.images.pull(image) container = client.containers.create(image, **kwargs) return {'id': container.id} except docker.errors.APIError: raise Exception('Docker failed to pull ({image})', image=image) self.log.debug('docker create :: {image} -> {kw}', image=image, kw=kwargs) kwargs['detach'] = True kwargs['tty'] = True kwargs['stdin_open'] = True return (yield threads.deferToThreadPool(self._reactor, self._threadpool, shim, image, **kwargs)) @inlineCallbacks def get_info(self): """ Recover information about our docker installation. Shell command: ``crossbar shell --realm mrealm1 show docker node1`` """ def shim(): return self._docker.from_env().info() self.log.debug('docker get_info') return (yield threads.deferToThreadPool(self._reactor, self._threadpool, shim)) @inlineCallbacks def get_containers(self): """ Recover a list of container ID's """ def shim(): return [ c.id for c in self._docker.from_env().containers.list(all=True) ] self.log.debug('docker get_containers') return (yield threads.deferToThreadPool(self._reactor, self._threadpool, shim)) @inlineCallbacks def get_container(self, id): """ Recover information about one specific container (by id) """ def shim(id): try: return self._docker.from_env().containers.get(id).attrs except Exception as e: return { 'error': 'unable to get container details', 'traceback': str(e) } self.log.debug('docker get_container -> {id}', id=id) return (yield threads.deferToThreadPool(self._reactor, self._threadpool, shim, id)) @inlineCallbacks def get_images(self): """ Recover a list of image ID's Shell command: ``crossbar shell --realm mrealm1 list docker-images node1`` """ def shim(): return [c.id for c in self._docker.from_env().images.list()] self.log.debug('docker get_images') return (yield threads.deferToThreadPool(self._reactor, self._threadpool, shim)) @inlineCallbacks def delete_image(self, id): """ Purge old images """ def shim(): try: return self._docker.from_env().images.remove(id) except Exception as e: print(e) print(dir(e)) return { 'error': 'unable to remove image', 'traceback': str(e) } self.log.debug('docker delete_image') return (yield threads.deferToThreadPool(self._reactor, self._threadpool, shim)) @inlineCallbacks def get_image(self, id): """ Recover information about one specific image (by id) Shell command: ``crossbar shell --realm mrealm1 show docker-image node1 4bbb66`` """ def shim(id): try: return self._docker.from_env().images.get(id).attrs except Exception as e: return { 'error': 'unable to get image', 'traceback': str(e) } self.log.debug('docker get_image -> {id}', id=id) return (yield threads.deferToThreadPool(self._reactor, self._threadpool, shim, id)) @inlineCallbacks def df(self): """ Get information relating to docker's usage of available storage """ def shim(): return self._docker.from_env().df() self.log.debug('docker df') return (yield threads.deferToThreadPool(self._reactor, self._threadpool, shim)) @inlineCallbacks def ping(self): """ Bounce a message off docket to see if it's running """ def shim(): try: return self._docker.from_env().ping() except Exception: return False self.log.debug('docker ping') return (yield threads.deferToThreadPool(self._reactor, self._threadpool, shim)) @inlineCallbacks def version(self): """ Get the version information of our docker instance """ def shim(): return self._docker.from_env().version() self.log.debug('docker version') return (yield threads.deferToThreadPool(self._reactor, self._threadpool, shim)) @inlineCallbacks def container(self, id, cmd): """ Operate on a specific container (by id) """ def shim(id, cmd): container = self._docker.from_env().containers.get(id) if hasattr(container, cmd): return getattr(container, cmd)() raise Exception('no such command :: {}'.format(cmd)) self.log.debug('docker container -> {id} + {cmd}', id=id, cmd=cmd) return (yield threads.deferToThreadPool(self._reactor, self._threadpool, shim, id, cmd)) @inlineCallbacks def start(self, id): """ Specific container routine that needs a non-default timeout """ def shim(id): container = self._docker.from_env().containers.get(id) status = container.start() tty_id = self._channels.get_tty(id) if tty_id >= 0: client = docker.APIClient() params = { 'stdin': 1, 'stdout': 1, 'stderr': 1, 'stream': 1, 'timestamps': 0, 'logs': 0 } socket = client.attach_socket(id, params) self._channels.close(id) self._channels.create(id, socket, tty_id) status = {'status': 'OK', 'id': id, 'tty_id': tty_id} return status self.log.debug('docker container start -> {id}', id=id) return (yield threads.deferToThreadPool(self._reactor, self._threadpool, shim, id)) @inlineCallbacks def restart(self, id): """ Specific container routine that needs a non-default timeout """ def shim(id): container = self._docker.from_env().containers.get(id) try: container.restart(timeout=1) except Exception as e: self.log.error( 'Exception while trying to restart container') self.log.error(str(e)) tty_id = self._channels.get_tty(id) client = self._docker.APIClient() params = { 'stdin': 1, 'stdout': 1, 'stderr': 1, 'stream': 1, 'timestamps': 0, 'logs': 0 } socket = client.attach_socket(id, params) self._channels.close(id) self._channels.create(id, socket, tty_id) return {'status': 'OK', 'id': id, 'tty_id': tty_id} self.log.info('docker container restart -> {id}', id=id) return (yield threads.deferToThreadPool(self._reactor, self._threadpool, shim, id)) @inlineCallbacks def image(self, id, cmd): """ Operate on a specific image (by id) """ def shim(id, cmd): image = self._docker.from_env().images.get(id) if hasattr(image, cmd): return getattr(image, cmd)() raise Exception('no such command :: {}'.format(cmd)) self.log.debug('docker image -> {id} + {cmd}', id=id, cmd=cmd) return (yield threads.deferToThreadPool(self._reactor, self._threadpool, shim, id, cmd)) @inlineCallbacks def prune(self, filter): """ Prune docker images """ def shim(filter): return self._docker.from_env().images.prune(filter) self.log.debug('docker prune -> {filter}', filter=filter) return (yield threads.deferToThreadPool(self._reactor, self._threadpool, shim, filter)) @inlineCallbacks def backlog(self, id): """ Request historical console logs / buffer """ def shim(id): if not self._channels.exists(id): return {'status': 'NOTFOUND'} client = self._docker.from_env() try: container = client.containers.get(id) # FIXME: NotFound except: return {'status': 'OK', 'packet': ''} lines = container.logs(stdout=1, stderr=1, stream=0, timestamps=1, tail=60) return { 'status': 'OK', 'packet': lines[-16384:].decode('utf-8') } self.log.debug('docker backlog -> {id}', id=id) return (yield threads.deferToThreadPool(self._reactor, self._threadpool, shim, id)) def request_tty(self, id): return {'status': 'OK', 'tty_id': self._channels.get_next_id(id)} @inlineCallbacks def watch(self, id, tty_id): """ Watch the console of the specified container """ def shim(id): try: client = self._docker.from_env() container = client.containers.get(id) except docker.errors.NotFound: return {'status': 'NOTFOUND', 'packet': ''} if self._channels.exists(id) or self._channels.tty_exists( tty_id): self._channels.set_tty(id, tty_id) buffer = container.logs(stdout=1, stderr=1, stream=0, timestamps=0, tail=self.CONSOLE_HISTORY) else: client = self._docker.APIClient() params = { 'stdin': 1, 'stdout': 1, 'stderr': 1, 'stream': 1, 'timestamps': 0, 'logs': 0 } socket = client.attach_socket(id, params) buffer = container.logs(stdout=1, stderr=1, stream=0, timestamps=0, tail=self.CONSOLE_HISTORY) self._channels.create(id, socket, tty_id) buffer = buffer.decode('utf-8') # attempt to clean broken ESC sequence if len(buffer) > 16384: buffer = buffer[-16384:] for i in range(12): if ord(buffer[i]) == 27: buffer = buffer[i:] break return { 'status': 'OK', 'id': id, 'tty_id': tty_id, 'buffer': buffer } self.log.debug('docker watch -> {id}', id=id) return (yield threads.deferToThreadPool(self._reactor, self._threadpool, shim, id)) def keystroke(self, id, data): """ Enter a new keystroke into a container console """ if not self._channels.exists(id): return {'status': 'NOTFOUND'} if isinstance(data, list): for item in data: if item['action'] == 'keepalive': self._channels.keepalive(id) elif item['action'] == 'size_console': client = self._docker.from_env() container = client.containers.get(id) container.resize(item['rows'], item['cols']) elif item['action'] == 'size_shell': client = self._docker.APIClient() client.exec_resize(id, item['rows'], item['cols']) elif item['action'] == 'close': self._channels.close(id) else: self.log.error('unknown keystroke command: {cmd}', cmd=item) return self._channels.write(id, data) return {'status': 'OK'} @inlineCallbacks def shell(self, container, tty_id, kwargs={}): """ Execute a shell in a running container """ client = self._docker.APIClient() def shim(image, **kwargs): kwargs['tty'] = True kwargs['stdin'] = True cmd = '/bin/bash' return client.exec_create(container, cmd, **kwargs) self.log.debug('docker shell :: {container} -> {kw}', container=container, kw=kwargs) execId = (yield threads.deferToThreadPool(self._reactor, self._threadpool, shim, container, **kwargs)) id = execId.get('Id') def shim2(): socket = client.exec_start(id, detach=False, tty=True, socket=True) self._channels.create(id, socket, tty_id, True) return {'status': 'OK', 'id': id} return (yield threads.deferToThreadPool(self._reactor, self._threadpool, shim2)) def fs_root(self, id, path): while path and path[0] == '/': path = path[1:] container = self._docker.from_env().containers.get(id) for point in container.attrs.get('Mounts', []): dst = point.get('Destination', '') src = point.get('Source', '') if f'/{path}'.startswith(f'{dst}/'): path = "/".join(path.split('/')[1:]) return os.path.join(src, path) raise Exception(f'invalid path "{path}"') def fs_open(self, id, path): """ Read the filesystem structure for the given container """ while path and path[0] == '/': path = path[1:] files = [] dirs = [] container = self._docker.from_env().containers.get(id) if not path: for point in container.attrs.get('Mounts', []): dirs.append(point.get('Destination')) else: if path[:-1] != '/': path += '/' for point in container.attrs.get('Mounts', []): dst = point.get('Destination', '') src = point.get('Source', '') print(f'path={path} dst={dst}') if not f'/{path}'.startswith(f'{dst}/'): continue path = "/".join(path.split('/')[1:]) root = os.path.join(src, path) print(f'Root={root}') with os.scandir(root) as iterator: for entry in iterator: if entry.is_file(): files.append(entry.name) elif entry.is_dir(): if entry.name not in self.EXCLUDE_DIRS_ANY: dirs.append(entry.name) dirs.sort() files.sort() return {'dirs': dirs, 'files': files} def fs_get(self, id, path): """ Recover a file from a container filesystem """ with open(self.fs_root(id, path)) as io: return {'data': io.read()} def fs_put(self, id, path, data): """ Store a file into a Docker container """ with open(self.fs_root(id, path), 'w') as io: io.write(data)
class Validator(object): DefaultTransactionFamilies = [ # IntegerKey, endpoint_registry ] def __init__(self, config, windows_service): self.status = 'stopped' self.Config = config # Parse the listen directives from the configuration so # we know what to bind gossip protocol to listen_directives = parse_listen_directives(self.Config) # If the gossip listen address is 0.0.0.0, then there must be # an Endpoint.Host entry as we don't know what to put in the # endpoint registry otherwise. if listen_directives['gossip'].host == '0.0.0.0' and \ ('Endpoint' not in self.Config or 'Port' not in self.Config['Endpoint']): raise Exception( 'gossip listen address is 0.0.0.0, but endpoint host ' 'missing from configuration') self._gossip_host = listen_directives['gossip'].host self._gossip_port = listen_directives['gossip'].port # The endpoint host/port and HTTP port come from the listen data, but # can be overridden by the configuration. self._endpoint_host = self._gossip_host self._endpoint_port = self._gossip_port self._endpoint_http_port = None if 'http' in listen_directives: self._endpoint_http_port = listen_directives['http'].port # See if we need to override the endpoint data endpoint_cfg = self.Config.get('Endpoint', None) if endpoint_cfg is not None: if 'Host' in endpoint_cfg: self._endpoint_host = endpoint_cfg['Host'] if 'Port' in endpoint_cfg: self._endpoint_port = int(endpoint_cfg['Port']) if 'HttpPort' in endpoint_cfg: self._endpoint_http_port = int(endpoint_cfg['HttpPort']) # Finally, if the endpoint host is 'localhost', we need to convert it # because to another host, obviously 'localhost' won't mean "us" if self._endpoint_host == 'localhost': self._endpoint_host = socket.gethostbyname(self._endpoint_host) self.profile = self.Config.get('Profile', False) if self.profile: self.pr = cProfile.Profile() self.pr.enable() self.windows_service = windows_service # flag to indicate that a topology update is in progress self._topology_update_in_progress = False self.delaystart = self.Config['DelayStart'] # set up signal handlers for shutdown if not windows_service: signal.signal(signal.SIGTERM, self.handle_shutdown_signal) signal.signal(signal.SIGINT, self.handle_shutdown_signal) # ---------- Initialize the configuration ---------- self.initialize_common_configuration() self.initialize_ledger_specific_configuration() # ---------- Initialize the NodeMap ---------- self.initialize_node_map() # ---------- Initialize the Ledger ---------- self.initialize_ledger_object() self.web_thread_pool = ThreadPool(0, 8, "WebThreadPool") def handle_shutdown_signal(self, signum, frame): logger.warn('received shutdown signal') self.shutdown() def shutdown(self): """ Shutdown the validator. There are several things that need to happen on shutdown: 1) disconnect this node from the network, 2) close all the databases, and 3) shutdown twisted. We need time for each to finish. """ self.status = 'stopping' if self.profile: self.pr.create_stats() loc = os.path.join(self.Config.get('DataDirectory', '/tmp'), '{0}.cprofile'.format( self.Config.get('NodeName', str(os.getpid())))) self.pr.dump_stats(loc) # send the transaction to remove this node from the endpoint # registry (or send it to the web server) self.unregister_endpoint(self.Ledger.LocalNode, self.EndpointDomain) # Need to wait long enough for all the shutdown packets to be sent out reactor.callLater(1.0, self.handle_ledger_shutdown) def handle_ledger_shutdown(self): self.Ledger.shutdown() # Need to wait long enough for all the shutdown packets to be sent out # if a shutdown packet was the reason for the shutdown reactor.callLater(1.0, self.handle_shutdown) def handle_shutdown(self): self.web_thread_pool.stop() reactor.stop() self.status = 'stopped' def initialize_common_configuration(self): self.GenesisLedger = self.Config.get('GenesisLedger', False) # Handle the common configuration variables if 'NetworkFlowRate' in self.Config: token_bucket.TokenBucket.DefaultDripRate = self.Config[ 'NetworkFlowRate'] if 'NetworkBurstRate' in self.Config: token_bucket.TokenBucket.DefaultDripRate = self.Config[ 'NetworkBurstRate'] if 'AdministrationNode' in self.Config: logger.info('set administration node to %s', self.Config.get('AdministrationNode')) shutdown_message.AdministrationNode = self.Config[ 'AdministrationNode'] if 'NetworkDelayRange' in self.Config: node.Node.DelayRange = self.Config['NetworkDelayRange'] if 'UseFixedDelay' in self.Config: node.Node.UseFixedDelay = self.Config['UseFixedDelay'] def initialize_ledger_specific_configuration(self): """ Initialize any ledger type specific configuration options, expected to be overridden """ pass def initialize_node_map(self): self.NodeMap = {} for nodedata in self.Config.get("Nodes", []): addr = (socket.gethostbyname(nodedata["Host"]), nodedata["Port"]) nd = node.Node(address=addr, identifier=nodedata["Identifier"], name=nodedata["ShortName"]) self.NodeMap[nodedata["ShortName"]] = nd def initialize_ledger_object(self): # Create the local ledger instance name = self.Config['NodeName'] addr = \ (socket.gethostbyname(self._gossip_host), self._gossip_port) endpoint_addr = (self._endpoint_host, self._endpoint_port) signingkey = signed_object.generate_signing_key( wifstr=self.Config.get('SigningKey')) identifier = signed_object.generate_identifier(signingkey) nd = node.Node(address=addr, identifier=identifier, signingkey=signingkey, name=name, endpoint_address=endpoint_addr) self.initialize_ledger_from_node(nd) assert self.Ledger for txnfamily in self.DefaultTransactionFamilies: txnfamily.register_transaction_types(self.Ledger) self.Ledger.onNodeDisconnect += self.handle_node_disconnect_event logger.info("starting ledger %s with id %s at network address %s", self.Ledger.LocalNode, self.Ledger.LocalNode.Identifier[:8], self.Ledger.LocalNode.NetAddress) def initialize_ledger_from_node(self, node): """ Initialize the ledger object for the local node, expected to be overridden """ self.Ledger = None def add_transaction_family(self, txnfamily): txnfamily.register_transaction_types(self.Ledger) def pre_start(self): if self.delaystart is True: logger.debug("DelayStart is in effect, waiting for /start") reactor.callLater(1, self.pre_start) else: self.status = 'starting' self.start() def start(self): # if this is the genesis ledger then there isn't anything left to do if self.GenesisLedger: self.start_ledger() return # if this isn't the genesis ledger then we need to connect # this node into the validator network self.initialize_ledger_connection() def handle_node_disconnect_event(self, nodeid): """ Handle the situation where a peer is marked as disconnected. """ logger.info('node %s dropped, reassess connectivity', nodeid) # first see if we are already handling the situation if self._topology_update_in_progress: logger.info('topology update already in progress') return # there are many possible policies for when to kick off # new topology probes. for the moment, just use the initial # connectivity as a lower threshhold minpeercount = self.Config.get("InitialConnectivity", 1) peerlist = self.Ledger.peer_list() if len(peerlist) <= minpeercount: def disconnect_callback(): logger.info('topology update finished, %s peers connected', len(self.Ledger.peer_list())) logger.info('connectivity has dropped below mimimal levels, ' 'kick off topology update') self._topology_update_in_progress = True reactor.callLater(2.0, self.initialize_ledger_topology, disconnect_callback) def _get_candidate_peers(self): """ Return the candidate (potential) peers to send connection requests; in addition to the list of nodes directly specified in the configuration file, pull a list from the LedgerURL. Once the list of potential peers is constructed, pick from it those specified in the Peers configuration variable. If that is not enough, then pick more at random from the list. """ # Continue to support existing config files with single # string values. if isinstance(self.Config.get('LedgerURL'), basestring): urls = [self.Config.get('LedgerURL')] else: urls = self.Config.get('LedgerURL', []) # We randomize the url list here so that we avoid the # condition of a small number of validators referencing # each other's empty EndpointRegistries forever. random.shuffle(urls) for url in urls: logger.info('attempting to load peers using url %s', url) try: peers = self.get_endpoint_nodes(url) # If the Endpoint Registry is empty, try the next # url in the shuffled list if len(peers) == 0: continue for peer in peers: self.NodeMap[peer.Name] = peer break except MessageException as e: logger.error("Unable to get endpoints from LedgerURL: %s", str(e)) # We may also be able to rediscover peers via the persistence layer. if self.Ledger.Restore: for blockid in self.Ledger.GlobalStoreMap.persistmap_keys(): blk = self.Ledger.GlobalStoreMap.get_block_store(blockid) sto = blk.get_transaction_store('/EndpointRegistryTransaction') for key in sto: nd = self._endpoint_info_to_node(sto[key]) self.NodeMap[nd.Name] = nd # Build a list of nodes that we can use for the initial connection minpeercount = self.Config.get("InitialConnectivity", 1) peerset = set(self.Config.get('Peers', [])) nodeset = set(self.NodeMap.keys()) if len(peerset) < minpeercount and len(nodeset) > 0: nodeset.discard(self.Ledger.LocalNode.Name) nodeset = nodeset.difference(peerset) peerset = peerset.union(random.sample(list(nodeset), min( minpeercount - len(peerset), len(nodeset)))) return peerset def _connect_to_peers(self): min_peer_count = self.Config.get("InitialConnectivity", 1) current_peer_count = len(self.Ledger.peer_list()) logger.debug("peer count is %d of %d", current_peer_count, min_peer_count) if current_peer_count < min_peer_count: peerset = self._get_candidate_peers() # Add the candidate nodes to the gossip object so we can send # connect requests to them for peername in peerset: peer = self.NodeMap.get(peername) if peer: logger.info('add peer %s with identifier %s', peername, peer.Identifier) connect_message.send_connection_request(self.Ledger, peer) self.Ledger.add_node(peer) else: logger.info('requested connection to unknown peer %s', peername) return False else: return True def initialize_ledger_connection(self): """ Connect the ledger to the rest of the network. """ assert self.Ledger self.status = 'waiting for initial connections' if not self._connect_to_peers(): reactor.callLater(2.0, self.initialize_ledger_connection) else: reactor.callLater(2.0, self.initialize_ledger_topology, self.start_journal_transfer) def initialize_ledger_topology(self, callback): """ Make certain that there is at least one connected peer and then kick off the configured topology generation protocol. """ logger.debug('initialize ledger topology') if not self._connect_to_peers(): reactor.callLater(2.0, self.initialize_ledger_topology, callback) return self._topology_update_in_progress = False # and now its time to pick the topology protocol topology = self.Config.get("TopologyAlgorithm", "RandomWalk") if topology == "RandomWalk": if 'TargetConnectivity' in self.Config: random_walk.TargetConnectivity = self.Config[ 'TargetConnectivity'] self.random_walk_initialization(callback) elif topology == "BarabasiAlbert": if 'MaximumConnectivity' in self.Config: barabasi_albert.MaximumConnectivity = self.Config[ 'MaximumConnectivity'] if 'MinimumConnectivity' in self.Config: barabasi_albert.MinimumConnectivity = self.Config[ 'MinimumConnectivity'] self.barabasi_initialization(callback) else: logger.error("unknown topology protocol %s", topology) self.shutdown() return def barabasi_initialization(self, callback): logger.info("ledger connections using BarabasiAlbert topology") barabasi_albert.start_topology_update(self.Ledger, callback) def random_walk_initialization(self, callback): logger.info("ledger connections using RandomWalk topology") random_walk.start_topology_update(self.Ledger, callback) def start_journal_transfer(self): self.status = 'transferring ledger' if not journal_transfer.start_journal_transfer(self.Ledger, self.start_ledger): self.start_ledger() def start_ledger(self): logger.info('ledger initialization complete') self.Ledger.initialization_complete() self.status = 'started' self.register_endpoint(self.Ledger.LocalNode, self.EndpointDomain) def register_endpoint(self, node, domain='/'): txn = endpoint_registry.EndpointRegistryTransaction.register_node( node, domain, httpport=self._endpoint_http_port) txn.sign_from_node(node) msg = endpoint_registry.EndpointRegistryTransactionMessage() msg.Transaction = txn msg.SenderID = str(node.Identifier) msg.sign_from_node(node) logger.info('register endpoint %s with name %s', node.Identifier[:8], node.Name) self.Ledger.handle_message(msg) def unregister_endpoint(self, node, domain='/'): txn = endpoint_registry.EndpointRegistryTransaction \ .unregister_node(node) txn.sign_from_node(node) # Since unregister is often called on shutdown, we really need to make # this a system message for the purpose of sending it out from our own # queue msg = endpoint_registry.EndpointRegistryTransactionMessage() msg.Transaction = txn msg.SenderID = str(node.Identifier) msg.sign_from_node(node) logger.info('unregister endpoint %s with name %s', node.Identifier[:8], node.Name) self.Ledger.handle_message(msg) def get_endpoint_nodes(self, url): client = EndpointRegistryClient(url) nodes = [] for epinfo in client.get_endpoint_list(domain=self.EndpointDomain): nodes.append(self._endpoint_info_to_node(epinfo)) return nodes @staticmethod def _endpoint_info_to_node(epinfo): addr = (socket.gethostbyname(epinfo["Host"]), epinfo["Port"]) nd = node.Node(address=addr, identifier=epinfo["NodeIdentifier"], name=epinfo["Name"]) return nd
class ServerTwisted(AbstractServer): """ "ServerTwisted" is responsible to start the HTTP Twisted server. :author: direct Netware Group et al. :copyright: (C) direct Netware Group - All rights reserved :package: pas.http :subpackage: core :since: v1.0.0 :license: https://www.direct-netware.de/redirect?licenses;mpl2 Mozilla Public License, v. 2.0 """ def __init__(self): """ Constructor __init__(ServerTwisted) :since: v1.0.0 """ AbstractServer.__init__(self) self.log_observer = None """ @TODO """ self.reactor = None """ Twisted reactor instance """ self.thread_pool = None """ @TODO """ log_handler = NamedLoader.get_singleton("dNG.data.logging.LogHandler", False) if (log_handler is not None): log_handler.add_logger("twisted") self.log_observer = log.PythonLoggingObserver("twisted") self.log_observer.start() log.startLoggingWithObserver(self.log_observer.emit, setStdout = False) # # def _configure(self): """ Configures the server :since: v1.0.0 """ listener_host = Settings.get("pas_http_twisted_server_host", self.socket_hostname) self.port = int(Settings.get("pas_http_twisted_server_port", 8080)) self.reactor = reactor self.reactor.addSystemEventTrigger('before', 'shutdown', self.stop) server_description = "tcp:{0:d}".format(self.port) if (listener_host == ""): self.host = Settings.get("pas_http_server_preferred_hostname", self.socket_hostname) else: self.host = listener_host server_description += ":interface={0}".format(self.host) # self.thread_pool = ThreadPool() self.thread_pool.start() if (self._log_handler is not None): self._log_handler.info("pas.http.core Twisted server starts at '{0}:{1:d}'", listener_host, self.port, context = "pas_http_core") server = serverFromString(self.reactor, server_description) server.listen(Site(WSGIResource(reactor, self.thread_pool, HttpWsgi1Request))) """ Configure common paths and settings """ AbstractServer._configure(self) # def run(self): """ Runs the server :since: v1.0.0 """ self.reactor.startRunning(installSignalHandlers = False) with ExceptionLogTrap("pas_http_core"): self.reactor.mainLoop() # def stop(self, params = None, last_return = None): """ Stop the server :param params: Parameter specified :param last_return: The return value from the last hook called. :return: (mixed) Return value :since: v1.0.0 """ if (self.thread_pool is not None): self.thread_pool.stop() self.thread_pool = None # if (self.reactor is not None): self.reactor.stop() self.reactor = None # if (self.log_observer is not None): self.log_observer.stop() self.log_observer = None # return AbstractServer.stop(self, params, last_return)
class WorkerPool: """ A generalized class that can start multiple workers in a thread pool with values drawn from the given value factory object, and wait for their completion and a given number of successes (a worker returning something without throwing an exception). """ class TimedOut(Exception): "Raised if waiting for the target number of successes timed out." class OutOfValues(Exception): "Raised if the value factory is out of values, but the target number was not reached." def __init__(self, worker: Callable[[Any], Any], value_factory: Callable[[int], Optional[List[Any]]], target_successes, timeout: float, stagger_timeout: float = 0, threadpool_size: int = None): # TODO: make stagger_timeout a part of the value factory? self._worker = worker self._value_factory = value_factory self._timeout = timeout self._stagger_timeout = stagger_timeout self._target_successes = target_successes thread_pool_kwargs = {} if threadpool_size is not None: thread_pool_kwargs['minthreads'] = threadpool_size thread_pool_kwargs['maxthreads'] = threadpool_size self._threadpool = ThreadPool(**thread_pool_kwargs) # These three tasks must be run in separate threads # to avoid being blocked by workers in the thread pool. self._bail_on_timeout_thread = Thread(target=self._bail_on_timeout) self._produce_values_thread = Thread(target=self._produce_values) self._process_results_thread = Thread(target=self._process_results) self._successes = {} self._failures = {} self._started_tasks = 0 self._finished_tasks = 0 self._cancel_event = Event() self._result_queue = Queue() self._target_value = SetOnce() self._unexpected_error = SetOnce() self._results_lock = Lock() self._stopped = False def start(self): # TODO: check if already started? self._threadpool.start() self._produce_values_thread.start() self._process_results_thread.start() self._bail_on_timeout_thread.start() def cancel(self): """ Cancels the tasks enqueued in the thread pool and stops the producer thread. """ self._cancel_event.set() def join(self): """ Waits for all the threads to finish. Can be called several times. """ if self._stopped: return # or raise AlreadyStopped? self._produce_values_thread.join() self._process_results_thread.join() self._bail_on_timeout_thread.join() # protect from a possible race try: self._threadpool.stop() except AlreadyQuit: pass self._stopped = True if self._unexpected_error.is_set(): e = self._unexpected_error.get() raise RuntimeError(f"Unexpected error in the producer thread: {e}") def _sleep(self, timeout): """ Sleeps for a given timeout, can be interrupted by a cancellation event. """ if self._cancel_event.wait(timeout): raise Cancelled def block_until_target_successes(self) -> Dict: """ Blocks until the target number of successes is reached. Returns a dictionary of values matched to results. Can be called several times. """ if self._unexpected_error.is_set(): # So that we don't raise it again when join() is called e = self._unexpected_error.get_and_clear() raise RuntimeError(f"Unexpected error in the producer thread: {e}") result = self._target_value.get() if result == TIMEOUT_TRIGGERED: raise self.TimedOut() elif result == PRODUCER_STOPPED: raise self.OutOfValues() return result def get_failures(self) -> Dict: """ Get the current failures, as a dictionary of values to thrown exceptions. """ with self._results_lock: return dict(self._failures) def get_successes(self) -> Dict: """ Get the current successes, as a dictionary of values to worker return values. """ with self._results_lock: return dict(self._successes) def _bail_on_timeout(self): """ A service thread that cancels the pool on timeout. """ if not self._cancel_event.wait(timeout=self._timeout): self._target_value.set(TIMEOUT_TRIGGERED) self._cancel_event.set() def _worker_wrapper(self, value): """ A wrapper that catches exceptions thrown by the worker and sends the results to the processing thread. """ try: # If we're in the cancelled state, interrupt early self._sleep(0) result = self._worker(value) self._result_queue.put(Success(value, result)) except Cancelled as e: self._result_queue.put(e) except BaseException as e: self._result_queue.put(Failure(value, str(e))) def _process_results(self): """ A service thread that processes worker results and waits for the target number of successes to be reached. """ producer_stopped = False success_event_reached = False while True: result = self._result_queue.get() if result == PRODUCER_STOPPED: producer_stopped = True else: self._finished_tasks += 1 if isinstance(result, Success): with self._results_lock: self._successes[result.value] = result.result len_successes = len(self._successes) if not success_event_reached and len_successes == self._target_successes: # A protection for the case of repeating values. # Only trigger the target value once. success_event_reached = True self._target_value.set(self.get_successes()) if isinstance(result, Failure): with self._results_lock: self._failures[result.value] = result.exception if producer_stopped and self._finished_tasks == self._started_tasks: self.cancel() # to cancel the timeout thread self._target_value.set(PRODUCER_STOPPED) break def _produce_values(self): while True: try: with self._results_lock: len_successes = len(self._successes) batch = self._value_factory(len_successes) if not batch: break self._started_tasks += len(batch) for value in batch: # There is a possible race between `callInThread()` and `stop()`, # But we never execute them at the same time, # because `join()` checks that the producer thread is stopped. self._threadpool.callInThread(self._worker_wrapper, value) self._sleep(self._stagger_timeout) except Cancelled: break except BaseException as e: self._unexpected_error.set(e) self.cancel() break self._result_queue.put(PRODUCER_STOPPED)
class DBScheduler(object): ''' Database operation scheduler We will have one or more read thread and only one write thread. ''' def __init__(self, spider): from twisted.internet import reactor # Imported here.inside self.spider = spider ''' Used for logging for now ''' self.reactor = reactor ''' Used for thred pools ''' engine = get_engine() create_schema(engine) self.thread_pool = ThreadPool( minthreads=1, maxthreads=13, name="ReadPool") # There should be only one pool in the write_pool # Never increase maxtreads value self.write_pool = ProfiledThreadPool( minthreads=1, maxthreads=1, name="WritePool") self.thread_pool.start() self.write_pool.start() self.signals = SignalManager(dispatcher.Any).connect( self.stop_threadpools, spider_closed) self.reporter = Reporter() ''' Reporer is used for statistics collection ''' self.counters = self.reporter.counters self.cache = defaultdict( lambda: dict()) self.write_queue = Queue() self.writelock = False # Write queue mutex def stop_threadpools(self): self.thread_pool.stop() self.write_pool.stop() for line in self.reporter.get_report().splitlines(): log.msg(line) def _do_save_item(self, item): ''' Save items one by one ''' assert not isInIOThread() session = Session() session.add(item) try: session.commit() self.reporter.saved(item.__class__, item) result = True except IntegrityError as error: session.rollback() result = False finally: session.close() return result def _do_save(self): assert not isInIOThread() while not self.write_queue.empty(): items = [] try: self.writelock = True try: while True: items.append(self.write_queue.get_nowait()) except Empty: pass session = Session() try: session.add_all(items) session.commit() # All items were unique. # All of them are counted for item in items: self.reporter.saved(item.__class__, item) except IntegrityError as error: # This is needed because we are calling from the thread self.spider.log( 'Exception occured while saving objects: {}'.format( error), level=log.WARNING) self.spider.log( traceback.format_exc(), level=log.DEBUG) session.rollback() self.spider.log( 'Saving {} items one by one'.format(len(items))) for item in items: # Saving items one by one self._do_save_item(item) except Exception: session.rollback() raise finally: session.close() finally: self.writelock = False def save(self, obj): ''' Save object. Very effective if we know, that object doesn't exist within the database. If the object already exists - ignore it. TODO: Maybe we need to implement strategy when object update will be needed on duplicate. ''' self.write_queue.put(obj) if self.writelock: return None else: return deferToThreadPool( self.reactor, self.write_pool, self._do_save) def _do_update_if_changed(self, model, selector, updated): ''' Update model matching some *selector* dict and if it's changed. For each custom situation custom query should be built using *case* function. This function is very general. ''' assert not isInIOThread() result_query = update(model) for field, value in selector.iteritems(): result_query = result_query.where( getattr(model, field) == value) result_query = result_query.where( reduce(or_, [getattr(model, field) != value for field, value in updated.iteritems()])) result_query = result_query.values(**updated) session = Session() try: result = session.execute(result_query) session.commit() except: session.rollback() raise finally: session.close() return result.rowcount @inlineCallbacks def update_if_changed(self, model, selector, updated): result = 0 item = dict(selector) item.update(updated) old_item = yield self.get_changed(model, selector, updated) if old_item is not None: result = yield deferToThreadPool( self.reactor, self.thread_pool, self._do_update_if_changed, model, selector, updated) if result: self.reporter.updated( model, dicthash(selector), item, old_item) else: self.reporter.unchanged(model, dicthash(selector)) returnValue(result) def _do_update(self, model, selector, updated): assert not isInIOThread() session = Session() try: result = session.query(model).filter_by(**selector).update(updated) session.commit() except: session.rollback() raise finally: session.close() return result def update(self, model, selector, updated): ''' Update model matching some *selector* dict and replacing it's values from *updated* dict. Universal solution, but slow. Really slow. For each custom situation custom query should be built using *case* function. ''' return deferToThreadPool( self.reactor, self.thread_pool, self._do_update, model, selector, updated) def _do_exists(self, model, selector): session = Session() try: result = bool( session.query(model.id).filter_by(**selector).scalar()) return result finally: session.close() def exists(self, model, selector): ''' Check whether object matching selector exists ''' return deferToThreadPool( self.reactor, self.thread_pool, self._do_exists, model, selector) def _do_is_changed(self, model, selector, updated): session = Session() result_query = session.query(model.id).filter(**selector) result_query = result_query.filter( reduce(or_, [getattr(model, field) != value for field, value in updated.iteritems()])) try: result = bool(result_query.scalar()) finally: session.close() return result def is_changed(self, model, selector, updated): ''' Check whether model fields are changed ''' return deferToThreadPool( self.reactor, self.thread_pool, self._do_is_changed, model, selector, updated) def _do_get_changed(self, model, selector, updated): session = Session() query = session.query(model).filter_by(**selector) query = query.filter( reduce(or_, [getattr(model, field) != value for field, value in updated.iteritems()])) try: item = query.first() if item is not None: item = row2dict(item) return item finally: session.close() def get_changed(self, model, selector, updated): ''' Return model if it's changed and None if it's unchanged ''' return deferToThreadPool( self.reactor, self.thread_pool, self._do_get_changed, model, selector, updated) def _do_get_id(self, model, unique, fval, fields): assert not isInIOThread() session = Session() try: result = session.query(model.id).filter( getattr(model, unique) == fval).one().id return result finally: session.close() @inlineCallbacks def get_id(self, model, unique, fields, update_existing=False): ''' Get an ID from the cache or from the database. If doesn't exist - create an item. All database operations are done from the separate thread - update_existing: Update object if it exists within the database. ''' assert isInIOThread() fval = fields[unique] try: result = self.cache[model][fval] self.counters['cache.hit'][model] += 1 returnValue(result) except KeyError: self.counters['cache.miss'][model] += 1 selectors = {unique: fval} result, created = yield deferToThreadPool( self.reactor, self.thread_pool, get_or_create, model, fields, **selectors) if created: self.reporter.saved(model, result) else: self.counters['db.cache.hit'][model] += 1 if update_existing: yield self.update_if_changed( model, {unique: fval}, fields) result = result.id self.cache[model][fval] = result returnValue(result)
def stop(self): for tid, conn in self.pool.connections.items(): for thread in self.threads: if thread.ident == tid: thread._db_close = lambda: self.pool.disconnect(conn) ThreadPool.stop(self)