def hashValue_(self, value): if hasattr(value, 'hash'): return value.hash logging.debug("Using python hash on type '%s'. %s", type(value), str(value)) hashValue = ctypes.c_uint32(hash(value)).value return Hash.Hash(hashValue)
def setScriptRoots(self, scriptPath, computedValues): hashes = Hash.ImmutableTreeSetOfHash() for value in computedValues: hashes = hashes + self.getComputationIdForDefinition( value.cumulusComputationDefinition).computationHash logging.info( "PersistentCacheIndex setting script dependencies for %s to %s hashes", scriptPath, len(hashes)) self.persistentCacheIndex.setScriptDependencies(scriptPath, hashes)
def test_checkpointingGarbageCollectionGraph(self): simulation = self.createSimulation() self.assertTrue(len(simulation.objectStore.listValues()) == 0) try: #give the simulation a couple of seconds to pick a scheduler self.assertTrue(simulation.waitForGlobalScheduler(timeout=2.0)) compId1 = simulation.submitComputation(expensiveChildCachecalls(0)) compId2 = simulation.submitComputation(expensiveChildCachecalls(1)) simulation.waitForGlobalScheduler() time.sleep(1.0) simulation.getGlobalScheduler( ).triggerFullCheckpointsOnOutstandingComputations() self.waitForAllCheckpointsToClear(simulation) cache = simulation.getWorkerVdm(0).getPersistentCacheIndex() self.assertTrue(cache.totalComputationsInCache() > 0) self.assertEqual(cache.totalReachableComputationsInCache(), 0) cache.setScriptDependencies( "script", HashNative.ImmutableTreeSetOfHash() + compId1.computationHash) self.assertTrue(cache.totalReachableComputationsInCache() > 0) self.assertTrue(cache.totalReachableComputationsInCache() < cache.totalComputationsInCache()) cache.setScriptDependencies( "script2", HashNative.ImmutableTreeSetOfHash() + compId2.computationHash) self.assertEqual(cache.totalComputationsInCache(), cache.totalReachableComputationsInCache()) finally: simulation.teardown()
def connectToRemoteWorker(self, machineId, ip, port, guid): logging.info( "Attempting to connect to machine %s on %s:%s with guid %s", machineId, ip, port, guid) stringChannel = self._channelFactory.createChannel((ip, port)) stringChannel.write(ufora.version) stringChannel.write(machineId.__getstate__()) #initiate the handshake stringChannel.write( CumulusNative.CumulusClientOrMachine.Machine( self.machineId).__getstate__()) stringChannel.write(guid.__getstate__()) logging.info("CumulusService %s wrote handshake for %s with guid %s", self.machineId, machineId, guid) channelAsQueue = stringChannel.makeQueuelike(self.callbackScheduler) msg = channelAsQueue.getTimeout(HANDSHAKE_TIMEOUT) if msg is None: logging.error( "While attempting to add worker %s with guid %s, " + "CumulusWorker %s did not receive a builtin hash message during handshake", machineId, guid, self.machineId) return None otherWorkersBuiltinHash = Hash.Hash(0) otherWorkersBuiltinHash.__setstate__(msg) builtinsAgree = otherWorkersBuiltinHash == ModuleImporter.builtinModuleImplVal( ).hash if not builtinsAgree: logging.critical("CumulusWorker %s could not connect to CumulusWorker %s as they have " + \ "different builtins; former's builtin hash: %s, latter's builtin hash: " + \ "%s", self.machineId, machineId, ModuleImporter.builtinModuleImplVal().hash, otherWorkersBuiltinHash ) channelAsQueue.disconnect() return None return channelAsQueue
def logBadUforaVersionOnChannel(self, version): try: anId = CumulusNative.MachineId(Hash.Hash(0)) anId.__setstate__(version) logging.error( "CumulusService %s received a bad version message that is, " \ "in fact, a machineId: %s", self.machineId, anId ) except: logging.error( "CumulusService %s received a bad version message that is not a machineId: %s", self.machineId, repr(version))
def isOwnHashInHandshakeMessage(self, message): if message is None: logging.error( "CumulusService %s didn't receive own Id in handshake.", self.machineId) return False try: machineId = CumulusNative.MachineId(Hash.Hash(0)) machineId.__setstate__(message) except: machineId = "not a valid machine ID" if isinstance(machineId, str) or machineId != self.machineId: logging.error( "CumulusWorker %s received connection intended for another machine (%s). %s != %s", self.machineId, machineId, repr(message), repr(self.machineId.__getstate__())) return False return True
def test_cumulusReconnectSharedState(self): try: cluster = InMemoryCluster.InMemoryCluster() listener = WorkerCounterListener() cumulusActiveMachines = CumulusActiveMachines.CumulusActiveMachines( cluster.sharedStateViewFactory ) cumulusActiveMachines.addListener(listener) cumulusActiveMachines.startService() self.dialToCount(2, cluster, listener, blocking=True) cluster.disconnectAllWorkersFromSharedState() time.sleep(10.0) self.dialToCount(4, cluster, listener, blocking=True) self.assertTrue(len(cluster.cumuli), 4) for cumulus in cluster.cumuli: RetryAssert.waitUntilTrue( cumulus.cumulusWorker.hasEstablishedHandshakeWithExistingMachines, 2.0) #at this point, the persistent cache should work persistentCacheIndex = cluster.cumuli[0].persistentCacheIndex self.assertTrue(persistentCacheIndex.hasConnectedView()) self.assertTrue(persistentCacheIndex.timesViewReconnected() > 0) persistentCacheIndex.addPage(HashNative.Hash.sha1("page"), HashNative.ImmutableTreeSetOfHash(), 1, HashNative.Hash.sha1("page")) finally: cumulusActiveMachines.stopService() cluster.stop()
def clientId(ix): return CumulusNative.CumulusClientId(HashNative.Hash(ix))
def machineId(ix, seed = None): h = HashNative.Hash(ix) if seed is not None: h = h + HashNative.Hash.sha1(seed) return CumulusNative.MachineId(h)
def connectToWorker(self, machineId, ip, port, guid): with self.lock_: stringChannel = self.channelFactory_.createChannel((ip, port)) builtinsHash = ModuleImporter.builtinModuleImplVal().hash clientId = self.cumulusClientId callbackScheduler = self.callbackScheduler logging.info("Client %s writing version message '%s' to %s", clientId, ufora.version, machineId) stringChannel.write(ufora.version) logging.info("Client %s writing client ID message to %s", clientId, machineId) stringChannel.write(machineId.__getstate__()) logging.info("Client %s writing expected machineId message to %s", clientId, machineId) stringChannel.write( CumulusNative.CumulusClientOrMachine.Client( clientId).__getstate__()) logging.info("Client %s writing guid %s to %s", clientId, guid, machineId) stringChannel.write(guid.__getstate__()) channelAsQueue = stringChannel.makeQueuelike(callbackScheduler) msg = channelAsQueue.getTimeout(HANDSHAKE_TIMEOUT) if msg is None: logging.error( "While attempting to add worker %s, CumulusClient %s did not " + "receive a builtin hash message during handshake", machineId, clientId) assert msg is not None logging.info("Client %s received serialized worker's builtin hash", clientId) try: workersBuiltinHash = HashNative.Hash(0) workersBuiltinHash.__setstate__(msg) except: logging.info("Client received a bad worker hash: %s of size %s", repr(msg), len(msg)) raise builtinsAgree = workersBuiltinHash == builtinsHash if not builtinsAgree: logging.critical("Could not connect CumulusClient %s to CumulusWorker %s as they " + \ "have different builtins; client's builtin hash: %s, worker's " + \ "builtin hash: %s. Disconnecting channel", clientId, machineId, builtinsHash, workersBuiltinHash ) channelAsQueue.disconnect() return None return channelAsQueue
def machineId(ix): return CumulusNative.MachineId(HashNative.Hash(ix))
def connectToRemoteWorker(self, machineId, ip, port, guid): logging.debug("Attempting to connect to machine %s on %s:%s with guid %s", machineId, ip, port, guid) try: stringChannel = self._channelFactory.createChannel((ip, port)) except: logging.error("CAN'T CONNECT TO WORKER ON %s:%s!\n" "This may be a temporary failure but if the problem persists, " "check the workers' network configuration and verify " "that the machines can see each other.", ip, port) raise stringChannel.write(ufora.version) stringChannel.write(machineId.__getstate__()) #initiate the handshake stringChannel.write( CumulusNative.CumulusClientOrMachine.Machine( self.machineId ).__getstate__() ) stringChannel.write(guid.__getstate__()) logging.debug("CumulusService %s wrote handshake for %s with guid %s", self.machineId, machineId, guid) channelAsQueue = stringChannel.makeQueuelike(self.callbackScheduler) msg = channelAsQueue.getTimeout(HANDSHAKE_TIMEOUT) if msg is None: logging.error("CAN'T CONNECT TO WORKER ON %s:%s!\n" "While attempting to add worker %s with guid %s, " "Worker %s did not receive a builtin hash message " "during handshake.\n" "Verify that the ufora worker is running on the remote machine.", ip, port, machineId, guid, self.machineId) return None otherWorkersBuiltinHash = Hash.Hash(0) otherWorkersBuiltinHash.__setstate__(msg) builtinsAgree = otherWorkersBuiltinHash == ModuleImporter.builtinModuleImplVal().hash if not builtinsAgree: logging.critical("CAN'T CONNECT TO WORKER ON %s:%s!\n" "Worker %s could not connect to Worker %s as they have " "different builtins; former's builtin hash: %s, latter's builtin hash: " "%s\n" "Verify that both machines run the same ufora version.", ip, port, self.machineId, machineId, ModuleImporter.builtinModuleImplVal().hash, otherWorkersBuiltinHash) channelAsQueue.disconnect() return None return channelAsQueue
def doChannelHandshake(self, channel): try: logging.debug("Worker %s beginning channel handshake", self.machineId) version = channel.getTimeout(HANDSHAKE_TIMEOUT) if version is None: logging.error( "CAN'T ACCEPT CONNECTION!\n" "CumulusService %s couldn't read client version within the configured timeout", self.machineId ) if version != ufora.version: self.logBadUforaVersionOnChannel(version) channel.disconnect() return logging.debug( "CumulusService %s accepted connection from client with version %s", self.machineId, version ) msgThatShouldBeMyOwnHash = channel.getTimeout(HANDSHAKE_TIMEOUT) if not self.isOwnHashInHandshakeMessage(msgThatShouldBeMyOwnHash): channel.disconnect() return msg = channel.getTimeout(HANDSHAKE_TIMEOUT) if msg is None: logging.error( "CAN'T ACCEPT CONNECTION!\n" "Worker %s didn't received remote machine ID during handshake", self.machineId ) channel.disconnect() return clientOrMachine = CumulusNative.CumulusClientOrMachine.Machine( CumulusNative.MachineId( Hash.Hash(0) ) ) clientOrMachine.__setstate__(msg) hashGuid = Hash.Hash(0) msg = channel.getTimeout(HANDSHAKE_TIMEOUT) if msg is None: logging.error( "CAN'T ACCEPT CONNECTION!\n" "Worker %s didn't received handshake GUID", self.machineId ) channel.disconnect() return hashGuid.__setstate__(msg) logging.debug( "Worker %s accepted connection with guid %s from %s", self.machineId, hashGuid, clientOrMachine ) channel.write( ModuleImporter.builtinModuleImplVal().hash.__getstate__() ) with self.lock: self._channelListener.setGroupIdForAcceptedChannel( channel, (clientOrMachine, hashGuid) ) logging.debug("CumulusService %s added a channel to group %s", self.machineId, (clientOrMachine, hashGuid)) except: logging.error("FAILED TO PROCESS INCOMING CONNECTION: %s", traceback.format_exc()) channel.disconnect()