示例#1
0
 def _getInfo(allMesosNodes, ip):
     info = None
     try:
         info = allMesosNodes[ip]
     except KeyError:
         # never seen by mesos - 1 of 3 possibilities:
         # 1) node is still launching mesos & will come online soon
         # 2) no jobs have been assigned to this worker. This means the executor was never
         #    launched, so we don't even get an executorInfo back indicating 0 workers running
         # 3) mesos crashed before launching, worker will never come online
         # In all 3 situations it's safe to fake executor info with 0 workers, since in all
         # cases there are no workers running. We also won't waste any money in cases 1/2 since
         # we will still wait for the end of the node's billing cycle for the actual
         # termination.
         info = NodeInfo(coresTotal=1, coresUsed=0, requestedCores=0,
                         memoryTotal=1, memoryUsed=0, requestedMemory=0,
                         workers=0)
     else:
         # Node was tracked but we haven't seen this in the last 10 minutes
         inUse = self.scaler.leader.batchSystem.nodeInUse(ip)
         if not inUse:
             # The node hasn't reported in the last 10 minutes & last we know
             # there weren't any tasks running. We will fake executorInfo with no
             # worker to reflect this, since otherwise this node will never
             # be considered for termination
             info.workers = 0
         else:
             pass
             # despite the node not reporting to mesos jobs may still be running
             # so we can't terminate the node
     return info
示例#2
0
 def _getInfo(allMesosNodes, ip):
     info = None
     try:
         info = allMesosNodes[ip]
     except KeyError:
         # never seen by mesos - 1 of 3 possibilities:
         # 1) node is still launching mesos & will come online soon
         # 2) no jobs have been assigned to this worker. This means the executor was never
         #    launched, so we don't even get an executorInfo back indicating 0 workers running
         # 3) mesos crashed before launching, worker will never come online
         # In all 3 situations it's safe to fake executor info with 0 workers, since in all
         # cases there are no workers running.
         info = NodeInfo(coresTotal=1,
                         coresUsed=0,
                         requestedCores=0,
                         memoryTotal=1,
                         memoryUsed=0,
                         requestedMemory=0,
                         workers=0)
     else:
         # Node was tracked but we haven't seen this in the last 10 minutes
         inUse = self.scaler.leader.batchSystem.nodeInUse(ip)
         if not inUse:
             # The node hasn't reported in the last 10 minutes & last we know
             # there weren't any tasks running. We will fake executorInfo with no
             # worker to reflect this, since otherwise this node will never
             # be considered for termination
             info.workers = 0
         else:
             pass
             # despite the node not reporting to mesos jobs may still be running
             # so we can't terminate the node
     return info
示例#3
0
    def frameworkMessage(self, driver, executorId, agentId, message):
        """
        Invoked when an executor sends a message.
        """

        # Take it out of base 64 encoding from Protobuf
        message = decode_data(message).decode()

        log.debug('Got framework message from executor %s running on agent %s: %s',
                  executorId.value, agentId.value, message)
        message = ast.literal_eval(message)
        assert isinstance(message, dict)
        # Handle the mandatory fields of a message
        nodeAddress = message.pop('address')
        executor = self._registerNode(nodeAddress, agentId.value)
        # Handle optional message fields
        for k, v in message.items():
            if k == 'nodeInfo':
                assert isinstance(v, dict)
                resources = [taskData for taskData in self.runningJobMap.values()
                             if taskData.executorID == executorId.value]
                requestedCores = sum(taskData.cores for taskData in resources)
                requestedMemory = sum(taskData.memory for taskData in resources)
                executor.nodeInfo = NodeInfo(requestedCores=requestedCores, requestedMemory=requestedMemory, **v)
                self.executors[nodeAddress] = executor
            else:
                raise RuntimeError("Unknown message field '%s'." % k)
示例#4
0
 def getNodes(self):
     return {
         address: NodeInfo(cores=0,
                           memory=0,
                           workers=1 if w.busyEvent.is_set() else 0)
         for address, w in enumerate(self.workers)
     }
示例#5
0
 def getNodes(self):
     nodes = dict()
     for i, worker in enumerate(self.nodesToWorker.values()):
         nodes[(i, self.preemptable)] = NodeInfo(coresTotal=0, coresUsed=0, requestedCores=1,
                                                 memoryTotal=0, memoryUsed=0, requestedMemory=1,
                                                 workers=1 if worker.busyEvent.is_set() else 0)
     return nodes
示例#6
0
 def getNodes(self, preemptable=False, timeout=None):
     nodes = dict()
     for node in self.nodesToWorker:
         if node.preemptable == preemptable:
             worker = self.nodesToWorker[node]
             nodes[node.privateIP] = NodeInfo(coresTotal=0, coresUsed=0, requestedCores=1,
                                              memoryTotal=0, memoryUsed=0, requestedMemory=1,
                                              workers=1 if worker.busyEvent.is_set() else 0)
     return nodes
示例#7
0
 def getNodes(self):
     return {
         address: NodeInfo(coresTotal=0,
                           coresUsed=0,
                           requestedCores=1,
                           memoryTotal=0,
                           memoryUsed=0,
                           requestedMemory=1,
                           workers=1 if w.busyEvent.is_set() else 0)
         for address, w in enumerate(self.workers)
     }
示例#8
0
 def frameworkMessage(self, driver, executorId, slaveId, message):
     """
     Invoked when an executor sends a message.
     """
     log.debug(
         'Got framework message from executor %s running on slave %s: %s',
         executorId.value, slaveId.value, message)
     message = ast.literal_eval(message)
     assert isinstance(message, dict)
     # Handle the mandatory fields of a message
     nodeAddress = message.pop('address')
     executor = self._registerNode(nodeAddress, slaveId.value)
     # Handle optional message fields
     for k, v in message.iteritems():
         if k == 'nodeInfo':
             assert isinstance(v, dict)
             executor.nodeInfo = NodeInfo(**v)
         else:
             raise RuntimeError("Unknown message field '%s'." % k)
示例#9
0
 def frameworkMessage(self, driver, executorId, slaveId, message):
     """
     Invoked when an executor sends a message.
     """
     message = ast.literal_eval(message)
     assert isinstance(message, dict)
     # Handle the mandatory fields of a message
     nodeAddress = message.pop('address')
     executor = self.executors.get(nodeAddress)
     if executor is None or executor.slaveId != slaveId:
         executor = Expando(nodeAddress=nodeAddress,
                            slaveId=slaveId,
                            nodeInfo=None)
         self.executors[nodeAddress] = executor
     executor.lastSeen = time.time()
     # Handle optional message fields
     for k, v in message.iteritems():
         if k == 'nodeInfo':
             assert isinstance(v, dict)
             executor.nodeInfo = NodeInfo(**v)
         else:
             raise RuntimeError("Unknown message field '%s'." % k)