Exemplo n.º 1
0
    def get_nodeinfolist(self):
        nodeinfolist = {}
        infile = open_file('/etc/clues2/mesos_vnodes.info')
        if infile:
            for line in infile:
                name = line.rstrip('\n')
                state = NodeInfo.OFF
                # Illustrative values for Clues, since the node is not running, we
                # cannot know the real values
                slots_count = 1
                memory_total = 1572864000
                slots_free = 1
                memory_free = 1572864000
                # Create a fake queue
                keywords = {}
                keywords['hostname'] = TypedClass.auto(name)
                queues = ["default"]
                if queues:
                    keywords['queues'] = TypedList([TypedClass.auto(q) for q in queues])

                nodeinfolist[name] = NodeInfo(name, slots_count, slots_free, memory_total, memory_free, keywords)
                nodeinfolist[name].state = state
            infile.close()

        mesos_slaves = self._obtain_mesos_nodes()
        if mesos_slaves:
            used_nodes = self._obtain_mesos_used_nodes()
            for mesos_slave in mesos_slaves['slaves']:
                name = mesos_slave['hostname']
                if nodeinfolist:
                    for node in nodeinfolist:
                        if name == nodeinfolist[node].name:
                            state = infer_clues_node_state(mesos_slave["id"], mesos_slave["active"], used_nodes)
                            slots_count = float(mesos_slave['resources']['cpus'])
                            memory_total = calculate_memory_bytes(mesos_slave['resources']['mem'])

                            used_cpu, used_mem = self._obtain_cpu_mem_used_in_mesos_node(mesos_slave["id"])
                            slots_free = slots_count - used_cpu
                            memory_free = memory_total - used_mem

                            # Create a fake queue
                            keywords = {}
                            keywords['hostname'] = TypedClass.auto(name)
                            queues = ["default"]
                            if queues:
                                keywords['queues'] = TypedList([TypedClass.auto(q) for q in queues])

                            nodeinfolist[name] = NodeInfo(
                                name, slots_count, slots_free, memory_total, memory_free, keywords)
                            nodeinfolist[name].state = state

        return nodeinfolist
Exemplo n.º 2
0
    def get_nodeinfolist(self):
        nodeinfolist = collections.OrderedDict()
        '''Exit example of scontrol show nodes
        NodeName=wn0 Arch=x86_64 CoresPerSocket=1
        CPUAlloc=0 CPUErr=0 CPUTot=1 CPULoad=0.02 Features=(null)
        Gres=(null)
        NodeAddr=wn0 NodeHostName=wn0 Version=14.11
        OS=Linux RealMemory=1 AllocMem=0 Sockets=1 Boards=1
        State=IDLE ThreadsPerCore=1 TmpDisk=0 Weight=1
        BootTime=2015-04-28T13:12:21 SlurmdStartTime=2015-04-28T13:16:32
        CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
        ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s'''

        exit = " "
        try:
            exit = parse_scontrol(run_command(self._nodes))
        except:
            _LOGGER.error(
                "could not obtain information about SLURM nodes %s (%s)" %
                (self._server_ip, exit))
            return None

        if exit:
            for key in exit:
                try:
                    name = str(key["NodeName"])
                    slots_count = int(key["CPUTot"])
                    slots_free = int(key["CPUTot"]) - int(key["CPUAlloc"])
                    #NOTE: memory is in GB
                    memory_total = _translate_mem_value(key["RealMemory"] +
                                                        ".GB")
                    memory_free = _translate_mem_value(
                        key["RealMemory"] +
                        ".GB") - _translate_mem_value(key["AllocMem"] + ".GB")
                    state = infer_clues_node_state(self, str(key["State"]))
                    keywords = {}
                    queues = get_partition(self, name)
                    keywords['hostname'] = TypedClass.auto(name)
                    if queues:
                        keywords['queues'] = TypedList(
                            [TypedClass.auto(q) for q in queues])

                    nodeinfolist[name] = NodeInfo(name, slots_count,
                                                  slots_free, memory_total,
                                                  memory_free, keywords)
                    nodeinfolist[name].state = state
                except:
                    _LOGGER.error("Error adding node: %s." % key)

        return nodeinfolist
Exemplo n.º 3
0
    def get_nodeinfolist(self):      
        nodeinfolist = {}
        
        '''Exit example of scontrol show nodes
        NodeName=wn0 Arch=x86_64 CoresPerSocket=1
        CPUAlloc=0 CPUErr=0 CPUTot=1 CPULoad=0.02 Features=(null)
        Gres=(null)
        NodeAddr=wn0 NodeHostName=wn0 Version=14.11
        OS=Linux RealMemory=1 AllocMem=0 Sockets=1 Boards=1
        State=IDLE ThreadsPerCore=1 TmpDisk=0 Weight=1
        BootTime=2015-04-28T13:12:21 SlurmdStartTime=2015-04-28T13:16:32
        CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
        ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s'''

        exit = " "
        try:
            exit = parse_scontrol(run_command(self._nodes))
        except:
            _LOGGER.error("could not obtain information about SLURM nodes %s (%s)" % (self._server_ip, exit))
            return None

        if exit:
            for key in exit:
                name = str(key["NodeName"])
                slots_count = int(key["CPUTot"])
                slots_free = int(key["CPUTot"]) - int(key["CPUAlloc"])
                #NOTE: memory is in GB
                memory_total = _translate_mem_value(key["RealMemory"] + ".GB")
                memory_free = _translate_mem_value(key["RealMemory"] + ".GB") - _translate_mem_value(key["AllocMem"] + ".GB")
                state = infer_clues_node_state(self, str(key["State"]))
                keywords = {}
                queues = get_partition(self, name)
                keywords['hostname'] = TypedClass.auto(name)
                if len(queues) > 0:
                    keywords['queues'] = TypedList([TypedClass.auto(q) for q in queues])
                    
                nodeinfolist[name] = NodeInfo(name, slots_count, slots_free, memory_total, memory_free, keywords)
                nodeinfolist[name].state = state

        return nodeinfolist
Exemplo n.º 4
0
    def _get_NodeInfo(self, info_node, default_info_node):

        # Check queues
        keywords = default_info_node['keywords']
        queues = default_info_node['keywords']['queues']
        q = info_node['node_class']
        if not (q in self._queues or q == ''):
            _LOGGER.error(
                " '%s' (node_class of Nomad Client) is not a valid queue, queue is set to queue of file %s."
                % (q, self._nodes_info_file))
        if q in self._queues:
            queues = [q]
            keywords['queues'] = TypedList(
                [TypedClass.auto(q) for q in queues])

        # Illustrative values for Clues, since the node is not running, we cannot know the real values
        slots_count = default_info_node['cpus']
        slots_free = default_info_node['cpus']
        memory_total = default_info_node['memory']
        memory_free = default_info_node['memory']

        # Information obtained from queries
        if 'slots_count' in info_node['resources']:
            slots_count = info_node['resources']['slots_count']
        if 'memory_total' in info_node['resources']:
            memory_total = info_node['resources']['memory_total']
        if 'slots_used' in info_node['resources']:
            slots_free = float(slots_count) - float(
                info_node['resources']['slots_used'])
        if 'memory_used' in info_node['resources']:
            memory_free = float(memory_total) - float(
                info_node['resources']['memory_used'])

        # Check state
        state = NodeInfo.UNKNOWN
        if (info_node['status'] == self._state_on
                and not info_node['any_job_is_running']):
            state = NodeInfo.IDLE
        elif (info_node['status'] == self._state_on
              and info_node['any_job_is_running']):
            state = NodeInfo.USED
        elif (info_node['status'] == self._state_off):
            state = NodeInfo.OFF

        #_LOGGER.debug(" name= " + info_node['name'] + ", slots_count= " + str(slots_count) + ", slots_free= " + str(slots_free) + ", memory_total= " + str(memory_total) + ", memory_free= " + str(memory_free) + ", keywords= " + str(keywords) + ", memory_used=" + str(info_node['resources']['memory_used'])  + ", slots_used=" + str(info_node['resources']['slots_used'])  )
        node = NodeInfo(info_node['name'], slots_count, slots_free,
                        memory_total, memory_free, keywords)
        node.state = state

        return node
Exemplo n.º 5
0
    def get_nodeinfolist(self):
        nodeinfolist = collections.OrderedDict()
        infile = open_file('/etc/clues2/mesos_vnodes.info')
        if infile:
            for line in infile:
                name = line.rstrip('\n')
                state = NodeInfo.OFF
                # Illustrative values for Clues, since the node is not running, we
                # cannot know the real values
                slots_count = self._node_slots
                memory_total = self._node_memory
                slots_free = self._node_slots
                memory_free = self._node_memory
                # Create a fake queue
                keywords = {}
                keywords['hostname'] = TypedClass.auto(name)
                queues = ["default"]
                if queues:
                    keywords['queues'] = TypedList(
                        [TypedClass.auto(q) for q in queues])

                nodeinfolist[name] = NodeInfo(name, slots_count, slots_free,
                                              memory_total, memory_free,
                                              keywords)
                nodeinfolist[name].state = state
            infile.close()

        mesos_slaves = self._obtain_mesos_nodes()
        if mesos_slaves:
            used_nodes = self._obtain_mesos_used_nodes()
            for mesos_slave in mesos_slaves['slaves']:
                name = mesos_slave['hostname']
                if nodeinfolist:
                    for node in nodeinfolist:
                        nodeinfolist_node_ip = None
                        try:
                            nodeinfolist_node_ip = socket.gethostbyname(
                                nodeinfolist[node].name)
                        except:
                            _LOGGER.warning("Error resolving node ip %s" %
                                            nodeinfolist[node].name)
                        if name == nodeinfolist[
                                node].name or name == nodeinfolist_node_ip:
                            name = nodeinfolist[node].name
                            state = infer_clues_node_state(
                                mesos_slave["id"], mesos_slave["active"],
                                used_nodes)
                            slots_count = float(
                                mesos_slave['resources']['cpus'])
                            memory_total = calculate_memory_bytes(
                                mesos_slave['resources']['mem'])

                            used_cpu, used_mem = self._obtain_cpu_mem_used_in_mesos_node(
                                mesos_slave["id"])
                            slots_free = slots_count - used_cpu
                            memory_free = memory_total - used_mem

                            # Create a fake queue
                            keywords = {}
                            keywords['hostname'] = TypedClass.auto(name)
                            queues = ["default"]
                            if queues:
                                keywords['queues'] = TypedList(
                                    [TypedClass.auto(q) for q in queues])

                            nodeinfolist[name] = NodeInfo(
                                name, slots_count, slots_free, memory_total,
                                memory_free, keywords)
                            nodeinfolist[name].state = state

        return nodeinfolist
Exemplo n.º 6
0
    def get_nodeinfolist(self):
        nodeinfolist = collections.OrderedDict()
        worker_nodes = get_worker_nodes_list_from_Startd()
        if len(worker_nodes) > 0:
            for worker_node in worker_nodes:
                activity = ""
                name = ""
                slots = 0
                slots_free = 0
                memory = 0
                memory_free = 0
                keywords = {}
                queues = []
                try:
                    activity = worker_node["Activity"]
                except:
                    activity = "undefined"
                if activity == "Idle":
                    try:
                        name = worker_node["Name"]
                    except:
                        name = ""
                    try:
                        slots = worker_node["TotalSlots"]
                    except:
                        slots = 0
                    slots_free = slots
                    try:
                        memory = worker_node["Memory"]
                    except:
                        memory = 0
                    memory_free = memory
                    keywords['hostname'] = TypedClass.auto(name)
                    queues = ["default"]
                    keywords['queues'] = TypedList(
                        [TypedClass.auto(q) for q in queues])
                    nodeinfolist[name] = NodeInfo(name, slots, slots_free,
                                                  memory, memory_free,
                                                  keywords)
                    nodeinfolist[name].state = NodeInfo.IDLE
                elif activity != "undefined":
                    try:
                        name = worker_node["Name"]
                    except:
                        name = ""
                    try:
                        slots = worker_node["TotalSlots"]
                    except:
                        slots = 0
                    slots_free = slots
                    try:
                        memory = worker_node["Memory"]
                    except:
                        memory = 0
                    memory_free = memory
                    keywords['hostname'] = TypedClass.auto(name)
                    schedulers = get_schedulers_list_from_Schedd()
                    if len(schedulers) > 0:
                        for scheduler in schedulers:
                            jobs_scheduled = htcondor.Schedd(scheduler)
                            jobs_scheduled_attributes = jobs_scheduled.query()

                            if len(jobs_scheduled_attributes) > 0:
                                for job_scheduled_attributes in jobs_scheduled_attributes:
                                    nodes = []
                                    try:
                                        nodes = job_scheduled_attributes[
                                            "AllRemoteHosts"].split(",")
                                    except:
                                        try:
                                            nodes = [
                                                job_scheduled_attributes[
                                                    "RemoteHost"]
                                            ]
                                        except:
                                            nodes = []
                                    if name in nodes:
                                        cpus = 0
                                        try:
                                            cpus = job_scheduled_attributes[
                                                "RequestCpus"]
                                        except:
                                            cpus = 0
                                        slots_free -= cpus
                                        mem = 0
                                        try:
                                            mem = (job_scheduled_attributes[
                                                "ImageSize"] + 1023) / 1024
                                        except:
                                            mem = 0
                                        memory_free -= mem
                    queues = ["default"]
                    if len(queues) > 0:
                        keywords['queues'] = TypedList(
                            [TypedClass.auto(q) for q in queues])
                    if slots_free < 0:
                        slots_free = 0
                    if memory_free < 0:
                        memory_free = 0
                    nodeinfolist[name] = NodeInfo(name, slots, slots_free,
                                                  memory, memory_free,
                                                  keywords)
                    nodeinfolist[name].state = NodeInfo.USED
                else:
                    _LOGGER.warning(
                        "could not obtain information about nodes.")
                    return None
        else:
            try:
                infile = open('/etc/clues2/condor_vnodes.info', 'r')
                for line in infile:
                    name = line.rstrip('\n')
                    # Illustrative values for Clues, since the node is not running, we cannot know the real values
                    slots_count = 1
                    slots_free = 1
                    memory_total = 1572864000
                    memory_free = 1572864000
                    # Create a fake queue
                    keywords = {}
                    keywords['hostname'] = TypedClass.auto(name)
                    queues = ["default"]
                    keywords['queues'] = TypedList(
                        [TypedClass.auto(q) for q in queues])
                    nodeinfolist[name] = NodeInfo(name, slots_count,
                                                  slots_free, memory_total,
                                                  memory_free, keywords)
                    nodeinfolist[name].state = NodeInfo.OFF
                infile.close()
            except:
                _LOGGER.warning("could not obtain information about nodes.")
                return None
        return nodeinfolist
Exemplo n.º 7
0
    def get_nodeinfolist(self): 
        nodeinfolist = {}
        
        '''Exit example of /usr/bin/curl -L -X GET http://mesosserverpublic:5050/master/slaves
        {
            "slaves": [
                {
                    "active": true,
                    "attributes": {},
                    "hostname": "10.0.0.84",
                    "id": "20150716-115932-1063856798-5050-14165-S0",
                    "pid": "slave(1)@10.0.0.84:5051",
                    "registered_time": 1437487335.75923,
                    "reregistered_time": 1437487335.75927,
                    "resources": {
                        "cpus": 1,
                        "disk": 13438,
                        "mem": 623,
                        "ports": "[31000-32000]"
                    }
                }
            ]
        }'''

        exit = " "
        try:
            exit = run_command(self._nodes)
            json_data = json.loads(exit)
            infile = open('/etc/clues2/mesos_vnodes.info', 'r')
        except:
            _LOGGER.error("could not obtain information about MESOS nodes %s (%s)" % (self._server_ip, exit))
            return None

        for line in infile:
            #name = line[:-1]
            name = line.rstrip('\n')
            #name = line
            state = NodeInfo.OFF
            # Illustrative values for Clues, since the node is not running, we cannot know the real values
            slots_count = 1
            memory_total = 1572864000
            slots_free = 1
            memory_free = 1572864000

            # Create a fake queue
            keywords = {}
            queues = ["default"]
            keywords['hostname'] = TypedClass.auto(name)
            if len(queues) > 0:
                keywords['queues'] = TypedList([TypedClass.auto(q) for q in queues])
                    
            nodeinfolist[name] = NodeInfo(name, slots_count, slots_free, memory_total, memory_free, keywords)
            nodeinfolist[name].state = state
        infile.close()

        if json_data:
            for node, details in json_data.items():
                used_nodes = obtain_used_nodes()
                for element in details:
                    name = element['hostname']
                    for node in nodeinfolist:
                        if name == nodeinfolist[node].name:
                            state = infer_clues_node_state(element["id"], element["active"], used_nodes)
                            slots_count = float(element['resources']['cpus'])
                            memory_total = element['resources']['mem'] * 1048576
                            
                            used_cpu, used_mem = obtain_cpu_mem_used(element["id"])
                            slots_free = slots_count - used_cpu
                            memory_free = memory_total - used_mem

                            # Create a fake queue
                            keywords = {}
                            queues = ["default"]
                            keywords['hostname'] = TypedClass.auto(name)
                            if len(queues) > 0:
                                keywords['queues'] = TypedList([TypedClass.auto(q) for q in queues])
                            
                            nodeinfolist[name] = NodeInfo(name, slots_count, slots_free, memory_total, memory_free, keywords)
                            nodeinfolist[name].state = state


        return nodeinfolist
Exemplo n.º 8
0
 def get_nodeinfolist(self):
     nodeinfolist = {}
     worker_nodes = []
     collector = htcondor.Collector()
     try: 
         worker_nodes = collector.locateAll(htcondor.DaemonTypes.Startd)
     except: 
         worker_nodes = []
     if len(worker_nodes) > 0:
         for worker_node in worker_nodes:
             activity = ""
             name = ""
             slots = 0
             slots_free = 0
             memory = 0
             memory_free = 0
             keywords = {}
             queues = []
             try:
                 activity = worker_node["Activity"]
             except:
                 activity = "undefined"
             if activity == "Idle":
                 try:
                     name = worker_node["Name"]
                 except:
                     name = ""
                 try:
                     slots = worker_node["TotalSlots"]
                 except:
                     slots = 0
                 slots_free = slots
                 try:
                     memory =  worker_node["Memory"]
                 except:
                     memory =  0
                 memory_free = memory
                 keywords['hostname'] = TypedClass.auto(name)
                 queues = ["default"]
                 keywords['queues'] = TypedList([TypedClass.auto(q) for q in queues])
                 nodeinfolist[name] = NodeInfo(name, slots, slots_free, memory, memory_free, keywords)
                 nodeinfolist[name].state = NodeInfo.IDLE
             elif activity != "undefined":
                 try:
                     name = worker_node["Name"]
                 except:
                     name = ""
                 try:
                     slots = worker_node["TotalSlots"]
                 except:
                     slots = 0
                 slots_free = slots
                 try:
                     memory =  worker_node["Memory"]
                 except:
                     memory =  0
                 memory_free = memory
                 keywords['hostname'] = TypedClass.auto(name)
                 try:
                     schedulers = collector.locateAll(htcondor.DaemonTypes.Schedd)
                 except: 
                     schedulers = []
                 if len(schedulers) > 0:
                     for scheduler in schedulers:
                         jobs_scheduled = htcondor.Schedd(scheduler)
                         jobs_scheduled_attributes = jobs_scheduled.query()
                         if len(jobs_scheduled_attributes) > 0:
                             for job_scheduled_attributes in jobs_scheduled_attributes:
                                 nodes = []
                                 try:
                                     nodes = job_scheduled_attributes["AllRemoteHosts"].split(",")
                                 except:
                                     try: 
                                         nodes = [job_scheduled_attributes["RemoteHost"]]
                                     except:
                                         nodes = []
                                 if name in nodes:
                                     cpus = 0
                                     try:
                                         cpus = job_scheduled_attributes["RequestCpus"]
                                     except: 
                                         cpus =  0
                                     slots_free -= cpus
                                     mem = 0
                                     try:
                                         mem =  (job_scheduled_attributes["ImageSize"] + 1023)/1024
                                     except: 
                                         mem =  0
                                     memory_free -= mem
                 queues = ["default"]
                 if len(queues) > 0:
                     keywords['queues'] = TypedList([TypedClass.auto(q) for q in queues])
                 if slots_free < 0:
                     slots_free = 0
                 if memory_free < 0:
                     memory_free = 0
                 nodeinfolist[name] = NodeInfo(name , slots , slots_free , memory , memory_free, keywords)
                 nodeinfolist[name].state = NodeInfo.USED
             else:
                 _LOGGER.warning("could not obtain information about nodes.")
                 return None                    
     else:
         try:
             infile = open('/etc/clues2/condor_vnodes.info', 'r')
             for line in infile:
                 name = line.rstrip('\n')
                 # Illustrative values for Clues, since the node is not running, we cannot know the real values
                 slots_count = 1
                 slots_free = 1
                 memory_total = 1572864000
                 memory_free = 1572864000
                 # Create a fake queue
                 keywords = {}
                 keywords['hostname'] = TypedClass.auto(name)
                 queues = ["default"]
                 keywords['queues'] = TypedList([TypedClass.auto(q) for q in queues])
                 nodeinfolist[name] = NodeInfo(name, slots_count, slots_free, memory_total, memory_free, keywords)
                 nodeinfolist[name].state = NodeInfo.OFF
             infile.close()
         except:
             _LOGGER.warning("could not obtain information about nodes.")
             return None
     return nodeinfolist
Exemplo n.º 9
0
    def get_nodeinfolist(self):
        nodeinfolist = collections.OrderedDict()

        nodes_data = self._create_request('GET', self._nodes_api_url_path,
                                          self.auth_data)
        if nodes_data:
            pods_data = self._create_request('GET', self._pods_api_url_path,
                                             self.auth_data)
            if not pods_data:
                _LOGGER.error(
                    "Error getting Kubernetes pod list. Node usage will not be obtained."
                )

            for node in nodes_data["items"]:
                name = node["metadata"]["name"]
                memory_total = self._get_memory_in_bytes(
                    node["status"]["allocatable"]["memory"])
                slots_total = int(node["status"]["allocatable"]["cpu"])
                pods_total = int(node["status"]["allocatable"]["pods"])

                skip_node = False
                # Get Taints
                if 'taints' in node["spec"] and node["spec"]['taints']:
                    for taint in node["spec"]['taints']:
                        if taint['effect'] in [
                                "NoSchedule", "PreferNoSchedule", "NoExecute"
                        ]:
                            skip_node = True
                            _LOGGER.debug(
                                "Node %s is tainted with %s, skiping." %
                                (name, taint['effect']))

                if not skip_node:
                    used_mem, used_cpus, used_pods, system_pods = self._get_node_used_resources(
                        name, pods_data)

                    memory_free = memory_total - used_mem
                    slots_free = slots_total - used_cpus
                    pods_free = pods_total - used_pods

                    is_ready = True
                    for conditions in node["status"]["conditions"]:
                        if conditions['type'] == "Ready":
                            if conditions['status'] != "True":
                                is_ready = False

                    keywords = {
                        'pods_free': TypedNumber(pods_free),
                        'nodeName': TypedClass(name, TypedClass.STRING)
                    }
                    # Add labels as keywords
                    for key, value in node["metadata"]["labels"].items():
                        keywords[key] = TypedClass(value, TypedClass.STRING)

                    nodeinfolist[name] = NodeInfo(name, slots_total,
                                                  slots_free, memory_total,
                                                  memory_free, keywords)
                    if is_ready:
                        nodeinfolist[name].state = NodeInfo.IDLE
                        if (used_pods - system_pods) > 0:
                            nodeinfolist[name].state = NodeInfo.USED
                    else:
                        nodeinfolist[name].state = NodeInfo.OFF
        else:
            _LOGGER.error("Error getting Kubernetes node list.")

        # Add the "virtual" nodes
        try:
            vnodes = json.load(open('/etc/clues2/kubernetes_vnodes.info', 'r'))
            for vnode in vnodes:
                name = vnode["name"]
                if name not in nodeinfolist:
                    keywords = {
                        'pods_free': TypedNumber(self._node_pods),
                        'nodeName': TypedClass(name, TypedClass.STRING)
                    }

                    cpus = self._node_slots
                    if "cpu" in vnode:
                        cpus = int(vnode["cpu"])

                    memory = self._node_memory
                    if "memory" in vnode:
                        memory = self._get_memory_in_bytes(vnode["memory"])

                    if "queues" in vnode:
                        queues = vnode["queues"].split(",")
                        if queues:
                            keywords['queues'] = TypedList(
                                [TypedClass.auto(q) for q in queues])

                    if "keywords" in vnode:
                        for keypair in vnode["keywords"].split(','):
                            parts = keypair.split('=')
                            keywords[parts[0].strip()] = TypedClass(
                                parts[1].strip(), TypedClass.STRING)

                    nodeinfolist[name] = NodeInfo(name, cpus, cpus, memory,
                                                  memory, keywords)
                    nodeinfolist[name].state = NodeInfo.OFF
        except Exception as ex:
            _LOGGER.error(
                "Error processing file /etc/clues2/kubernetes_vnodes.info: %s"
                % str(ex))

        return nodeinfolist
Exemplo n.º 10
0
    def get_nodeinfolist(self):
        hostgroups = self._get_hostgroups()

        dom = self._parse_qhost_xml()
        if dom is None:
            return None

        hosts = dom.getElementsByTagName("host")

        nodeinfolist = collections.OrderedDict()
        for h in hosts:
            hostname = h.getAttribute("name")
            keywords = {}
            node_queues = []
            # ignore the generic host "global"
            if hostname != "global":
                memory_total = 0
                memory_used = 0
                # get the host values to get the information
                powered_on = False
                hostvalues = h.getElementsByTagName("hostvalue")
                for hv in hostvalues:
                    valuename = hv.getAttribute("name")
                    if valuename == "load_avg":
                        # If the load_avg is defined, the node is considered to be on
                        # TODO: Try to improve this
                        if hv.firstChild.nodeValue != "-":
                            powered_on = True
                    elif valuename == "mem_total":
                        if hv.firstChild.nodeValue != "-":
                            memory_total = self._translate_mem_value(
                                hv.firstChild.nodeValue)
                    elif valuename == "mem_used":
                        if hv.firstChild.nodeValue != "-":
                            memory_used = self._translate_mem_value(
                                hv.firstChild.nodeValue)

                used_slots = 0
                total_slots = 0
                # Get the info about the queues
                queues = h.getElementsByTagName("queue")
                for q in queues:
                    queue_name = q.getAttribute("name")
                    node_queues.append(TypedClass.auto(str(queue_name)))
                    # Get the queue values
                    queuevalues = q.getElementsByTagName("queuevalue")
                    queue_used_slots = 0
                    queue_total_slots = 0
                    state = None
                    for qv in queuevalues:
                        queuevaluename = qv.getAttribute("name")
                        if queuevaluename == "slots_used":
                            queue_used_slots = int(qv.firstChild.nodeValue)
                        if queuevaluename == "slots":
                            queue_total_slots = int(qv.firstChild.nodeValue)
                        if queuevaluename == "state_string":
                            if qv.firstChild != None:
                                state = qv.firstChild.nodeValue

                    # if some of the queues are in "Alarm Unknown" state the node is down
                    if state != None and (state.lower().find('au') != -1):
                        powered_on = False

                    # This slots are disabled/suspended
                    if state != None and (state.lower().find('d') != -1
                                          or state.lower().find('s') != -1):
                        _LOGGER.debug(queue_name + "@" + hostname + " is in " +
                                      state + " state. Ignoring this slots")
                    else:
                        used_slots += queue_used_slots
                        total_slots += queue_total_slots

                keywords['hostname'] = TypedClass.auto(hostname)
                if len(node_queues) > 0:
                    keywords['queues'] = TypedList(node_queues)

                node_hgs = []
                for hg, nodelist in hostgroups.iteritems():
                    if hostname in nodelist:
                        node_hgs.append(TypedClass.auto(str(hg)))

                keywords['hostgroups'] = TypedList(node_hgs)

                free_slots = total_slots - used_slots

                if powered_on:
                    if free_slots > 0:
                        state = NodeInfo.IDLE
                    else:
                        state = NodeInfo.USED
                else:
                    state = NodeInfo.OFF

                memory_free = -1
                if memory_total != -1:
                    memory_free = memory_total - memory_used
                nodeinfolist[hostname] = NodeInfo(hostname, total_slots,
                                                  free_slots, memory_total,
                                                  memory_free, keywords)
                nodeinfolist[hostname].state = state

        return nodeinfolist
Exemplo n.º 11
0
    def _get_nodeinfolist(self, server_nodes_info):
        ##_LOGGER.info("***** START - get_nodeinfolist ***** ")
        nodeinfolist = collections.OrderedDict()
        default_node_info = collections.OrderedDict()

        # DEFAULT NODE INFO
        try:
            vnodes = json.load(open(self._nodes_info_file, 'r'))
            for vnode in vnodes:
                NODE = {}
                NODE['name'] = vnode["name"]
                NODE['state'] = NodeInfo.OFF
                NODE['keywords'] = {}

                NODE['cpus'] = float(self._default_cpu_node)
                if "cpu" in vnode:
                    NODE['cpus'] = int(vnode["cpu"])

                NODE['memory'] = _get_memory_in_bytes(
                    self._default_memory_node)
                if "memory" in vnode:
                    NODE['memory'] = _get_memory_in_bytes(vnode["memory"])

                if "keywords" in vnode:
                    for keypair in vnode["keywords"].split(','):
                        parts = keypair.split('=')
                        NODE['keywords'][parts[0].strip()] = TypedClass(
                            parts[1].strip(), TypedClass.STRING)

                if "queues" in vnode:
                    queues = vnode["queues"].split(",")
                    if queues:
                        NODE['keywords']['queues'] = TypedList(
                            [TypedClass.auto(q) for q in queues])
                else:  # All queues to the node
                    NODE['keywords']['queues'] = TypedList(
                        [TypedClass.auto(q) for q in self._queues[:]])

                default_node_info[NODE['name']] = NODE

        except Exception as ex:
            _LOGGER.error("Error processing file %s: %s" %
                          (self._nodes_info_file, str(ex)))

        clients_by_server = {}
        for server_node in server_nodes_info:
            clients_by_server[server_node] = self._get_Clients_by_Server(
                server_node
            )  # Obtain ID, Name, Status, NodeClass and if the Client is running some job
            # Obtain Resources and Queues
            for client_id in clients_by_server[server_node]:
                info_client = clients_by_server[server_node][client_id]
                if (info_client['state'] in [NodeInfo.IDLE,
                                             NodeInfo.USED]):  # Client is ON
                    # Obtain Client node address for checking used resources
                    info_client['resources'] = self._get_Client_resources(
                        server_node, client_id)

                    if info_client[
                            'name'] in default_node_info:  # Valid node for CLUES and IM
                        nodeinfolist[info_client['name']] = self._get_NodeInfo(
                            info_client,
                            default_node_info[info_client['name']])
                    else:
                        _LOGGER.warning(
                            "Nomad Client with name '%s' founded using Nomad Server API but not exists this node in the configuration file %s"
                            % (info_client['name'], self._nodes_info_file))

        # Add nodes from nomad_info file to the list
        for namenode, node_info in default_node_info.items():
            if namenode not in nodeinfolist:
                nodeinfolist[namenode] = NodeInfo(namenode, node_info['cpus'],
                                                  node_info['cpus'],
                                                  node_info['memory'],
                                                  node_info['memory'],
                                                  node_info['keywords'])
                nodeinfolist[namenode].state = node_info['state']

        # Print all nodes in log with keywords
        for key, value in nodeinfolist.items():
            string = "%s + keywords={ " % (str(value))
            for key2 in value.keywords:
                string += key2 + ":" + str(value.keywords[key2]) + ","
            string = string[:-1] + "}"
            _LOGGER.debug(string)
        ##_LOGGER.info("***** END - get_nodeinfolist ***** ")
        return nodeinfolist
Exemplo n.º 12
0
    def get_nodeinfolist(self):
        nodeinfolist = collections.OrderedDict()
        try:
            vnodes = json.load(open('/etc/clues2/mesos_vnodes.info', 'r'))
            for vnode in vnodes:
                name = vnode["name"]
                if name not in nodeinfolist:
                    keywords = {'hostname': TypedClass(name, TypedClass.STRING)}
                    state = NodeInfo.OFF
                    slots_count = self._node_slots
                    slots_free = self._node_slots
                    if "cpu" in vnode:
                        slots_count = int(vnode["cpu"])
                        slots_free = int(vnode["cpu"])

                    memory_total = self._node_memory
                    memory_free = self._node_memory
                    if "memory" in vnode:
                        memory_total = get_memory_in_bytes(vnode["memory"])
                        memory_free = get_memory_in_bytes(vnode["memory"])
                    #queues = ["default"]
                    #if "queues" in vnode:
                    #    queues = vnode["queues"].split(",")
                    #    if queues:
                    #        keywords['queues'] = TypedList([TypedClass.auto(q) for q in queues])

                    if "keywords" in vnode:
                        for keypair in vnode["keywords"].split(','):
                            parts = keypair.split('=')
                            keywords[parts[0].strip()] = TypedClass(parts[1].strip(), TypedClass.STRING)

                    nodeinfolist[name] = NodeInfo(name, slots_count, slots_free, memory_total, memory_free, keywords)
                    nodeinfolist[name].state = state
        except Exception as ex:
            _LOGGER.error("Error processing file /etc/clues2/mesos_vnodes.info: %s" % str(ex))

        mesos_slaves = self._obtain_mesos_nodes()
        if mesos_slaves:
            for mesos_slave in mesos_slaves['slaves']:
                name = mesos_slave['hostname']
                if nodeinfolist:
                    for node in nodeinfolist:
                        nodeinfolist_node_ip = None
                        try:
                            nodeinfolist_node_ip = socket.gethostbyname(nodeinfolist[node].name)
                        except:
                            _LOGGER.warning("Error resolving node ip %s" % nodeinfolist[node].name)
                        if name == nodeinfolist[node].name or name == nodeinfolist_node_ip:
                            name = nodeinfolist[node].name
                            slots_count = float(mesos_slave['resources']['cpus'])
                            memory_total = calculate_memory_bytes(mesos_slave['resources']['mem'])
                            used_cpu = float(mesos_slave['used_resources']['cpus'])
                            used_mem = calculate_memory_bytes(mesos_slave['used_resources']['mem'])

                            state = NodeInfo.UNKNOWN
                            if mesos_slave["active"]:
                                if used_cpu > 0 or used_mem > 0:
                                    state = NodeInfo.USED
                                else:
                                    state = NodeInfo.IDLE
                            else:
                                state = NodeInfo.OFF

                            slots_free = slots_count - used_cpu
                            memory_free = memory_total - used_mem

                            # Create a fake queue
                            keywords = {}
                            keywords['hostname'] = TypedClass.auto(name)
                            #queues = ["default"]
                            #if queues:
                            #    keywords['queues'] = TypedList([TypedClass.auto(q) for q in queues])

                            nodeinfolist[name] = NodeInfo(
                                name, slots_count, slots_free, memory_total, memory_free, keywords)
                            nodeinfolist[name].state = state

        return nodeinfolist
Exemplo n.º 13
0
Arquivo: sge.py Projeto: amcaar/clues
	def get_nodeinfolist(self):
		hostgroups = self._get_hostgroups()
		
		dom = self._parse_qhost_xml()
		if dom is None:
			return None

		hosts = dom.getElementsByTagName("host")
	
		nodeinfolist = {}
		for h in hosts:
			hostname = h.getAttribute("name");
			keywords = {}
			node_queues = []
			# ignore the generic host "global"
			if hostname != "global":
				memory_total = 0
				memory_used = 0
				# get the host values to get the information
				powered_on = False
				hostvalues = h.getElementsByTagName("hostvalue")
				for hv in hostvalues:
					valuename = hv.getAttribute("name");
					if valuename == "load_avg":
						# If the load_avg is defined, the node is considered to be on
						# TODO: Try to improve this
						if hv.firstChild.nodeValue != "-":
							powered_on = True
					elif valuename == "mem_total":
						if hv.firstChild.nodeValue != "-":
							memory_total = self._translate_mem_value(hv.firstChild.nodeValue)
					elif valuename == "mem_used":
						if hv.firstChild.nodeValue != "-":
							memory_used = self._translate_mem_value(hv.firstChild.nodeValue)
	
				used_slots = 0
				total_slots = 0
				# Get the info about the queues
				queues = h.getElementsByTagName("queue")
				for q in queues:
					queue_name = q.getAttribute("name");
					node_queues.append(TypedClass.auto(str(queue_name)))
					# Get the queue values
					queuevalues = q.getElementsByTagName("queuevalue")
					queue_used_slots = 0
					queue_total_slots = 0
					state = None
					for qv in queuevalues:
						queuevaluename = qv.getAttribute("name");
						if queuevaluename == "slots_used":
							queue_used_slots = int(qv.firstChild.nodeValue)
						if queuevaluename == "slots":
							queue_total_slots = int(qv.firstChild.nodeValue)
						if queuevaluename == "state_string":
							if qv.firstChild != None:
								state = qv.firstChild.nodeValue
	
					# if some of the queues are in "Alarm Unknown" state the node is down
					if state != None and (state.lower().find('au') != -1):
						powered_on = False

					# This slots are disabled/suspended
					if state != None and (state.lower().find('d') != -1 or state.lower().find('s') != -1):
						_LOGGER.debug(queue_name + "@" + hostname + " is in " + state + " state. Ignoring this slots")
					else:
						used_slots += queue_used_slots
						total_slots += queue_total_slots
	
				keywords['hostname'] = TypedClass.auto(hostname)
				if len(node_queues) > 0:
					keywords['queues'] = TypedList(node_queues)
	
				node_hgs = []
				for hg, nodelist in hostgroups.iteritems():
					if hostname in nodelist:
						node_hgs.append(TypedClass.auto(str(hg)))
	
				keywords['hostgroups'] = TypedList(node_hgs)
	
				free_slots = total_slots - used_slots
	
				if powered_on:
					if free_slots > 0:
						state = NodeInfo.IDLE
					else:
						state = NodeInfo.USED
				else:
					state = NodeInfo.OFF
	
				memory_free = -1
				if memory_total != -1:
					memory_free = memory_total - memory_used
				nodeinfolist[hostname] = NodeInfo(hostname, total_slots, free_slots, memory_total, memory_free, keywords)
				nodeinfolist[hostname].state = state
	
		return nodeinfolist