Exemplo n.º 1
0
    def deactivate_profile(self, job=None):
        self._check_pmi()

        if job is None:
            job = pbs.event().job
        if _running_excl(job):
            pbs.logjobmsg(job.id, "PMI: reset current_eoe")
            for h in _get_vnode_names(job):
                try:
                    pbs.event().vnode_list[h].current_eoe = None
                except:
                    pass
        return self.__pmi._deactivate_profile(job)
Exemplo n.º 2
0
    def deactivate_profile(self, job=None):
        self._check_pmi()

        if job is None:
            job = pbs.event().job
        if _running_excl(job):
            pbs.logjobmsg(job.id, "PMI: reset current_eoe")
            for h in _get_vnode_names(job):
                try:
                    pbs.event().vnode_list[h].current_eoe = None
                except:
                    pass
        return self.__pmi._deactivate_profile(job)
Exemplo n.º 3
0
    def _activate_profile(self, profile_name, job):
        pbs.logmsg(pbs.LOG_DEBUG,
                   "Cray: %s activate '%s'" % (job.id, str(profile_name)))

        nids, cnt = nidlist(job)
        if cnt == 0:
            pbs.logjobmsg(job.id, "Cray: no compute nodes for power setting")
            return False

        energy = job_energy(job, nids, cnt)
        if energy is not None:
            f = open(energy_file(job), "w")
            f.write(str(energy))
            f.close()

        # If this is the only job, set nodes to capped power.
        if _running_excl(job):
            cmd = "set_power_cap --nids " + nids
            doit = False

            pcap = job.Resource_List['pcap_node']
            if pcap is not None:
                pbs.logjobmsg(job.id, "Cray: pcap node %d" % pcap)
                cmd += " --node " + str(pcap)
                doit = True
            pcap = job.Resource_List['pcap_accelerator']
            if pcap is not None:
                pbs.logjobmsg(job.id, "Cray: pcap accel %d" % pcap)
                cmd += " --accel " + str(pcap)
                doit = True

            if doit:
                launch(job.id, cmd)
            else:
                pbs.logjobmsg(job.id, "Cray: no power cap to set")

        return True
Exemplo n.º 4
0
    def _activate_profile(self, profile_name, job):
        pbs.logmsg(pbs.LOG_DEBUG, "Cray: %s activate '%s'" %
                   (job.id, str(profile_name)))

        nids, cnt = nidlist(job)
        if cnt == 0:
            pbs.logjobmsg(job.id, "Cray: no compute nodes for power setting")
            return False

        energy = job_energy(job, nids, cnt)
        if energy is not None:
            f = open(energy_file(job), "w")
            f.write(str(energy))
            f.close()

        # If this is the only job, set nodes to capped power.
        if _running_excl(job):
            cmd = "set_power_cap --nids " + nids
            doit = False

            pcap = job.Resource_List['pcap_node']
            if pcap is not None:
                pbs.logjobmsg(job.id, "Cray: pcap node %d" % pcap)
                cmd += " --node " + str(pcap)
                doit = True
            pcap = job.Resource_List['pcap_accelerator']
            if pcap is not None:
                pbs.logjobmsg(job.id, "Cray: pcap accel %d" % pcap)
                cmd += " --accel " + str(pcap)
                doit = True

            if doit:
                launch(job.id, cmd)
            else:
                pbs.logjobmsg(job.id, "Cray: no power cap to set")

        return True
Exemplo n.º 5
0
    def _deactivate_profile(self, job):
        pbs.logmsg(pbs.LOG_DEBUG, "Cray: deactivate %s" % job.id)
        nids, cnt = nidlist(job)
        if cnt == 0:
            pbs.logjobmsg(job.id, "Cray: no compute nodes for power setting")
            return False

        # remove initial energy file
        try:
            os.unlink(energy_file(job))
        except Exception:
            pass

        # If this is the only job, undo any power cap we set.
        if _running_excl(job):
            cmd = "set_power_cap --nids " + nids
            doit = False

            pcap = job.Resource_List['pcap_node']
            if pcap is not None:
                pbs.logjobmsg(job.id, "Cray: remove pcap node %d" % pcap)
                cmd += " --node 0"
                doit = True
            pcap = job.Resource_List['pcap_accelerator']
            if pcap is not None:
                pbs.logjobmsg(job.id, "Cray: remove pcap accel %d" % pcap)
                cmd += " --accel 0"
                doit = True

            if doit:
                try:
                    launch(job.id, cmd)
                except Exception:
                    pass
            else:
                pbs.logjobmsg(job.id, "Cray: no power cap to remove")

        # Get final energy value from RUR data
        name = rur_file(job)
        try:
            rurfp = open(name, "r")
        except Exception:
            pbs.logjobmsg(job.id, "Cray: no RUR data")
            return False

        sbuf = os.fstat(rurfp.fileno())
        if (sbuf.st_uid != 0) or (sbuf.st_mode & stat.S_IWOTH):
            pbs.logjobmsg(job.id, "Cray: RUR file permission: %s" % name)
            rurfp.close()
            os.unlink(name)
            return False

        pbs.logjobmsg(job.id, "Cray: reading RUR file: %s" % name)
        energy = 0
        seen = False  # track if energy plugin is seen
        for line in rurfp:
            plugin, _, rest = line.partition(" : ")
            if plugin != "energy":  # check that the plugin is energy
                continue

            apid, _, metstr = rest.partition(" : ")
            seen = True
            try:  # parse the metric list
                metlist = eval(metstr, {})
                metrics = dict(metlist[i:i + 2]
                               for i in range(0, len(metlist), 2))
                joules = metrics["energy_used"]
                energy += joules
                pbs.logjobmsg(
                    job.id,
                    'Cray:RUR: {"apid":%s,"apid_energy":%dJ,"job_energy":%dJ}'
                    % (apid, joules, energy))
            except Exception as e:
                pbs.logjobmsg(job.id,
                              "Cray:RUR: energy_used not found: %s" % str(e))

        rurfp.close()
        os.unlink(name)

        if not seen:
            pbs.logjobmsg(job.id, "Cray:RUR: no energy plugin")
            return False

        old_energy = job.resources_used["energy"]
        new_energy = float(energy) / 3600000.0
        if old_energy is None:
            pbs.logjobmsg(job.id, "Cray:RUR: energy %fkWh" % new_energy)
            job.resources_used["energy"] = new_energy
        elif new_energy > old_energy:
            pbs.logjobmsg(
                job.id,
                "Cray:RUR: energy %fkWh replaces periodic energy %fkWh" %
                (new_energy, old_energy))
            job.resources_used["energy"] = new_energy
        else:
            pbs.logjobmsg(
                job.id, "Cray:RUR: energy %fkWh last periodic usage %fkWh" %
                (new_energy, old_energy))
        return True
Exemplo n.º 6
0
    def _deactivate_profile(self, job):
        pbs.logmsg(pbs.LOG_DEBUG, "Cray: deactivate %s" % job.id)
        nids, cnt = nidlist(job)
        if cnt == 0:
            pbs.logjobmsg(job.id, "Cray: no compute nodes for power setting")
            return False

        # remove initial energy file
        try:
            os.unlink(energy_file(job))
        except Exception:
            pass

        # If this is the only job, undo any power cap we set.
        if _running_excl(job):
            cmd = "set_power_cap --nids " + nids
            doit = False

            pcap = job.Resource_List['pcap_node']
            if pcap is not None:
                pbs.logjobmsg(job.id, "Cray: remove pcap node %d" % pcap)
                cmd += " --node 0"
                doit = True
            pcap = job.Resource_List['pcap_accelerator']
            if pcap is not None:
                pbs.logjobmsg(job.id, "Cray: remove pcap accel %d" % pcap)
                cmd += " --accel 0"
                doit = True

            if doit:
                try:
                    launch(job.id, cmd)
                except Exception:
                    pass
            else:
                pbs.logjobmsg(job.id, "Cray: no power cap to remove")

        # Get final energy value from RUR data
        name = rur_file(job)
        try:
            rurfp = open(name, "r")
        except Exception:
            pbs.logjobmsg(job.id, "Cray: no RUR data")
            return False

        sbuf = os.fstat(rurfp.fileno())
        if (sbuf.st_uid != 0) or (sbuf.st_mode & stat.S_IWOTH):
            pbs.logjobmsg(job.id, "Cray: RUR file permission: %s" % name)
            rurfp.close()
            os.unlink(name)
            return False

        pbs.logjobmsg(job.id, "Cray: reading RUR file: %s" % name)
        energy = 0
        seen = False        # track if energy plugin is seen
        for line in rurfp:
            plugin, _, rest = line.partition(" : ")
            if plugin != "energy":		# check that the plugin is energy
                continue

            apid, _, metstr = rest.partition(" : ")
            seen = True
            try:						# parse the metric list
                metlist = eval(metstr, {})
                metrics = dict(metlist[i:i + 2] for i in range(0,
                                                               len(metlist), 2))
                joules = metrics["energy_used"]
                energy += joules
                pbs.logjobmsg(job.id,
                              'Cray:RUR: {"apid":%s,"apid_energy":%dJ,"job_energy":%dJ}' %
                              (apid, joules, energy))
            except Exception, e:
                pbs.logjobmsg(job.id,
                              "Cray:RUR: energy_used not found: %s" % str(e))