Exemplo n.º 1
0
def get_hosts_metric(hosts, metric, from_ts=None, to_ts=None, resolution=None):
    """Get metric values from Grid'5000 metrology API

    :param hosts: List of hosts

    :param metric: Grid'5000 metrology metric to fetch (eg: "power",
      "cpu_user")

    :param from_ts: Time from which metric is collected, in any type
      supported by `execo.time_utils.get_unixts`, optional.

    :param to_ts: Time until which metric is collected, in any type
      supported by `execo.time_utils.get_unixts`, optional.

    :param resolution: time resolution, in any type supported by
      `execo.time_utils.get_seconds`, optional.

    :return: A dict of host -> dict with entries 'from' (unix
      timestamp in seconds, as returned from g5k api), 'to' (unix
      timestamp in seconds, as returned from g5k api), 'resolution'
      (in seconds, as returned from g5k api), type (the type of
      metric, as returned by g5k api), 'values': a list of tuples
      (timestamp, metric value). Some g5k metrics (the kwapi ones)
      return both the timestamps and values as separate lists, in
      which case this function only takes care of gathering them in
      tuples (note also that for these metrics, it seems that 'from',
      'to', 'resolution' returned by g5k api are inconsistent with the
      timestamps list. In this case this function makes no correction
      and returns everything 'as is'). Some other g5k metrics (the
      ganglia ones) only return the values, in which case this
      function generates the timestamps of the tuples from 'from',
      'to', 'resolution'.
    """
    if from_ts != None: from_ts = int(get_unixts(from_ts))
    if to_ts != None: to_ts = int(get_unixts(to_ts))
    if resolution != None: resolution = get_seconds(resolution)
    grouped_hosts = group_hosts(hosts)
    res = {}
    site_threads = []
    for site in grouped_hosts:
        site_th = threading.Thread(target=__get_site_metrics,
                                   args=(site, grouped_hosts, metric, from_ts,
                                         to_ts, resolution))
        site_th.start()
        site_threads.append(site_th)
    for site_th in site_threads:
        site_th.join()
        res.update(site_th.res)
    return res
Exemplo n.º 2
0
    def make_reservation(self):
        """Perform a reservation of the required number of nodes."""

        logger.info('Performing reservation')
        now = int(time.time() +
                  timedelta_to_seconds(datetime.timedelta(minutes=1)))
        starttime = now
        endtime = int(
            starttime +
            timedelta_to_seconds(datetime.timedelta(days=3, minutes=1)))
        startdate, n_nodes = self._get_nodes(starttime, endtime)

        search_time = 3 * 24 * 60 * 60  # 3 days
        walltime_seconds = get_seconds(self.options.walltime)

        iteration = 0
        while not n_nodes:
            iteration += 1
            logger.info(
                'Not enough nodes found between %s and %s, ' +
                'increasing time window', format_date(starttime),
                format_date(endtime))
            starttime = max(now,
                            now + iteration * search_time - walltime_seconds)
            endtime = int(now + (iteration + 1) * search_time)

            startdate, n_nodes = self._get_nodes(starttime, endtime)
            if starttime > int(time.time() +
                               timedelta_to_seconds(datetime.timedelta(
                                   weeks=6))):
                logger.error(
                    'There are not enough nodes on %s for your ' +
                    'experiments, abort ...', self.cluster)
                exit()

        jobs_specs = get_jobs_specs({self.cluster: n_nodes},
                                    name=self.__class__.__name__)
        sub = jobs_specs[0][0]
        sub.walltime = self.options.walltime
        if self.use_kadeploy:
            sub.additional_options = '-t deploy'
        else:
            sub.additional_options = '-t allow_classic_ssh'
        sub.reservation_date = startdate
        (self.oar_job_id, self.frontend) = oarsub(jobs_specs)[0]
        logger.info('Startdate: %s, n_nodes: %s, job_id: %s',
                    format_date(startdate), str(n_nodes), str(self.oar_job_id))
Exemplo n.º 3
0
    def make_reservation(self):
        """Perform a reservation of the required number of nodes."""

        logger.info('Performing reservation')
        now = int(time.time() +
                  timedelta_to_seconds(datetime.timedelta(minutes=1)))
        starttime = now
        endtime = int(starttime +
                      timedelta_to_seconds(datetime.timedelta(days=3,
                                                              minutes=1)))
        startdate, n_nodes = self._get_nodes(starttime, endtime)

        search_time = 3 * 24 * 60 * 60  # 3 days
        walltime_seconds = get_seconds(self.options.walltime)

        iteration = 0
        while not n_nodes:
            iteration += 1
            logger.info('Not enough nodes found between %s and %s, ' +
                        'increasing time window',
                        format_date(starttime), format_date(endtime))
            starttime = max(now, now +
                            iteration * search_time - walltime_seconds)
            endtime = int(now + (iteration + 1) * search_time)

            startdate, n_nodes = self._get_nodes(starttime, endtime)
            if starttime > int(time.time() + timedelta_to_seconds(
                    datetime.timedelta(weeks=6))):
                logger.error('There are not enough nodes on %s for your ' +
                             'experiments, abort ...', self.cluster)
                exit()

        jobs_specs = get_jobs_specs({self.cluster: n_nodes},
                                    name=self.__class__.__name__)
        sub = jobs_specs[0][0]
        sub.walltime = self.options.walltime
        if self.use_kadeploy:
            sub.additional_options = '-t deploy'
        else:
            sub.additional_options = '-t allow_classic_ssh'
        sub.reservation_date = startdate
        (self.oar_job_id, self.frontend) = oarsub(jobs_specs)[0]
        logger.info('Startdate: %s, n_nodes: %s, job_id: %s',
                    format_date(startdate),
                    str(n_nodes), str(self.oar_job_id))
Exemplo n.º 4
0
def get_hosts_jobs(hosts, walltime, out_of_chart=False):
    """Find the first slot when the hosts are available and return a
     list of jobs_specs

    :param hosts: list of hosts

    :param walltime: duration of reservation
    """
    hosts = map(lambda x: x.address if isinstance(x, Host) else x, hosts)
    planning = get_planning(elements=hosts, out_of_chart=out_of_chart)
    limits = _slots_limits(planning)
    walltime = get_seconds(walltime)
    for limit in limits:
        all_host_free = True
        for site_planning in planning.itervalues():
            for cluster, cluster_planning in site_planning.iteritems():
                if cluster in get_g5k_clusters():
                    for host_planning in cluster_planning.itervalues():
                        host_free = False
                        for free_slot in host_planning['free']:
                            if free_slot[0] <= limit and free_slot[
                                    1] >= limit + walltime:
                                host_free = True
                        if not host_free:
                            all_host_free = False
        if all_host_free:
            startdate = limit
            break
    else:
        logger.error('Unable to find a slot for %s', hosts)
        return None

    jobs_specs = []
    for site in planning.keys():
        site_hosts = map(get_host_longname,
                         filter(lambda h: get_host_site(h) == site, hosts))
        sub_res = "{host in ('" + "','".join(site_hosts) + "')}/nodes=" + str(
            len(site_hosts))
        jobs_specs.append((OarSubmission(resources=sub_res,
                                         reservation_date=startdate), site))

    return jobs_specs
Exemplo n.º 5
0
Arquivo: utils.py Projeto: badock/vm5k
def get_hosts_jobs(hosts, walltime, out_of_chart=False):
    """Find the first slot when the hosts are available and return a
     list of jobs_specs

    :param hosts: list of hosts

    :param walltime: duration of reservation
    """
    hosts = map(lambda x: x.address if isinstance(x, Host) else x, hosts)
    planning = get_planning(elements=hosts, out_of_chart=out_of_chart)
    limits = _slots_limits(planning)
    walltime = get_seconds(walltime)
    for limit in limits:
        all_host_free = True
        for site_planning in planning.itervalues():
            for cluster, cluster_planning in site_planning.iteritems():
                if cluster in get_g5k_clusters():
                    for host_planning in cluster_planning.itervalues():
                        host_free = False
                        for free_slot in host_planning['free']:
                            if free_slot[0] <= limit and free_slot[1] >= limit + walltime:
                                host_free = True
                        if not host_free:
                            all_host_free = False
        if all_host_free:
            startdate = limit
            break
    else:
        logger.error('Unable to find a slot for %s', hosts)
        return None

    jobs_specs = []
    for site in planning.keys():
        site_hosts = map(get_host_longname,
                         filter(lambda h: get_host_site(h) == site,
                                hosts))
        sub_res = "{host in ('" + "','".join(site_hosts) + "')}/nodes=" + str(len(site_hosts))
        jobs_specs.append((OarSubmission(resources=sub_res,
                                         reservation_date=startdate), site))

    return jobs_specs
Exemplo n.º 6
0
def format_oar_duration(duration):
    """Return a string with a formatted duration (hours, mins, secs, ms) formatted for oar/oargrid.

    :param duration: a duration in one of the formats handled.
    """
    duration = get_seconds(duration)
    s = duration
    h = (s - (s % 3600)) // 3600
    s -= h * 3600
    m = (s - (s % 60)) // 60
    s -= m * 60
    s = int(s)
    formatted_duration = ""
    if duration >= 3600:
        formatted_duration += "%i:" % h
    else:
        formatted_duration += "0:"
    if duration >= 60:
        formatted_duration += "%i:" % m
    else:
        formatted_duration += "0:"
    formatted_duration += "%i" % s
    return formatted_duration
Exemplo n.º 7
0
def format_oar_duration(duration):
    """Return a string with a formatted duration (hours, mins, secs, ms) formatted for oar/oargrid.

    :param duration: a duration in one of the formats handled.
    """
    duration = get_seconds(duration)
    s = duration
    h = (s - (s % 3600)) // 3600
    s -= h * 3600
    m = (s - (s % 60)) // 60
    s -= m * 60
    s = int(s)
    formatted_duration = ""
    if duration >= 3600:
        formatted_duration += "%i:" % h
    else:
        formatted_duration += "0:"
    if duration >= 60:
        formatted_duration += "%i:" % m
    else:
        formatted_duration += "0:"
    formatted_duration += "%i" % s
    return formatted_duration
Exemplo n.º 8
0
def get_hosts_metric(hosts, metric, from_ts=None, to_ts=None, resolution=1):
    """Get metric values from Grid'5000 metrology API

    :param hosts: List of hosts

    :param metric: Grid'5000 metrology metric to fetch (eg: "power",
      "cpu_user")

    :param from_ts: Time from which metric is collected, in any type
      supported by `execo.time_utils.get_unixts`, optional.

    :param to_ts: Time until which metric is collected, in any type
      supported by `execo.time_utils.get_unixts`, optional.

    :param resolution: time resolution, in any type supported by
      `execo.time_utils.get_seconds`, default 1 second.

    :return: A dict of host -> List of (timestamp, metric value)
      retrieved from API
    """
    from_ts = get_unixts(from_ts)
    to_ts = get_unixts(to_ts)
    resolution = get_seconds(resolution)
    grouped_hosts = group_hosts(hosts)
    res = {}
    site_threads = []
    for site in grouped_hosts:
        site_th = threading.Thread(
            target=__get_site_metrics,
            args=(site, grouped_hosts, metric, from_ts, to_ts, resolution)
            )
        site_th.start()
        site_threads.append(site_th)
    for site_th in site_threads:
        site_th.join()
        res.update(site_th.res)
    return res
Exemplo n.º 9
0
def compute_slots(planning, walltime, excluded_elements=None):
    """Compute the slots limits and find the number of available nodes for
    each elements and for the given walltime.

    Return the list of slots where a slot is ``[ start, stop, freehosts ]`` and
    freehosts is a dict of Grid'5000 element with number of nodes available
    ``{'grid5000': 40, 'lyon': 20, 'reims': 10, 'stremi': 10 }``.

    WARNING: slots does not includes subnets

    :param planning: a dict of the resources planning, returned by ``get_planning``

    :param walltime: a duration in a format supported by get_seconds where the resources
      are available

    :param excluded_elements: list of elements that will not be included in the slots
      computation
    """
    slots = []
    walltime = get_seconds(walltime)
    if excluded_elements is not None:
        _remove_excluded(planning, excluded_elements)
    limits = _slots_limits(planning)

    # Checking if we need to compile vlans planning
    kavlan = False
    kavlan_global = False
    if len(planning) > 0:
        if 'vlans' in next(iter(planning.values())):
            if len(planning) > 1:
                kavlan_global = True
            else:
                kavlan = True

    for limit in limits:
        log = ''
        free_elements = {'grid5000': 0}

        if kavlan_global:
            free_vlans_global = []

        for site, site_planning in planning.items():
            free_elements[site] = 0

            for cluster, cluster_planning in site_planning.items():

                if cluster in get_g5k_clusters(queues=None):
                    free_elements[cluster] = 0
                    for host, host_planning in cluster_planning.items():
                        host_free = False
                        for free_slot in host_planning['free']:
                            if free_slot[0] <= limit and free_slot[
                                    1] >= limit + walltime:
                                host_free = True
                        if host_free:
                            free_elements['grid5000'] += 1
                            free_elements[site] += 1
                            free_elements[cluster] += 1
                            log += ', ' + host

            if kavlan:
                free_vlans = 0
                for vlan, vlan_planning in site_planning['vlans'].items():
                    if int(vlan.split('-')[1]) < 10:
                        kavlan_free = False
                        for free_slot in vlan_planning['free']:
                            if free_slot[0] <= limit and free_slot[
                                    1] >= limit + walltime:
                                kavlan_free = True
                        if kavlan_free:
                            free_vlans += 1
                free_elements['kavlan'] = free_vlans
            elif kavlan_global:
                for vlan, vlan_planning in site_planning['vlans'].items():
                    if int(vlan.split('-')[1]) > 10:
                        kavlan_global_free = False
                        for free_slot in vlan_planning['free']:
                            if free_slot[0] <= limit and free_slot[
                                    1] >= limit + walltime:
                                kavlan_global_free = True
                        if kavlan_global_free:
                            free_vlans_global.append(site)
                free_elements['kavlan'] = free_vlans_global
                ## MISSING OTHER RESOURCES COMPUTATION
        logger.debug(log)
        slots.append([limit, limit + walltime, free_elements])

    slots.sort(key=itemgetter(0))
    return slots