def make_reservation(self): """Perform a reservation of the required number of nodes""" logger.info('Performing reservation') starttime = int(time.time() + timedelta_to_seconds(datetime.timedelta(minutes=1))) endtime = int(starttime + timedelta_to_seconds(datetime.timedelta(days=3, minutes=1))) startdate, n_nodes = self._get_nodes(starttime, endtime) while not n_nodes: logger.info('No enough nodes found between %s and %s, ' + \ 'increasing time window', format_date(starttime), format_date(endtime)) starttime = endtime endtime = int(starttime + timedelta_to_seconds(datetime.timedelta(days=3, minutes=1))) startdate, n_nodes = self._get_nodes(starttime, endtime) if starttime > int(time.time() + timedelta_to_seconds( datetime.timedelta(weeks=6))): logger.error('There are not enough nodes on %s for your ' + \ 'experiments, abort ...', self.cluster) exit() jobs_specs = get_jobs_specs({self.cluster: n_nodes}, name=self.__class__.__name__) sub = jobs_specs[0][0] sub.walltime = self.options.walltime sub.additional_options = '-t deploy' sub.reservation_date = startdate (self.oar_job_id, self.frontend) = oarsub(jobs_specs)[0] logger.info('Startdate: %s, n_nodes: %s', format_date(startdate), str(n_nodes))
def make_reservation(self): """Perform a reservation of the required number of nodes""" logger.info('Performing reservation') starttime = int(time.time() + timedelta_to_seconds(datetime.timedelta(minutes=1))) endtime = int( starttime + timedelta_to_seconds(datetime.timedelta(days=3, minutes=1))) startdate, n_nodes = self._get_nodes(starttime, endtime) while not n_nodes: logger.info('No enough nodes found between %s and %s, ' + \ 'increasing time window', format_date(starttime), format_date(endtime)) starttime = endtime endtime = int( starttime + timedelta_to_seconds(datetime.timedelta(days=3, minutes=1))) startdate, n_nodes = self._get_nodes(starttime, endtime) if starttime > int(time.time() + timedelta_to_seconds(datetime.timedelta( weeks=6))): logger.error('There are not enough nodes on %s for your ' + \ 'experiments, abort ...', self.cluster) exit() jobs_specs = get_jobs_specs({self.cluster: n_nodes}, name=self.__class__.__name__) sub = jobs_specs[0][0] sub.walltime = self.options.walltime sub.additional_options = '-t deploy' sub.reservation_date = startdate (self.oar_job_id, self.frontend) = oarsub(jobs_specs)[0] logger.info('Startdate: %s, n_nodes: %s', format_date(startdate), str(n_nodes))
def make_reservation(self): """Perform a reservation of the required number of nodes.""" logger.info('Performing reservation') now = int(time.time() + timedelta_to_seconds(datetime.timedelta(minutes=1))) starttime = now endtime = int( starttime + timedelta_to_seconds(datetime.timedelta(days=3, minutes=1))) startdate, n_nodes = self._get_nodes(starttime, endtime) search_time = 3 * 24 * 60 * 60 # 3 days walltime_seconds = get_seconds(self.options.walltime) iteration = 0 while not n_nodes: iteration += 1 logger.info( 'Not enough nodes found between %s and %s, ' + 'increasing time window', format_date(starttime), format_date(endtime)) starttime = max(now, now + iteration * search_time - walltime_seconds) endtime = int(now + (iteration + 1) * search_time) startdate, n_nodes = self._get_nodes(starttime, endtime) if starttime > int(time.time() + timedelta_to_seconds(datetime.timedelta( weeks=6))): logger.error( 'There are not enough nodes on %s for your ' + 'experiments, abort ...', self.cluster) exit() jobs_specs = get_jobs_specs({self.cluster: n_nodes}, name=self.__class__.__name__) sub = jobs_specs[0][0] sub.walltime = self.options.walltime if self.use_kadeploy: sub.additional_options = '-t deploy' else: sub.additional_options = '-t allow_classic_ssh' sub.reservation_date = startdate (self.oar_job_id, self.frontend) = oarsub(jobs_specs)[0] logger.info('Startdate: %s, n_nodes: %s, job_id: %s', format_date(startdate), str(n_nodes), str(self.oar_job_id))
def make_reservation(self): """Perform a reservation of the required number of nodes.""" logger.info('Performing reservation') now = int(time.time() + timedelta_to_seconds(datetime.timedelta(minutes=1))) starttime = now endtime = int(starttime + timedelta_to_seconds(datetime.timedelta(days=3, minutes=1))) startdate, n_nodes = self._get_nodes(starttime, endtime) search_time = 3 * 24 * 60 * 60 # 3 days walltime_seconds = get_seconds(self.options.walltime) iteration = 0 while not n_nodes: iteration += 1 logger.info('Not enough nodes found between %s and %s, ' + 'increasing time window', format_date(starttime), format_date(endtime)) starttime = max(now, now + iteration * search_time - walltime_seconds) endtime = int(now + (iteration + 1) * search_time) startdate, n_nodes = self._get_nodes(starttime, endtime) if starttime > int(time.time() + timedelta_to_seconds( datetime.timedelta(weeks=6))): logger.error('There are not enough nodes on %s for your ' + 'experiments, abort ...', self.cluster) exit() jobs_specs = get_jobs_specs({self.cluster: n_nodes}, name=self.__class__.__name__) sub = jobs_specs[0][0] sub.walltime = self.options.walltime if self.use_kadeploy: sub.additional_options = '-t deploy' else: sub.additional_options = '-t allow_classic_ssh' sub.reservation_date = startdate (self.oar_job_id, self.frontend) = oarsub(jobs_specs)[0] logger.info('Startdate: %s, n_nodes: %s, job_id: %s', format_date(startdate), str(n_nodes), str(self.oar_job_id))
def make_reservation_local(self): """Perform a reservation of the required number of nodes, with 4000 IP. """ logger.info('Performing reservation') starttime = int(time.time() + timedelta_to_seconds(datetime.timedelta(minutes=1))) endtime = int( starttime + timedelta_to_seconds(datetime.timedelta(days=3, minutes=1))) self.cluster = self.options.selected_cluster startdate, n_nodes = self._get_nodes(starttime, endtime) while not n_nodes: logger.info('No enough nodes found between %s and %s, ' + \ 'increasing time window', format_date(starttime), format_date(endtime)) starttime = endtime endtime = int( starttime + timedelta_to_seconds(datetime.timedelta(days=3, minutes=1))) startdate, n_nodes = self._get_nodes(starttime, endtime) if starttime > int(time.time() + timedelta_to_seconds(datetime.timedelta( weeks=6))): logger.error('There are not enough nodes on %s for your ' + \ 'experiments, abort ...', self.cluster) exit() startdate = [] jobs_specs = get_jobs_specs({self.cluster: n_nodes}, name=self.__class__.__name__) sub = jobs_specs[0][0] tmp = str(sub.resources).replace('\\', '') sub.resources = tmp.replace('"', '') sub.walltime = self.options.walltime sub.additional_options = '-t allow_classic_ssh -t besteffort' (self.oar_job_id, self.frontend) = oarsub(jobs_specs)[0] logger.info('Startdate: besteffort, n_nodes: %s', str(n_nodes))
def make_reservation(self): """ """ logger.info('Performing reservation') starttime = int(time.time() + timedelta_to_seconds(datetime.timedelta(minutes=1))) planning = get_planning(elements=['grid5000'], starttime=starttime) slots = compute_slots(planning, self.options.walltime) wanted = {"grid5000": 0} start_date, end_date, resources = find_first_slot(slots, wanted) wanted['grid5000'] = min(resources['grid5000'], self.options.n_nodes) actual_resources = distribute_hosts(resources, wanted) job_specs = get_jobs_specs(actual_resources, name='Paasage_Simu') logger.info("try to reserve " + str(actual_resources)) self.oargrid_job_id, _ = oargridsub(job_specs, start_date, walltime=end_date - start_date, job_type="deploy") logger.info("Reservation done")
def make_reservation(self): """ """ logger.info('Performing reservation') starttime = int(time.time() + timedelta_to_seconds(datetime.timedelta(minutes=1))) planning = get_planning(elements=[self.options.selected_cluster], starttime=starttime) slots = compute_slots(planning, self.options.walltime) wanted = {self.options.selected_cluster: 0} start_date, end_date, resources = find_first_slot(slots, wanted) wanted[self.options.selected_cluster] = resources[ self.options.selected_cluster] actual_resources = distribute_hosts(resources, wanted) job_specs = get_jobs_specs(actual_resources, name='Aevol_diff_area') logger.info("try to reserve " + str(actual_resources)) self.oargrid_job_id, _ = oargridsub( job_specs, walltime=end_date - start_date, job_type=['besteffort"' 'allow_classic_ssh']) logger.info("Reservation done")
def get_planning(elements=['grid5000'], vlan=False, subnet=False, storage=False, out_of_chart=False, starttime=None, endtime=None, ignore_besteffort=True, queues='default'): """Retrieve the planning of the elements (site, cluster) and others resources. Element planning structure is ``{'busy': [(123456,123457), ... ], 'free': [(123457,123460), ... ]}.`` :param elements: a list of Grid'5000 elements ('grid5000', <site>, <cluster>) :param vlan: a boolean to ask for KaVLAN computation :param subnet: a boolean to ask for subnets computation :param storage: a boolean to ask for sorage computation :param out_of_chart: if True, consider that days outside weekends are busy :param starttime: start of time period for which to compute the planning, defaults to now + 1 minute :param endtime: end of time period for which to compute the planning, defaults to 4 weeks from now :param ignore_besteffort: True by default, to consider the resources with besteffort jobs as available :param queues: list of oar queues for which to get the planning Return a dict whose keys are sites, whose values are dict whose keys are cluster, subnets, kavlan or storage, whose values are planning dicts, whose keys are hosts, subnet address range, vlan number or chunk id planning respectively. """ if not starttime: starttime = int(time() + timedelta_to_seconds(timedelta(minutes=1))) starttime = int(get_unixts(starttime)) if not endtime: endtime = int(starttime + timedelta_to_seconds(timedelta(weeks=4, minutes=1))) endtime = int(get_unixts(endtime)) if 'grid5000' in elements: sites = elements = get_g5k_sites() else: sites = list( set([site for site in elements if site in get_g5k_sites()] + [ get_cluster_site(cluster) for cluster in elements if cluster in get_g5k_clusters(queues=queues) ] + [ get_host_site(host) for host in elements if host in get_g5k_hosts() or get_host_shortname(host) in get_g5k_hosts() ])) if len(sites) == 0: logger.error('Wrong elements given: %s' % (elements, )) return None planning = {} for site in sites: planning[site] = {} for cluster in get_site_clusters(site, queues=queues): planning[site][cluster] = {} for site in sites: if vlan: planning[site].update({'vlans': {}}) if subnet: planning[site].update({'subnets': {}}) if storage: planning[site].update({'storage': {}}) if _retrieve_method == 'API': _get_planning_API(planning, ignore_besteffort) elif _retrieve_method == 'PostgreSQL': _get_planning_PGSQL(planning, ignore_besteffort) if out_of_chart: _add_charter_to_planning(planning, starttime, endtime) for site_pl in planning.values(): for res_pl in site_pl.values(): for el_planning in res_pl.values(): el_planning['busy'].sort() _merge_el_planning(el_planning['busy']) _trunc_el_planning(el_planning['busy'], starttime, endtime) _fill_el_planning_free(el_planning, starttime, endtime) # cleaning real_planning = deepcopy(planning) for site, site_pl in planning.items(): for cl, cl_pl in site_pl.items(): if cl in ['vlans']: continue keep_cluster = False for h in cl_pl: if not (get_host_site(h) in elements or get_host_cluster(h) in elements or get_host_shortname(h) in elements or h in elements): del real_planning[site][cl][h] else: keep_cluster = True if not keep_cluster: del real_planning[site][cl] return real_planning
def draw_slots(slots, colors=None, show=False, save=True, outfile=None): """Draw the number of nodes available for the clusters (requires Matplotlib >= 1.2.0) :param slots: a list of slot, as returned by ``compute_slots`` :param colors: a dict to define element coloring ``{'element': (255., 122., 122.)}`` :param show: display the slots versus time :param save: save the plot to outfile :param outfile: specify the output file""" startstamp = slots[0][0] endstamp = slots[-1][1] if outfile is None: outfile = 'slots_' + format_date(startstamp) logger.info('Saving slots diagram to %s', style.emph(outfile)) if colors is None: colors = _set_colors() xfmt = MD.DateFormatter('%d %b, %H:%M ') if endstamp - startstamp <= timedelta_to_seconds(timedelta(days=7)): x_major_locator = MD.HourLocator(byhour=[9, 19]) elif endstamp - startstamp <= timedelta_to_seconds(timedelta(days=17)): x_major_locator = MD.HourLocator(byhour=[9]) else: x_major_locator = MD.AutoDateLocator() max_nodes = {} total_nodes = 0 slot_limits = [] total_list = [] i_slot = 0 for slot in slots: slot_limits.append(slot[0]) if i_slot + 1 < len(slots): slot_limits.append(slots[i_slot + 1][0]) i_slot += 1 for element, n_nodes in slot[2].items(): if element in get_g5k_clusters(queues=None): if not element in max_nodes: max_nodes[element] = [] max_nodes[element].append(n_nodes) max_nodes[element].append(n_nodes) if element == 'grid5000': total_list.append(n_nodes) total_list.append(n_nodes) if n_nodes > total_nodes: total_nodes = n_nodes slot_limits.append(endstamp) slot_limits.sort() dates = [unixts_to_datetime(ts) for ts in slot_limits] datenums = MD.date2num(dates) fig = PLT.figure(figsize=(15, 10), dpi=80) ax = PLT.subplot(111) ax.xaxis_date() box = ax.get_position() ax.set_position([box.x0 - 0.07, box.y0, box.width, box.height]) ax.set_xlim(unixts_to_datetime(startstamp), unixts_to_datetime(endstamp)) ax.set_xlabel('Time') ax.set_ylabel('Nodes available') ax.set_ylim(0, total_nodes * 1.1) ax.axhline(y=total_nodes, color='#000000', linestyle='-', linewidth=2, label='ABSOLUTE MAXIMUM') ax.yaxis.grid(color='gray', linestyle='dashed') ax.xaxis.set_major_formatter(xfmt) ax.xaxis.set_major_locator(x_major_locator) PLT.xticks(rotation=15) max_nodes_list = [] p_legend = [] p_rects = [] p_colors = [] for key, value in sorted(max_nodes.items()): if key != 'grid5000': max_nodes_list.append(value) p_legend.append(key) p_rects.append(PLT.Rectangle((0, 0), 1, 1, fc=colors[key])) p_colors.append(colors[key]) plots = PLT.stackplot(datenums, max_nodes_list, colors=p_colors) PLT.legend(p_rects, p_legend, loc='center right', ncol=1, shadow=True, bbox_to_anchor=(1.2, 0.5)) if show: PLT.show() if save: logger.debug('Saving file %s ...', outfile) PLT.savefig(outfile, dpi=300)
def make_reservation(self): """Perform a reservation of the required number of nodes. Parameters ---------- Returns ------- """ if self.oar_result: message = "Validated OAR_JOB_ID:" for job_id, site in self.oar_result: message += "\n%s: %s" % (site, job_id) logger.info(message) message = "The list of hosts:" for job_id, site in self.oar_result: hosts = get_oar_job_nodes(oar_job_id=job_id, frontend=site) message += "\n--- %s: %s nodes ---" % (site, len(hosts)) for host in hosts: message += "\n%s" % (host.address) logger.info(message) return if self.configs['walltime'] <= 99*3600+99*60+99: walltime = time.strftime('%H:%M:%S', time.gmtime(self.configs['walltime'])) else: walltime = '%s seconds' % self.configs['walltime'] message = 'You are requesting %s nodes for %s:' % (sum(self.clusters.values()), walltime) for cluster, n_nodes in self.clusters.items(): message += "\n%s: %s nodes" % (cluster, n_nodes) logger.info(message) logger.info('Performing reservation .......') if 'starttime' not in self.configs or self.configs['starttime'] is None: self.configs['starttime'] = int( time.time() + timedelta_to_seconds(datetime.timedelta(minutes=1))) starttime = int(get_unixts(self.configs['starttime'])) endtime = int( starttime + timedelta_to_seconds(datetime.timedelta(days=3, minutes=1))) startdate = self._get_nodes(starttime, endtime) while startdate is None: logger.info('No enough nodes found between %s and %s, ' + '\nIncreasing the window time....', format_date(starttime), format_date(endtime)) starttime = endtime endtime = int( starttime + timedelta_to_seconds(datetime.timedelta(days=3, minutes=1))) startdate = self._get_nodes(starttime, endtime) if starttime > int(self.configs['starttime'] + timedelta_to_seconds(datetime.timedelta(weeks=6))): logger.error( 'What a pity! There is no slot which satisfies your request until %s :(' % format_date(endtime)) exit() jobs_specs = get_jobs_specs(self.clusters, name=self.job_name) for job_spec, site_name in jobs_specs: tmp = str(job_spec.resources).replace('\\', '') job_spec.resources = 'slash_22=4+' + tmp.replace('"', '') job_spec.walltime = self.configs['walltime'] # -t deploy to reserve node without deploying OS job_spec.additional_options = '-t deploy' job_spec.reservation_date = startdate + 10 self.oar_result = oarsub(jobs_specs) for oar_job_id, _ in self.oar_result: if oar_job_id is None: logger.info('Performing reservation FAILED') exit() message = "Reserved nodes successfully!!! \nOAR JOB ID:\n" for each in self.oar_result: message += "%s:%s," % (each[1], each[0]) logger.info(message)