def _start_disk_copy(self, disks=None): """ """ disks_copy = [] if not disks: disks = self.backing_files for bf in disks: logger.info('Treating ' + style.emph(bf)) logger.debug("Checking frontend disk vs host disk") raw_disk = '/tmp/orig_' + bf.split('/')[-1] f_disk = Process('md5sum -b ' + bf).run() disk_hash = f_disk.stdout.split(' ')[0] cmd = 'if [ -f ' + raw_disk + ' ]; ' + \ 'then md5sum -b ' + raw_disk + '; fi' h_disk = self.fact.get_remote(cmd, self.hosts).run() disk_ok = True for p in h_disk.processes: if p.stdout.split(' ')[0] != disk_hash: disk_ok = False break if disk_ok: logger.info("Disk " + style.emph(bf) + " is already present, skipping copy") else: disks_copy.append( self.fact.get_fileput(self.hosts, [bf], remote_location="/tmp")) if len(disks_copy) > 0: self.copy_actions = ParallelActions(disks_copy).start() else: self.copy_actions = Remote('ls', self.hosts[0]).run()
def _start_disk_copy(self, disks=None, backing_file_dir='/tmp'): """ """ disks_copy = [] if not disks: disks = self.backing_files for bf in disks: logger.info('Treating ' + style.emph(bf)) logger.debug("Checking frontend disk vs host disk") raw_disk = '%s/orig_' % backing_file_dir + bf.split('/')[-1] f_disk = Process('md5sum -b ' + bf).run() disk_hash = f_disk.stdout.split(' ')[0] cmd = 'if [ -f ' + raw_disk + ' ]; ' + \ 'then md5sum -b ' + raw_disk + '; fi' h_disk = self.fact.get_remote(cmd, self.hosts).run() disk_ok = True for p in h_disk.processes: if p.stdout.split(' ')[0] != disk_hash: disk_ok = False break if disk_ok: logger.info("Disk " + style.emph(bf) + " is already present, skipping copy") else: disks_copy.append(self.fact.get_fileput(self.hosts, [bf], remote_location="%s" % backing_file_dir)) if len(disks_copy) > 0: self.copy_actions = ParallelActions(disks_copy).start() else: self.copy_actions = Remote('ls', self.hosts[0]).run()
def _enable_bridge(self, name='br0'): """We need a bridge to have automatic DHCP configuration for the VM.""" logger.detail('Configuring the bridge') hosts_br = self._get_bridge(self.hosts) nobr_hosts = [] for host, br in hosts_br.iteritems(): if br is None: logger.debug('No bridge on host %s', style.host(host)) nobr_hosts.append(host) elif br != name: logger.debug('Wrong bridge on host %s, destroying it', style.host(host)) SshProcess('ip link set ' + br + ' down ; brctl delbr ' + br, host).run() nobr_hosts.append(host) else: logger.debug('Bridge %s is present on host %s', style.emph('name'), style.host(host)) nobr_hosts = map(lambda x: x.address if isinstance(x, Host) else x, nobr_hosts) if len(nobr_hosts) > 0: logger.debug('Creating bridge on %s', hosts_list(nobr_hosts)) script = 'export br_if=`ip route |grep default |cut -f 5 -d " "`; \n' + \ 'ifdown $br_if ; \n' + \ 'sed -i "s/$br_if inet dhcp/$br_if inet manual/g" /etc/network/interfaces ; \n' + \ 'sed -i "s/auto $br_if//g" /etc/network/interfaces ; \n' + \ 'echo " " >> /etc/network/interfaces ; \n' + \ 'echo "auto ' + name + '" >> /etc/network/interfaces ; \n' + \ 'echo "iface ' + name + ' inet dhcp" >> /etc/network/interfaces ; \n' + \ 'echo " bridge_ports $br_if" >> /etc/network/interfaces ; \n' + \ 'echo " bridge_stp off" >> /etc/network/interfaces ; \n' + \ 'echo " bridge_maxwait 0" >> /etc/network/interfaces ; \n' + \ 'echo " bridge_fd 0" >> /etc/network/interfaces ; \n' + \ 'ifup ' + name fd, br_script = mkstemp(dir='/tmp/', prefix='create_br_') f = fdopen(fd, 'w') f.write(script) f.close() self.fact.get_fileput(nobr_hosts, [br_script]).run() self.fact.get_remote('nohup sh ' + br_script.split('/')[-1], nobr_hosts).run() logger.debug('Waiting for network restart') if_up = False nmap_tries = 0 while (not if_up) and nmap_tries < 20: sleep(20) nmap_tries += 1 nmap = Process('nmap ' + ' '.join([host for host in nobr_hosts]) + ' -p 22').run() for line in nmap.stdout.split('\n'): if 'Nmap done' in line: if_up = line.split()[2] == line.split()[5].replace('(', '') logger.debug('Network has been restarted') logger.detail('All hosts have the bridge %s', style.emph(name))
def check_update_deployed(undeployed_hosts, check_deployed_command, node_connection_params, vlan): #IGNORE:W0613 logger.debug(style.emph("check which hosts are already deployed among:") + " %s", undeployed_hosts) deployment_hostnames_mapping = dict() if vlan: for host in undeployed_hosts: deployment_hostnames_mapping[get_kavlan_host_name(host, vlan)] = host else: for host in undeployed_hosts: deployment_hostnames_mapping[host] = host deployed_check = get_remote(check_deployed_command, list(deployment_hostnames_mapping), connection_params = node_connection_params) for p in deployed_check.processes: p.nolog_exit_code = True p.nolog_timeout = True p.nolog_error = True p.timeout = check_timeout deployed_check.run() newly_deployed = list() for process in deployed_check.processes: logger.debug(style.emph("check on %s:" % (process.host,)) + " %s\n" % (process,) + style.emph("stdout:") + "\n%s\n" % (process.stdout) + style.emph("stderr:") + "\n%s\n" % (process.stderr)) if (process.ok): newly_deployed.append(deployment_hostnames_mapping[process.host.address]) logger.debug("OK %s", deployment_hostnames_mapping[process.host.address]) else: logger.debug("KO %s", deployment_hostnames_mapping[process.host.address]) return newly_deployed
def _enable_bridge(self, name='br0'): """We need a bridge to have automatic DHCP configuration for the VM.""" logger.detail('Configuring the bridge') hosts_br = self._get_bridge(self.hosts) nobr_hosts = [] for host, br in hosts_br.iteritems(): if br is None: logger.debug('No bridge on host %s', style.host(host)) nobr_hosts.append(host) elif br != name: logger.debug('Wrong bridge on host %s, destroying it', style.host(host)) SshProcess('ip link set ' + br + ' down ; brctl delbr ' + br, host).run() nobr_hosts.append(host) else: logger.debug('Bridge %s is present on host %s', style.emph('name'), style.host(host)) nobr_hosts = map(lambda x: x.address if isinstance(x, Host) else x, nobr_hosts) if len(nobr_hosts) > 0: logger.debug('Creating bridge on %s', hosts_list(nobr_hosts)) script = 'export br_if=`ip route |grep default |cut -f 5 -d " "`; \n' + \ 'ifdown $br_if ; \n' + \ 'sed -i "s/$br_if inet dhcp/$br_if inet manual/g" /etc/network/interfaces ; \n' + \ 'sed -i "s/auto $br_if//g" /etc/network/interfaces ; \n' + \ 'echo " " >> /etc/network/interfaces ; \n' + \ 'echo "auto ' + name + '" >> /etc/network/interfaces ; \n' + \ 'echo "iface ' + name + ' inet dhcp" >> /etc/network/interfaces ; \n' + \ 'echo " bridge_ports $br_if" >> /etc/network/interfaces ; \n' + \ 'echo " bridge_stp off" >> /etc/network/interfaces ; \n' + \ 'echo " bridge_maxwait 0" >> /etc/network/interfaces ; \n' + \ 'echo " bridge_fd 0" >> /etc/network/interfaces ; \n' + \ 'ifup ' + name fd, br_script = mkstemp(dir='/tmp/', prefix='create_br_') f = fdopen(fd, 'w') f.write(script) f.close() self.fact.get_fileput(nobr_hosts, [br_script]).run() self.fact.get_remote('nohup sh ' + br_script.split('/')[-1], nobr_hosts).run() logger.debug('Waiting for network restart') if_up = False nmap_tries = 0 while (not if_up) and nmap_tries < 20: sleep(20) nmap_tries += 1 nmap = Process('nmap ' + ' '.join([host for host in nobr_hosts]) + ' -p 22').run() for line in nmap.stdout.split('\n'): if 'Nmap done' in line: if_up = line.split()[2] == line.split()[5].replace( '(', '') logger.debug('Network has been restarted') logger.detail('All hosts have the bridge %s', style.emph(name))
def peace_flag(self, param1, param2, boxes=None, **kwargs): """ Create a contour plot of the boxes final value as a function of two parameters""" x = self.parameters[param1] y = self.parameters[param2] param1 = param1.lower() param2 = param2.lower() if not boxes: boxes = self.Boxes.keys() logger.info('Drawing contour plot of ' + ','.join([style.log_header(box) for box in boxes]) +\ ' final Delta for ' + style.emph(param1) + ' and ' + style.emph(param2)+' ...') i_box = [self.Boxes.keys().index(box) for box in boxes] print self.parameters.keys() for d in listdir(self.result_dir): if param1 in d: dir_string = '-'.join([e + '-$' + e for e in d.split('-')[0::2]]) break for key, value in kwargs.iteritems(): dir_string = dir_string.replace("$" + key.lower(), str(value).replace('.', '').replace('-', '')) Delta = [] for yy in y: Delta.append([]) for xx in x: infile = dir_string.replace("$" + param1, str(xx).replace('.', '').replace('-', '')) infile = infile.replace("$" + param2, str(yy).replace('.', '').replace('-', '')) f = open(self.result_dir + '/' + infile + '/Delta.final') for i, line in enumerate(f): if i in i_box: vals = line.split(' ') print vals Delta[y.index(yy)].append(vals[1].rstrip()) f.close() print len(x) print len(y) print len(Delta), len(Delta[0]) plt.contourf(x, y, Delta) plt.xlabel(param1) plt.ylabel(param2) plt.colorbar(fraction=0.1) plt.show() plt.savefig(self.result_dir + '/' + param1 + '_' + param2 + '.png')
def deploy(self): # we put the nodes in the first vlan we have vlan = self._get_primary_vlan() # Deploy all the nodes logger.info("Deploying %s on %d nodes %s" % (self.config['env_name'], len(self.nodes), '(forced)' if self.force_deploy else '')) deployed, undeployed = EX5.deploy( EX5.Deployment( self.nodes, env_name=self.config['env_name'], vlan = vlan[1] ), check_deployed_command=not self.force_deploy) # Check the deployment if len(undeployed) > 0: logger.error("%d nodes where not deployed correctly:" % len(undeployed)) for n in undeployed: logger.error(style.emph(n)) # Updating nodes names with vlans self.nodes = sorted(translate_to_vlan(self.nodes, vlan[1]), key = lambda n: n.address) logger.info(self.nodes) self.deployed_nodes = sorted(translate_to_vlan( map(lambda n: EX.Host(n), deployed), vlan[1]), key = lambda n: n.address) logger.info(self.deployed_nodes) check_nodes( nodes = self.deployed_nodes, resources = self.config['resources'], mode = self.config['role_distribution']) return deployed, undeployed
def initial_state(self, outdir=None): """ Convert the dict given from parameters to Numpy array """ logger.info(style.log_header('Initial boxes configuration\n') + ''.ljust(8) + ''.join([style.emph(box.rjust(10)) for box in self.Boxes.iterkeys()]) + style.object_repr('\n' + 'Delta'.ljust(8)) + ''.join([str(box['Delta']).rjust(10) for box in self.Boxes.itervalues()]) + style.object_repr('\n' + 'Mass'.ljust(8)) + ''.join([str(box['Mass']).rjust(10) for box in self.Boxes.itervalues()]) ) if outdir is None: outdir = self.result_dir + '/' self.plot_state(self.Boxes.keys(), array([box['Delta'] for box in self.Boxes.itervalues()]), name='_initial', outdir=outdir) self._Mass = array([box['Mass'] for box in self.Boxes.itervalues()]) self._Flux = array([box.values() for box in self.Flux.values()]) self._Partcoeff = array([box.values() for box in self.Partcoeff.values()]) f = open(outdir + '/Delta.initial', 'w') for box, value in self.Boxes.iteritems(): f.write(box + ' ' + str(value['Delta']) + '\n') f.close() return [box['Delta'] for box in self.Boxes.itervalues()]
def __init__(self, infile=None, resources=None, hosts=None, ip_mac=None, vlan=None, env_name=None, env_file=None, vms=None, distribution=None, outdir=None): """:param infile: an XML file that describe the topology of the deployment :param resources: a dict whose keys are Grid'5000 sites and values are dict, whose keys are hosts and ip_mac, where hosts is a list of execo.Host and ip_mac is a list of tuple (ip, mac). :param env_name: name of the Kadeploy environment :param env_file: path to the Kadeploy environment file :params vms: dict defining the virtual machines :params distribution: how to distribute the vms on the hosts (``round-robin`` , ``concentrated``, ``random``) :params outdir: directory to store the deployment files """ # set a factory for the deployment that use taktuk and chainput self.fact = ActionFactory(remote_tool=TAKTUK, fileput_tool=CHAINPUT, fileget_tool=TAKTUK) self.kavlan = None if not vlan else vlan self.kavlan_site = None if env_name is not None: self.env_file = None if ':' not in env_name: self.env_name, self.env_user = env_name, None else: self.env_user, self.env_name = env_name.split(':') else: if env_file is not None: self.env_name = None self.env_user = None self.env_file = env_file else: self.env_name = 'vm5k' self.env_user = '******' self.env_file = None if outdir: self.outdir = outdir else: self.outdir = 'vm5k_' + strftime("%Y%m%d_%H%M%S_%z") self.copy_actions = None self.state = Element('vm5k') self._define_elements(infile, resources, hosts, vms, ip_mac, distribution) logger.info('%s %s %s %s %s %s %s %s', len(self.sites), style.emph('sites'), len(self.clusters), style.user1('clusters'), len(self.hosts), style.host('hosts'), len(self.vms), style.vm('vms'))
def get_oar_job_vm5k_resources(jobs): """Retrieve the hosts list and (ip, mac) list from a list of oar_job and return the resources dict needed by vm5k_deployment """ resources = {} for oar_job_id, site in jobs: logger.detail('Retrieving resources from %s:%s', style.emph(site), oar_job_id) oar_job_id = int(oar_job_id) wait_oar_job_start(oar_job_id, site) logger.debug('Retrieving hosts') hosts = [host.address for host in get_oar_job_nodes(oar_job_id, site)] logger.debug('Retrieving subnet') ip_mac, _ = get_oar_job_subnets(oar_job_id, site) kavlan = None if len(ip_mac) == 0: logger.debug('Retrieving kavlan') kavlan = get_oar_job_kavlan(oar_job_id, site) if kavlan: assert (len(kavlan) == 1) kavlan = kavlan[0] ip_mac = get_kavlan_ip_mac(kavlan, site) resources[site] = { 'hosts': hosts, 'ip_mac': ip_mac[300:], 'kavlan': kavlan } return resources
def get_oar_job_vm5k_resources(jobs): """Retrieve the hosts list and (ip, mac) list from a list of oar_job and return the resources dict needed by vm5k_deployment """ resources = {} for oar_job_id, site in jobs: logger.detail('Retrieving resources from %s:%s', style.emph(site), oar_job_id) oar_job_id = int(oar_job_id) wait_oar_job_start(oar_job_id, site) logger.debug('Retrieving hosts') hosts = [host.address for host in get_oar_job_nodes(oar_job_id, site)] logger.debug('Retrieving subnet') ip_mac, _ = get_oar_job_subnets(oar_job_id, site) kavlan = None if len(ip_mac) == 0: logger.debug('Retrieving kavlan') kavlan = get_oar_job_kavlan(oar_job_id, site) if kavlan: assert(len(kavlan) == 1) kavlan = kavlan[0] ip_mac = get_kavlan_ip_mac(kavlan, site) resources[site] = {'hosts': hosts, 'ip_mac': ip_mac[300:], 'kavlan': kavlan} return resources
def generate_kolla_files(config_vars, kolla_vars, directory): # get the static parameters from the config file kolla_globals = config_vars # add the generated parameters kolla_globals.update(kolla_vars) # write to file in the result dir globals_path = os.path.join(directory, 'globals.yml') with open(globals_path, 'w') as f: yaml.dump(kolla_globals, f, default_flow_style=False) logger.info("Wrote " + style.emph(globals_path)) # copy the passwords file passwords_path = os.path.join(directory, "passwords.yml") call("cp %s/passwords.yml %s" % (TEMPLATE_DIR, passwords_path), shell=True) logger.info("Password file is copied to %s" % (passwords_path)) # admin openrc admin_openrc_path = os.path.join(directory, 'admin-openrc') admin_openrc_vars = { 'keystone_address': kolla_vars['kolla_internal_vip_address'] } render_template('templates/admin-openrc.jinja2', admin_openrc_vars, admin_openrc_path) logger.info("admin-openrc generated in %s" % (admin_openrc_path))
def get_host(self): """Returns the hosts from an existing reservation (if any), or from a new reservation""" # Look if there is a running job self.site = get_cluster_site(self.config['cluster']) jobs = EX5.get_current_oar_jobs([self.site]) self.job_id = None for t in jobs: if EX5.get_oar_job_info( t[0], self.site)['name'] == self.options.job_name: self.job_id = t[0] break if self.job_id: logger.info('Using job %s' % style.emph(self.job_id)) else: logger.info('Making a new reservation') self._make_reservation(self.site) if not self.job_id: logger.error("Could not get a reservation for the job") exit(6) EX5.wait_oar_job_start(self.job_id, self.site) pp(EX5.get_oar_job_nodes(self.job_id, self.site)) return EX5.get_oar_job_nodes(self.job_id, self.site)[0]
def boot_vms_by_core(vms): """ """ n_vm = len(vms) if n_vm == 0: return True if isinstance(vms[0]['host'], Host): host = vms[0]['host'].address.split('.')[0] else: host = vms[0]['host'].split('.')[0] sub_vms = {} for i_core in list(set(vm['cpuset'] for vm in vms)): sub_vms[i_core] = list() for vm in vms: if vm['cpuset'] == i_core: sub_vms[i_core].append(vm) booted_vms = 0 while len(sub_vms.keys()) > 0: vms_to_boot = [] for i_core in sub_vms.keys(): vms_to_boot.append(sub_vms[i_core][0]) sub_vms[i_core].pop(0) if len(sub_vms[i_core]) == 0: del sub_vms[i_core] logger.info(style.Thread(host) + ': Starting VMS '+', '.join( [vm['id'] for vm in sorted(vms_to_boot)])) start_vms(vms_to_boot).run() booted = wait_vms_have_started(vms_to_boot) if not booted: return False booted_vms += len(vms_to_boot) logger.info(style.Thread(host)+': '+style.emph(str(booted_vms)+'/'+str(n_vm))) return True
def configure_libvirt(self, bridge='br0', libvirt_conf=None): """Enable a bridge if needed on the remote hosts, configure libvirt with a bridged network for the virtual machines, and restart service. """ self._enable_bridge() self._libvirt_check_service() self._libvirt_uniquify() self._libvirt_bridged_network(bridge) logger.info('Restarting %s', style.emph('libvirt')) self.fact.get_remote('service libvirtd restart', self.hosts).run()
def _install_packages(self, other_packages=None, launch_disk_copy=True): """Installation of required packages on the hosts""" base_packages = 'uuid-runtime bash-completion taktuk locate htop init-system-helpers netcat-traditional' logger.info('Installing base packages \n%s', style.emph(base_packages)) cmd = 'export DEBIAN_MASTER=noninteractive ; apt-get update && apt-get ' + \ 'install -y --force-yes --no-install-recommends ' + base_packages install_base = self.fact.get_remote(cmd, self.hosts).run() self._actions_hosts(install_base) if launch_disk_copy: self._start_disk_copy() libvirt_packages = 'libvirt-bin virtinst python2.7 python-pycurl python-libxml2 qemu-kvm nmap libgmp10' logger.info('Installing libvirt packages \n%s', style.emph(libvirt_packages)) cmd = 'export DEBIAN_MASTER=noninteractive ; apt-get update && apt-get install -y --force-yes '+\ '-o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold" -t %s-backports ' % self.debian_name+\ libvirt_packages install_libvirt = self.fact.get_remote(cmd, self.hosts).run() self._actions_hosts(install_libvirt) if other_packages: self._other_packages(other_packages)
def _other_packages(self, other_packages=None): """Installation of packages""" other_packages = other_packages.replace(',', ' ') logger.info('Installing extra packages \n%s', style.emph(other_packages)) cmd = 'export DEBIAN_MASTER=noninteractive ; ' + \ 'apt-get update && apt-get install -y --force-yes ' + \ other_packages install_extra = self.fact.get_remote(cmd, self.hosts).run() self._actions_hosts(install_extra)
def _install_packages(self, other_packages=None, launch_disk_copy=True): """Installation of required packages on the hosts""" base_packages = 'uuid-runtime bash-completion taktuk locate htop init-system-helpers netcat-traditional' logger.info('Installing base packages \n%s', style.emph(base_packages)) cmd = 'export DEBIAN_MASTER=noninteractive ; apt-get update && apt-get ' + \ 'install -y --force-yes --no-install-recommends ' + base_packages install_base = self.fact.get_remote(cmd, self.hosts).run() self._actions_hosts(install_base) if launch_disk_copy: self._start_disk_copy() libvirt_packages = 'libvirt-bin virtinst python2.7 python-pycurl python-libxml2 qemu-kvm nmap libgmp10' logger.info('Installing libvirt packages \n%s', style.emph(libvirt_packages)) cmd = 'export DEBIAN_MASTER=noninteractive ; apt-get update && apt-get install -y --force-yes '+\ '-o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold" -t wheezy-backports '+\ libvirt_packages install_libvirt = self.fact.get_remote(cmd, self.hosts).run() self._actions_hosts(install_libvirt) if other_packages: self._other_packages(other_packages)
def get_job_by_name(job_name, sites=None): """ """ logger.detail('Looking for a job named %s', style.emph(job_name)) if not sites: sites = get_g5k_sites() oargrid_jobs = get_current_oargrid_jobs() if len(oargrid_jobs) > 0: for g_job in oargrid_jobs: for job in get_oargrid_job_oar_jobs(g_job): info = get_oar_job_info(job[0], job[1]) if info['name'] == job_name: logger.info('Oargridjob %s found !', style.emph(g_job)) return g_job, None running_jobs = get_current_oar_jobs(sites) for job in running_jobs: info = get_oar_job_info(job[0], job[1]) if info['name'] == job_name: logger.info('Job %s found on site %s !', style.emph(job[0]), style.host(job[1])) return job return None, None
def check_update_deployed(undeployed_hosts, check_deployed_command, node_connection_params, vlan): #IGNORE:W0613 logger.debug( style.emph("check which hosts are already deployed among:") + " %s", undeployed_hosts) deployment_hostnames_mapping = dict() if vlan: for host in undeployed_hosts: deployment_hostnames_mapping[get_kavlan_host_name(host, vlan)] = host else: for host in undeployed_hosts: deployment_hostnames_mapping[host] = host deployed_check = get_remote(check_deployed_command, list(deployment_hostnames_mapping), connection_params=node_connection_params) for p in deployed_check.processes: p.nolog_exit_code = True p.nolog_timeout = True p.nolog_error = True p.timeout = check_timeout deployed_check.run() newly_deployed = list() for process in deployed_check.processes: logger.debug( style.emph("check on %s:" % (process.host, )) + " %s\n" % (process, ) + style.emph("stdout:") + "\n%s\n" % (process.stdout) + style.emph("stderr:") + "\n%s\n" % (process.stderr)) if (process.ok): newly_deployed.append( deployment_hostnames_mapping[process.host.address]) logger.debug( "OK %s", deployment_hostnames_mapping[process.host.address]) else: logger.debug( "KO %s", deployment_hostnames_mapping[process.host.address]) return newly_deployed
def add_backbone(self): """Add the nodes corresponding to Renater equipments""" logger.debug('Add %s network', style.emph('Renater')) backbone = self.data['network']['backbone'] for equip in backbone: src = equip['uid'] self.add_node(src, kind='renater') for lc in equip['linecards']: for port in lc['ports']: if 'uid' in port and 'renater-' in _parse_port_uid( port['uid']): port_bw = lc['rate'] if 'rate' not in port else port[ 'rate'] latency = port['latency'] if 'latency' in port \ else arbitrary_latency kind = 'renater' if 'kind' not in port else port['kind'] dst = _parse_port_uid(port['uid']) logger.debug('* %s (%s, bw=%s, lat=%s)', dst, kind, port_bw, latency) self.add_node(dst, kind=kind) if not self.has_edge(src, dst): self.add_edge(src, dst, _unique_link_key(src, dst), bandwidth=port_bw, latency=latency, active=True) # Removing unused one logger.debug('Removing unused Renater equipments') used_elements = [] for site in self.get_sites(): dests = [s for s in self.get_sites() if s != site] for dest in dests: gw_src = self.get_site_router(site)[0] gw_dst = self.get_site_router(dest)[0] if not gw_src is None and not gw_dst is None: for element in [ el for el in nx.shortest_path(self, gw_src, gw_dst) if 'renater' in el ]: if element not in used_elements: used_elements.append(element) for element, _ in [ n for n in self.nodes_iter(data=True) if n[1]['kind'] == 'renater' ]: if element not in used_elements: logger.debug('removing %s' % (element, )) self.remove_node(element)
def get_job(self): """Get the hosts from an existing job (if any) or from a new job. This will perform a reservation if necessary.""" # Look if there is a running job or make a new reservation self.gridjob, _ = EX5.planning.get_job_by_name(self.config['name']) if self.gridjob is None: self._make_reservation() else: logger.info("Using running oargrid job %s" % style.emph(self.gridjob)) # Wait for the job to start EX5.wait_oargrid_job_start(self.gridjob) attempts = 0 self.nodes = None while self.nodes is None and attempts < MAX_ATTEMPTS: self.nodes = sorted(EX5.get_oargrid_job_nodes(self.gridjob), key = lambda n: n.address) attempts += 1 check_nodes( nodes = self.nodes, resources = self.config['resources'], mode = self.config['role_distribution']) # TODO - Start_date is never used, deadcode ? Ad_rien_ - August 11th 2016 self.start_date = None job_info = EX5.get_oargrid_job_info(self.gridjob) if 'start_date' in job_info: self.start_date = job_info['start_date'] ## filling some information about the jobs here self.user = None job_info = EX5.get_oargrid_job_info(self.gridjob) if 'user' in job_info: self.user = job_info['user'] ## vlans information job_sites = EX5.get_oargrid_job_oar_jobs(self.gridjob) self.jobs = [] self.vlans = [] for (job_id, site) in job_sites: self.jobs.append((site, job_id)) vlan_id = EX5.get_oar_job_kavlan(job_id, site) if vlan_id is not None: self.vlans.append((site, EX5.get_oar_job_kavlan(job_id, site))) return self.gridjob
def generate_inventory(roles, base_inventory, dest): """ Generate the inventory. It will generate a group for each role in roles and concatenate them with the base_inventory file. The generated inventory is written in dest """ with open(dest, 'w') as f: f.write(to_ansible_group_string(roles)) with open(base_inventory, 'r') as a: for line in a: f.write(line) logger.info("Inventory file written to " + style.emph(dest))
def get_cpu_topology(cluster, xpdir=None): """ """ logger.info('Determining the architecture of cluster ' + \ style.emph(cluster)) root = None # Trying to reed topology from a directory if xpdir: fname = xpdir + '/topo_' + cluster + '.xml' try: tree = parse(fname) root = tree.getroot() except: logger.info('No cache file found, will reserve a node and ' + \ 'determine topology from virsh capabilities') pass if root is None: frontend = get_cluster_site(cluster) submission = OarSubmission(resources="{cluster='" + cluster + "'}/nodes=1", walltime="0:02:00", job_type="allow_classic_ssh") ((job_id, _), ) = oarsub([(submission, frontend)]) wait_oar_job_start(job_id, frontend) host = get_oar_job_nodes(job_id, frontend)[0] capa = SshProcess('virsh capabilities', host, connection_params={ 'user': default_frontend_connection_params['user'] }).run() oardel([(job_id, frontend)]) root = fromstring(capa.stdout) if xpdir is not None: tree = ElementTree(root) tree.write(fname) cpu_topology = [] i_cell = 0 for cell in root.findall('.//cell'): cpu_topology.append([]) for cpu in cell.findall('.//cpu'): cpu_topology[i_cell].append(int(cpu.attrib['id'])) i_cell += 1 logger.info(pformat(cpu_topology)) return cpu_topology
def configure_service_node(self): """Setup automatically a DNS server to access virtual machines by id and also install a DHCP server if kavlan is used""" if self.kavlan: service = 'DNS/DHCP' dhcp = True else: service = 'DNS' dhcp = False service_node = get_fastest_host(self.hosts) logger.info('Setting up %s on %s', style.emph(service), style.host(service_node.split('.')[0])) clients = list(self.hosts) clients.remove(service_node) dnsmasq_server(service_node, clients, self.vms, dhcp)
def final_state(self, Delta_final, outdir=None): """ """ if outdir is None: outdir = self.result_dir logger.info(style.log_header('Final boxes state\n',) + ''.ljust(8) + ''.join([style.emph(box.rjust(10)) for box in self.Boxes.iterkeys()]) + style.objec_repr('\n' + 'Delta'.ljust(8)) + ''.join([str(round(delta, 7)).rjust(10) for delta in Delta_final if absolute(delta) < 1000])) self.plot_state(self.Boxes.keys(), Delta_final, name='_final', outdir=outdir) f = open(outdir + '/Delta.final', 'w') for box in self.Boxes.iterkeys(): idx = self.Boxes.keys().index(box) f.write(box + ' ' + str(Delta_final[idx]) + '\n') f.close()
def plot_state(self, boxes, deltas, name='', outdir=None): """ Make a graph of a given state """ gr = nx.MultiDiGraph() i_box = 0 for box in boxes: textcolor = 'white' if sum([self.color_chars.index(col) for col in self.plots_conf[box]['color'].split('#')[1]]) < 35 \ else 'black' if box not in gr.nodes(): gr.add_node(box, attrib={'delta': round(deltas[i_box], 7)}) i_box += 1 for box_from, boxes_to in self.Flux.iteritems(): for box_to, flux in boxes_to.iteritems(): if flux != 0: if flux > 0: #print box_from, box_to gr.add_edge(box_from, box_to, attrib={'flux': flux}) elif flux < 0: #print box_to, box_from gr.add_edge(box_to, box_from, attrib={'flux': flux}) pos = nx.graphviz_layout(gr, prog='neato') for p in gr.nodes(): nx.draw_networkx_nodes(gr, pos, nodelist=[p], node_color=self.plots_conf[p]['color'], node_shape=self.plots_conf[p]['shape']) # for f, t, att in gr.edges(data=True): # print f, t, att['flux'] # pos = nx.spring_layout(gr) # nx.draw(gr, pos) # nx.draw_networkx_labels(gr, pos) plt.axis('off') if outdir is None: outdir = self.result_dir outfile = outdir + '/state' + name + '.png' plt.savefig(outfile) plt.close() logger.info('State has been saved to ' + style.emph(outfile))
def show_resources(resources, msg='Resources', max_resources=None, queues='default'): """Print the resources in a fancy way""" if not max_resources: max_resources = {} total_hosts = 0 log = style.log_header(msg) + '\n' for site in get_g5k_sites(): site_added = False if site in resources: log += style.log_header(site).ljust(20) + ' ' + str( resources[site]) if site in max_resources: log += '/' + str(max_resources[site]) log += ' ' site_added = True for cluster in get_site_clusters(site, queues=queues): if len(list(set(get_site_clusters(site)) & set(resources.keys()))) > 0 \ and not site_added: log += style.log_header(site).ljust(20) if site in max_resources: log += '/' + str(max_resources[site]) log += ' ' site_added = True if cluster in resources: log += style.emph(cluster) + ': ' + str(resources[cluster]) if cluster in max_resources: log += '/' + str(max_resources[cluster]) log += ' ' total_hosts += resources[cluster] if site_added: log += '\n' if 'grid5000' in resources: log += style.log_header('Grid5000').ljust(20) + str( resources['grid5000']) if "grid5000" in max_resources: log += '/' + str(max_resources["grid5000"]) elif total_hosts > 0: log += style.log_header('Total ').ljust(20) + str(total_hosts) logger.info(log)
def get_cpu_topology(cluster, xpdir=None): """ """ logger.info('Determining the architecture of cluster ' + \ style.emph(cluster)) root = None # Trying to reed topology from a directory if xpdir: fname = xpdir + '/topo_' + cluster + '.xml' try: tree = parse(fname) root = tree.getroot() except: logger.info('No cache file found, will reserve a node and ' + \ 'determine topology from virsh capabilities') pass if root is None: frontend = get_cluster_site(cluster) submission = OarSubmission( resources="{cluster='" + cluster + "'}/nodes=1", walltime="0:02:00", job_type="allow_classic_ssh") ((job_id, _), ) = oarsub([(submission, frontend)]) wait_oar_job_start(job_id, frontend) host = get_oar_job_nodes(job_id, frontend)[0] capa = SshProcess('virsh capabilities', host, connection_params={'user': default_frontend_connection_params['user']} ).run() oardel([(job_id, frontend)]) root = fromstring(capa.stdout) if xpdir is not None: tree = ElementTree(root) tree.write(fname) cpu_topology = [] i_cell = 0 for cell in root.findall('.//cell'): cpu_topology.append([]) for cpu in cell.findall('.//cpu'): cpu_topology[i_cell].append(int(cpu.attrib['id'])) i_cell += 1 logger.info(pformat(cpu_topology)) return cpu_topology
def _make_reservation(self, site): """Make a new reservation""" elements = {self.config['cluster']: 1} logger.info('Finding slot for the experiment ' '\nrally %s:1', style.host(self.config['cluster']).rjust(5)) planning = funk.get_planning(elements) slots = funk.compute_slots(planning, walltime=self.config['walltime'].encode( 'ascii', 'ignore'), excluded_elements=EXCLUDED_ELEMENTS) startdate, enddate, resources = funk.find_free_slot(slots, elements) resources = funk.distribute_hosts(resources, elements, EXCLUDED_ELEMENTS) if startdate is None: logger.error("Sorry, could not find the resources requested.") exit(4) jobs_specs = funk.get_jobs_specs(resources, name=self.options.job_name, excluded_elements=EXCLUDED_ELEMENTS) print jobs_specs sub, site = jobs_specs[0] sub.additional_options = "-t deploy" sub.reservation_date = startdate sub.walltime = self.config['walltime'].encode('ascii', 'ignore') sub.name = self.options.job_name if 'testing' in EX5.get_cluster_attributes( self.config['cluster'])['queues']: sub.queue = 'testing' jobs = EX5.oarsub([(sub, site)]) self.job_id = jobs[0][0] logger.info('Job %s will start at %s', style.emph(self.job_id), style.log_header(EX.time_utils.format_date(startdate)))
def plot_evolution(self, Delta, outdir=None): """ Draw a graph of the boxes evolution through years""" fig = plt.figure() i_box = 0 for box in self.Boxes: # remove deriving boxes if absolute(Delta[0, i_box] - Delta[-1, i_box]) < 1000: plt.plot(self.time / 365., Delta[:, i_box], label=box, color=self.plots_conf[box]['color']) i_box += 1 plt.legend() plt.xlabel(r"Years") plt.ylabel(self.delta_name) if outdir is None: outdir = self.result_dir outfile = outdir + '/evolution.png' plt.savefig(outfile) logger.info('Evolution has been saved to ' + style.emph(outfile)) fig.clf() plt.close() gc.collect()
def _make_reservation(self): """Make a new reservation.""" # Extract the list of criteria (ie, `oarsub -l # *criteria*`) in order to compute a specification for the # reservation. criteria = {} # Actual criteria are : # - Number of node per site for cluster, roles in self.config["resources"].items(): site = get_cluster_site(cluster) nb_nodes = reduce(operator.add, map(int, roles.values())) criterion = "{cluster='%s'}/nodes=%s" % (cluster, nb_nodes) criteria.setdefault(site, []).append(criterion) for site, vlan in self.config["vlans"].items(): criteria.setdefault(site, []).append(vlan) # Compute the specification for the reservation jobs_specs = [(OarSubmission(resources = '+'.join(c), name = self.config["name"]), s) for s, c in criteria.items()] logger.info("Criteria for the reservation: %s" % pf(jobs_specs)) # Make the reservation gridjob, _ = EX5.oargridsub( jobs_specs, reservation_date=self.config['reservation'], walltime=self.config['walltime'].encode('ascii', 'ignore'), job_type='deploy' ) # TODO - move this upper to not have a side effect here if gridjob is not None: self.gridjob = gridjob logger.info("Using new oargrid job %s" % style.emph(self.gridjob)) else: logger.error("No oar job was created.") sys.exit(26)
def add_backbone(self): """Add the nodes corresponding to Renater equipments""" logger.debug('Add %s network', style.emph('Renater')) backbone = self.data['network']['backbone'] for equip in backbone: src = equip['uid'] self.add_node(src, kind='renater') for lc in equip['linecards']: for port in lc['ports']: if 'uid' in port and 'renater-' in port['uid']: bandwidth = lc['rate'] if 'rate' not in port else port['rate'] latency = port['latency'] if 'latency' in port \ else arbitrary_latency kind = 'renater' if 'kind' not in port else port['kind'] dst = port['uid'] logger.debug('* %s (%s, bw=%s, lat=%s)', dst, kind, bandwidth, latency) self.add_node(dst, kind=kind) if not self.has_edge(src, dst): self.add_edge(src, dst, bandwidth=bandwidth, latency=latency, active=True) # Removing unused one if self.get_sites != get_g5k_sites(): logger.debug('Removing unused Renater equipments') used_elements = [] for site in self.get_sites(): dests = self.get_sites()[:] dests.remove(site) for dest in dests: gw_src = self.get_site_router(site)[0] gw_dst = self.get_site_router(dest)[0] for element in [el for el in nx.shortest_path(self, gw_src, gw_dst) if 'renater' in el]: if element not in used_elements: used_elements.append(element) for element, _ in [n for n in self.nodes_iter(data=True) if n[1]['kind'] == 'renater']: if element not in used_elements: self.remove_node(element)
def run_ansible(playbooks, inventory_path, extra_vars={}, tags=None): inventory = Inventory(inventory_path) for path in playbooks: logger.info("Running playbook %s with vars:\n%s" % (style.emph(path), extra_vars)) stats = ansible.callbacks.AggregateStats() playbook_cb = ansible.callbacks.PlaybookCallbacks(verbose=1) pb = ansible.playbook.PlayBook( playbook=path, inventory=inventory, extra_vars=extra_vars, stats=stats, callbacks=playbook_cb, only_tags=tags, runner_callbacks=ansible.callbacks.PlaybookRunnerCallbacks( stats, verbose=1)) pb.run() hosts = pb.stats.processed.keys() failed_hosts = [] unreachable_hosts = [] for h in hosts: t = pb.stats.summarize(h) if t['failures'] > 0: failed_hosts.append(h) if t['unreachable'] > 0: unreachable_hosts.append(h) if len(failed_hosts) > 0: logger.error("Failed hosts: %s" % failed_hosts) if len(unreachable_hosts) > 0: logger.error("Unreachable hosts: %s" % unreachable_hosts)
def boot_vms_by_core(vms): """ """ n_vm = len(vms) if n_vm == 0: return True if isinstance(vms[0]['host'], Host): host = vms[0]['host'].address.split('.')[0] else: host = vms[0]['host'].split('.')[0] sub_vms = {} for i_core in list(set(vm['cpuset'] for vm in vms)): sub_vms[i_core] = list() for vm in vms: if vm['cpuset'] == i_core: sub_vms[i_core].append(vm) booted_vms = 0 while len(sub_vms.keys()) > 0: vms_to_boot = [] for i_core in sub_vms.keys(): vms_to_boot.append(sub_vms[i_core][0]) sub_vms[i_core].pop(0) if len(sub_vms[i_core]) == 0: del sub_vms[i_core] logger.info( style.Thread(host) + ': Starting VMS ' + ', '.join([vm['id'] for vm in sorted(vms_to_boot)])) start_vms(vms_to_boot).run() booted = wait_vms_have_started(vms_to_boot) if not booted: return False booted_vms += len(vms_to_boot) logger.info( style.Thread(host) + ': ' + style.emph(str(booted_vms) + '/' + str(n_vm))) return True
def create_par_file(self, comb): """Create Run directory on remote server and upload par file""" logger.info('Creating and uploading par file') comb_dir = parent_dir + slugify(comb) + '/' logger.info('comb_dir = ' + comb_dir) # Create remote directories make_dirs = SshProcess('mkdir -p ' + comb_dir + 'Img ; mkdir -p ' + comb_dir + 'Op ; ', jobserver).run() # Generate par file par_file = 'par_' + slugify(comb) logger.info('par_file = %s', style.emph(par_file)) nml = f90nml.read('template.nml') nml['refstate']['ra0'] = float(comb['RA']) nml['tracersin']['K_Fe'] = comb['KFe'] nml['geometry']['r_cmb'] = comb['RCMB'] nztot = min(int(2**(math.log10(float(comb['RA']))+1)), 128) nml['geometry']['nztot'] = nztot nml['geometry']['nytot'] = int(math.pi*(comb['RCMB']+0.5)*nztot) nml.write(par_file, force=True) logger.info('Created par file ' + par_file) # Upload par file to remote directory Put([jobserver], [par_file], remote_location=comb_dir).run() SshProcess('cd ' + comb_dir + ' ; mv ' + par_file+ ' par', jobserver).run() logger.info('Done')
def generate_kolla_files(config_vars, kolla_vars, directory): # get the static parameters from the config file kolla_globals = config_vars # add the generated parameters kolla_globals.update(kolla_vars) # write to file in the result dir globals_path = os.path.join(directory, 'globals.yml') with open(globals_path, 'w') as f: yaml.dump(kolla_globals, f, default_flow_style=False) logger.info("Wrote " + style.emph(globals_path)) # copy the passwords file passwords_path = os.path.join(directory, 'passwords.yml') call("cp %s/passwords.yml %s" % (TEMPLATE_DIR, passwords_path), shell=True) logger.info("Password file is copied to %s" % (passwords_path)) # admin openrc admin_openrc_path = os.path.join(directory, 'admin-openrc') admin_openrc_vars = { 'keystone_address': kolla_vars['kolla_internal_vip_address'] } render_template('templates/admin-openrc.jinja2', admin_openrc_vars, admin_openrc_path) logger.info("admin-openrc generated in %s" % (admin_openrc_path))
def run(self): """ Execute the engine and compute the results """ self.parameters = {'delta_diet': arange(-0.1, 1.5, 0.5), 'coeff_DP': arange(0.9995, 1.0005, 0.0003), 'flux_DP': range(5, 15), 'flux_PB': arange(0.01, 0.1, 0.03)} sweeps = sweep(self.parameters) sweeper = ParamSweeper(path.join(self.result_dir, "sweeps"), sweeps) logger.info('Engine will treat %s models', style.emph(len(sweeper.get_remaining()))) total_comb = len(sweeper.get_remaining()) i_comb = 0 while len(sweeper.get_remaining()) > 0: i_comb += 1 comb = sweeper.get_next() logger.info(style.comb('Combination %s/%s' % (i_comb, total_comb)) + '\n%s', pformat(comb)) comb_dir = self.result_dir + '/' + slugify(comb) try: mkdir(comb_dir) except: pass self.set_boxes(comb['delta_diet']) self.set_flux(comb['flux_DP'], comb['flux_PB']) self.set_partcoeff(comb['coeff_DP']) Delta = [] Delta = self.initial_state(outdir=comb_dir) Delta = self.compute_evolution(Delta, outdir=comb_dir) self.final_state(Delta[-1, :], outdir=comb_dir) sweeper.done(comb) logger.info('Combination done\n') self.set_boxes(1) self.peace_flag('delta_diet', 'flux_DP') logger.info('All combinations have been done, result can be found in ' + self.result_dir)
def distribute_vms(vms, hosts, distribution='round-robin'): """Distribute the virtual machines on the hosts. :param vms: a list of VMs dicts which host key will be updated :param hosts: a list of hosts :param distribution: a string defining the distribution type: 'round-robin', 'concentrated', 'n_by_hosts', 'random """ logger.debug('Initial virtual machines distribution \n%s', "\n".join([vm['id'] + ": " + str(vm['host']) for vm in vms])) if distribution in ['round-robin', 'concentrated', 'random']: attr = get_CPU_RAM_FLOPS(hosts) dist_hosts = hosts[:] iter_hosts = cycle(dist_hosts) host = iter_hosts.next() for vm in vms: remaining = attr[host].copy() while remaining['RAM'] - vm['mem'] <= 0 \ or remaining['CPU'] - vm['n_cpu'] / 3 <= 0: dist_hosts.remove(host) if len(dist_hosts) == 0: req_mem = sum([vm['mem'] for vm in vms]) req_cpu = sum([vm['n_cpu'] for vm in vms]) / 3 logger.error( 'Not enough ressources ! \n' + 'RAM'.rjust(20) + 'CPU'.rjust(10) + '\n' + 'Needed'.ljust(15) + '%s Mb'.ljust(15) + '%s \n' + 'Available'.ljust(15) + '%s Mb'.ljust(15) + '%s \n' + 'Maximum number of VM is %s', req_mem, req_cpu, attr['TOTAL']['RAM'], attr['TOTAL']['CPU'], style.emph(str(get_max_vms(hosts, vm['mem'])))) exit() iter_hosts = cycle(dist_hosts) host = iter_hosts.next() remaining = attr[host].copy() vm['host'] = host remaining['RAM'] -= vm['mem'] remaining['CPU'] -= vm['n_cpu'] / 3 attr[host] = remaining.copy() if distribution == 'round-robin': host = iter_hosts.next() remaining = attr[host].copy() if distribution == 'random': for i in range(randint(0, len(dist_hosts))): host = iter_hosts.next() remaining = attr[host].copy() elif distribution == 'n_by_hosts': n_by_host = int(len(vms) / len(hosts)) i_vm = 0 for host in hosts: for i in range(n_by_host): vms[i_vm]['host'] = host i_vm += 1 if len(vms) % len(hosts) != 0: logger.warning('Reducing number of VMs to have %s by host', style.emph(n_by_host)) vms[:] = vms[0:n_by_host * len(hosts)] else: logger.debug('No valid distribution given') logger.debug('Final virtual machines distribution \n%s', "\n".join([vm['id'] + ": " + str(vm['host']) for vm in vms]))
def define_vms(vms_id, template=None, ip_mac=None, tap=None, state=None, host=None, n_cpu=None, cpusets=None, mem=None, hdd=None, backing_file=None, real_file=None): """Create a list of virtual machines, where VM parameter is a dict similar to {'id': None, 'host': None, 'ip': None, 'mac': None, 'mem': 512, 'n_cpu': 1, 'cpuset': 'auto', 'hdd': 10, 'backing_file': '/tmp/vm-base.img', 'state': 'KO'} Can be generated from a template or using user defined parameters that can be a single element or a list of element :param vms_id: a list of string that will be used as vm id :param template: an XML element defining the template of the VM :param ip_mac: a list of tuple containing ip, mac correspondance :param state: the state of the VM :param host: the host of the VM (string) :param n_cpu: the number of virtual CPU of the VMs :param real_file: boolean to use a real file """ n_vm = len(vms_id) if template is None: n_cpu = [default_vm['n_cpu']] * n_vm if n_cpu is None \ else [n_cpu] * n_vm if isinstance(n_cpu, int) else n_cpu cpusets = [default_vm['cpuset']] * n_vm if cpusets is None \ else [cpusets] * n_vm if isinstance(cpusets, int) else cpusets mem = [default_vm['mem']] * n_vm if mem is None \ else [mem] * n_vm if isinstance(mem, int) else mem hdd = [default_vm['hdd']] * n_vm if hdd is None \ else [hdd] * n_vm if isinstance(hdd, int) else hdd backing_file = [default_vm['backing_file']] * n_vm if backing_file is None \ else [backing_file] * n_vm if isinstance(backing_file, str) else backing_file real_file = [default_vm['real_file']] * n_vm if real_file is None \ else [real_file] * n_vm if isinstance(real_file, bool) else real_file state = [default_vm['state']] * n_vm if state is None \ else [state] * n_vm if isinstance(state, str) else state host = [default_vm['host']] * n_vm if host is None \ else [host] * n_vm if isinstance(host, str) else host else: n_cpu = [default_vm['n_cpu']] * n_vm if 'n_cpu' not in template.attrib \ else [int(template.get('n_cpu'))] * n_vm cpusets = [default_vm['cpuset']] * n_vm if 'cpuset' not in template.attrib \ else [template.get('cpuset')] * n_vm mem = [default_vm['mem']] * n_vm if 'mem' not in template.attrib \ else [int(template.get('mem'))] * n_vm hdd = [default_vm['hdd']] * n_vm if 'hdd' not in template.attrib \ else [int(template.get('hdd'))] * n_vm backing_file = [default_vm['backing_file']] * n_vm if 'backing_file' not in template.attrib \ else [template.get('backing_file')] * n_vm real_file = [default_vm['real_file']] * n_vm if 'real_file' not in template.attrib \ else [template.get('real_file')] * n_vm state = [default_vm['state']] * n_vm if 'state' not in template.attrib \ else [template.get('state')] * n_vm host = [default_vm['host']] * n_vm if 'host' not in template.attrib \ else [template.get('host')] * n_vm ip_mac = [(None, None)] * n_vm if ip_mac is None else ip_mac tap = [tap] * n_vm if not isinstance(tap, list) else tap vms = [{ 'id': vms_id[i], 'mem': mem[i], 'n_cpu': n_cpu[i], 'cpuset': cpusets[i], 'hdd': hdd[i], 'host': host[i], 'backing_file': backing_file[i], 'real_file': real_file[i], 'state': state[i], 'tap': tap[i], 'ip': ip_mac[i][0], 'mac': ip_mac[i][1] } for i in range(n_vm)] logger.debug('VM parameters have been defined:\n%s', ' '.join([style.emph(param['id']) for param in vms])) return vms
def setup_host(self): """Deploy a node, install dependencies and Rally""" logger.info('Deploying environment %s on %s' % (style.emph(self.config['env-name']), self.host) + (' (forced)' if self.options.force_deploy else '')) deployment = None if 'env-user' not in self.config or self.config['env-user'] == '': deployment = EX5.Deployment(hosts=[self.host], env_name=self.config['env-name']) else: deployment = EX5.Deployment(hosts=[self.host], env_name=self.config['env-name'], user=self.config['env-user']) deployed_hosts, _ = EX5.deploy( deployment, check_deployed_command=not self.options.force_deploy) # Test if rally is installed test_p = EX.SshProcess('rally version', self.host, {'user': '******'}) test_p.ignore_exit_code = True test_p.nolog_exit_code = True test_p.run() if test_p.exit_code != 0: # Install rally self._run_or_abort( "curl -sO %s" % RALLY_INSTALL_URL, self.host, "Could not download Rally install script from %s" % RALLY_INSTALL_URL, conn_params={'user': '******'}) logger.info("Installing dependencies on deployed host") self._run_or_abort('apt-get update && apt-get -y update', self.host, 'Could not update packages on host', conn_params={'user': '******'}) self._run_or_abort('apt-get -y install python-pip', self.host, 'Could not install pip on host', conn_params={'user': '******'}) self._run_or_abort('pip install --upgrade setuptools', self.host, 'Could not upgrade setuptools', conn_params={'user': '******'}) logger.info("Installing rally from %s" % style.emph(self.config['rally-git'])) self._run_or_abort("bash install_rally.sh -y --url %s" % self.config['rally-git'], self.host, 'Could not install Rally on host', conn_params={'user': '******'}) else: logger.info("Rally %s is already installed" % test_p.stdout.rstrip()) # Setup the deployment file vars = { "controller": self.config['os-services']['controller'], "os_region": self.config['authentication']['os-region'], "os_username": self.config['authentication']['os-username'], "os_password": self.config['authentication']['os-password'], "os_tenant": self.config['authentication']['os-tenant'], "os_user_domain": self.config['authentication']['os-user-domain'], "os_project_domain": self.config['authentication']['os-project-domain'] } rally_deployment = self._render_template( 'templates/deployment_existing.json', vars) EX.Put([self.host], [rally_deployment], remote_location='deployment_existing.json', connection_params={ 'user': '******' }).run() # Create a Rally deployment self._run_or_abort( "rally deployment create --filename deployment_existing.json " "--name %s" % self.config['deployment-name'], self.host, 'Could not create the Rally deployment', conn_params={'user': '******'}) self.rally_deployed = True logger.info("Rally has been deployed correctly")
def main(): copy_outputs('config.log', 'config.log') args = parser.parse_args() whoami = os.getlogin() logger.info('whoami: %s', whoami) jobids = [] if args.grid_job_id == None: jobids = args.job_ids else: grid_job_id = int(args.grid_job_id[0]) jobids = ["%s:%d" % (site, job_id) for job_id, site in get_oargrid_job_oar_jobs(grid_job_id)] logger.info('Using jobs %s', style.emph(' '.join(jobids))) sites = [j.strip().split(':')[0] for j in jobids] frontends = [str('frontend.'+s) for s in sites] oar_ids = [j.strip().split(':')[1] for j in jobids] jobids_list=[(int(j.strip().split(':')[1]),str(j.strip().split(':')[0])) for j in jobids] # print sites # print oar_ids # print jobids_list # print frontends logger.info("Get list of associated nodes") nodes = [ job_nodes for job in jobids_list for job_nodes in get_oar_job_nodes(*job) ] # logger.info('%s', hosts_list(nodes)) logger.info('%s', nodes) logger.info("Deploying %i nodes" % (len(nodes),)) deployed, undeployed = deploy(Deployment(nodes, env_name = "jessie-x64-nfs")) logger.info("%i deployed, %i undeployed" % (len(deployed), len(undeployed))) ## Configure Host OSes logger.info('Finalize node customization') # use root to connect on the host default_connection_params['user'] = '******' ## Copy local .ssh to remote nodes: logger.info('Copy ssh entries into root of each node') Put(nodes, ['/home/'+whoami+'/.ssh/id_rsa','/home/'+whoami+'/.ssh/id_rsa.pub'],'.ssh/.').run() ## Install missing packages logger.info('| - Install Packages') install_packages = TaktukRemote('export DEBIAN_MASTER=noninteractive ; export https_proxy="https://proxy:3128"; apt-get -o Acquire::Check-Valid-Until=false update && apt-get install -y --force-yes python-pip lynx openjdk-8-jdk uuid-runtime cpufrequtils kanif -o Acquire::Check-Valid-Until=false -o Dpkgtions::="--force-confdef" -o Dpkgtions::="--force-confold" ; pip install tabview', nodes).run() ## Fix ulimit and related stuffs logger.info('| - set limit related stuffs') cmd = 'ulimit -c unlimited; sysctl -w vm.max_map_count=331072 ; echo 120000 > /proc/sys/kernel/threads-max ; echo 600000 > /proc/sys/vm/max_map_count ; echo 200000 > /proc/sys/kernel/pid_max' TaktukRemote(cmd, nodes).run() ## Copy the DHT-EXP hierarchy to the remote site logger.info('Copy sloth and injector files on each NFS server involved in the experiment') TaktukRemote('mkdir -p ~/SLOTH-EXP-TMP/', frontends, connection_params={'user': str(whoami)}).run() TaktukPut(frontends, ['./SLOTH_HOME' ],'./SLOTH-EXP-TMP/.', connection_params={'user': str(whoami)}).run() TaktukPut(frontends, ['./INJECTOR_HOME' ], './SLOTH-EXP-TMP/.', connection_params={'user': str(whoami)}).run() ## Prepare the address file for the sloth peers (please remind that the last node is dedicated for the injector logger.info('Prepare the peers list') f1 = open('./hosts.info', 'w') f2 = open('./peers.info', 'w') logger.info('enumerate %s', enumerate(nodes[:-1])) i = 0 for j, node in enumerate(nodes[:-1]): f1.write("%s\n" % (node.address)) for cores in range(get_host_attributes(node)['architecture']['smt_size']): f2.write("%s:%d:%d\n" % (node.address, 3000 + i, 8000 + i)) i = i + 1 f1.close() f2.close() f = open('./service_node.info', 'w') f.write("%s" % nodes[-1].address) f.close() logger.info("Nodes are now ready, you should launch ./runExperiment.sh ... from the lyon frontend") logger.info("The list of sloth peers is in ./peers.info") logger.info("The service node is in ./service_node.info") logger.info("The injector will run on %s" % nodes[-1].address) logger.info("The usual(max) command should be : ./runExperiment.sh in_vivo %d %s/peers.info %s" % (i,os.getcwd(),nodes[-1].address))
def draw_gantt(planning, colors=None, show=False, save=True, outfile=None): """ Draw the hosts planning for the elements you ask (requires Matplotlib) :param planning: the dict of elements planning :param colors: a dict to define element coloring ``{'element': (255., 122., 122.)}`` :param show: display the Gantt diagram :param save: save the Gantt diagram to outfile :param outfile: specify the output file""" if colors is None: colors = _set_colors() n_sites = len(planning) startstamp = None endstamp = None for clusters_hosts in planning.values(): for hosts_kinds in clusters_hosts.values(): for kinds_slots in hosts_kinds.values(): for slots in kinds_slots.values(): for slot in slots: if startstamp == None or slot[0] < startstamp: startstamp = slot[0] if endstamp == None or slot[1] > endstamp: endstamp = slot[1] if startstamp and endstamp: break if startstamp and endstamp: break for slot in slots: if slot[0] < startstamp: startstamp = slot[0] if slot[1] > endstamp: endstamp = slot[1] if outfile is None: outfile = 'gantt_' + "_".join([site for site in planning]) \ + '_' + format_date(startstamp) logger.info('Saving Gantt chart to %s', style.emph(outfile)) n_col = 2 if n_sites > 1 else 1 n_row = int(ceil(float(n_sites) / float(n_col))) x_major_locator = MD.AutoDateLocator() xfmt = MD.DateFormatter('%d %b, %H:%M ') PLT.ioff() fig = PLT.figure(figsize=(15, 5 * n_row), dpi=80) i_site = 1 for site, clusters in planning.items(): n_hosts = 0 for hosts in clusters.values(): n_hosts += len(hosts) if n_hosts == 0: continue ax = fig.add_subplot(n_row, n_col, i_site, title=site.title()) ax.title.set_fontsize(18) ax.xaxis_date() ax.set_xlim(unixts_to_datetime(startstamp), unixts_to_datetime(endstamp)) ax.xaxis.set_major_formatter(xfmt) ax.xaxis.set_major_locator(x_major_locator) ax.xaxis.grid(color='black', linestyle='dashed') PLT.xticks(rotation=15) ax.set_ylim(0, 1) ax.get_yaxis().set_ticks([]) ax.yaxis.label.set_fontsize(16) pos = 0.0 inc = 1.0 / n_hosts ylabel = '' for cluster, hosts in clusters.items(): ylabel += cluster + ' ' i_host = 0 for key in sorted(list(hosts), key=lambda name: (name.split('.', 1)[0].split('-')[0], int(name.split('.', 1)[0].split('-')[1]))): slots = hosts[key] i_host += 1 cl_colors = {'free': colors[cluster], 'busy': colors['busy']} for kind in cl_colors: for freeslot in slots[kind]: edate, bdate = [ MD.date2num(item) for item in (unixts_to_datetime(freeslot[1]), unixts_to_datetime(freeslot[0])) ] ax.barh(pos, edate - bdate, 1, left=bdate, color=cl_colors[kind], edgecolor='none') pos += inc if i_host == len(hosts): ax.axhline(y=pos, color=cl_colors['busy'], linestyle='-', linewidth=1) ax.set_ylabel(ylabel) i_site += 1 fig.tight_layout() if show: PLT.show() if save: logger.debug('Saving file %s ...', outfile) PLT.savefig(outfile, dpi=300)
if resources[c] > 1: wanted = {c: 1} break jobs_specs = get_jobs_specs(wanted, name=job_name) for sub, frontend in jobs_specs: sub.walltime = walltime sub.job_type = "deploy" job = oarsub(jobs_specs)[0] nodes = get_oar_job_nodes(job[0], job[1]) logger.info('Deploying host %s', nodes[0].address) deployed, undeployed = deploy(Deployment(nodes, env_name="jessie-x64-base")) execware_host = list(deployed)[0] logger.info('Installing required packages %s', style.emph(packages)) install_packages = SshProcess('apt-get update && apt-get install -y ' + packages, execware_host).run() logger.info('Copying files to host') put_files = Put(execware_host, [source_code], remote_location="/tmp").run() xml_file = """ <settings> <proxies> <proxy> <id>g5k-proxy</id> <active>true</active> <protocol>http</protocol> <host>proxy</host> <port>3128</port> </proxy>
def deploy(deployment, check_deployed_command=True, node_connection_params={'user': '******'}, num_tries=1, check_enough_func=None, frontend_connection_params=None, deploy_timeout=None, check_timeout=30, stdout_handlers=None, stderr_handlers=None): """Deploy nodes, many times if needed, checking which of these nodes are already deployed with a user-supplied command. If no command given for checking if nodes deployed, rely on kadeploy to know which nodes are deployed. - loop `num_tries` times: - if ``check_deployed_command`` given, try to connect to these hosts using the supplied `node_connection_params` (or the default ones), and to execute ``check_deployed_command``. If connection succeeds and the command returns 0, the host is assumed to be deployed, else it is assumed to be undeployed. - optionnaly call user-supplied ``check_enough_func``, passing to it the list of deployed and undeployed hosts, to let user code decide if enough nodes deployed. Otherwise, try as long as there are undeployed nodes. - deploy the undeployed nodes returns a tuple with the list of deployed hosts and the list of undeployed hosts. When checking correctly deployed nodes with ``check_deployed_command``, and if the deployment is using the kavlan option, this function will try to contact the nodes using the appropriate DNS hostnames in the new vlan. :param deployment: instance of `execo.kadeploy.Deployment` class describing the intended kadeployment. :param check_deployed_command: command to perform remotely to check node deployement. May be a String, True, False or None. If String: the actual command to be used (This command should return 0 if the node is correctly deployed, or another value otherwise). If True, the default command value will be used (from `execo_g5k.config.g5k_configuration`). If None or False, no check is made and deployed/undeployed status will be taken from kadeploy's output. :param node_connection_params: a dict similar to `execo.config.default_connection_params` whose values will override those in `execo.config.default_connection_params` when connecting to check node deployment with ``check_deployed_command`` (see below). :param num_tries: number of deploy tries :param check_enough_func: a function taking as parameter a list of deployed hosts and a list of undeployed hosts, which will be called at each deployment iteration end, and that should return a boolean indicating if there is already enough nodes (in this case, no further deployement will be attempted). :param frontend_connection_params: connection params for connecting to frontends if needed. Values override those in `execo_g5k.config.default_frontend_connection_params`. :param deploy_timeout: timeout for deployement. Default is None, which means no timeout. :param check_timeout: timeout for node deployment checks. Default is 30 seconds. :param stdout_handlers: iterable of `ProcessOutputHandlers` which will be passed to the actual deploy processes. :param stderr_handlers: iterable of `ProcessOutputHandlers` which will be passed to the actual deploy processes. """ if check_enough_func == None: check_enough_func = lambda deployed, undeployed: len(undeployed) == 0 if check_deployed_command == True: check_deployed_command = g5k_configuration.get( 'check_deployed_command') def check_update_deployed(undeployed_hosts, check_deployed_command, node_connection_params, vlan): #IGNORE:W0613 logger.debug( style.emph("check which hosts are already deployed among:") + " %s", undeployed_hosts) deployment_hostnames_mapping = dict() if vlan: for host in undeployed_hosts: deployment_hostnames_mapping[get_kavlan_host_name(host, vlan)] = host else: for host in undeployed_hosts: deployment_hostnames_mapping[host] = host deployed_check = get_remote(check_deployed_command, list(deployment_hostnames_mapping), connection_params=node_connection_params) for p in deployed_check.processes: p.nolog_exit_code = True p.nolog_timeout = True p.nolog_error = True p.timeout = check_timeout deployed_check.run() newly_deployed = list() for process in deployed_check.processes: logger.debug( style.emph("check on %s:" % (process.host, )) + " %s\n" % (process, ) + style.emph("stdout:") + "\n%s\n" % (process.stdout) + style.emph("stderr:") + "\n%s\n" % (process.stderr)) if (process.ok): newly_deployed.append( deployment_hostnames_mapping[process.host.address]) logger.debug( "OK %s", deployment_hostnames_mapping[process.host.address]) else: logger.debug( "KO %s", deployment_hostnames_mapping[process.host.address]) return newly_deployed start_time = time.time() deployed_hosts = set() undeployed_hosts = set([Host(host).address for host in deployment.hosts]) my_newly_deployed = [] if check_deployed_command: my_newly_deployed = check_update_deployed(undeployed_hosts, check_deployed_command, node_connection_params, deployment.vlan) deployed_hosts.update(my_newly_deployed) undeployed_hosts.difference_update(my_newly_deployed) num_tries_done = 0 elapsed = time.time() - start_time last_time = time.time() deploy_stats = list() # contains tuples ( timestamp, # num attempted deploys, # len(kadeployer.deployed_hosts), # len(my_newly_deployed), # len(deployed_hosts), # len(undeployed_hosts ) deploy_stats.append((elapsed, None, None, len(my_newly_deployed), len(deployed_hosts), len(undeployed_hosts))) while (not check_enough_func(deployed_hosts, undeployed_hosts) and num_tries_done < num_tries): num_tries_done += 1 logger.debug( style.emph("try %i, deploying on:" % (num_tries_done, )) + " %s", undeployed_hosts) tmp_deployment = copy.copy(deployment) tmp_deployment.hosts = undeployed_hosts kadeployer = Kadeployer( tmp_deployment, frontend_connection_params=frontend_connection_params, stdout_handlers=stdout_handlers, stderr_handlers=stderr_handlers) kadeployer.timeout = deploy_timeout kadeployer.run() my_newly_deployed = [] if check_deployed_command: my_newly_deployed = check_update_deployed(undeployed_hosts, check_deployed_command, node_connection_params, deployment.vlan) deployed_hosts.update(my_newly_deployed) undeployed_hosts.difference_update(my_newly_deployed) else: deployed_hosts.update(kadeployer.deployed_hosts) undeployed_hosts.difference_update(kadeployer.deployed_hosts) logger.debug( style.emph("kadeploy reported newly deployed hosts:") + " %s", kadeployer.deployed_hosts) logger.debug( style.emph("check reported newly deployed hosts:") + " %s", my_newly_deployed) logger.debug( style.emph("all deployed hosts:") + " %s", deployed_hosts) logger.debug( style.emph("still undeployed hosts:") + " %s", undeployed_hosts) elapsed = time.time() - last_time last_time = time.time() deploy_stats.append( (elapsed, len(tmp_deployment.hosts), len(kadeployer.deployed_hosts), len(my_newly_deployed), len(deployed_hosts), len(undeployed_hosts))) logger.detail( style.emph("deploy finished") + " in %i tries, %s", num_tries_done, format_seconds(time.time() - start_time)) logger.detail( "deploy duration attempted deployed deployed total total" ) logger.detail( " deploys as reported as reported already still" ) logger.detail( " by kadeploy by check deployed undeployed" ) logger.detail( "---------------------------------------------------------------------------" ) for (deploy_index, deploy_stat) in enumerate(deploy_stats): logger.detail( "#%-5.5s %-8.8s %-9.9s %-11.11s %-11.11s %-8.8s %-10.10s", deploy_index, format_seconds(deploy_stat[0]), deploy_stat[1], deploy_stat[2], deploy_stat[3], deploy_stat[4], deploy_stat[5]) logger.debug(style.emph("deployed hosts:") + " %s", deployed_hosts) logger.debug(style.emph("undeployed hosts:") + " %s", undeployed_hosts) return (deployed_hosts, undeployed_hosts)
def define_vms(vms_id, template=None, ip_mac=None, tap=None, state=None, host=None, n_cpu=None, cpusets=None, mem=None, hdd=None, backing_file=None, real_file=None): """Create a list of virtual machines, where VM parameter is a dict similar to {'id': None, 'host': None, 'ip': None, 'mac': None, 'mem': 512, 'n_cpu': 1, 'cpuset': 'auto', 'hdd': 10, 'backing_file': '/tmp/vm-base.img', 'state': 'KO'} Can be generated from a template or using user defined parameters that can be a single element or a list of element :param vms_id: a list of string that will be used as vm id :param template: an XML element defining the template of the VM :param ip_mac: a list of tuple containing ip, mac correspondance :param state: the state of the VM :param host: the host of the VM (string) :param n_cpu: the number of virtual CPU of the VMs :param real_file: boolean to use a real file """ n_vm = len(vms_id) if template is None: n_cpu = [default_vm['n_cpu']] * n_vm if n_cpu is None \ else [n_cpu] * n_vm if isinstance(n_cpu, int) else n_cpu cpusets = [default_vm['cpuset']] * n_vm if cpusets is None \ else [cpusets] * n_vm if isinstance(cpusets, int) else cpusets mem = [default_vm['mem']] * n_vm if mem is None \ else [mem] * n_vm if isinstance(mem, int) else mem hdd = [default_vm['hdd']] * n_vm if hdd is None \ else [hdd] * n_vm if isinstance(hdd, int) else hdd backing_file = [default_vm['backing_file']] * n_vm if backing_file is None \ else [backing_file] * n_vm if isinstance(backing_file, str) else backing_file real_file = [default_vm['real_file']] * n_vm if real_file is None \ else [real_file] * n_vm if isinstance(real_file, bool) else real_file state = [default_vm['state']] * n_vm if state is None \ else [state] * n_vm if isinstance(state, str) else state host = [default_vm['host']] * n_vm if host is None \ else [host] * n_vm if isinstance(host, str) else host else: n_cpu = [default_vm['n_cpu']] * n_vm if 'n_cpu' not in template.attrib \ else [int(template.get('n_cpu'))] * n_vm cpusets = [default_vm['cpuset']] * n_vm if 'cpuset' not in template.attrib \ else [template.get('cpuset')] * n_vm mem = [default_vm['mem']] * n_vm if 'mem' not in template.attrib \ else [int(template.get('mem'))] * n_vm hdd = [default_vm['hdd']] * n_vm if 'hdd' not in template.attrib \ else [int(template.get('hdd'))] * n_vm backing_file = [default_vm['backing_file']] * n_vm if 'backing_file' not in template.attrib \ else [template.get('backing_file')] * n_vm real_file = [default_vm['real_file']] * n_vm if 'real_file' not in template.attrib \ else [template.get('real_file')] * n_vm state = [default_vm['state']] * n_vm if 'state' not in template.attrib \ else [template.get('state')] * n_vm host = [default_vm['host']] * n_vm if 'host' not in template.attrib \ else [template.get('host')] * n_vm ip_mac = [(None, None)] * n_vm if ip_mac is None else ip_mac tap = [tap] * n_vm if not isinstance(tap, list) else tap vms = [{'id': vms_id[i], 'mem': mem[i], 'n_cpu': n_cpu[i], 'cpuset': cpusets[i], 'hdd': hdd[i], 'host': host[i], 'backing_file': backing_file[i], 'real_file': real_file[i], 'state': state[i], 'tap': tap[i], 'ip': ip_mac[i][0], 'mac': ip_mac[i][1]} for i in range(n_vm)] logger.debug('VM parameters have been defined:\n%s', ' '.join([style.emph(param['id']) for param in vms])) return vms
def distribute_vms(vms, hosts, distribution='round-robin'): """Distribute the virtual machines on the hosts. :param vms: a list of VMs dicts which host key will be updated :param hosts: a list of hosts :param distribution: a string defining the distribution type: 'round-robin', 'concentrated', 'n_by_hosts', 'random """ logger.debug('Initial virtual machines distribution \n%s', "\n".join([vm['id'] + ": " + str(vm['host']) for vm in vms])) if distribution in ['round-robin', 'concentrated', 'random']: attr = get_CPU_RAM_FLOPS(hosts) dist_hosts = hosts[:] iter_hosts = cycle(dist_hosts) host = iter_hosts.next() for vm in vms: remaining = attr[host].copy() while remaining['RAM'] - vm['mem'] <= 0 \ or remaining['CPU'] - vm['n_cpu'] / 3 <= 0: dist_hosts.remove(host) if len(dist_hosts) == 0: req_mem = sum([vm['mem'] for vm in vms]) req_cpu = sum([vm['n_cpu'] for vm in vms]) / 3 logger.error('Not enough ressources ! \n' + 'RAM'.rjust(20) + 'CPU'.rjust(10) + '\n' + 'Needed'.ljust(15) + '%s Mb'.ljust(15) + '%s \n' + 'Available'.ljust(15) + '%s Mb'.ljust(15) + '%s \n' + 'Maximum number of VM is %s', req_mem, req_cpu, attr['TOTAL']['RAM'], attr['TOTAL']['CPU'], style.emph(str(get_max_vms(hosts, vm['mem'])))) exit() iter_hosts = cycle(dist_hosts) host = iter_hosts.next() remaining = attr[host].copy() vm['host'] = host remaining['RAM'] -= vm['mem'] remaining['CPU'] -= vm['n_cpu'] / 3 attr[host] = remaining.copy() if distribution == 'round-robin': host = iter_hosts.next() remaining = attr[host].copy() if distribution == 'random': for i in range(randint(0, len(dist_hosts))): host = iter_hosts.next() remaining = attr[host].copy() elif distribution == 'n_by_hosts': n_by_host = int(len(vms) / len(hosts)) i_vm = 0 for host in hosts: for i in range(n_by_host): vms[i_vm]['host'] = host i_vm += 1 if len(vms) % len(hosts) != 0: logger.warning('Reducing number of VMs to have %s by host', style.emph(n_by_host)) vms[:] = vms[0:n_by_host * len(hosts)] else: logger.debug('No valid distribution given') logger.debug('Final virtual machines distribution \n%s', "\n".join([vm['id'] + ": " + str(vm['host']) for vm in vms]))
def run_ansible(playbooks, inventory_path, extra_vars={}, tags=None): variable_manager = VariableManager() loader = DataLoader() inventory = Inventory(loader=loader, variable_manager=variable_manager, host_list=inventory_path) variable_manager.set_inventory(inventory) if extra_vars: variable_manager.extra_vars=extra_vars passwords = {} Options = namedtuple('Options', ['listtags', 'listtasks', 'listhosts', 'syntax', 'connection','module_path', 'forks', 'private_key_file', 'ssh_common_args', 'ssh_extra_args', 'sftp_extra_args', 'scp_extra_args', 'become', 'become_method', 'become_user', 'remote_user', 'verbosity', 'check', 'tags']) options = Options(listtags=False, listtasks=False, listhosts=False, syntax=False, connection='ssh', module_path=None, forks=100,private_key_file=None, ssh_common_args=None, ssh_extra_args=None, sftp_extra_args=None, scp_extra_args=None, become=False, become_method=None, become_user=None, remote_user=None, verbosity=None, check=False, tags=tags) for path in playbooks: logger.info("Running playbook %s with vars:\n%s" % (style.emph(path), extra_vars)) pbex = PlaybookExecutor( playbooks=[path], inventory=inventory, variable_manager=variable_manager, loader=loader, options=options, passwords=passwords ) code = pbex.run() stats = pbex._tqm._stats hosts = stats.processed.keys() result = [{h: stats.summarize(h)} for h in hosts] results = {'code': code, 'result': result, 'playbook': path} print(results) failed_hosts = [] unreachable_hosts = [] for h in hosts: t = stats.summarize(h) if t['failures'] > 0: failed_hosts.append(h) if t['unreachable'] > 0: unreachable_hosts.append(h) if len(failed_hosts) > 0: logger.error("Failed hosts: %s" % failed_hosts) if len(unreachable_hosts) > 0: logger.error("Unreachable hosts: %s" % unreachable_hosts)
for c in filter(lambda x: x in get_g5k_clusters(), resources.keys()): if resources[c] > 1: wanted = {c: 1} break jobs_specs = get_jobs_specs(wanted, name=job_name) for sub, frontend in jobs_specs: sub.walltime = walltime sub.job_type = "deploy" job = oarsub(jobs_specs)[0] nodes = get_oar_job_nodes(job[0], job[1]) logger.info('Deploying host %s', nodes[0].address) deployed, undeployed = deploy(Deployment(nodes, env_name="jessie-x64-base")) execware_host = list(deployed)[0] logger.info('Installing required packages %s', style.emph(packages)) install_packages = SshProcess( 'apt-get update && apt-get install -y ' + packages, execware_host).run() logger.info('Copying files to host') put_files = Put(execware_host, [source_code], remote_location="/tmp").run() xml_file = """ <settings> <proxies> <proxy> <id>g5k-proxy</id> <active>true</active> <protocol>http</protocol> <host>proxy</host> <port>3128</port> </proxy>
#!/usr/bin/env python from execo_g5k.topology import g5k_graph, treemap from execo.log import logger, style from execo_g5k.oar import get_oar_job_nodes from execo_g5k.utils import hosts_list from networkx.algorithms.shortest_paths.generic import shortest_path from execo_g5k.api_utils import get_host_shortname from random import uniform jobs = [(1696863, 'grenoble'), (1502558, 'lille'), (74715, 'luxembourg')] logger.info( 'Retrieving hosts used for jobs %s', ', '.join([ style.host(site) + ':' + style.emph(job_id) for job_id, site in jobs ])) hosts = [ get_host_shortname(h) for job_id, site in jobs for h in get_oar_job_nodes(job_id, site) ] logger.info(hosts_list(hosts)) logger.info('Creating topological graph') g = g5k_graph(elements=hosts) i, j = int(uniform(1, len(hosts))), int(uniform(1, len(hosts))) path = shortest_path(g, hosts[i], hosts[j]) logger.info( 'Communication between %s and %s go through ' 'the following links: \n%s', style.host(hosts[i]), style.host(hosts[j]), ' -> '.join(path))
def deploy(deployment, check_deployed_command = True, node_connection_params = {'user': '******'}, num_tries = 1, check_enough_func = None, frontend_connection_params = None, deploy_timeout = None, check_timeout = 30, stdout_handlers = None, stderr_handlers = None): """Deploy nodes, many times if needed, checking which of these nodes are already deployed with a user-supplied command. If no command given for checking if nodes deployed, rely on kadeploy to know which nodes are deployed. - loop `num_tries` times: - if ``check_deployed_command`` given, try to connect to these hosts using the supplied `node_connection_params` (or the default ones), and to execute ``check_deployed_command``. If connection succeeds and the command returns 0, the host is assumed to be deployed, else it is assumed to be undeployed. - optionnaly call user-supplied ``check_enough_func``, passing to it the list of deployed and undeployed hosts, to let user code decide if enough nodes deployed. Otherwise, try as long as there are undeployed nodes. - deploy the undeployed nodes returns a tuple with the list of deployed hosts and the list of undeployed hosts. When checking correctly deployed nodes with ``check_deployed_command``, and if the deployment is using the kavlan option, this function will try to contact the nodes using the appropriate DNS hostnames in the new vlan. :param deployment: instance of `execo.kadeploy.Deployment` class describing the intended kadeployment. :param check_deployed_command: command to perform remotely to check node deployement. May be a String, True, False or None. If String: the actual command to be used (This command should return 0 if the node is correctly deployed, or another value otherwise). If True, the default command value will be used (from `execo_g5k.config.g5k_configuration`). If None or False, no check is made and deployed/undeployed status will be taken from kadeploy's output. :param node_connection_params: a dict similar to `execo.config.default_connection_params` whose values will override those in `execo.config.default_connection_params` when connecting to check node deployment with ``check_deployed_command`` (see below). :param num_tries: number of deploy tries :param check_enough_func: a function taking as parameter a list of deployed hosts and a list of undeployed hosts, which will be called at each deployment iteration end, and that should return a boolean indicating if there is already enough nodes (in this case, no further deployement will be attempted). :param frontend_connection_params: connection params for connecting to frontends if needed. Values override those in `execo_g5k.config.default_frontend_connection_params`. :param deploy_timeout: timeout for deployement. Default is None, which means no timeout. :param check_timeout: timeout for node deployment checks. Default is 30 seconds. :param stdout_handlers: iterable of `ProcessOutputHandlers` which will be passed to the actual deploy processes. :param stderr_handlers: iterable of `ProcessOutputHandlers` which will be passed to the actual deploy processes. """ if check_enough_func == None: check_enough_func = lambda deployed, undeployed: len(undeployed) == 0 if check_deployed_command == True: check_deployed_command = g5k_configuration.get('check_deployed_command') def check_update_deployed(undeployed_hosts, check_deployed_command, node_connection_params, vlan): #IGNORE:W0613 logger.debug(style.emph("check which hosts are already deployed among:") + " %s", undeployed_hosts) deployment_hostnames_mapping = dict() if vlan: for host in undeployed_hosts: deployment_hostnames_mapping[get_kavlan_host_name(host, vlan)] = host else: for host in undeployed_hosts: deployment_hostnames_mapping[host] = host deployed_check = get_remote(check_deployed_command, list(deployment_hostnames_mapping), connection_params = node_connection_params) for p in deployed_check.processes: p.nolog_exit_code = True p.nolog_timeout = True p.nolog_error = True p.timeout = check_timeout deployed_check.run() newly_deployed = list() for process in deployed_check.processes: logger.debug(style.emph("check on %s:" % (process.host,)) + " %s\n" % (process,) + style.emph("stdout:") + "\n%s\n" % (process.stdout) + style.emph("stderr:") + "\n%s\n" % (process.stderr)) if (process.ok): newly_deployed.append(deployment_hostnames_mapping[process.host.address]) logger.debug("OK %s", deployment_hostnames_mapping[process.host.address]) else: logger.debug("KO %s", deployment_hostnames_mapping[process.host.address]) return newly_deployed start_time = time.time() deployed_hosts = set() undeployed_hosts = set([ Host(host).address for host in deployment.hosts ]) my_newly_deployed = [] if check_deployed_command: my_newly_deployed = check_update_deployed(undeployed_hosts, check_deployed_command, node_connection_params, deployment.vlan) deployed_hosts.update(my_newly_deployed) undeployed_hosts.difference_update(my_newly_deployed) num_tries_done = 0 elapsed = time.time() - start_time last_time = time.time() deploy_stats = list() # contains tuples ( timestamp, # num attempted deploys, # len(kadeployer.deployed_hosts), # len(my_newly_deployed), # len(deployed_hosts), # len(undeployed_hosts ) deploy_stats.append((elapsed, None, None, len(my_newly_deployed), len(deployed_hosts), len(undeployed_hosts))) while (not check_enough_func(deployed_hosts, undeployed_hosts) and num_tries_done < num_tries): num_tries_done += 1 logger.debug(style.emph("try %i, deploying on:" % (num_tries_done,)) + " %s", undeployed_hosts) tmp_deployment = copy.copy(deployment) tmp_deployment.hosts = undeployed_hosts kadeployer = Kadeployer(tmp_deployment, frontend_connection_params = frontend_connection_params, stdout_handlers = stdout_handlers, stderr_handlers = stderr_handlers) kadeployer.timeout = deploy_timeout kadeployer.run() my_newly_deployed = [] if check_deployed_command: my_newly_deployed = check_update_deployed(undeployed_hosts, check_deployed_command, node_connection_params, deployment.vlan) deployed_hosts.update(my_newly_deployed) undeployed_hosts.difference_update(my_newly_deployed) else: deployed_hosts.update(kadeployer.deployed_hosts) undeployed_hosts.difference_update(kadeployer.deployed_hosts) logger.debug(style.emph("kadeploy reported newly deployed hosts:") + " %s", kadeployer.deployed_hosts) logger.debug(style.emph("check reported newly deployed hosts:") + " %s", my_newly_deployed) logger.debug(style.emph("all deployed hosts:") + " %s", deployed_hosts) logger.debug(style.emph("still undeployed hosts:") + " %s", undeployed_hosts) elapsed = time.time() - last_time last_time = time.time() deploy_stats.append((elapsed, len(tmp_deployment.hosts), len(kadeployer.deployed_hosts), len(my_newly_deployed), len(deployed_hosts), len(undeployed_hosts))) logger.detail(style.emph("deploy finished") + " in %i tries, %s", num_tries_done, format_seconds(time.time() - start_time)) logger.detail("deploy duration attempted deployed deployed total total") logger.detail(" deploys as reported as reported already still") logger.detail(" by kadeploy by check deployed undeployed") logger.detail("---------------------------------------------------------------------------") for (deploy_index, deploy_stat) in enumerate(deploy_stats): logger.detail("#%-5.5s %-8.8s %-9.9s %-11.11s %-11.11s %-8.8s %-10.10s", deploy_index, format_seconds(deploy_stat[0]), deploy_stat[1], deploy_stat[2], deploy_stat[3], deploy_stat[4], deploy_stat[5]) logger.debug(style.emph("deployed hosts:") + " %s", deployed_hosts) logger.debug(style.emph("undeployed hosts:") + " %s", undeployed_hosts) return (deployed_hosts, undeployed_hosts)
def draw_slots(slots, colors=None, show=False, save=True, outfile=None): """Draw the number of nodes available for the clusters (requires Matplotlib >= 1.2.0) :param slots: a list of slot, as returned by ``compute_slots`` :param colors: a dict to define element coloring ``{'element': (255., 122., 122.)}`` :param show: display the slots versus time :param save: save the plot to outfile :param outfile: specify the output file""" startstamp = slots[0][0] endstamp = slots[-1][1] if outfile is None: outfile = 'slots_' + format_date(startstamp) logger.info('Saving slots diagram to %s', style.emph(outfile)) if colors is None: colors = _set_colors() xfmt = MD.DateFormatter('%d %b, %H:%M ') if endstamp - startstamp <= timedelta_to_seconds(timedelta(days=7)): x_major_locator = MD.HourLocator(byhour=[9, 19]) elif endstamp - startstamp <= timedelta_to_seconds(timedelta(days=17)): x_major_locator = MD.HourLocator(byhour=[9]) else: x_major_locator = MD.AutoDateLocator() max_nodes = {} total_nodes = 0 slot_limits = [] total_list = [] i_slot = 0 for slot in slots: slot_limits.append(slot[0]) if i_slot + 1 < len(slots): slot_limits.append(slots[i_slot + 1][0]) i_slot += 1 for element, n_nodes in slot[2].items(): if element in get_g5k_clusters(queues=None): if not element in max_nodes: max_nodes[element] = [] max_nodes[element].append(n_nodes) max_nodes[element].append(n_nodes) if element == 'grid5000': total_list.append(n_nodes) total_list.append(n_nodes) if n_nodes > total_nodes: total_nodes = n_nodes slot_limits.append(endstamp) slot_limits.sort() dates = [unixts_to_datetime(ts) for ts in slot_limits] datenums = MD.date2num(dates) fig = PLT.figure(figsize=(15, 10), dpi=80) ax = PLT.subplot(111) ax.xaxis_date() box = ax.get_position() ax.set_position([box.x0 - 0.07, box.y0, box.width, box.height]) ax.set_xlim(unixts_to_datetime(startstamp), unixts_to_datetime(endstamp)) ax.set_xlabel('Time') ax.set_ylabel('Nodes available') ax.set_ylim(0, total_nodes * 1.1) ax.axhline(y=total_nodes, color='#000000', linestyle='-', linewidth=2, label='ABSOLUTE MAXIMUM') ax.yaxis.grid(color='gray', linestyle='dashed') ax.xaxis.set_major_formatter(xfmt) ax.xaxis.set_major_locator(x_major_locator) PLT.xticks(rotation=15) max_nodes_list = [] p_legend = [] p_rects = [] p_colors = [] for key, value in sorted(max_nodes.items()): if key != 'grid5000': max_nodes_list.append(value) p_legend.append(key) p_rects.append(PLT.Rectangle((0, 0), 1, 1, fc=colors[key])) p_colors.append(colors[key]) plots = PLT.stackplot(datenums, max_nodes_list, colors=p_colors) PLT.legend(p_rects, p_legend, loc='center right', ncol=1, shadow=True, bbox_to_anchor=(1.2, 0.5)) if show: PLT.show() if save: logger.debug('Saving file %s ...', outfile) PLT.savefig(outfile, dpi=300)