def start_all_vm(self): """Starts VM on reserved machines, and returns the associated task object. This function will return immediately, but the caller has to wait for the VM to be setup before using them. """ assert (len(self.vm_hosts) > 0) (all_ip, all_mac) = zip(*self.subnet_ip_mac) self.vm_macs = all_mac[:self.args.nb_vm * len(self.vm_hosts)] self.vm_ips = all_ip[:self.args.nb_vm * len(self.vm_hosts)] logger.debug("VMs IP: {}".format(' '.join(self.vm_ips))) memory = self.args.memory nb_vm = self.args.nb_vm # For each physical host, build a list of MAC addresses to be used for its VMs macs_per_host = [ self.vm_macs[i * nb_vm:(i + 1) * nb_vm] for i, host in enumerate(self.vm_hosts) ] # Double escaping is magic (after .format, it will become {{macs_per_host}}) script = """\ for mac in {{{{[' '.join(macs) for macs in macs_per_host]}}}} do iface=$(tunctl -b) brctl addif br0 "$iface" ip link set "$iface" up kvm -m {memory} -smp cores={cores},threads=1,sockets=1 -nographic -localtime -enable-kvm -drive file="{image}",if=virtio,media=disk -snapshot -net nic,model=virtio,macaddr="$mac" -net tap,ifname="$iface",script=no & done wait """.format(memory=memory, cores=1, image=self.args.vm_image) vm_task = execo.Remote(script, self.vm_hosts, connection_params=self.server_conn_params, name="Run VM on all hosts") return vm_task.start()
def default(self, line): global interrupted, workers, cores interrupted = False print 'interrupting previous command' workers.kill() execo.sleep(1) print 'sending command: ' + line workers = execo.Remote(line, cores).start()
def start_dns_server(self): resolver_params = { "resolver": self.args.resolver, "buffer_size": 4096, "nb_threads": self.args.server_threads, "max_tcp_clients_per_thread": self.args.resolver_slots_per_thread, # Only used by bind9: according to the documentation, reserved-sockets can be at most maxsockets - 128 "maxsockets": self.args.resolver_slots_per_thread + 128, "mode": self.args.mode, "port": 853 if self.args.mode == 'tls' else 53, } max_clients = self.args.server_threads * self.args.resolver_slots_per_thread logger.debug( "{resolver} in {mode} mode using {nb_threads} threads, {max_tcp_clients_per_thread} max TCP/TLS clients per thread, {buffer_size}b buffer size" .format(**resolver_params)) logger.debug("Max TCP/TLS clients: {}".format(max_clients)) if self.args.resolver == 'unbound': resolver_config = self.configure_unbound(resolver_params) elif self.args.resolver == 'bind9': resolver_config = self.configure_bind9(resolver_params) elif self.args.resolver == 'knot-resolver': resolver_config = self.configure_knot(resolver_params) execo.Remote(resolver_config, [self.server], connection_params=self.server_conn_params, name="Configure resolver").run() # Generate TLS key and self-signed certificate if self.args.mode == 'tls': generate_tls = "openssl req -x509 -subj '/CN=localhost' -nodes -newkey {} -keyout /tmp/resolver.key -out /tmp/resolver.cert -days 365".format( self.args.tls_keytype) execo.Remote(generate_tls, [self.server], connection_params=self.server_conn_params, name="Generate TLS key and cert").run() # Run resolver if self.args.resolver == 'unbound': resolver_cmd = "pkill unbound; sleep 3; /root/unbound/unbound -d -v -c /tmp/unbound.conf" elif self.args.resolver == 'bind9': resolver_cmd = "/root/bind9/bin/named/named -c /tmp/named.conf -g -n {nb_threads} -U {nb_threads} -S {maxsockets}" elif self.args.resolver == 'knot-resolver': resolver_cmd = "LD_LIBRARY_PATH=/usr/local/lib /usr/local/sbin/kresd -f {nb_threads} -c /tmp/knot-resolver.conf" task = execo.Remote(resolver_cmd.format(**resolver_params), [self.server], connection_params=self.server_conn_params, name="Resolver process").start() return task
def start_client_vm(self): """Start tcpclient or udpclient on all VM""" if self.args.mode == 'tcp': client = 'tcpclient' elif self.args.mode == 'tls': client = 'tcpclient --tls' else: client = 'udpclient' # Create a different random seed for each client, but # deterministically based on the global seed. random_seed = [ self.args.random_seed + vm_id for vm_id, vm in enumerate(self.vm) ] # Disable PTY allocation conn_params = deepcopy(execo.default_connection_params) utils.disable_pty(conn_params) if self.simple_queryrate: script = "/root/tcpscaler/{} -s {{{{random_seed}}}} -t {} -R -p {} -r {} -c {} -n {} {}" script = script.format(client, self.args.client_duration, self.server_port, self.args.client_query_rate, self.args.client_connections, self.args.client_connection_rate, self.server.address) elif self.stdin_queryrate: script = "/root/tcpscaler/{} -s {{{{random_seed}}}} --stdin -R -p {} -c {} -n {} {}" script = script.format(client, self.server_port, self.args.client_connections, self.args.client_connection_rate, self.server.address) elif self.stdin_queryratelinear: script = "/root/tcpscaler/{} -s {{{{random_seed}}}} --stdin-rateslope -R -p {} -r {} -c {} -n {} {}" script = script.format(client, self.server_port, self.args.client_query_rate, self.args.client_connections, self.args.client_connection_rate, self.server.address) task = execo.Remote(script, self.vm, name=client, connection_params=conn_params).start() if self.stdin_queryrate: # Write desired query rate sequence to stdin of all processes task.write("{}\n".format(len( self.args.client_query_rate)).encode()) for rateduration in self.args.client_query_rate: task.write("{} {}\n".format(rateduration.duration_ms, rateduration.rate).encode()) if self.stdin_queryratelinear: # Write desired query rate increase/decrease sequence to stdin of all processes task.write("{}\n".format(len( self.args.client_query_rate_linear)).encode()) for rateslope_duration in self.args.client_query_rate_linear: task.write("{} {}\n".format(rateslope_duration.duration_ms, rateslope_duration.rate).encode()) return task
def install_devstack(): install_stack_controller=EX.SshProcess('cd ~/devstack;./stack.sh', hosts[0],connexion_params = {'user': '******'},pty = True).run() install_stack_compute=EX.Remote('cd ~/devstack;./stack.sh', hosts[1:],connexion_params = {'user': '******'},pty = True).run() for p in install_stack_compute.processes(): print p.stdout() print hosts
def prepare_vmhosts(self): script = """\ # Avoid conntrack on all machines iptables -t raw -A PREROUTING -p tcp -j NOTRACK iptables -t raw -A PREROUTING -p udp -j NOTRACK iptables -t raw -A OUTPUT -p tcp -j NOTRACK iptables -t raw -A OUTPUT -p udp -j NOTRACK """ task = execo.Remote(script, self.vm_hosts, connection_params=self.server_conn_params).start() return task
def _get_logs(self, bench_file): # Generating the HTML file logger.info("Getting the results into " + self.result_dir) html_file = os.path.splitext(bench_file)[0] + '.html' dest = os.path.join(self.result_dir, 'rally', html_file) result = EX.Remote("rally task report --out=" + html_file, [self.host], {'user': '******'}) result.run() if result.processes[0].exit_code != 0: logger.error("Could not generate the HTML result file") if result.processes[0].stderr: logger.error(result.processes[0].stderr) else: # Downloading the HTML file EX.Get(self.host, [html_file], local_location=dest, connection_params={ 'user': '******' }).run() logger.info("Wrote " + dest) # Get the metrics from Rally result = EX.Remote("rally task results", [self.host], {'user': '******'}) metrics_file = os.path.join(self.result_dir, 'rally', os.path.splitext(bench_file)[0] + '.json') result.run() if result.processes[0].exit_code != 0: logger.error("Could not get the metrics back") if result.processes[0].stderr: logger.error(result.processes[0].stderr) else: # The json is on the standard output of the process with open(metrics_file, 'w') as f: f.write(result.processes[0].stdout) logger.info("Wrote " + metrics_file)
def start_cpunetlog(self, hosts, conn_params=None): script = """/root/CPUnetLOG/__init__.py --stdout -i {}""".format( self.args.cpunetlog_interval) if conn_params == None: conn_params = deepcopy(execo.default_connection_params) else: conn_params = deepcopy(conn_params) utils.disable_pty(conn_params) task = execo.Remote(script, hosts, connection_params=conn_params, name="CPUnetLOG").start() return task
def _exec_command_on_nodes(self, nodes, cmd, label, conn_params=None): """Execute a command on a node (id or hostname) or on a set of nodes""" if not isinstance(nodes, list): nodes = [nodes] if conn_params is None: conn_params = DEFAULT_CONN_PARAMS logging.info(label) remote = EX.Remote(cmd, nodes, conn_params) remote.run() if not remote.finished_ok: sys.exit(31)
def _exec_command_on_nodes(self, nodes, cmd, label, conn_params=None): """Execute a command on a node (id or hostname) or on a set of nodes""" if isinstance(nodes, basestring): nodes = [nodes] if conn_params is None: conn_params = DEFAULT_CONN_PARAMS logging.info(label) remote = EX.Remote(cmd, nodes, conn_params) remote.run() if not remote.finished_ok: raise Exception('An error occcured during remote execution')
def prepare_vm(self): script = """\ rc=0 # Install dependencies apt-get update || rc=$? apt-get --yes install libssl-dev || rc=$? # Update git repository for tcpclient. cd /root/tcpscaler || rc=$? git pull || rc=$? make || rc=$? # Add direct route to server (but don't fail if it fails). # We use the old-style "route" because it can resolve DNS names, unlike "ip route" route add {server_name} eth0 || echo "Failed to add route to {server_name}" >&2 # Increase max number of outgoing connections sysctl net.ipv4.ip_local_port_range="1024 65535" || rc=$? sysctl net.ipv4.tcp_tw_reuse=1 || rc=$? # No connection tracking iptables -t raw -A PREROUTING -p tcp -j NOTRACK || rc=$? iptables -t raw -A PREROUTING -p udp -j NOTRACK || rc=$? iptables -t raw -A OUTPUT -p tcp -j NOTRACK || rc=$? iptables -t raw -A OUTPUT -p udp -j NOTRACK || rc=$? # Install CPUNetLog cd /root/ apt-get --yes install python3 python3-psutil python3-netifaces || rc=$? [ -d "CPUnetLOG" ] || git clone https://github.com/jonglezb/CPUnetLOG || rc=$? cd CPUnetLOG || rc=$? git pull || rc=$? exit $rc """.format(server_name=self.server.address) conn_params = deepcopy(execo.default_connection_params) utils.disable_pty(conn_params) task = execo.Remote(script, self.vm, name="Setup VM", connection_params=conn_params).start() return task
def exec_command_on_nodes(nodes, cmd, label, conn_params=None): """Execute a command on a node (id or hostname) or on a set of nodes. :param nodes: list of targets of the command cmd. Each must be an execo.Host. :param cmd: string representing the command to run on the remote nodes. :param label: string for debugging purpose. :param conn_params: connection parameters passed to the execo.Remote function """ if isinstance(nodes, basestring): nodes = [nodes] if conn_params is None: conn_params = DEFAULT_CONN_PARAMS logging.debug("Running %s on %s " % (label, nodes)) remote = ex.Remote(cmd, nodes, conn_params) remote.run()
def exec_command_on_nodes(nodes, cmd, label, conn_params=None): """Execute a command on a node (id or hostname) or on a set of nodes. :param nodes: list of targets of the command cmd. Each must be an execo.Host. :param cmd: string representing the command to run on the remote nodes. :param label: string for debugging purpose. :param conn_params: connection parameters passed to the execo.Remote function """ if isinstance(nodes, BASESTRING): nodes = [nodes] if conn_params is None: conn_params = DEFAULT_CONN_PARAMS logger.debug("Running %s on %s ", label, nodes) remote = ex.Remote(cmd, nodes, conn_params) remote.run() if not remote.finished_ok: raise Exception('An error occcured during remote execution')
def run(self): # Defining experiment parameters self.parameters = { 'n_clients': [400, 450, 500, 550, 600], 'n_transitions': [10000] } cluster = 'griffon' sweeps = sweep(self.parameters) sweeper = ParamSweeper(os.path.join(self.result_dir, "sweeps"), sweeps) server_out_path = os.path.join(self.result_dir, "server.out") self._updateStat(sweeper.stats()) # Loop on the number of nodes while True: # Taking the next parameter combinations comb = sweeper.get_next() if not comb: break # Performing the submission on G5K site = get_cluster_site(cluster) self._log("Output will go to " + self.result_dir) n_nodes = int( math.ceil( float(comb['n_clients']) / EX5.get_host_attributes( cluster + '-1')['architecture']['smt_size'])) + 1 self._log("Reserving {0} nodes on {1}".format(n_nodes, site)) resources = "{cluster=\\'" + cluster + "\\'}/nodes=" + str(n_nodes) submission = EX5.OarSubmission(resources=resources, job_type='allow_classic_ssh', walltime='00:10:00') job = EX5.oarsub([(submission, site)]) self.__class__._job = job # Sometimes oarsub fails silently if job[0][0] is None: print("\nError: no job was created") sys.exit(1) # Wait for the job to start self._log( "Waiting for job {0} to start...\n".format(BOLD_MAGENTA + str(job[0][0]) + NORMAL)) EX5.wait_oar_job_start(job[0][0], job[0][1], prediction_callback=prediction) nodes = EX5.get_oar_job_nodes(job[0][0], job[0][1]) # Deploying nodes #deployment = EX5.Deployment(hosts = nodes, env_file='path_to_env_file') #run_deploy = EX5.deploy(deployment) #nodes_deployed = run_deploy.hosts[0] # Copying active_data program on all deployed hosts EX.Put([nodes[0]], '../dist/active-data-lib-0.1.2.jar', connexion_params={ 'user': '******' }).run() EX.Put([nodes[0]], '../server.policy', connexion_params={ 'user': '******' }).run() # Loop on the number of requests per client process while True: # Split the nodes clients = nodes[1:] server = nodes[0] self._log( "Running experiment with {0} nodes and {1} transitions per client" .format(len(clients), comb['n_transitions'])) # Launching Server on one node out_handler = FileOutputHandler(server_out_path) launch_server = EX.Remote( 'java -jar active-data-lib-0.1.2.jar', [server], stdout_handler=out_handler, stderr_handler=out_handler).start() self._log("Server started on " + server.address) time.sleep(2) # Launching clients rank = 0 n_cores = EX5.get_host_attributes( clients[0])['architecture']['smt_size'] cores = nodes * n_cores cores = cores[ 0:comb['n_clients']] # Cut out the additional cores client_connection_params = { 'taktuk_gateway': 'lyon.grid5000.fr', 'host_rewrite_func': None } self._log("Launching {0} clients...".format(len(cores))) client_cmd = "/usr/bin/env java -cp /home/ansimonet/active-data-lib-0.1.2.jar org.inria.activedata.examples.perf.TransitionsPerSecond " + \ "{0} {1} {2} {3} {4}".format(server.address, 1200, "{{range(len(cores))}}", len(cores), comb['n_transitions']) client_out_handler = FileOutputHandler( os.path.join(self.result_dir, "clients.out")) client_request = EX.TaktukRemote(client_cmd, cores, connexion_params = client_connection_params, \ stdout_handler = client_out_handler, stderr_handler = client_out_handler) client_request.run() if not client_request.ok(): # Some client failed, please panic self._log( "One or more client process failed. Enjoy reading their outputs." ) self._log( "OUTPUT STARTS -------------------------------------------------\n" ) for process in client_request.processes(): print("----- {0} returned {1}".format( process.host().address, process.exit_code())) if not process.stdout() == "": print(GREEN + process.stdout() + NORMAL) if not process.stderr() == "": print(RED + process.stderr() + NORMAL) print("") self._log( "OUTPUT ENDS ---------------------------------------------------\n" ) sweeper.skip(comb) launch_server.kill() launch_server.wait() else: # Waiting for server to end launch_server.wait() # Getting log files distant_path = OUT_FILE_FORMAT.format( len(cores), comb['n_transitions']) local_path = distant_path EX.Get([server], distant_path).run() EX.Local('mv ' + distant_path + ' ' + os.path.join(self.result_dir, local_path)).run() EX.Get([server], 'client_*.out', local_location=self.result_dir) EX.Remote('rm -f client_*.out', [server]) self._log( "Finishing experiment with {0} clients and {1} transitions per client" .format(comb['n_clients'], comb['n_transitions'])) sweeper.done(comb) sub_comb = sweeper.get_next(filtr=lambda r: filter( lambda s: s["n_clients"] == comb['n_clients'], r)) self._updateStat(sweeper.stats()) if not sub_comb: # Killing job EX5.oar.oardel(job) self.__class__._job = None break else: comb = sub_comb print ""
def uninstall_devstack(): uninstall_stack=EX.Remote('cd ~/devstack;./unstack.sh', hosts,connexion_params = {'user': '******'},pty = True).run()
def configure_devstack(): update = EX.Remote('apt-get update;apt-get install python-software-properties -y;add-apt-repository cloud-archive:havana -y; apt-get update;', hosts, connexion_params = {'user': '******'}).run() add_stack_user = EX.Remote('apt-get install -y git sudo;groupadd stack;useradd -g stack -s /bin/bash -d /opt/stack -m stack;'+ 'echo "stack ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers;'+ ' mkdir /opt/stack/.ssh/; cp /root/.ssh/authorized_keys /opt/stack/.ssh/;'+ ' chmod 700 ~/.ssh;'+ 'echo "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCyYjfgyPazTvGpd8OaAvtU2utL8W6gWC4JdRS1J95GhNNfQd657yO6s1AH5KYQWktcE6FO/xNUC2reEXSGC7ezy+sGO1kj9Limv5vrvNHvF1+wts0Cmyx61D2nQw35/Qz8BvpdJANL7VwP/cFI/p3yhvx2lsnjFE3hN8xRB2LtLUopUSVdBwACOVUmH2G+2BWMJDjVINd2DPqRIA4Zhy09KJ3O1Joabr0XpQL0yt/I9x8BVHdAx6l9U0tMg9dj5+tAjZvMAFfye3PJcYwwsfJoFxC8w/SLtqlFX7Ehw++8RtvomvuipLdmWCy+T9hIkl+gHYE4cS3OIqXH7f49jdJf [email protected]" >> ~/.ssh/authorized_keys', hosts, connexion_params = {'user': '******'}).run() for host in hosts: proxy_config = EX.SshProcess("export ip=`/sbin/ifconfig br100 | sed '/inet\ /!d;s/.*r://g;s/\ .*//g'`;"+'echo -e "http_proxy=http://proxy.reims.grid5000.fr:3128/\nhttps_proxy=http://proxy.reims.grid5000.fr:3128/\nip=$ip">> /etc/environment;' ,host, connexion_params = {'user': '******'},pty = True).run() #reset = EX.SshProcess('echo -e "PATH=\\\"/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games\\\"" > /etc/environment;' #,host, connexion_params = {'user': '******'},pty = True).run() ip_process = EX.Remote('host {{{host}}}', hosts).run() i=0 hs=[] for p in ip_process.processes(): ip = p.stdout().split(' ')[3] ip=str(ip) ip=ip.replace('\r\n', '') hs=hs+[{'host':hosts[i],'ip':ip}] i=i+1 ip_split=hs[0]['ip'].split('.') for host in hs: no_proxy_config= EX.SshProcess('echo -e "PATH=\\\"/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games\\\"\nno_proxy=\\\"localhost,127.0.0.0,"'+host['ip']+'"\\\"" >> /etc/environment',host['host'], connexion_params = {'user': '******'},pty = True).run() print no_proxy_config.stderr() download_devstack = EX.Remote('cd ~;git clone https://github.com/openstack-dev/devstack;', hosts, connexion_params = {'user': '******'}).run() config_controller=EX.SshProcess('echo -e "HOST_IP=$ip\n"'+ '"FLAT_INTERFACE=eth0\n"'+ '"FIXED_RANGE=10.4.128.0/20\n"'+ '"FIXED_NETWORK_SIZE=4096\n"'+ #'"FLOATING_RANGE="'+ip_split[0]+'"."'+ip_split[1]+'"."'+ip_split[2]+'".128/25\n"'+ '"FLOATING_RANGE=10.36.66.0/26\n"'+ '"MULTI_HOST=1\n"'+ '"LOGFILE=/opt/stack/logs/stack.sh.log\n"'+ '"GIT_BASE=https://github.com\n"'+ '"ADMIN_PASSWORD=pass\n"'+ '"MYSQL_PASSWORD=pass\n"'+ '"RABBIT_PASSWORD=pass\n"'+ '"SERVICE_PASSWORD=pass\n"'+ '"SERVICE_TOKEN=s4c\n"'+ '"DISABLE_SERVICE=n-cpu" > /opt/stack/devstack/localrc', hosts[0],connexion_params = {'user': '******'}).run() config_compute=EX.Remote('echo -e "HOST_IP=$ip\n"'+ '"FLAT_INTERFACE=eth0\n"'+ '"FIXED_RANGE=10.4.128.0/20\n"'+ '"FIXED_NETWORK_SIZE=4096\n"'+ #'"FLOATING_RANGE="'+ip_split[0]+'"."'+ip_split[1]+'"."'+ip_split[2]+'".128/25\n"'+ '"FLOATING_RANGE=10.36.66.0/26\n"'+ '"MULTI_HOST=1\n"'+ '"LOGFILE=/opt/stack/logs/stack.sh.log\n"'+ '"GIT_BASE=https://github.com\n"'+ '"ADMIN_PASSWORD=pass\n"'+ '"MYSQL_PASSWORD=pass\n"'+ '"RABBIT_PASSWORD=pass\n"'+ '"SERVICE_PASSWORD=pass\n"'+ '"SERVICE_TOKEN=s4c\n"'+ '"DATABASE_TYPE=mysql\n"'+ '"SERVICE_HOST="'+hs[0]['ip']+'"\n"'+ '"MYSQL_HOST="'+hs[0]['ip']+'"\n"'+ '"RABBIT_HOST="'+hs[0]['ip']+'"\n"'+ '"GLANCE_HOSTPORT="'+hs[0]['ip']+'":9292\n"'+ '"ENABLED_SERVICES=n-cpu,n-net,n-api,rabbit,c-sch,c-api,c-vol\n" > /opt/stack/devstack/localrc', hosts[1:],connexion_params = {'user': '******'}).run()
def check_hosts_up(hosts, timeout=None, connection_params=None, polling_interval=5): """Check that a list of host are joinable with ssh. Checks that all hosts of the list are joinable with ssh. Retry continuously to connect to them every <polling_interval> seconds, until either all are reachable or the timeout is reached. Returns the list of hosts which are joinable. :param hosts: list of hosts :param timeout: timeout of the checks. No timeout if None. :param connection_params: to connect to the hosts. Note that the ssh_option entry of the connection_params is overwritten by this function :param polling_interval: tries to connect each <polling_interval> seconds. :returns: list of joinable hosts """ start_ts = time.time() if timeout != None: completion_ts = start_ts + timeout remaining_hosts = set(hosts) if connection_params != None: real_connection_params = connection_params else: real_connection_params = {} while len(remaining_hosts) > 0 and (timeout == None or time.time() <= completion_ts): #print('remaining_hosts=%s' % (remaining_hosts,)) if timeout != None: next_poll_ts = min(time.time() + polling_interval, completion_ts) else: next_poll_ts = time.time() + polling_interval poll_timeout = max(0, next_poll_ts - time.time()) real_connection_params.update({ 'ssh_options': ('-tt', '-o', 'BatchMode=yes', '-o', 'PasswordAuthentication=no', '-o', 'StrictHostKeyChecking=no', '-o', 'UserKnownHostsFile=/dev/null', '-o', 'ConnectTimeout=%s' % (int(poll_timeout), )) }) check = execo.Remote('true', remaining_hosts, connection_params=real_connection_params, process_args={ 'timeout': poll_timeout, 'nolog_exit_code': True, 'nolog_timeout': True }).run() hosts_up = [p.host for p in check.processes if p.finished_ok] #print('hosts_up=%s' %(hosts_up,)) remaining_hosts = remaining_hosts.difference(hosts_up) if len(remaining_hosts) > 0: execo.sleep(max(0, next_poll_ts - time.time())) return list(set(hosts).difference(remaining_hosts))
def run(self): """Perform experiment""" logger.detail(self.options) # Checking the options if len(self.args) < 2: self.options_parser.print_help() exit(1) # Load the configuration file try: with open(self.args[0]) as config_file: self.config = json.load(config_file) except: logger.error("Error reading configuration file") t, value, tb = sys.exc_info() print str(t) + " " + str(value) exit(3) # Put default values for key in defaults: if not key in self.config['authentication'] or self.config[ 'authentication'][key] == "": self.config['authentication'][key] = defaults[key] logger.info("Using default value '%s' for '%s'" % (self.config['authentication'][key], key)) if not 'rally-git' in self.config or self.config['rally-git'] == '': self.config['rally-git'] = DEFAULT_RALLY_GIT logger.info("Using default Git for Rally: %s " % self.config['rally-git']) try: self.rally_deployed = False # Retrieving the host for the experiment self.host = self.get_host() if self.host is None: logger.error("Cannot get host for request") exit(1) # Deploying the host and Rally self.setup_host() # This will be useful in a bit os.mkdir(os.path.join(self.result_dir, 'rally')) os.mkdir(os.path.join(self.result_dir, 'energy')) experiment = {} experiment['start'] = int(time.time()) # Launch the benchmarks benchmarks = {} n_benchmarks = len(self.args[1:]) i_benchmark = 0 for bench_file in self.args[1:]: if not os.path.isfile(bench_file): logger.warn("Ignoring %s which is not a file" % bench_file) continue i_benchmark += 1 logger.info("[%d/%d] Preparing benchmark %s" % (i_benchmark, n_benchmarks, bench_file)) # Send the benchmark description file to the host EX.Put(self.host, [bench_file], connection_params={ 'user': '******' }).run() v = '' if self.options.verbose: v = '-d' cmd = "rally %s task start %s" % (v, os.path.basename(bench_file)) # If necessary, send the rally task args if self.options.rally_args is not None: filename = os.path.basename(self.options.rally_args) cmd = cmd + ' --task-args-file ' + filename EX.Put([self.host], [self.options.rally_args], remote_location=filename, connection_params={ 'user': '******' }).run() rally_task = EX.Remote(cmd, [self.host], {'user': '******'}) logger.info("[%d/%d] Runing benchmark %s" % (i_benchmark, n_benchmarks, bench_file)) bench_basename = os.path.basename(bench_file) benchmarks[bench_basename] = {} benchmarks[bench_basename]['idle_start'] = int(time.time()) time.sleep(idle_time) benchmarks[bench_basename]['run_start'] = int(time.time()) # This is it rally_task.run() benchmarks[bench_basename]['run_end'] = int(time.time()) time.sleep(idle_time) benchmarks[bench_basename]['idle_end'] = int(time.time()) if not rally_task.finished_ok: logger.error("Error while running benchmark") benchmarks[bench_basename]['error'] = '' if rally_task.processes[0].stderr is not None: logger.error(rally_task.processes[0].stderr) # Try to find the reason lines = rally_task.processes[0].stdout.splitlines(True) for i in range(0, len(lines)): if 'Task config is invalid' in lines[i]: benchmarks[bench_basename]['error'] += lines[ i].strip() if 'Reason:' in lines[i]: benchmarks[bench_basename]['error'] += lines[ i + 1].strip() continue else: # Getting the results back self._get_logs(bench_basename) logger.info('----------------------------------------') except Exception as e: t, value, tb = sys.exc_info() print str(t) + " " + str(value) traceback.print_tb(tb) finally: self.tear_down() # Write info about the benchmarks to experiment.json if self.rally_deployed: out_path = os.path.join(self.result_dir, 'experiment.json') experiment['nodes'] = {} experiment['nodes']['services'] = self.config['os-services'] experiment['nodes']['computes'] = self.config['os-computes'] experiment['end'] = int(time.time()) experiment['benchmarks'] = benchmarks with open(out_path, 'w') as f: f.write(json.dumps(experiment, indent=3)) logger.info("Wrote " + out_path) exit()
def prepare_server(self): # At this point, the server is already deployed bind_version = "v" + self.args.bind9_version.replace(".", "_") knot_version = "v" + self.args.knot_version script = """\ function repeat() {{ count=$1; shift; for i in $(seq 1 $count); do "$@" && return; sleep 2; done; }} rc=0 # Add direct route to VM network ip route replace {vm_subnet} dev eth0 || rc=$? # Increase max number of incoming connections sysctl net.ipv4.tcp_syncookies=0 || rc=$? sysctl net.core.somaxconn=100000 || rc=$? sysctl net.ipv4.tcp_max_syn_backlog=100000 || rc=$? sysctl fs.file-max=20000000 || rc=$? echo 20000000 > /proc/sys/fs/nr_open || rc=$? [ "{resolver}" = "unbound" ] && {{ # Update git repository for unbound. cd /root/unbound || rc=$? repeat 3 git pull || rc=$? make -j8 || rc=$? }} [ "{resolver}" = "bind9" ] && {{ # Install bind cd /root/ [ -d "bind9" ] || repeat 3 git clone https://gitlab.isc.org/isc-projects/bind9.git || rc=$? cd bind9 || rc=$? repeat 3 git pull || rc=$? git checkout {bind_version} # Perf tuning: https://kb.isc.org/docs/aa-01314 # We can't tune the buffer size, it is unconditionally set to 16 MB by --with-tuning=large. # -DRCVBUFSIZE=4194304 ./configure --with-tuning=large --enable-largefile --enable-shared --enable-static --with-openssl=/usr --with-gnu-ld --with-atf=no --disable-linux-caps 'CFLAGS=-O2 -fstack-protector-strong -Wformat -Werror=format-security -fno-strict-aliasing -fno-delete-null-pointer-checks -DNO_VERSION_DATE -DDIG_SIGCHASE' 'LDFLAGS=-Wl,-z,relro -Wl,-z,now' 'CPPFLAGS=-Wdate-time -D_FORTIFY_SOURCE=2' || rc=$? make -j32 || rc=$? }} [ "{resolver}" = "knot-resolver" ] && {{ # Install knot-resolver apt-get update cd /root/ [ -d "knot-resolver" ] || repeat 5 git clone https://gitlab.labs.nic.cz/knot/knot-resolver.git || rc=$? cd knot-resolver || rc=$? repeat 5 git pull || rc=$? git checkout {knot_version} || rc=$? # Tweak timers sed -i -e 's#net->tcp.in_idle_timeout = 10000;#net->tcp.in_idle_timeout = 3600000;#' daemon/network.c || rc=$? sed -i -e 's#KR_CONN_RTT_MAX 2000#KR_CONN_RTT_MAX 3600000#' lib/defines.h || rc=$? git submodule update --init --recursive || rc=$? apt-get --yes install -t stretch-backports libknot-dev || rc=$? apt-get --yes build-dep -t stretch-backports knot-resolver || rc=$? make -j32 CFLAGS="-DNDEBUG" daemon modules || rc=$? make install || rc=$? }} # Install CPUNetLog apt-get --yes install python3 python3-psutil python3-netifaces cd /root/ [ -d "CPUnetLOG" ] || repeat 3 git clone https://github.com/jonglezb/CPUnetLOG || rc=$? cd CPUnetLOG || rc=$? repeat 3 git pull || rc=$? exit $rc """.format(vm_subnet=self.subnet, resolver=self.args.resolver, bind_version=bind_version, knot_version=knot_version) task = execo.Remote(script, [self.server], connection_params=self.server_conn_params, name="Setup server").start() return task
def kill_all_vm(self): task = execo.Remote("killall qemu-system-x86_64 || true", self.vm_hosts, connection_params=g5k.default_oarsh_oarcp_params, name="Kill all VMs") task.run()
execo.sleep(1) print 'sending command: ' + line workers = execo.Remote(line, cores).start() app = App() if jobid: try: print 'Waiting for job to start' execo_g5k.wait_oar_job_start(jobid, site) print 'Retrieving nodes' nodes = execo_g5k.get_oar_job_nodes(jobid, site) # Setup nodes print 'Preparing workers with cmd: ' + setup_cmd workers = execo.Remote(setup_cmd, nodes).start() workers.expect('Worker Setup Completed') workers.kill() # Possibly open more than one connection per machine cores = nodes * args.nb_cores print cores print 'Example cmd: %s' % (workers_cmd) app.prompt = '%s (%d node(s), %d core(s)/node)> ' % ( site, args.volunteers, args.nb_cores) app.cmdloop() # execo.sleep(600) # print 'Workers done' finally: execo_g5k.oardel([(jobid, site)])