def run(self): """ """ token = 'bRIJb9jp5igAAAAAAAAACc5QzQ619Vp0pYa2PdIrt0q2y0qFyJgwrKvtzuTp3Sz_' client = dropbox.client.DropboxClient(token) parameters = {'size': igeom(128, 2048, 5), 'db_if': ['rest', 'sdk']} combs = sweep(parameters) sweeper = ParamSweeper(self.result_dir + "/sweeps", combs) f = open(self.result_dir + '/results.txt', 'w') while len(sweeper.get_remaining()) > 0: comb = sweeper.get_next() logger.info('Treating combination %s', pformat(comb)) comb_dir = self.result_dir + '/' + slugify(comb) try: os.mkdir(comb_dir) except: pass fname = self.create_file(comb['size']) timer = Timer() if comb['db_if'] == 'sdk': self.upload_file_sdk(client, fname, fname.split('/')[-1]) up_time = timer.elapsed() self.download_file_sdk(client, fname.split('/')[-1], comb_dir + fname.split('/')[-1]) dl_time = timer.elapsed() - up_time sweeper.done(comb) elif comb['db_if'] == 'rest': logger.warning('REST interface not implemented') sweeper.skip(comb) continue os.remove(fname) f.write("%f %i %f %f \n" % (timer.start_date(), comb['size'], up_time, dl_time)) f.close()
def run(self): """ run method from engine in order to do our workflow """ mongo = ClientMongo() size = dict if not self.options.file: if not self.options.only: size = { 1, long(self.options.size * 0.25), long(self.options.size * 0.5), long(self.options.size * 0.75), long(self.options.size) } else: size = {long(self.options.size)} else: if self.OnlyDownload: size = getFilSize(self.options.file) else: size = {0} drive = None if self.options.drive: drive = self.options.drive else: drive = self.drive interface = ['rest', 'sdk'] parameters = { 'size': size, 'if': interface, 'drive': drive, 'transfert': self.transfert } p = None for n in range(0, int(self.options.ntest), 1): logger.info('---------------------') logger.info('Round %i', n + 1) combs = sweep(parameters) date = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") pathResults = os.getcwd() + '/Results/Bench' + date sweeper = ParamSweeper(pathResults + "/sweeps", combs) f = open(pathResults + '/results.txt', 'w') while len(sweeper.get_remaining()) > 0: # sort the parameters for i in interface: for dr in drive: for s in size: comb = sweeper.get_next(filtr=lambda r: filter( lambda x: x['drive'] == dr and x['size'] == s and x['if'] == i, r)) if not comb: continue # start of the workflow if comb['drive'] == 'amazon': p = providerS3.ProviderS3() elif comb['drive'] == 'dropbox': p = providerDB.ProviderDB() else: p = providerGD.ProviderGD() logger.info('Treating combination %s', pformat(comb)) comb_dir = pathResults + '/' + slugify(comb) if not os.path.isdir(comb_dir): os.mkdir(comb_dir) if not self.options.file: fname = self.create_file(comb['size']) else: fname = self.options.file timer = Timer() up_time = 0 dl_time = 0 start_date = datetime.datetime.now() if comb['if'] == 'sdk': if p.provider_name == "amazon": # AMAZON clientAmz = p.getConnexion() if self.OnlyDownload: p.bucketKey += fname else: p.bucketKey += '/' + fname if comb['transfert'] == "upload" or comb[ 'transfert'] == 'upDown': p.upload_file_sdk( clientAmz.get_bucket(p.bucketName), p.bucketKey, fname) up_time = timer.elapsed() if comb['transfert'] == "download" or comb[ 'transfert'] == 'upDown': p.download_file_sdk( clientAmz.get_bucket(p.bucketName), p.bucketKey, comb_dir + '/' + fname.split('/')[-1]) dl_time = timer.elapsed() - up_time if not self.OnlyDownload: p.delete_file_sdk( clientAmz.get_bucket(p.bucketName), p.bucketKey) elif p.provider_name == "dropbox": # DROPBOX client = p.getToken() if comb['transfert'] == "upload" or comb[ 'transfert'] == 'upDown': p.upload_file_sdk( client, fname, fname.split('/')[-1]) up_time = timer.elapsed() if comb['transfert'] == "download" or comb[ 'transfert'] == 'upDown': p.download_file_sdk( client, fname.split('/')[-1], comb_dir + '/' + fname.split('/')[-1]) dl_time = timer.elapsed() - up_time if not self.OnlyDownload: p.delete_file(client, fname.split('/')[-1]) elif p.provider_name == "googledrive": # GOOGLEDRIVE drive_service = p.getConnexion() new_file = None if comb['transfert'] == 'upload' or comb[ 'transfert'] == 'upDown': new_file = p.upload_file_sdk( drive_service, fname, fname.split('/')[-1], 'text/plain') up_time = timer.elapsed() if comb['transfert'] == 'download' or comb[ 'transfert'] == 'upDown': p.download_file_sdk( drive_service, new_file, comb_dir + '/' + fname.split('/')[-1]) dl_time = timer.elapsed() - up_time if not self.OnlyDownload: p.delete_file_sdk( drive_service, new_file['id']) sweeper.done(comb) elif comb['if'] == 'rest': logger.warning( 'REST interface not implemented') sweeper.skip(comb) if not self.OnlyDownload: # logger.info('delete de '+fname) if os.path.isfile(fname): os.remove(fname) # delete only if rest is implmented # os.remove(comb_dir + '/' + fname.split('/')[-1]) continue if comb['transfert'] == "upload" or comb[ 'transfert'] == "upDown": f.write("%s %s %s %s %s %s %s %f %i %s %f\n" % (self.localisation['ip'], self.localisation['lat'], self.localisation['lon'], self.localisation['city'], self.localisation['country'], comb['drive'], comb['if'], timer.start_date(), comb['size'], "upload", up_time)) mongo.collection.insert({ 'ip': self.localisation['ip'], 'latitude': self.localisation['lat'], 'longitude': self.localisation['lon'], 'city': self.localisation['city'], 'country': self.localisation['country'], 'drive': comb['drive'], 'interface': comb['if'], 'start_date': start_date, 'size': comb['size'], 'transfert': 'upload', 'time': up_time }) if comb['transfert'] == "download" or comb[ 'transfert'] == "upDown": f.write("%s %s %s %s %s %s %s %f %i %s %f\n" % (self.localisation['ip'], self.localisation['lat'], self.localisation['lon'], self.localisation['city'], self.localisation['country'], comb['drive'], comb['if'], timer.start_date(), comb['size'], "download", dl_time)) mongo.collection.insert({ 'ip': self.localisation['ip'], 'latitude': self.localisation['lat'], 'longitude': self.localisation['lon'], 'city': self.localisation['city'], 'country': self.localisation['country'], 'drive': comb['drive'], 'interface': comb['if'], 'start_date': start_date, 'size': comb['size'], 'transfert': 'download', 'time': dl_time }) if not self.OnlyDownload: # logger.info('delete de '+fname) if os.path.isfile(fname): os.remove(fname) if os.path.isfile(comb_dir + '/' + fname): os.remove(comb_dir + '/' + fname.split('/')[-1]) f.close() # delete the Bench Folder os.rmdir(self.result_dir) logger.info("---------------------------------------") for t in check_Exp_database(self.options, self.localisation)['result']: logger.info(t)
def run(self): # Defining experiment parameters self.parameters = { 'n_clients': [400, 450, 500, 550, 600], 'n_transitions': [10000] } cluster = 'griffon' sweeps = sweep(self.parameters) sweeper = ParamSweeper(os.path.join(self.result_dir, "sweeps"), sweeps) server_out_path = os.path.join(self.result_dir, "server.out") self._updateStat(sweeper.stats()) # Loop on the number of nodes while True: # Taking the next parameter combinations comb = sweeper.get_next() if not comb: break # Performing the submission on G5K site = get_cluster_site(cluster) self._log("Output will go to " + self.result_dir) n_nodes = int(math.ceil(float(comb['n_clients']) / EX5.get_host_attributes(cluster + '-1')['architecture']['smt_size'])) + 1 self._log("Reserving {0} nodes on {1}".format(n_nodes, site)) resources = "{cluster=\\'" + cluster + "\\'}/nodes=" + str(n_nodes) submission = EX5.OarSubmission(resources = resources, job_type = 'allow_classic_ssh', walltime ='00:10:00') job = EX5.oarsub([(submission, site)]) self.__class__._job = job # Sometimes oarsub fails silently if job[0][0] is None: print("\nError: no job was created") sys.exit(1) # Wait for the job to start self._log("Waiting for job {0} to start...\n".format(BOLD_MAGENTA + str(job[0][0]) + NORMAL)) EX5.wait_oar_job_start(job[0][0], job[0][1], prediction_callback = prediction) nodes = EX5.get_oar_job_nodes(job[0][0], job[0][1]) # Deploying nodes #deployment = EX5.Deployment(hosts = nodes, env_file='path_to_env_file') #run_deploy = EX5.deploy(deployment) #nodes_deployed = run_deploy.hosts[0] # Copying active_data program on all deployed hosts EX.Put([nodes[0]], '../dist/active-data-lib-0.1.2.jar', connexion_params = {'user': '******'}).run() EX.Put([nodes[0]], '../server.policy', connexion_params = {'user': '******'}).run() # Loop on the number of requests per client process while True: # Split the nodes clients = nodes[1:] server = nodes[0] self._log("Running experiment with {0} nodes and {1} transitions per client".format(len(clients), comb['n_transitions'])) # Launching Server on one node out_handler = FileOutputHandler(server_out_path) launch_server = EX.Remote('java -jar active-data-lib-0.1.2.jar', [server], stdout_handler = out_handler, stderr_handler = out_handler).start() self._log("Server started on " + server.address) time.sleep(2) # Launching clients rank=0 n_cores = EX5.get_host_attributes(clients[0])['architecture']['smt_size']; cores = nodes * n_cores cores = cores[0:comb['n_clients']] # Cut out the additional cores client_connection_params = { 'taktuk_gateway': 'lyon.grid5000.fr', 'host_rewrite_func': None } self._log("Launching {0} clients...".format(len(cores))) client_cmd = "/usr/bin/env java -cp /home/ansimonet/active-data-lib-0.1.2.jar org.inria.activedata.examples.perf.TransitionsPerSecond " + \ "{0} {1} {2} {3} {4}".format(server.address, 1200, "{{range(len(cores))}}", len(cores), comb['n_transitions']) client_out_handler = FileOutputHandler(os.path.join(self.result_dir, "clients.out")) client_request = EX.TaktukRemote(client_cmd, cores, connexion_params = client_connection_params, \ stdout_handler = client_out_handler, stderr_handler = client_out_handler) client_request.run() if not client_request.ok(): # Some client failed, please panic self._log("One or more client process failed. Enjoy reading their outputs.") self._log("OUTPUT STARTS -------------------------------------------------\n") for process in client_request.processes(): print("----- {0} returned {1}".format(process.host().address, process.exit_code())) if not process.stdout() == "": print(GREEN + process.stdout() + NORMAL) if not process.stderr() == "": print(RED + process.stderr() + NORMAL) print("") self._log("OUTPUT ENDS ---------------------------------------------------\n") sweeper.skip(comb) launch_server.kill() launch_server.wait() else: # Waiting for server to end launch_server.wait() # Getting log files distant_path = OUT_FILE_FORMAT.format(len(cores), comb['n_transitions']) local_path = distant_path EX.Get([server], distant_path).run() EX.Local('mv ' + distant_path + ' ' + os.path.join(self.result_dir, local_path)).run() EX.Get([server], 'client_*.out', local_location = self.result_dir) EX.Remote('rm -f client_*.out', [server]) self._log("Finishing experiment with {0} clients and {1} transitions per client".format(comb['n_clients'], comb['n_transitions'])) sweeper.done(comb) sub_comb = sweeper.get_next (filtr = lambda r: filter(lambda s: s["n_clients"] == comb['n_clients'], r)) self._updateStat(sweeper.stats()) if not sub_comb: # Killing job EX5.oar.oardel(job) self.__class__._job = None break else: comb = sub_comb print ""
class mpi_bench(Engine): def run(self): """Inherited method, put here the code for running the engine""" self.define_parameters() if self.prepare_bench(): logger.info('Bench prepared on all frontends') self.run_xp() def define_parameters(self): """Create the iterator on the parameters combinations to be explored""" # fixed number of nodes self.n_nodes = 4 # choose a list of clusters clusters = ['graphene', 'petitprince', 'edel', 'paradent', 'stremi'] #clusters = ['petitprince', 'paradent'] # compute the maximum number of cores among all clusters max_core = self.n_nodes * max([ get_host_attributes(cluster + '-1')['architecture']['smt_size'] for cluster in clusters]) # define the parameters self.parameters = { 'cluster' : clusters, 'n_core': filter(lambda i: i >= self.n_nodes, list(takewhile(lambda i: i<max_core, (2**i for i in count(0, 1))))), 'size' : ['A', 'B', 'C'] } logger.info(self.parameters) # define the iterator over the parameters combinations self.sweeper = ParamSweeper(os.path.join(self.result_dir, "sweeps"), sweep(self.parameters)) logger.info('Number of parameters combinations %s' % len(self.sweeper.get_remaining())) def prepare_bench(self): """bench configuration and compilation, copy binaries to frontends return True if preparation is ok """ logger.info("preparation: configure and compile benchmark") # the involved sites. We will do the compilation on the first of these. sites = list(set(map(get_cluster_site, self.parameters['cluster']))) # generate the bench compilation configuration bench_list = '\n'.join([ 'lu\t%s\t%s' % (size, n_core) for n_core in self.parameters['n_core'] for size in self.parameters['size'] ]) # Reserving a node because compiling on the frontend is forbidden # and because we need mpif77 jobs = oarsub([(OarSubmission(resources = "nodes=1", job_type = 'allow_classic_ssh', walltime ='0:10:00'), sites[0])]) if jobs[0][0]: try: logger.info("copying bench archive to %s" % (sites[0],)) copy_bench = Put([sites[0]], ['NPB3.3-MPI.tar.bz2']).run() logger.info("extracting bench archive on %s" % (sites[0],)) extract_bench = Remote('tar -xjf NPB3.3-MPI.tar.bz2', [sites[0]]).run() logger.info("waiting job start %s" % (jobs[0],)) wait_oar_job_start(*jobs[0], prediction_callback = pred_cb) logger.info("getting nodes of %s" % (jobs[0],)) nodes = get_oar_job_nodes(*jobs[0]) logger.info("configure bench compilation") conf_bench = Remote('echo "%s" > ~/NPB3.3-MPI/config/suite.def' % bench_list, nodes).run() logger.info("compil bench") compilation = Remote('cd NPB3.3-MPI && make clean && make suite', nodes).run() logger.info("compil finished") except: logger.error("unable to compile bench") return False finally: oardel(jobs) # Copying binaries to all other frontends frontends = sites[1:] rsync = Remote('rsync -avuP ~/NPB3.3-MPI/ {{frontends}}:NPB3.3-MPI', [get_host_site(nodes[0])] * len(frontends)) rsync.run() return compilation.ok and rsync.ok def run_xp(self): """Iterate over the parameters and execute the bench""" while len(self.sweeper.get_remaining()) > 0: comb = self.sweeper.get_next() if comb['n_core'] > get_host_attributes(comb['cluster']+'-1')['architecture']['smt_size'] * self.n_nodes: self.sweeper.skip(comb) continue logger.info('Processing new combination %s' % (comb,)) site = get_cluster_site(comb['cluster']) jobs = oarsub([(OarSubmission(resources = "{cluster='" + comb['cluster']+"'}/nodes=" + str(self.n_nodes), job_type = 'allow_classic_ssh', walltime ='0:10:00'), site)]) if jobs[0][0]: try: wait_oar_job_start(*jobs[0]) nodes = get_oar_job_nodes(*jobs[0]) bench_cmd = 'mpirun -H %s -n %i %s ~/NPB3.3-MPI/bin/lu.%s.%i' % ( ",".join([node.address for node in nodes]), comb['n_core'], get_mpi_opts(comb['cluster']), comb['size'], comb['n_core']) lu_bench = SshProcess(bench_cmd, nodes[0]) lu_bench.stdout_handlers.append(self.result_dir + '/' + slugify(comb) + '.out') lu_bench.run() if lu_bench.ok: logger.info("comb ok: %s" % (comb,)) self.sweeper.done(comb) continue finally: oardel(jobs) logger.info("comb NOT ok: %s" % (comb,)) self.sweeper.cancel(comb)
def run(self): # Defining experiment parameters self.parameters = { 'n_clients': [400, 450, 500, 550, 600], 'n_transitions': [10000] } cluster = 'griffon' sweeps = sweep(self.parameters) sweeper = ParamSweeper(os.path.join(self.result_dir, "sweeps"), sweeps) server_out_path = os.path.join(self.result_dir, "server.out") self._updateStat(sweeper.stats()) # Loop on the number of nodes while True: # Taking the next parameter combinations comb = sweeper.get_next() if not comb: break # Performing the submission on G5K site = get_cluster_site(cluster) self._log("Output will go to " + self.result_dir) n_nodes = int( math.ceil( float(comb['n_clients']) / EX5.get_host_attributes( cluster + '-1')['architecture']['smt_size'])) + 1 self._log("Reserving {0} nodes on {1}".format(n_nodes, site)) resources = "{cluster=\\'" + cluster + "\\'}/nodes=" + str(n_nodes) submission = EX5.OarSubmission(resources=resources, job_type='allow_classic_ssh', walltime='00:10:00') job = EX5.oarsub([(submission, site)]) self.__class__._job = job # Sometimes oarsub fails silently if job[0][0] is None: print("\nError: no job was created") sys.exit(1) # Wait for the job to start self._log( "Waiting for job {0} to start...\n".format(BOLD_MAGENTA + str(job[0][0]) + NORMAL)) EX5.wait_oar_job_start(job[0][0], job[0][1], prediction_callback=prediction) nodes = EX5.get_oar_job_nodes(job[0][0], job[0][1]) # Deploying nodes #deployment = EX5.Deployment(hosts = nodes, env_file='path_to_env_file') #run_deploy = EX5.deploy(deployment) #nodes_deployed = run_deploy.hosts[0] # Copying active_data program on all deployed hosts EX.Put([nodes[0]], '../dist/active-data-lib-0.1.2.jar', connexion_params={ 'user': '******' }).run() EX.Put([nodes[0]], '../server.policy', connexion_params={ 'user': '******' }).run() # Loop on the number of requests per client process while True: # Split the nodes clients = nodes[1:] server = nodes[0] self._log( "Running experiment with {0} nodes and {1} transitions per client" .format(len(clients), comb['n_transitions'])) # Launching Server on one node out_handler = FileOutputHandler(server_out_path) launch_server = EX.Remote( 'java -jar active-data-lib-0.1.2.jar', [server], stdout_handler=out_handler, stderr_handler=out_handler).start() self._log("Server started on " + server.address) time.sleep(2) # Launching clients rank = 0 n_cores = EX5.get_host_attributes( clients[0])['architecture']['smt_size'] cores = nodes * n_cores cores = cores[ 0:comb['n_clients']] # Cut out the additional cores client_connection_params = { 'taktuk_gateway': 'lyon.grid5000.fr', 'host_rewrite_func': None } self._log("Launching {0} clients...".format(len(cores))) client_cmd = "/usr/bin/env java -cp /home/ansimonet/active-data-lib-0.1.2.jar org.inria.activedata.examples.perf.TransitionsPerSecond " + \ "{0} {1} {2} {3} {4}".format(server.address, 1200, "{{range(len(cores))}}", len(cores), comb['n_transitions']) client_out_handler = FileOutputHandler( os.path.join(self.result_dir, "clients.out")) client_request = EX.TaktukRemote(client_cmd, cores, connexion_params = client_connection_params, \ stdout_handler = client_out_handler, stderr_handler = client_out_handler) client_request.run() if not client_request.ok(): # Some client failed, please panic self._log( "One or more client process failed. Enjoy reading their outputs." ) self._log( "OUTPUT STARTS -------------------------------------------------\n" ) for process in client_request.processes(): print("----- {0} returned {1}".format( process.host().address, process.exit_code())) if not process.stdout() == "": print(GREEN + process.stdout() + NORMAL) if not process.stderr() == "": print(RED + process.stderr() + NORMAL) print("") self._log( "OUTPUT ENDS ---------------------------------------------------\n" ) sweeper.skip(comb) launch_server.kill() launch_server.wait() else: # Waiting for server to end launch_server.wait() # Getting log files distant_path = OUT_FILE_FORMAT.format( len(cores), comb['n_transitions']) local_path = distant_path EX.Get([server], distant_path).run() EX.Local('mv ' + distant_path + ' ' + os.path.join(self.result_dir, local_path)).run() EX.Get([server], 'client_*.out', local_location=self.result_dir) EX.Remote('rm -f client_*.out', [server]) self._log( "Finishing experiment with {0} clients and {1} transitions per client" .format(comb['n_clients'], comb['n_transitions'])) sweeper.done(comb) sub_comb = sweeper.get_next(filtr=lambda r: filter( lambda s: s["n_clients"] == comb['n_clients'], r)) self._updateStat(sweeper.stats()) if not sub_comb: # Killing job EX5.oar.oardel(job) self.__class__._job = None break else: comb = sub_comb print ""
@enostask() def backup(env=None): LOG.info(f"Running backup on {env['roles']}") @enostask() def destroy(env=None): LOG.info(f"Running destroy on {env['roles']}") # Iterate over a set of parameters parameters = {"param1": [1, 4], "param2": ["a", "b"]} sweeps = sweep(parameters) sweeper = ParamSweeper( persistence_dir=str(Path("sweeps")), sweeps=sweeps, save_sweeps=True ) parameter = sweeper.get_next() while parameter: try: deploy() bench(parameter) backup() sweeper.done(parameter) except Exception as e: traceback.print_exc() sweeper.skip(parameter) finally: destroy() parameter = sweeper.get_next()