def _copy_and_aggregate_other_cluster(job, reference_digests, aggregation=False, output=None): """ merge dirs in aggregation and in a normal merge :param job (Job) job structure :param reference_digests :param aggregation (boolean) check if it should aggregate data :return (list) list of clusters that contain the data """ filename = settings.get_temp_dir() + "/job_progress_log.json" if not aggregation: new_clusters = mergeDirs_reexecution([job.output_path]) # save progress of the job update_json_file(filename, 1) else: new_clusters = aggregationMergeDirs(job, reference_digests) # save progress of the job update_json_file(filename, 4) if output is not None: output.put(new_clusters) return return new_clusters # [new_clusters[-1]] # shortcut just for this test. Must be removed in the end and put just "return new_clusters"
def run_verification(job_output, aggregation): """ Check the digests of the set of jobs that have run :param job_output (list) list of output of the jobs (json) :param aggregation (Boolean) is it for aggregation? :return (tuple) with the result of the validation (True|False) or the selected digest """ result, selected_digest = parse_digests(job_output) if settings.medusa_settings.faults_left > 0: result = False settings.medusa_settings.faults_left -= 1 if result: filename = settings.get_temp_dir() + "/job_progress_log.json" step = 3 if aggregation == 0 else 6 update_json_file(filename, step) # for purpose when it is necessary to execute in another cloud save_reexecute_another_cloud(False) return result, selected_digest return False, None
def save_prediction(job_params): """ Save prediction values of the job into a file :param job_params (string) data to be saved """ prediction_file = "%s/prediction.json" % settings.get_temp_dir() write_data(prediction_file, job_params)
def read_remote_job_data(): """ Read job data :return: output of the command """ command = lcat(settings.get_temp_dir() + "/" + "job_log.json") output = medusa.execution.local_execute_command(command) return output
def writeJobRunning(job_output): """ append job execution results to a file :param job_output (string) job output data """ path = settings.get_temp_dir() + "/job_log.json" job_remote_dataset = json.loads(read_data_oneline(path)) job_remote_dataset["data"].append(json.loads(job_output)) write_data(path, json.dumps(job_remote_dataset, indent=2))
def load_penalization(): """ Load penalization values """ prediction_file = "%s/penalization.json" % settings.get_temp_dir() data = read_data(prediction_file)[0] print "----" print data print "----" return data
def load_prediction(): """ Load prediction values of the job into a file :return: """ prediction_file = "%s/prediction.json" % settings.get_temp_dir() data = read_data(prediction_file)[0] return data
def read_remote_network_data(from_cluster, to_cluster, packet_size): """ Read network data :param from_cluster: (string) from cluster :param to_cluster: (string) to cluster :param packet_size: (string) size of the packet :return: output of the command """ command = lcat(settings.get_temp_dir() + "/" + from_cluster + "-" + to_cluster + "-" + packet_size + ".json") output = local_execute_command(command) return output
def test_update_json(self): path = settings.get_temp_dir() + "/job_log.json" content = read_data(path) content = ''.join(content) content = content.replace("\n", "") content = content.strip() content = json.loads(content) gid = "3" for x in content["jobs"]["job"]: if x["gid"] == gid: x["step"] = "20" write_data(path, json.dumps(content))
def test_add_entry_json(self): # from pudb import set_trace; set_trace() path = settings.get_temp_dir() + "/job_log.json" content = read_data(path) content = ''.join(content) content = content.replace("\n", "") content = content.strip() content = json.loads(content) gid = "40" for x in content["jobs"]["job"]: if x["gid"] == gid: x["step"] = "20" content["jobs"]["job"].append({"gid": "40", "command": "test", "step": "1"}) write_data(path, json.dumps(content))
def run_verification(job_output): """ Check the digests of the set of jobs that have run :param job_output (list) list of output of the jobs (json) :return (tuple) with the result of the validation (True|False) or the selected digest """ result, selected_digest = parse_digests(job_output) if settings.medusa_settings.faults_left > 0: result = False settings.medusa_settings.faults_left -= 1 if result: filename = settings.get_temp_dir() + "/job_progress_log.json" step = 3 update_json_file(filename, step) return result, selected_digest return False, None
def run_execution_threads(faults, jobs, aggregation, reference_digests): """ Execute jobs in serial :param faults: (int) Number of faults to tolerate :param jobs: (list) list of Job structures :param aggregation: (boolean) is it the aggregation phase or not :param reference_digests: :return: """ group_jobs = [] if not jobs: return group_jobs logging.info(" Running scheduling: %s" % medusa_settings.ranking_scheduler) job_args = [] # Setup a list of processes that we want to run output = mp.Queue() processes = [ Thread(target=_copy_and_aggregate, args=(job, reference_digests, aggregation, output)) for job in jobs ] # Run and exit processes [p.start() for p in processes] [p.join() for p in processes] # Get process results from the output queue list_clusters = [output.get() for _ in processes] for clusters_to_launch_job, job in zip(list_clusters, jobs): logging.debug("Clusters included %s" % clusters_to_launch_job) job_args.append( ExecutionJob(job.id, clusters_to_launch_job, job.command, job.output_path + '/part*', majority(faults))) # if medusa_settings.relaunch_job_other_cluster and not aggregation: # logging.warn("Please shut one cluster down... Execution will resume in 10 secs.") # time.sleep(10) logging.info("Running %s jobs..." % (len(job_args))) seffective_job_runtime = time.time() processes = [] for execution_parameters in job_args: # Each thread executes a job in the respective clusters processes.append( Thread(target=run_job, args=( execution_parameters, output, ))) # Run processes [p.start() for p in processes] [p.join() for p in processes] logging.info("Run_job took %s" % str(seffective_job_runtime - time.time())) job_output_list = [] _output_list = [output.get() for _ in processes] _job_output = [_output for _output in _output_list[0]] for _output in _job_output: job_output_list.append(parse_data(_output)) digests_matrix = [] while True: successful, digests = run_verification(job_output_list, aggregation) if not successful: if medusa_settings.relaunch_job_same_cluster: # relaunch job in the same cloud path_to_remove = os.path.dirname( execution_parameters.output_path) _relaunch_job_same_cluster(execution_parameters, path_to_remove) else: logging.debug("Re-launching job %s" % execution_parameters.command) save_reexecute_another_cloud(True) execution_parameters = _relaunch_job_other_cluster( execution_parameters, jobs, reference_digests, aggregation) _job_output = run_job(execution_parameters) for _output in _job_output: job_output_list.append(parse_data(_output)) else: digests_matrix.append(digests) break # save progress of the job filename = settings.get_temp_dir() + "/job_progress_log.json" step = 2 if not aggregation else 5 update_json_file(filename, step) eeffective_job_runtime = time.time() span = str(eeffective_job_runtime - seffective_job_runtime) """ The total time that it took to execute all jobs """ logging.info("Effective job run-time: %s" % span) return digests_matrix
def run_execution_serial(faults, jobs, aggregation, reference_digests): """ Execute jobs in serial :param faults: (int) Number of faults to tolerate :param jobs: (list) list of Job structures :param aggregation: (boolean) is it the aggregation phase or not :param reference_digests: (RefDigests) digests of reference :return: list with the result of the selected digest. Ex: (True, {u'/aggregate-output/part-r-00000': u'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'}) """ group_jobs = [] if not jobs: return group_jobs logging.info(" Running scheduling: %s" % medusa_settings.ranking_scheduler) job_args = [] for job in jobs: clusters_to_launch_job = _copy_and_aggregate(job, reference_digests, aggregation) logging.debug("Clusters included %s" % clusters_to_launch_job) job_args.append( ExecutionJob(job.id, clusters_to_launch_job, job.command, job.output_path + '/part*', majority(faults))) # if medusa_settings.relaunch_job_other_cluster and not aggregation: # logging.warn("Please shut one cluster down... Execution will resume in 10 secs.") # time.sleep(10) logging.info("Running %s jobs..." % (len(job_args))) seffective_job_runtime = time.time() digests_matrix = [] for execution_parameters in job_args: _job_output_list = [] while True: _job_output = run_job( execution_parameters) # run job in the set of clusters for _output in _job_output: _job_output_list.append(parse_data(_output)) successful, digests = run_verification(_job_output_list, aggregation) if not successful: if medusa_settings.relaunch_job_same_cluster: # relaunch job in the same cloud path_to_remove = os.path.dirname( execution_parameters.output_path) _relaunch_job_same_cluster(execution_parameters, path_to_remove) else: # if len(_failed_exec) > 0: logging.debug("Re-launching job %s" % execution_parameters.command) save_reexecute_another_cloud(True) execution_parameters = _relaunch_job_other_cluster( execution_parameters, jobs, reference_digests, aggregation) else: digests_matrix.append(digests) break # save progress of the job filename = settings.get_temp_dir() + "/job_progress_log.json" step = 2 if not aggregation else 5 update_json_file(filename, step) eeffective_job_runtime = time.time() span = str(eeffective_job_runtime - seffective_job_runtime) """ The total time that it took to execute all jobs """ logging.info("Effective job run-time: %s" % span) return digests_matrix
def save_penalization(penalization_values): """ Save penalization values """ prediction_file = "%s/penalization.json" % settings.get_temp_dir() with open(prediction_file, 'w') as the_file: the_file.write(penalization_values)
def run_execution_threads(faults, jobs): """ Execute jobs in serial :param faults: (int) Number of faults to tolerate :param jobs: (list) list of Job structures :param reference_digests: :return: """ group_jobs = [] if not jobs: return group_jobs logging.info(" Running scheduling: %s" % medusa_settings.ranking_scheduler) # Setup a list of processes that we want to run output = mp.Queue() # Get process results from the output queue clusters_to_launch_job = pick_up_clusters(0) job_args = [] for job in jobs: job_args.append( ExecutionJob(job.id, clusters_to_launch_job, job.command, job.output_path + '/part*', majority(faults))) logging.info("Running %s jobs..." % (len(job_args))) seffective_job_runtime = time.time() processes = [] for execution_parameters in job_args: # Each thread executes a job in the respective clusters processes.append(Thread(target=run_job, args=(execution_parameters, output,))) # Run processes [p.start() for p in processes] [p.join() for p in processes] _output_list = output.get() logging.info("Run_job took %s" % str(time.time() - seffective_job_runtime)) spart = time.time() _job_output = [] for _output in _output_list: _job_output += _output job_output_list = [ parse_data(_joutput) for _joutput in _job_output ] logging.info("Parse_data took %s" % str(time.time() - spart)) srverification = time.time() digests_matrix = [] while True: successful, digests = run_verification(job_output_list) if not successful: if medusa_settings.relaunch_job_same_cluster: # relaunch job in the same cloud path_to_remove = os.path.dirname(execution_parameters.output_path) _relaunch_job_same_cluster(execution_parameters, path_to_remove) else: logging.debug("Re-launching job %s" % execution_parameters.command) execution_parameters = _relaunch_job_other_cluster(execution_parameters, jobs) _job_output = run_job(execution_parameters) for _output in _job_output: job_output_list.append(parse_data(_output[0])) else: digests_matrix.append(digests) break logging.info("Run_verification took %s" % str(time.time() - srverification)) # save progress of the job filename = settings.get_temp_dir() + "/job_progress_log.json" step = 2 update_json_file(filename, step) eeffective_job_runtime = time.time() span = str(eeffective_job_runtime - seffective_job_runtime) """ The total time that it took to execute all jobs """ logging.info("Effective job run-time: %s" % span) return digests_matrix