def test_is_it_ready_to_process_delta(self): ed = ExperimentDefinition() self.addCleanup(self._del_table, "experiment") ed.create_table(self._db) t1 = ExperimentDefinition() id1 = t1.store(self._db) t2 = ExperimentDefinition() id2 = t2.store(self._db) t3 = DeltaExperimentDefinition(subtraces=[id1, id2]) t3.store(self._db) self.assertFalse( t3.is_it_ready_to_process(self._db), "The subtraces" " are still pending, it should not be possible to" " process it.") t1.mark_simulation_done(self._db) self.assertFalse( t3.is_it_ready_to_process(self._db), "One subtrace" " is still pending, it should not be possible to" " process it.") t2.mark_simulation_done(self._db) self.assertTrue( t3.is_it_ready_to_process(self._db), "Subtraces " "are genreated, t3, should be ready to run.")
def test_is_it_ready_to_process(self): ed = ExperimentDefinition() self.addCleanup(self._del_table, "experiment") ed.create_table(self._db) t1 = ExperimentDefinition() id1 = t1.store(self._db) t2 = ExperimentDefinition() id2 = t2.store(self._db) t3 = GroupExperimentDefinition(subtraces=[id1, id2]) t3.store(self._db) self.assertFalse( t3.is_it_ready_to_process(self._db), "The subtraces" " are still pending, it should not be possible to" " process it.") t1.mark_simulation_done(self._db) self.assertFalse( t3.is_it_ready_to_process(self._db), "One subtrace" " is still pending, it should not be possible to" " process it.") t2.mark_simulation_done(self._db) self.assertFalse( t3.is_it_ready_to_process(self._db), "Subtraces " "have to be analyzed for this the grouped to be " "ready") t1.mark_analysis_done(self._db) t2.mark_analysis_done(self._db) self.assertTrue(t3.is_it_ready_to_process(self._db), "Subtraces " "are analyzed. It should be ready")
def test_store_load(self): ed_old = ExperimentDefinition(seed="seeeed", machine="machine", trace_type="double", manifest_list=[{ "share": 0.2, "manifest": "man1.json" }, { "share": 0.8, "manifest": "man2.json" }], workflow_policy="period", workflow_period_s=20, workflow_share=30.0, workflow_handling="manifest", subtraces=[100002, 10003], preload_time_s=3600 * 24 * 3, workload_duration_s=3600 * 24 * 8, work_state="fresher", analysis_state="1", overload_target=2.0, conf_file="my.conf") ed = ExperimentDefinition() self.addCleanup(self._del_table, "experiment") ed.create_table(self._db) trace_id = ed_old.store(self._db) ed.load(self._db, trace_id) self.assertEqual( ed._experiment_set, "machine-double-m[0.2|man1.json," "0.8|man2.json]-period-p20-%30.0-manifest-" "t[100002,10003]" "-3d-8d-O2.0-my.conf") self.assertEqual( ed._name, "machine-double-m[0.2|man1.json," "0.8|man2.json]" "-period-p20-%30.0-manifest-t[100002,10003]-3d-8d-O2.0" "-my.conf-s[seeeed]") self.assertEqual(ed._seed, "seeeed") self.assertEqual(ed._machine, "machine") self.assertEqual(ed._trace_type, "double") self.assertEqual(ed._manifest_list, [ dict(share=0.2, manifest="man1.json"), dict(share=0.8, manifest="man2.json") ]) self.assertEqual(ed._workflow_policy, "period") self.assertEqual(ed._workflow_period_s, 20) self.assertEqual(ed._workflow_share, 30.0) self.assertEqual(ed._workflow_handling, "manifest") self.assertEqual(ed._subtraces, [100002, 10003]) self.assertEqual(ed._preload_time_s, 3 * 24 * 3600) self.assertEqual(ed._workload_duration_s, 8 * 24 * 3600) self.assertEqual(ed._work_state, "fresher") self.assertEqual(ed._analysis_state, "1") self.assertEqual(ed._table_name, "experiment") self.assertEqual(ed._overload_target, 2.0) self.assertEqual(ed._conf_file, "my.conf")
def test_mark_simulation_failed(self): ed = ExperimentDefinition() self.addCleanup(self._del_table, "experiment") ed.create_table(self._db) my_id = ed.store(self._db) ed.mark_simulation_failed(self._db) now_time = datetime.datetime.now() new_ed = ExperimentDefinition() new_ed.load(self._db, my_id) self.assertEqual(new_ed._work_state, "simulation_failed") self.assertLess(now_time - new_ed._simulating_end, datetime.timedelta(10))
def test_load_next_ready_for_pass_error(self): ed = ExperimentDefinition() self.addCleanup(self._del_table, "experiment") ed.create_table(self._db) ed_1 = ExperimentDefinition() ed_2 = ExperimentDefinition() ed_3 = ExperimentDefinition() ed_4 = ExperimentDefinition() ed_1._workflow_handling = "manifest" ed_1._work_state = "analysis_done" ed_2._workflow_handling = "multi" ed_2._work_state = "analysis_done" ed_3._workflow_handling = "multi" ed_3._work_state = "analysis_done" target_trace_id = ed_1.store(self._db) ed_2.store(self._db) ed_3.store(self._db) ed_4.store(self._db) #ed_1 to ed_4 should be skipped. ed_1b = ExperimentDefinition() ed_2b = ExperimentDefinition() ed_3b = ExperimentDefinition() ed_1b._workflow_handling = "manifest" ed_1b._work_state = "analysis_done" ed_2b._workflow_handling = "single" ed_2b._work_state = "analysis_done" ed_3b._workflow_handling = "multi" ed_3b._work_state = "analysis_done" target_trace_id_b = ed_1b.store(self._db) ed_2b.store(self._db) ed_3b.store(self._db) ed.load_next_ready_for_pass(self._db) self.assertEqual(target_trace_id_b, ed._trace_id)
def test_get_fresh(self): ed = ExperimentDefinition() self.addCleanup(self._del_table, "experiment") ed.create_table(self._db) ed.store(self._db) ed_2 = ExperimentDefinition() ed_2.store(self._db) ed_f = ExperimentDefinition() ed_f.load_fresh(self._db) self.assertEqual(ed_f._trace_id, 1) ed_f_2 = ExperimentDefinition() ed_f_2.load_fresh(self._db) self.assertEqual(ed_f_2._trace_id, 2)
def test_get_file_names(self): ed = ExperimentDefinition(seed="seeeed", machine="machine", trace_type="double", manifest_list=[{ "share": 0.2, "manifest": "man1.json" }, { "share": 0.8, "manifest": "man2.json" }], workflow_policy="period", workflow_period_s=20, workflow_share=30.0, workflow_handling="manifest", subtraces=[100002, 10003], preload_time_s=3600 * 24 * 3, workload_duration_s=3600 * 24 * 8, work_state="fresher", analysis_state="1") self.assertEqual( ed.get_trace_file_name(), "machine-double-m0.2man1.json" "0.8man2.json" "-period-p20-30.0-manifest-t10000210003-3d-8d-O0.0" "-sseeeed.trace") self.assertEqual( ed.get_qos_file_name(), "machine-double-m0.2man1.json" "0.8man2.json" "-period-p20-30.0-manifest-t10000210003-3d-8d-O0.0" "-sseeeed.qos") self.assertEqual( ed.get_users_file_name(), "machine-double-m0.2man1.json" "0.8man2.json" "-period-p20-30.0-manifest-t10000210003-3d-8d-O0.0" "-sseeeed.users")
def extract_usage(db_obj, trace_id_rows, fill_none=True, factor=1.0, mean=False): """Takes a list of lists of trace_is and produces a list of lists of results corresponding to them. Args: - db_obj: DBManager object connted to a db where the results will be pulled from. """ exp_rows = [] my = ResultTrace() res_type = "usage" if mean: res_type = "usage_mean" for row in trace_id_rows: new_row = [] exp_rows.append(new_row) for trace_id in row: exp = ExperimentDefinition() exp.load(db_obj, trace_id) result = my._get_utilization_result() if exp.is_analysis_done(): result.load(db_obj, trace_id, res_type) else: result._set("utilization", 0) result._set("waste", 0) result._set("corrected_utilization", 0) result.apply_factor(factor) new_row.append(result) return exp_rows
def test_get_corrected_start_times(self): self._create_tables() rt = ResultTrace() rt._lists_submit = { "job_db_inx": [1, 2, 3], "account": ["account1", "account2", "a3"], "cpus_req": [48, 96, 96], "cpus_alloc": [48, 96, 96], "job_name": [ "wf_synthLongWide.json-1_S0", "wf_synthLongWide.json-1_S1_dS0", "wf_synthLongWide.json-2_S1_dS0" ], "id_job": [1, 2, 3], "id_qos": [2, 3, 3], "id_resv": [3, 4, 5], "id_user": [4, 5, 6], "nodes_alloc": [2, 4, 4], "partition": ["partition1", "partition2", "partition2"], "priority": [99, 199, 210], "state": [3, 3, 3], "timelimit": [100, 200, 300], "time_submit": [3000, 3003, 3004], "time_start": [0, 20000, 0], "time_end": [20000, 25000, 30000] } trace_id = 1 rt.store_trace(self._db, trace_id) stc = StartTimeCorrector() stc._experiment = ExperimentDefinition() stc._experiment._trace_id = trace_id stc._trace = ResultTrace() stc._trace.load_trace(self._db, trace_id) new_times = stc.get_corrected_start_times("multi") self.assertEqual(new_times, {1: 20000 - 14340, 3: 30000 - 3540})
def test_load_trace(self): ed = ExperimentDefinition() ed._trace_id = 1 ar = AnalysisRunnerSingle(ed) new_rt = ar.load_trace(self._db) self.assertEqual(self._rt._lists_start, new_rt._lists_start) self.assertEqual(self._rt._lists_submit, new_rt._lists_submit)
def test_generate_trace_files(self): ExperimentRunner.configure("tmp/trace_folder", "tmp", True, "myhost", "myUser", drain_time=0) self.assertEqual(ExperimentRunner._trace_folder, "tmp/trace_folder") self.assertEqual(ExperimentRunner._trace_generation_folder, "tmp") self.assertEqual(ExperimentRunner._local, True) ed = ExperimentDefinition(seed="seeeed", machine="edison", trace_type="single", manifest_list=[{ "share": 1.0, "manifest": "manifestSim.json" }], workflow_policy="period", workflow_period_s=5, workflow_handling="single", preload_time_s=20, start_date=datetime(2016, 1, 1), workload_duration_s=400) er = ExperimentRunner(ed) er._generate_trace_files(ed) self.assertTrue( os.path.exists("tmp/edison-single-m1.0manifestSim.json" "-period-p5-0.0-single-t-0d-0d-O0.0" "-sseeeed.trace")) self.assertTrue( os.path.exists("tmp/edison-single-m1.0manifestSim.json" "-period-p5-0.0-single-t-0d-0d-O0.0" "-sseeeed.qos")) self.assertTrue( os.path.exists("tmp/edison-single-m1.0manifestSim.json" "-period-p5-0.0-single-t-0d-0d-O0.0" "-sseeeed.users")) records = trace_gen.extract_records( file_name="tmp/edison-single-m1.0manifestSim.json" "-period-p5-0.0-single-t-0d-0d-O0.0" "-sseeeed.trace", list_trace_location="../bin/list_trace") man_count = 0 self.assertGreater( int(records[-1]["SUBMIT"]) - int(records[0]["SUBMIT"]), 320) self.assertLess( int(records[-1]["SUBMIT"]) - int(records[0]["SUBMIT"]), 1500) for rec in records: if rec["WF"].split("-")[0] == "manifestSim.json": man_count += 1 self.assertGreaterEqual( man_count, 64, "There should be at least 80" " workflows in the " "trace, found: {0}".format(man_count)) self.assertLessEqual( man_count, 104, "There should be at least 80" " workflows in the " "trace, found: {0}".format(man_count))
def test_run_simulation(self): ExperimentRunner.configure(trace_folder="/tmp/", trace_generation_folder="tmp", local=False, run_hostname=self._vm_ip, run_user=None, scheduler_conf_dir="/scsf/slurm_conf", local_conf_dir="configs/", scheduler_folder="/scsf/", drain_time=100) ensureDir("tmp") ed = ExperimentDefinition(seed="seeeed", machine="edison", trace_type="single", manifest_list=[{ "share": 1.0, "manifest": "manifestSim.json" }], workflow_policy="period", workflow_period_s=5, workflow_handling="single", preload_time_s=60, start_date=datetime(2016, 1, 1), workload_duration_s=3600) er = ExperimentRunner(ed) er.create_trace_file() er._run_simulation() er.stop_simulation() self.assertTrue(er.is_simulation_done())
def test_place_trace_files_remote_and_clean(self): ExperimentRunner.configure("/tmp/tests/tmp/dest", "/tmp/tests/tmp/orig", True, "locahost", None, scheduler_folder="/tmp/tests/tmp/sched", scheduler_conf_dir="/tmp/tests/tmp/conf", manifest_folder="manifests") self.assertEqual(ExperimentRunner._trace_folder, "/tmp/tests/tmp/dest") self.assertEqual(ExperimentRunner._trace_generation_folder, "/tmp/tests/tmp/orig") self.assertEqual(ExperimentRunner._local, True) ensureDir("/tmp/tests/tmp/dest") ensureDir("/tmp/tests/tmp/orig") ensureDir("/tmp/tests/tmp/sched") ensureDir("/tmp/tests/tmp/conf") ed = ExperimentDefinition(seed="seeeed", machine="edison", trace_type="single", manifest_list=[{ "share": 1.0, "manifest": "manifestSim.json" }], workflow_policy="period", workflow_period_s=5, workflow_handling="single", preload_time_s=20, start_date=datetime(2016, 1, 1), workload_duration_s=41, overload_target=1.1) er = ExperimentRunner(ed) filenames = er._generate_trace_files(ed) er._place_trace_file(filenames[0]) er._place_users_file(filenames[2]) self.assertTrue( os.path.exists( "/tmp/tests/tmp/dest/edison-single-m1.0manifestSim.json" "-period-p5-0.0-single-t-0d-0d-O1.1" "-sseeeed.trace")) self.assertTrue(os.path.exists("/tmp/tests/tmp/conf/users.sim")) self.assertFalse( os.path.exists( "/tmp/tests/tmp/orig/edison-single-m1.0manifestSim.json" "-period-p5-0.0-single-t-0d-0d-O1.1" "-sseeeed.trace")) er.clean_trace_file() self.assertFalse( os.path.exists( "/tmp/tests/tmp/dest/edison-single-m1.0manifestSim.json" "-period-p5-0.0-single-t-0d-0d-O1.1" "-sseeeed.trace")) self.assertFalse( os.path.exists( "/tmp/tests/tmp/dest/edison-single-m1.0manifestSim.json" "-period-p5-0.0-single-t-0d-0d-O1.1" "-sseeeed.users"))
def test_sustained_levels(self): definition = ExperimentDefinition(seed="AAAAA", machine="edison", trace_type="single", manifest_list=[], workflow_policy="no", workflow_period_s=0, workflow_handling="single", preload_time_s=3600 * 4, workload_duration_s=3600 * 1, overload_target=1.5) ExperimentRunner.configure("tmp/trace_folder", "tmp", True, "myhost", "myUser") trace_generator = MyTraceGen() machine = definition.get_machine() er = ExperimentRunner(definition) er._generate_trace_files(definition, trace_generator=trace_generator) acc_cores, period = trace_generator.check_pressure( machine.get_total_cores(), 3600, 1.5, self, 1.0) total_pressure = float(acc_cores) / float( period * machine.get_total_cores()) print(total_pressure) self.assertAlmostEqual(total_pressure, 1.5, delta=0.01) self.assertLess(total_pressure, 1.8)
def rescue_exp(self, central_db_obj, sched_db_obj, trace_id=None): """Retrieves the job trace from the database of an experiment worker and stores it in the central db. Args: - central_db_obj: DB object configured to access the analysis database. - sched_db_obj: DB object configured to access the slurm database of an experiment worker. - trace_id: trace_id of the experiment to which the rescued trace corresponds. """ there_are_more=True while there_are_more: ed = ExperimentDefinition() if trace_id: ed.load(central_db_obj, trace_id) ed.mark_simulation_done(central_db_obj) else: there_are_more = ed.load_next_state("simulation_failed", "simulation_done") if there_are_more: print(("About to run resque({0}):{1}".format( ed._trace_id, ed._name))) er = ExperimentRunner(ed) if(er.check_trace_and_store(sched_db_obj, central_db_obj)): er.clean_trace_file() print(("Exp({0}) Done".format( ed._trace_id))) else: print(("Exp({0}) Error!".format( ed._trace_id))) if trace_id: break
def test_generate_trace_files_special(self): ExperimentRunner.configure("tmp/trace_folder", "tmp", True, "myhost", "myUser") ed = ExperimentDefinition( seed="AAAA", machine="edison", trace_type="single", manifest_list=[], workflow_policy="sp-sat-p2-c24-r36000-t4-b100", workflow_period_s=0, workflow_handling="single", preload_time_s=0, start_date=datetime(2016, 1, 1), workload_duration_s=120, overload_target=1.2) er = ExperimentRunner(ed) er._generate_trace_files(ed) trace_file_route = ("tmp/{0}".format(ed.get_trace_file_name())) self.assertTrue(os.path.exists(trace_file_route)) records = trace_gen.extract_records( file_name=trace_file_route, list_trace_location="../bin/list_trace") self.assertEqual(len(records), 8) submit_times = [0, 2, 4, 6, 100, 102, 104, 106] first_submit = int(records[0]["SUBMIT"]) submit_times = [x + first_submit for x in submit_times] for (rec, submit_time) in zip(records, submit_times): self.assertEqual(int(rec["SUBMIT"]), submit_time) self.assertEqual( int(rec["NUM_TASKS"]) * int(rec["CORES_PER_TASK"]), 24) self.assertEqual(int(rec["DURATION"]), 36000) self.assertEqual(int(rec["WCLIMIT"]), 601)
def do_work(self, central_db_obj, sched_db_obj, trace_id=None): """ Args: - central_db_obj: DB object configured to access the analysis database. - sched_db_obj: DB object configured to access the slurm database of an experiment worker. - trace_id: If set to an experiment valid trace_id, it runs only the experiment identified by trace_id. """ there_are_more=True while there_are_more: ed = ExperimentDefinition() if trace_id: ed.load(central_db_obj, trace_id) ed.mark_pre_simulating(central_db_obj) else: there_are_more = ed.load_fresh(central_db_obj) if there_are_more: print(("About to run exp({0}):{1}".format( ed._trace_id, ed._name))) er = ExperimentRunner(ed) if(er.do_full_run(sched_db_obj, central_db_obj)): print(("Exp({0}) Done".format( ed._trace_id))) else: print(("Exp({0}) Error!".format( ed._trace_id))) if trace_id: break
def test_reset_simulating_time(self): ed = ExperimentDefinition() self.addCleanup(self._del_table, "experiment") ed.create_table(self._db) my_id = ed.store(self._db) ed.update_simulating_start(self._db) ed.update_simulating_end(self._db) new_ed = ExperimentDefinition() new_ed.load(self._db, my_id) self.assertNotEqual(new_ed._simulating_end, None) self.assertNotEqual(new_ed._simulating_start, None) ed.reset_simulating_time(self._db) new_ed.load(self._db, my_id) self.assertEqual(new_ed._simulating_end, None) self.assertEqual(new_ed._simulating_start, None)
def produce_plot_config(db_obj, trace_id_rows_colors): """ Produces the coloring and hatches matrixes for a matrix style plot. For that it conencts to a dabase, and depending on the scheduling algorithm used in the experiment, it chooses a cooresponding coloring and hatches. Args: - db_obj: DBManager object connted to a db where the results will be pulled from. - trace_id_rows_colors: list of lists of integers as trace_ids of experiments. returns: - color_rows: list of list of matplotlib colors corresponding to each experiment subplot. - hatches_rows: list of lists of the hatches to be used in each experiment subplot. - legend: legend list of the format ("series names", "color"), listing the scheduling algorithms present in the experiments. """ colors_dic = { "no": "white", "manifest": "lightgreen", "single": "lightblue", "multi": "pink", "": "white" } hatches_dic = { "no": None, "manifest": "-", "single": "\\", "multi": "/", "": None } detected_handling = {} color_rows = [] hatches_rows = [] for row in trace_id_rows_colors: this_color_row = [] color_rows.append(this_color_row) this_hatches_row = [] hatches_rows.append(this_hatches_row) for trace_id in row: exp = ExperimentDefinition() exp.load(db_obj, trace_id) handling = exp.get_true_workflow_handling() detected_handling[handling] = 1 this_color_row.append(get_dic_val(colors_dic, handling)) this_hatches_row.append(get_dic_val(hatches_dic, handling)) legend = [("n/a", "white", "no", None), ("aware", "lightgreen", "manifest", "-"), ("waste", "lightblue", "single", "\\"), ("wait", "pink", "multi", "/")] new_legend = [] for item in legend: if item[2] in list(detected_handling.keys()): new_legend.append(item) return color_rows, hatches_rows, new_legend
def test_do_full_analysis(self): ed = ExperimentDefinition() ed._trace_id = 1 ed._start_date = datetime.datetime(1969, 1, 1) ed._workload_duration_s = 365 * 24 * 3600 ed._preload_time_s = 0 ar = AnalysisRunnerSingle(ed) ar.do_full_analysis(self._db)
def _create_tables(self): rt = ResultTrace() self.addCleanup(self._del_table, "import_table") rt.create_import_table(self._db, "import_table") self.addCleanup(self._del_table, "traces") rt.create_trace_table(self._db, "traces") self.addCleanup(self._del_table, "experiment") exp = ExperimentDefinition() exp.create_table(self._db)
def test_get_fresh_concurrent(self): ed = ExperimentDefinition() self.addCleanup(self._del_table, "experiment") ed.create_table(self._db) for i in range(200): ed.store(self._db) if os.path.exists("./out.file"): os.remove("./out.file") out = open("./out.file", "w") p = subprocess.Popen(["python", "./fresh_reader.py"], stdout=out) count = 0 there_are_more = True ids = [] while there_are_more: ed_f = ExperimentDefinition() there_are_more = ed_f.load_fresh(self._db) if there_are_more: ids.append(ed_f._trace_id) time.sleep(5) out.flush() out.close() out = open("./out.file", "r") lines = out.readlines() other_ids = [] for line in lines: if "END2" in line: print("") text_list = line.split("END2: [")[1] text_list = text_list.split("]")[0] other_ids = [int(x) for x in text_list.split(",")] self.assertGreater(len(ids), 0) self.assertGreater(len(other_ids), 0) for id in ids: self.assertNotIn(id, other_ids) print(("IDs", ids, other_ids))
def extract_grouped_results(db_obj, trace_id_rows_colors, edges, result_type): """Takes a list of lists of trace_is and produces a list of lists of results corresponding to them. Args: - db_obj: DBManager object connted to a db where the results will be pulled from. - trace_id_rows_colors: list of lists of integers as trace_ids of experiments. - edges: if set to [""], it does no effect, the function extracts results of the type result_type. If set to a list of items, results will be pulled for each element as: "g"+str(edge)+_str(result_type) - result_type: string indentifying which type of result are we pulling. It correspond to the type of the NumericStats stored in db_obj. Returns: a dictionary indexed by edges. Each element is a list of lists of same dimension of trace_id_rows_colors, each element a NumericStats object corresponding to the result of that component. """ exp_rows = {} for edge in edges: exp_rows[edge] = extract_results( db_obj, trace_id_rows_colors, ResultTrace.get_result_type_edge(edge, result_type)) return exp_rows exp_rows = {} for edge in edges: exp_rows[edge] = [] for row in trace_id_rows_colors: these_rows = {} for edge in edges: these_rows[edge] = [] exp_rows[edge].append(these_rows[edge]) for trace_id in row: exp = ExperimentDefinition() exp.load(db_obj, trace_id) for edge in edges: result = None if exp.is_it_ready_to_process(): if edge == "": key = ResultTrace.get_result_type_edge( edge, result_type) else: key = result_type key += "_stats" result = NumericStats() result.load(db_obj, trace_id, key) else: result = NumericStats() result.calculate([0, 0, 0]) these_rows[edge].append(result) return exp_rows
def correct_times(self, db_obj, trace_id): self._experiment = ExperimentDefinition() self._experiment.load(db_obj, trace_id) self._trace = ResultTrace() print("Loading trace {0}".format(trace_id)) self._trace.load_trace(db_obj, trace_id) trace_type = self._experiment._workflow_handling print( "Calculating corrected start times for trace {0}".format(trace_id)) modified_start_times = self.get_corrected_start_times(trace_type) print(("Found {0} jobs which start time was 0, but had ended.".format( len(modified_start_times)))) print("About to update times") self.apply_new_times(db_obj, modified_start_times)
def compare_traces_jobs(db_obj, t1, t2, num_jobs, workflows=False): if not workflows: jobs1 = get_jobs(db_obj, t1, num_jobs) jobs2 = get_jobs(db_obj, t2, num_jobs) else: jobs1 = get_workflow_jobs(db_obj, t1, num_jobs) jobs2 = get_workflow_jobs(db_obj, t2, num_jobs) exp1 = ExperimentDefinition() exp1.load(db_obj, t1) exp2 = ExperimentDefinition() exp2.load(db_obj, t2) different = False for (job1, job2) in zip(jobs1, jobs2): if job1 != job2: different = True break if not different and exp1._seed != exp2._seed: print( "Exps({0},{1}) have the exact same first {3} jobs with different" " seeds.".format(t1, t2, num_jobs)) return False return True
def test_get_fresh_pending(self): self.addCleanup(self._del_table, "experiment") ExperimentDefinition().create_table(self._db) ed_1 = ExperimentDefinition(start_date=datetime.datetime(2019, 1, 1)) trace_id_1 = ed_1.store(self._db) ed_2 = ExperimentDefinition() trace_id_2 = ed_2.store(self._db) ed_g1 = GroupExperimentDefinition(machine="kkk") ed_g1.add_sub_trace(trace_id_1) ed_g1.add_sub_trace(trace_id_2) ed_g1.store(self._db) ed_g2 = GroupExperimentDefinition() print(ed_g2._subtraces) ed_g2.add_sub_trace(trace_id_1) ed_g2.store(self._db) one_g = GroupExperimentDefinition() self.assertTrue(one_g.load_pending(self._db)) self.assertNotEqual(one_g._work_state, "pre_analyzing") ed_1.upate_state(self._db, "analysis_done") self.assertTrue(one_g.load_pending(self._db)) self.assertEqual(one_g._work_state, "pre_analyzing") self.assertEqual(one_g._trace_id, ed_g2._trace_id) one_g = GroupExperimentDefinition() self.assertTrue(one_g.load_pending(self._db)) ed_2.upate_state(self._db, "analysis_done") self.assertTrue(one_g.load_pending(self._db)) self.assertEqual(one_g._work_state, "pre_analyzing") self.assertEqual(one_g._trace_id, ed_g1._trace_id)
def test_generate_trace_files_overload(self): for seed_string in [ "seeeed", "asdsa", "asdasdasd", "asdasdasdas", "asdasdlkjlkjl", "eworiuwioejrewk", "asjdlkasdlas" ]: ExperimentRunner.configure("tmp/trace_folder", "tmp", True, "myhost", "myUser") self.assertEqual(ExperimentRunner._trace_folder, "tmp/trace_folder") self.assertEqual(ExperimentRunner._trace_generation_folder, "tmp") self.assertEqual(ExperimentRunner._local, True) workload_duration = 4 * 3600 m = Edison2015() total_cores = m.get_total_cores() ed = ExperimentDefinition(seed=seed_string, machine="edison", trace_type="single", manifest_list=[], workflow_policy="no", workflow_period_s=0, workflow_handling="single", preload_time_s=0, start_date=datetime(2016, 1, 1), workload_duration_s=workload_duration, overload_target=1.2) er = ExperimentRunner(ed) er._generate_trace_files(ed) trace_file_route = ("tmp/{0}".format(ed.get_trace_file_name())) self.assertTrue(os.path.exists(trace_file_route)) records = trace_gen.extract_records( file_name=trace_file_route, list_trace_location="../bin/list_trace") acc_core_hours = 0 for rec in records: acc_core_hours += (int(rec["NUM_TASKS"]) * int(rec["CORES_PER_TASK"]) * int(rec["DURATION"])) print("pressure Index:", (float(acc_core_hours) / float(total_cores * workload_duration))) self.assertGreater(acc_core_hours, 1.1 * total_cores * workload_duration) self.assertLess(acc_core_hours, 1.5 * total_cores * workload_duration)
def do_full_analysis(self, db_obj): result_trace = self.load_trace(db_obj) first = True last = False for trace_id in self._definition._subtraces: last = trace_id == self._definition._subtraces[-1] result_trace.load_trace(db_obj, trace_id) result_trace.do_workflow_pre_processing(append=not first) one_definition = ExperimentDefinition() one_definition.load(db_obj, trace_id) result_trace.fill_job_values( start=one_definition.get_start_epoch(), stop=one_definition.get_end_epoch(), append=not first) result_trace.fill_workflow_values( start=one_definition.get_start_epoch(), stop=one_definition.get_end_epoch(), append=not first) result_trace.calculate_job_results_grouped_core_seconds( one_definition.get_machine().get_core_seconds_edges(), last, db_obj, self._definition._trace_id, start=one_definition.get_start_epoch(), stop=one_definition.get_end_epoch(), append=not first) first = False result_trace.calculate_and_store_job_results( store=True, db_obj=db_obj, trace_id=self._definition._trace_id) result_trace._wf_extractor.calculate_and_store_overall_results( store=True, db_obj=db_obj, trace_id=self._definition._trace_id) result_trace._wf_extractor.calculate_and_store_per_manifest_results( store=True, db_obj=db_obj, trace_id=self._definition._trace_id) result_trace.calculate_utilization_median_result( self._definition._subtraces, store=True, db_obj=db_obj, trace_id=self._definition._trace_id) result_trace.calculate_utilization_mean_result( self._definition._subtraces, store=True, db_obj=db_obj, trace_id=self._definition._trace_id) self._definition.mark_analysis_done(db_obj)
def test_correct_times(self): self._create_tables() exp = ExperimentDefinition(workflow_handling="manifest") trace_id = exp.store(self._db) rt = ResultTrace() rt._lists_submit = { "job_db_inx": [1, 2, 3], "account": ["account1", "account2", "a3"], "cpus_req": [48, 96, 96], "cpus_alloc": [48, 96, 96], "job_name": [ "wf_synthLongWide.json-1_S0", "wf_synthLongWide.json-1_S1_dS0", "wf_synthLongWide.json-2" ], "id_job": [1, 2, 3], "id_qos": [2, 3, 3], "id_resv": [3, 4, 5], "id_user": [4, 5, 6], "nodes_alloc": [2, 4, 4], "partition": ["partition1", "partition2", "partition2"], "priority": [99, 199, 210], "state": [3, 3, 3], "timelimit": [100, 200, 300], "time_submit": [3000, 3003, 3004], "time_start": [0, 20000, 0], "time_end": [20000, 25000, 30000] } rt.store_trace(self._db, trace_id) rt.store_trace(self._db, trace_id + 1) stc = StartTimeCorrector() stc.correct_times(self._db, trace_id) new_rt = ResultTrace() new_rt.load_trace(self._db, trace_id) self.assertEqual(new_rt._lists_submit["time_start"], [20000 - 14340, 20000, 30000]) original_rt = ResultTrace() original_rt.load_trace(self._db, trace_id + 1) self.assertEqual(original_rt._lists_submit["time_start"], [0, 20000, 0])
def extract_results(db_obj, trace_id_rows_colors, result_type, factor=None, fill_none=True, second_pass=False): """Takes a list of lists of trace_is and produces a list of lists of results corresponding to them. Args: - db_obj: DBManager object connted to a db where the results will be pulled from. - trace_id_rows_colors: list of lists of integers as trace_ids of experiments. - result_type: string indentifying which type of result are we pulling. It correspond to the type of the NumericStats stored in db_obj. Returns: a list of lists of same dimension of trace_id_rows_colors, each element a NumericStats object corresponding to the result of that component. """ exp_rows = [] for row in trace_id_rows_colors: new_row = [] exp_rows.append(new_row) for trace_id in row: exp = ExperimentDefinition() exp.load(db_obj, trace_id) if exp.is_analysis_done(second_pass=second_pass): key = result_type + "_stats" result = NumericStats() result.load(db_obj, trace_id, key) if factor: result.apply_factor(factor) else: result = NumericStats() result.calculate([0, 0, 0]) if fill_none and result._get("median") is None: result = NumericStats() result.calculate([0, 0, 0]) new_row.append(result) return exp_rows