def _ReadStoredLog(): if Conf.ExpStartTime() is None: return None dn = "%s/work/mutant/misc/logs/cassandra" % os.path.expanduser("~") fn = "%s/system-%s" % (dn, Conf.ExpStartTime()) if not os.path.isfile(fn): # If there is a 7z file, uncompress it fn_7z = "%s.7z" % fn if os.path.isfile(fn_7z): with Cons.MT("Found a 7z file. Uncompressing"): Util.RunSubp("7z e -o%s %s" % (dn, fn_7z)) else: return None with Cons.MT("Reading the stored Cassandra Mutant log file %s" % fn, print_time=False): lines = [] with open(fn) as fo: for line in fo.readlines(): lines.append(line.strip()) # Stop after reading n lines for testing if 0 < Conf.MaxCassLogLines(): if Conf.MaxCassLogLines() < len(lines): break #Cons.P(len(lines)) return lines
def Get1minAvgFn(): exp_dts = [] for i in range(2): #Cons.P(Conf.Get(i)) mo = re.match(r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d", Conf.Get(i)) exp_dts.append(mo.group("exp_dt")) fn_out = "%s/mem-1minavg-%s" % (Conf.GetOutDir(), "-".join(exp_dts)) if os.path.exists(fn_out): return fn_out with Cons.MT("Creating avg memory usage comparison file for plotting ..."): records = [] dn_base = Conf.GetDir("dn_base") for i in range(2): fn_ycsb_log = "%s/%s" % (dn_base, Conf.Get(i)) hm_mem = _GetHmMem(fn_ycsb_log) for hm, mem in hm_mem.iteritems(): records.append(_RecordMemAvg(hm, i * 30, mem, i)) records.sort(key=operator.attrgetter("ts")) fmt = "%8s %6.3f %1d" header = Util.BuildHeader(fmt, "timestamp mem_avg_in_gb exp_type") with open(fn_out, "w") as fo: i = 0 for r in records: if i % 40 == 0: fo.write(header + "\n") i += 1 fo.write("%s\n" % r.ToStr(fmt)) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def main(argv): Conf.ParseArgs() SimTime.Init(Conf.Get("simulation_time_begin")) Util.MkDirs(Conf.dn_result) Plot.Plot()
def _GetFnCpuOverhead(): fn_out = "%s/cpu-overhead-by-time" % Conf.GetOutDir() if os.path.exists(fn_out): return fn_out dn_base = Conf.GetDir("dn_base") fn_ycsb_0 = "%s/%s" % (dn_base, Conf.Get("unmodified_db")) fn_ycsb_1 = "%s/%s" % (dn_base, Conf.Get("computation_overhead")) hour_cpustat_0 = _GetCpuStatByHour(fn_ycsb_0) hour_cpustat_1 = _GetCpuStatByHour(fn_ycsb_1) #Cons.P(hour_cpustat_0) #Cons.P(hour_cpustat_1) with open(fn_out, "w") as fo: fo.write("# u: unmodified\n") fo.write( "# c: with SSTable access monitoring and SSTable placement computation\n" ) fo.write("#\n") fmt = "%2d" \ " %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f" \ " %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f" fo.write(Util.BuildHeader(fmt, "hour" \ " u_avg u_min u_1 u_25 u_50 u_75 u_99 u_max" \ " c_avg c_min c_1 c_25 c_50 c_75 c_99 c_max" ) + "\n") for h, s0 in sorted(hour_cpustat_0.iteritems()): s1 = hour_cpustat_1[h] fo.write( (fmt + "\n") % (h, s0.avg, s0.min, s0._1, s0._25, s0._50, s0._75, s0._99, s0.max, s1.avg, s1.min, s1._1, s1._25, s1._50, s1._75, s1._99, s1.max)) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def GetHourlyFn(): fn_out = "%s/cpu-hourly-usage" % Conf.GetOutDir() if os.path.exists(fn_out): return fn_out with Cons.MT("Generating file for cpu usage comparison ..."): dn_base = Conf.GetDir("dn_base") fn_ycsb_0 = "%s/%s" % (dn_base, Conf.Get(0)) fn_ycsb_1 = "%s/%s" % (dn_base, Conf.Get(1)) hour_cpustat_0 = _GetCpuStatByHour(fn_ycsb_0) hour_cpustat_1 = _GetCpuStatByHour(fn_ycsb_1) #Cons.P(hour_cpustat_0) #Cons.P(hour_cpustat_1) with open(fn_out, "w") as fo: fo.write("# 0: %s\n" % fn_ycsb_0) fo.write("# 1: %s\n" % fn_ycsb_1) fo.write("#\n") fmt = "%2d" \ " %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f" \ " %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f" fo.write(Util.BuildHeader(fmt, "hour" \ " 0_avg 0_min 0_1 0_25 0_50 0_75 0_99 0_max" \ " 1_avg 1_min 1_1 1_25 1_50 1_75 1_99 1_max" ) + "\n") for h, s0 in sorted(hour_cpustat_0.iteritems()): s1 = hour_cpustat_1[h] fo.write((fmt + "\n") % (h, s0.avg, s0.min, s0._1, s0._25, s0._50, s0._75, s0._99, s0.max, s1.avg, s1.min, s1._1, s1._25, s1._50, s1._75, s1._99, s1.max)) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def Get1minAvgFn(): exp_dts = [] for i in range(2): #Cons.P(Conf.Get(i)) # computation/180126-142513/ycsb/180126-193525.769-d mo = re.match(r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d", Conf.Get(i)) exp_dts.append(mo.group("exp_dt")) fn_out = "%s/cpu-1minavg-%s" % (Conf.GetOutDir(), "-".join(exp_dts)) if os.path.exists(fn_out): return fn_out with Cons.MT("Creating avg cpu usage comparison file for plotting ..."): records = [] dn_base = Conf.GetDir("dn_base") for i in range(2): fn_ycsb_log = "%s/%s" % (dn_base, Conf.Get(i)) hm_cpu = _GetHmCpu(fn_ycsb_log) for hm, cpu in hm_cpu.iteritems(): records.append(_RecordCpuAvg(hm, i * 30, cpu, i)) records.sort(key=operator.attrgetter("ts")) fmt = "%8s %6.2f %1d" header = Util.BuildHeader(fmt, "timestamp cpu_avg exp_type") with open(fn_out, "w") as fo: i = 0 for r in records: if i % 40 == 0: fo.write(header + "\n") i += 1 fo.write("%s\n" % r.ToStr(fmt)) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def main(argv): Conf.ParseArgs() Util.MkDirs(Conf.GetDir("output_dir")) dist_sq_thresholds = [0, 0.008, 0.01, 0.02, 0.05] #dist_sq_thresholds = [0.02] dist_sq_thresholds_str = [] for d in dist_sq_thresholds: dist_sq_thresholds_str.append(_NoTrailing0s(d)) reduced_files = [] reduced_file_sizes = [] with Cons.MT("Generating reduced size usa map ..."): for d in dist_sq_thresholds_str: fn_co_loc = "filter-out-almost-duplicate-points/.output/usa-map-smallsize-%s" % d reduced_files.append(fn_co_loc) if not os.path.exists(fn_co_loc): cmd = "cd filter-out-almost-duplicate-points && ./build-and-run.sh --dist_sq_threshold=%s" % d Util.RunSubp(cmd) reduced_file_sizes.append(os.path.getsize(fn_co_loc)) dn_out = "%s/.output" % os.path.dirname(__file__) fn_out = "%s/usa-map.pdf" % dn_out with Cons.MT("Plotting ..."): env = os.environ.copy() env["IN_FNS"] = " ".join(reduced_files) env["IN_FN_SIZES"] = " ".join(str(s) for s in reduced_file_sizes) env["DIST_SQ_THRESHOLDS"] = " ".join(dist_sq_thresholds_str) env["OUT_FN"] = fn_out Util.RunSubp("gnuplot %s/usa-map.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def main(argv): Util.MkDirs(Conf.GetOutDir()) exp_set_id = "171013-134330" #exp_set_id = "171022-160102" conf_exp_set = Conf.Get(exp_set_id) if True: parallel_processing = True if parallel_processing: params = [] for stg_dev, v in conf_exp_set.iteritems(): params.append((exp_set_id, stg_dev, v)) p = multiprocessing.Pool() p.map(PlotByTime, params) else: for stg_dev, v in conf_exp_set.iteritems(): PlotByTime((exp_set_id, stg_dev, v)) # Plot (cost vs latency) by storage devices # Latency in avg and tail latencies # # The goal: # to show there are limited options # and show the baseline performances. # # Finish this and show that this was not a fair comparison. if True: PlotCostLatency(exp_set_id)
def GetNumAccessesStat(): fn_out = "%s/cdf-youtube-accesses-per-co" % Conf.DnOut() if os.path.exists(fn_out): return fn_out num_accesses = [] fn_in = Conf.GetFn("video_accesses_by_COs") with open(fn_in) as fo: while True: line = fo.readline() if len(line) == 0: break line = line.strip() if len(line) == 0: continue if line[0] == "#": continue # 4 34.3305 -111.091 13 t = line.split(" ") if len(t) != 4: raise RuntimeError("Unexpected: [%s]" % line) n = int(t[3]) #Cons.P(n) num_accesses.append(n) for j in range(n): if len(fo.readline()) == 0: raise RuntimeError("Unexpected") r = Stat.Gen(num_accesses, fn_out) #Cons.P(r) return fn_out
def GenDataFilesForGnuplot(): dn_base = Conf.GetDir("dn_base") # Analyze the number of compactions and migrations with # (a) an unmodified DB as a baseline # and (b) Mutant fns_ycsb = [] log_readers = [] for db_type in ["unmodified_db", "io_overhead"]: fn_ycsb = "%s/%s" % (dn_base, Conf.Get(db_type)) mo = re.match( r"(?P<dn_log>.+)/(?P<job_id>\d\d\d\d\d\d-\d\d\d\d\d\d)/ycsb/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d).+", fn_ycsb) dn_log = mo.group("dn_log") job_id = mo.group("job_id") exp_dt = mo.group("exp_dt") #Cons.P(dn_log) #Cons.P(job_id) #Cons.P(exp_dt) dn_log_job = "%s/%s" % (dn_log, job_id) log_readers.append(RocksdbLogReader(dn_log_job, exp_dt)) fn_metrics_by_time_0 = log_readers[0].FnMetricByTime() fn_metrics_by_time_1 = log_readers[1].FnMetricByTime() fn_rdb_compmigr = CompMigrStat.GetFnStat(fn_metrics_by_time_0, fn_metrics_by_time_1) return (fn_metrics_by_time_0, fn_metrics_by_time_1, fn_rdb_compmigr)
def PlotCompareTwo(): (fns_rocksdb, fn_sst_creation_stat) = RocksdbLog.GenDataFilesForGnuplot() #fn_cpu_stat_by_time = CompareCpu.GetHourlyFn() fn_cpu_1min_avg = CompareCpu.Get1minAvgFn() fn_mem_stat_by_time = CompareMem.GetHourlyFn() fn_mem_1min_avg = CompareMem.Get1minAvgFn() #time_max = "09:00:00" #time_max = "08:00:00" time_max = "07:50:00" exp_dts = [] for i in range(2): mo = re.match(r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d", Conf.Get(i)) exp_dts.append(mo.group("exp_dt")) fn_out = "%s/mutant-overhead-%s.pdf" % (Conf.GetOutDir(), "-".join(exp_dts)) with Cons.MT("Plotting ..."): env = os.environ.copy() env["TIME_MAX"] = str(time_max) #env["CPU_STAT"] = fn_cpu_stat_by_time env["FN_CPU_1MIN_AVG"] = fn_cpu_1min_avg #env["MEM_STAT"] = fn_mem_stat_by_time env["FN_MEM_1MIN_AVG"] = fn_mem_1min_avg env["ROCKSDB0"] = fns_rocksdb[0] env["ROCKSDB1"] = fns_rocksdb[1] env["OUT_FN"] = fn_out Util.RunSubp("gnuplot %s/compare-two-exps.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def SstInfo(): # Set Conf.ExpStartTime(), if not already set. if Conf.ExpStartTime() is None: MutantLogReader.Get() fn = "%s/sst-info-by-time-by-levels-%s" % (Conf.dn_result, Conf.ExpStartTime()) if os.path.isfile(fn): return fn (sst_lives, memt_lives) = MemtSstLife.Get() with Cons.MT("Generating Sst info by time by levels data file ..."): #with open(fn_m, "w") as fo: # fo.write("%s\n" % Memt.Header()) # for addr, l in sorted(_memt_lives.iteritems()): # fo.write("%s\n" % l) #Cons.P("Created %s %d" % (fn_m, os.path.getsize(fn_m))) with open(fn, "w") as fo: fo.write("%s\n" % MemtSstLife.SstLife.Header()) for sst_gen, l in sorted(sst_lives.iteritems()): fo.write("%s\n" % l) Cons.P("Created %s %d" % (fn, os.path.getsize(fn))) return fn
def GenDataFileForGnuplot(): dn = "%s/%s" % (Conf.GetDir("output_dir"), Conf.Get("simulation_time_begin")) Util.MkDirs(dn) fn = "%s/data" % dn if os.path.isfile(fn): return fn with Cons.MT("Generating data file for plot ..."): _Parse() fmt = "%9.0f %9.0f %9.0f %9.0f %9.0f %9.0f %9.0f %9.0f" \ " %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f" \ " %8.0f %8.0f %8.0f %8.0f" \ " %3.0f %3.0f" \ " %3.0f %3.0f %11s" \ " %3.1f %6.2f %3.1f %6.2f %6.2f %6.3f" header = Util.BuildHeader( fmt, " ".join(k for k, v in sorted(_header_idx.iteritems()))) with open(fn, "w") as fo: i = 0 for r in _body_rows: if i % 50 == 0: fo.write("%s\n" % header) i += 1 #Cons.P(fmt % tuple(r.Prepared())) fo.write((fmt + "\n") % tuple(r.Prepared())) Cons.P("Created %s %d" % (fn, os.path.getsize(fn))) return fn
def PlotSstAccfreqByAgeIndividual(): with Cons.MT( "Plotting individual SSTable access frequencies by their ages ..." ): dn_out = "%s/%s/sst-age-accfreq-plot" % ( Conf.Get("dn_result"), Conf.Get("simulation_time_begin")) Util.MkDirs(dn_out) env = os.environ.copy() sst_lives = RocksDbLogReader.GetSstAccFreqByAgeDataFiles() for sl in sst_lives: env["IN_FN"] = "%s/%s/sst-age-accfreq-data/%d" \ % (Conf.Get("dn_result"), Conf.Get("simulation_time_begin"), sl.Id()) env["LEVEL"] = str(sl.Level()) if sl.TsDeleted() is None: raise RuntimeError("Unexpected") env["AGE_DELETED"] = str( SimTime.ToSimulatedTimeDur( (sl.TsDeleted() - sl.TsCreated()).total_seconds())) out_fn = "%s/L%d-%d.pdf" % (dn_out, sl.Level(), sl.Id()) env["OUT_FN"] = out_fn start_time = time.time() Util.RunSubp("gnuplot %s/sst-accfreq-by-age-individual.gnuplot" % os.path.dirname(__file__), env=env, print_cmd=False) dur = time.time() - start_time Cons.P("Created %s %d in %.0f ms" % (out_fn, os.path.getsize(out_fn), dur * 1000.0))
def main(argv): Conf.ParseArgs() dn_out = Conf.Get("dn_result") Util.MkDirs(dn_out) SimTime.Init(Conf.Get("simulation_time_begin")) Plot.Plot()
def SstLevelInfo(): # Set Conf.ExpStartTime(), if not already set. if Conf.ExpStartTime() is None: MutantLogReader.Get() fn = "%s/sst-info-by-time-by-levels-level-seps-%s" % (Conf.dn_result, Conf.ExpStartTime()) if os.path.isfile(fn): return fn sst_y_cord_level_sep_highs = SstYCord.LevelSepHigh() with Cons.MT( "Generating Sst info by time by levels: level separators data file ..." ): with open(fn, "w") as fo: fmt = "%1d %10d %10s" fo.write("%s\n" % Util.BuildHeader( fmt, "level level_mid_for_labels level_low_for_separators")) lh_prev = 0 for l, lh in sorted(sst_y_cord_level_sep_highs.iteritems()): lm = (lh + lh_prev) / 2 fo.write((fmt + "\n") % (l, lm, lh_prev)) lh_prev = lh Cons.P("Created %s %d" % (fn, os.path.getsize(fn))) return fn
def GetTemporalDist(): fn_in = Conf.GetFn("youtube_workload") fn_out = "%s/%s-temporal-dist-weekly" % (Conf.DnOut(), os.path.basename(fn_in)) if os.path.isfile(fn_out): return (fn_out, _GetWeeklyMax(fn_out)) cmd = "%s/_gen-plot-data.sh --youtube_workload=%s --out_fn=%s" % (os.path.dirname(__file__), fn_in, fn_out) Util.RunSubp(cmd) return (fn_out, _GetWeeklyMax(fn_out))
def SstHeatAtLastTime(): # Set Conf.ExpStartTime(), if not already set. if Conf.ExpStartTime() is None: MutantLogReader.Get() fn_hlt = "%s/sst-heat-last-time-%s" % (Conf.dn_result, Conf.ExpStartTime()) if os.path.isfile(fn_hlt): return fn_hlt sst_lives = MemtSstLife.GetSstLives() with Cons.MT("Generating Sst heats at the last time ..."): # Gather heat info at n different times num_times = Conf.heatmap_by_time_num_times if Conf.ExpFinishTime() is None: MemtSstLife.SetExpEndTimeFromSstLives() min_sst_opened = None for sst_gen, sl in sorted(sst_lives.iteritems()): min_sst_opened = sl.Opened() if min_sst_opened is None else min( min_sst_opened, sl.Opened()) # Start time is when the first Sstable is opened, not the experiment start # time, when no SSTable exists yet. # Exp start time: 160927-143257.395 # First Sstable open time: 160927-143411.273 st = datetime.datetime.strptime(min_sst_opened, "%y%m%d-%H%M%S.%f") et = datetime.datetime.strptime(Conf.ExpFinishTime(), "%y%m%d-%H%M%S.%f") dur = (et - st).total_seconds() sstgen_heat = [] t = st + datetime.timedelta(seconds=(float(dur) * (num_times - 1) / num_times + time_offset_in_sec)) for sst_gen, sl in sorted(sst_lives.iteritems()): h = sl.HeatAtTime(t) if h is None: continue sstgen_heat.append((sst_gen, h)) sstgen_heat.sort(key=lambda sh: sh[1], reverse=True) # Note: Don't bother with the width proportional to the tablet size for now fmt = "%4d %1d %8.3f" with open(fn_hlt, "w") as fo: # y0 is smaller than y1 (y0 is placed higher in the plot than y1). fo.write("%s\n" % Util.BuildHeader(fmt, "sst_gen level heat")) for sh in sstgen_heat: sst_gen = sh[0] heat = sh[1] fo.write( (fmt + "\n") % (sst_gen, sst_lives[sst_gen].level, heat)) Cons.P("Created %s %d" % (fn_hlt, os.path.getsize(fn_hlt))) return fn_hlt
def LoadSstLivesFromPlotDataFiles(): global _sst_lives if _sst_lives is not None: return _sst_lives fn_sst_info_by_time = "%s/sst-info-by-time-by-levels-%s" % ( Conf.dn_result, Conf.ExpStartTime()) if not os.path.isfile(fn_sst_info_by_time): return None fn_sst_heat_by_time = "%s/sst-heat-by-time-by-levels-%s" % ( Conf.dn_result, Conf.ExpStartTime()) if not os.path.isfile(fn_sst_heat_by_time): return None # This is not needed for the by-levels-with-heat plot # "%s/sst-info-by-time-by-levels-%s" % (Conf.dn_result, Conf.ExpStartTime()) with Cons.MT("Loading Sst lives from (%s, %s) ..." % (fn_sst_info_by_time, fn_sst_heat_by_time)): _sst_lives = {} with open(fn_sst_info_by_time) as fo: for line in fo.readlines(): if len(line) == 0: continue if line[0] == "#": continue line = line.strip() #Cons.P(line) t = re.split(r" +", line) sst_gen = int(t[0]) sl = SstLife(sst_gen) sl.SetInfoFromPlotData(t) _sst_lives[sst_gen] = sl with open(fn_sst_heat_by_time) as fo: sstgen_lines_tokens = {} for line in fo.readlines(): if len(line) == 0: continue if line[0] == "#": continue line = line.strip() if len(line) == 0: continue t = re.split(r" +", line) if len(t) == 0: continue sst_gen = int(t[0]) if sst_gen not in sstgen_lines_tokens: sstgen_lines_tokens[sst_gen] = [] sstgen_lines_tokens[sst_gen].append(t) for sstgen, lt in sstgen_lines_tokens.iteritems(): _sst_lives[sstgen].SetHeatFromPlotData(lt) return _sst_lives
def Plot(): if Conf.PlotSstByTimeByLevelsWithHeat(): PlotTabletByTimeByLevelWithHeat() if Conf.PlotSstByLevelsWithHeatAtSpecificTimes(): PlotSstAccDistAtSpecificTimes() if Conf.PlotSstHeatmapByTime(): PlotSstHeatmapByTime() if Conf.PlotSstHeatAtLastTime(): PlotSstHeatAtLastTime()
def _ReadAndCacheCassLog(): with Cons.MT("Reading Cassandra log ..."): lines = [] found_reset = False lines1 = [] # WARN [main] 2016-09-20 02:20:39,250 MemSsTableAccessMon.java:115 - Mutant: ResetMon pattern = re.compile(r"WARN \[(main|MigrationStage:\d+)\]" \ " (?P<datetime>\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d,\d\d\d)" \ " MemSsTableAccessMon.java:\d+ -" \ " Mutant: (?P<event>ResetMon)") # Note: s0 only for now. dn = "%s/work/mutant/log/%s/s0/cassandra" % (os.path.expanduser("~"), Util0.JobId()) fn = "%s/system.log" % dn Cons.P("fn=%s" % fn) with open(fn) as fo: for line in fo.readlines(): line = line.strip() mo = pattern.match(line) if mo is not None: found_reset = True Conf.SetExpStartTime(Util0.ShortDateTime(mo.group("datetime"))) del lines[:] lines.append(line) # Keep reading zipped files like system.log.1.zip, until ResetMon is found i = 1 while found_reset == False: fn = "%s/system.log.%d.zip" % (dn, i) Cons.P("ResetMon not found. Reading more from file %s ..." % fn) with zipfile.ZipFile(fn, "r") as z: for fn1 in z.namelist(): #Cons.P(fn1) for line in z.read(fn1).split("\n"): line = line.strip() #Cons.P(line) mo = pattern.match(line) if mo is not None: found_reset = True Conf.SetExpStartTime(Util0.ShortDateTime(mo.group("datetime"))) del lines1[:] lines1.append(line) if len(lines1) != 0: lines1.extend(lines) lines = list(lines1) del lines1[:] i += 1 fn = "%s/work/mutant/misc/logs/cassandra/system-%s" \ % (os.path.expanduser("~"), Conf.ExpStartTime()) with open(fn, "w") as fo: for line in lines: fo.write("%s\n" % line) Cons.P("Created a Cassandra log file %s %d" % (fn, os.path.getsize(fn))) return lines
def PlotSstAccfreqByAgeIndividualMultiplot(): with Cons.MT("Plotting individual SSTable access frequencies by their ages ..."): dn_out = "%s/%s/sst-age-accfreq-plot" % (Conf.Get("dn_result"), Conf.Get("simulation_time_begin")) Util.MkDirs(dn_out) env = os.environ.copy() dn = "%s/%s/sst-age-accfreq-data" % (Conf.Get("dn_result"), Conf.Get("simulation_time_begin")) env["IN_DN"] = dn # Plot for all levels. Stop when there is no sstable at a level. level = 0 while True: env["LEVEL"] = str(level) sst_lives = RocksDbLogReader.GetSstAccFreqByAgeDataFiles(level) if len(sst_lives) == 0: break env["SST_IDS"] = " ".join(str(sl.Id()) for sl in sst_lives) age_deleted = [] for sl in sst_lives: age_deleted.append(SimTime.ToSimulatedTimeDur((sl.TsDeleted() - sl.TsCreated()).total_seconds())) env["AGE_DELETED"] = " ".join(str(i) for i in age_deleted) # Age deleted max. Round up with an hour granularity. age_deleted_max = max(age_deleted) age_deleted_max = math.ceil(age_deleted_max / 3600.0) * 3600 env["AGE_DELETED_MAX"] = str(age_deleted_max) accfreq_max_all_sst_in_level = 0.0 temp_max_all_sst_in_level = 0.0 accfreq_max_list = [] temp_max_list = [] for sl in sst_lives: accfreq_max = 0.0 temp_max = 0.0 for accfreq in sl.AgeAccfreq(): accfreq_max_all_sst_in_level = max(accfreq_max_all_sst_in_level, accfreq[4]) temp_max_all_sst_in_level = max(temp_max_all_sst_in_level, accfreq[5]) accfreq_max = max(accfreq_max, accfreq[4]) temp_max = max(temp_max, accfreq[5]) accfreq_max_list.append(accfreq_max) temp_max_list.append(temp_max) env["ACCFREQ_MAX_ALL_SST_IN LEVEL"] = str(accfreq_max_all_sst_in_level) env["TEMP_MAX_ALL_SST_IN_LEVEL"] = str(temp_max_all_sst_in_level) env["ACCFREQ_MAX"] = " ".join(str(e) for e in accfreq_max_list) env["TEMP_MAX"] = " ".join(str(e) for e in temp_max_list) out_fn = "%s/L%d.pdf" % (dn_out, level) env["OUT_FN"] = out_fn with Cons.MT("Plotting level %d ..." % level): Util.RunSubp("gnuplot %s/sst-accfreq-by-age-multiplot-by-level.gnuplot" % os.path.dirname(__file__), env=env, print_cmd=False) Cons.P("Created %s %d" % (out_fn, os.path.getsize(out_fn))) level += 1
def _Parse(): global _parsed if _parsed: return fn = "%s/dstat/%s.csv" % (Conf.GetDir("log_dir"), Conf.Get("simulation_time_begin")) with Cons.MT("Parsing %s ..." % fn): header_rows = [] global _body_rows _body_rows = [] with open(fn, "rb") as f: header_detected = False reader = csv.reader(f) for row in reader: if (len(row) > 0) and (row[0] in ["system", "time"]): header_rows.append(row) header_detected = True elif header_detected: _body_rows.append(BodyRow(row)) #Cons.P(pprint.pformat(header_rows)) # Make sure the rows are all the same size num_cols = None for r in header_rows: if num_cols is None: num_cols = len(r) else: if num_cols != len(r): raise RuntimeError("Unexpected") for r in _body_rows: if num_cols != r.NumCols(): raise RuntimeError("Unexpected") # Get column headers global _header_idx _header_idx = {} header_rows_0_prev = None for i in range(num_cols): if len(header_rows[0][i]) > 0: #Cons.P("%s, %s" % (header_rows[0][i], header_rows[1][i])) _header_idx["%s:%s" % (header_rows[0][i].replace( " ", "_"), header_rows[1][i].replace(" ", "_"))] = i header_rows_0_prev = header_rows[0][i].replace(" ", "_") else: #Cons.P("%s, %s" % (header_rows_0_prev, header_rows[1][i])) _header_idx["%s:%s" % (header_rows_0_prev.replace( " ", "_"), header_rows[1][i].replace(" ", "_"))] = i #Cons.P(pprint.pformat(_header_idx)) # Sort the data in the header order and convert strings to numbers for b in _body_rows: b.PrepareData() _parsed = True
def slotAddGroove(self): """ Called when the NewGroovePart's add button is clicked. """ tempo = self.newGroovePart.tempoComboBox.getValue() path = self.newGroovePart.dirPart.getLastSelectedPath() name = os.path.basename(path) grooves = Conf.get('GROOVES') grooves = "%s:%s,%s,%i" % (grooves, name, path, tempo) Conf.set('GROOVES', grooves) self.emit(PYSIGNAL('grooveAdded'), (name, path, tempo))
def GetObjPopDist(): fn_in = Conf.GetFn("youtube_workload") fn_out = "%s/%s-obj-pop-dist" % (Conf.DnOut(), os.path.basename(fn_in)) if os.path.isfile(fn_out): return fn_out cmd = "%s/_gen-plot-data.sh --youtube_workload=%s" % ( os.path.dirname(__file__), fn_in) Util.RunSubp(cmd) return fn_out
def GetHourlyFn(): exp_dts = [] for i in range(2): #Cons.P(Conf.Get(i)) mo = re.match(r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d", Conf.Get(i)) exp_dts.append(mo.group("exp_dt")) fn_out = "%s/memory-usage-by-time-%s" % (Conf.GetOutDir(), "-".join(exp_dts)) if os.path.exists(fn_out): return fn_out with Cons.MT("Generating file for memory usage comparison ..."): dn_base = Conf.GetDir("dn_base") fn_ycsb_0 = "%s/%s" % (dn_base, Conf.Get(0)) fn_ycsb_1 = "%s/%s" % (dn_base, Conf.Get(1)) hour_memstat_0 = _GetMemStatByHour(fn_ycsb_0) hour_memstat_1 = _GetMemStatByHour(fn_ycsb_1) #Cons.P(hour_memstat_0) #Cons.P(hour_memstat_1) with open(fn_out, "w") as fo: fo.write("# 0: %s\n" % fn_ycsb_0) fo.write("# 1: %s\n" % fn_ycsb_1) fo.write("#\n") fmt = "%2d" \ " %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f" \ " %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f" fo.write(Util.BuildHeader(fmt, "hour" \ " 0_avg 0_min 0_1 0_25 0_50 0_75 0_99 0_max" \ " 1_avg 1_min 1_1 1_25 1_50 1_75 1_99 1_max" ) + "\n") for h, s0 in sorted(hour_memstat_0.iteritems()): s1 = hour_memstat_1[h] fo.write((fmt + "\n") % (h , (float(s0.avg) / 1024 / 1024 / 1024) , (float(s0.min) / 1024 / 1024 / 1024) , (float(s0._1 ) / 1024 / 1024 / 1024) , (float(s0._25) / 1024 / 1024 / 1024) , (float(s0._50) / 1024 / 1024 / 1024) , (float(s0._75) / 1024 / 1024 / 1024) , (float(s0._99) / 1024 / 1024 / 1024) , (float(s0.max) / 1024 / 1024 / 1024) , (float(s1.avg) / 1024 / 1024 / 1024) , (float(s1.min) / 1024 / 1024 / 1024) , (float(s1._1 ) / 1024 / 1024 / 1024) , (float(s1._25) / 1024 / 1024 / 1024) , (float(s1._50) / 1024 / 1024 / 1024) , (float(s1._75) / 1024 / 1024 / 1024) , (float(s1._99) / 1024 / 1024 / 1024) , (float(s1.max) / 1024 / 1024 / 1024) )) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def _Cpu(): with Cons.MT("Plotting cpu ..."): env = os.environ.copy() env["FN_IN"] = CsvFile.GenDataFileForGnuplot() dn = "%s/%s" % (Conf.GetDir("output_dir"), Conf.Get("simulation_time_begin")) fn_out = "%s/cpu.pdf" % dn env["FN_OUT"] = fn_out with Cons.MT("Plotting ..."): Util.RunSubp("gnuplot %s/cpu.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def GenDataThrpVsLat(): fn_out = "%s/rocksdb-ycsb-thrp-vs-lat-by-stgdevs" % Conf.GetOutDir() if os.path.exists(fn_out): return fn_out with Cons.MT("Generating thrp vs lat data file ..."): dn_base = Conf.GetDir("dn_base") # {stg_dev: {target_iops: YcsbLogReader}} stgdev_tio_ylr = {} #for stgdev in ["local-ssd", "ebs-st1"]: for stgdev in ["ebs-st1"]: if stgdev not in stgdev_tio_ylr: stgdev_tio_ylr[stgdev] = {} for target_iops, v in sorted(Conf.Get(stgdev).iteritems()): fn = "%s/%s" % (dn_base, v["fn"]) t = v["time"].split("-") time_begin = t[0] time_end = t[1] overloaded = ("overloaded" in v) and v["overloaded"] stgdev_tio_ylr[stgdev][target_iops] = YcsbLogReader(fn, time_begin, time_end, overloaded) with open(fn_out, "w") as fo: fmt = "%9s %6.0f %1d %6.0f" \ " %8.2f %8.2f %9.2f %10.2f %10.2f" \ " %8.2f %8.2f %8.2f %9.2f %9.2f" fo.write("%s\n" % Util.BuildHeader(fmt, "stg_dev target_iops overloaded iops" \ " r_avg r_90 r_99 r_99.9 r_99.99" \ " w_avg w_90 w_99 w_99.9 w_99.99" )) for stgdev, v in sorted(stgdev_tio_ylr.iteritems()): for tio, ylr in sorted(v.iteritems()): fo.write((fmt + "\n") % ( stgdev , tio , (1 if ylr.overloaded else 0) , ylr.db_iops_stat.avg , ylr.r_avg , ylr.r_90 , ylr.r_99 , ylr.r_999 , ylr.r_9999 , ylr.w_avg , ylr.w_90 , ylr.w_99 , ylr.w_999 , ylr.w_9999 )) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def GetFnCompareTwo(): dn_base = Conf.GetDir("dn_base") # Analyze the number of compactions and migrations with # (a) an unmodified DB as a baseline # and (b) Mutant fn_metrics_by_time = [] for i in range(2): fn_ycsb = "%s/%s" % (dn_base, Conf.Get(i)) fn_metrics_by_time.append(GetFnTimeVsMetrics(fn_ycsb)) fn_rdb_compmigr = CompMigrStat.GetFnStat(fn_metrics_by_time[0], fn_metrics_by_time[1]) return (fn_metrics_by_time, fn_rdb_compmigr)
def GetClusteredPoints(): dist_sq_threshold = Conf.Get("dist_sq_threshold") fn_in = Conf.GetFn("youtube_workload") fn_out = "%s/%s-clustered-with-dist-sq-%s" % ( Conf.DnOut(), os.path.basename(fn_in), dist_sq_threshold) #Cons.P(fn_out) if os.path.isfile(fn_out): return fn_out cmd = "%s/_cluster.sh --youtube_workload=%s --dist_sq_threshold=%s" \ % (os.path.dirname(__file__), fn_in, dist_sq_threshold) Util.RunSubp(cmd) return fn_out
def Plot(): fn_cf = Conf.Get("cf_locs") fn_wf = GetWfLocFile() fn_out = "%s/cloudfront-wholefoods-locations.pdf" % Conf.DnOut() with Cons.MT("Plotting Whole Foods store locations ..."): env = os.environ.copy() env["FN_CF"] = fn_cf env["FN_WF"] = fn_wf env["FN_OUT"] = fn_out Util.RunSubp("gnuplot %s/edge-server-locs.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def main(): if len(sys.argv) != 2: print("ERROR: expected 1 command line arguement") print("ERROR: run as python run.py <confFile>") quit() ents, tests, miss = Conf.readFile(sys.argv[1]) #making the simulator process simProcess = Process(target = simMain, args = (ents, )) simProcess.start() #making the test process testProcess = Process(target = runTest, args = (tests, )) testProcess.start() #this sleep is to make sure the simulator has connected to svr before mission tries to time.sleep(3) #making the mission running process missionProcess = Process(target = runMissions, args = (miss, )) missionProcess.start() #wait until the mission is done missionProcess.join() #wait for test process to be done testProcess.join() raw_input("press enter to stop sim") #killing the simulator simProcess.terminate()
def start_smr(self, quiet=False): self.lazy_create_smr() so = None if quiet: so = open(os.devnull, "wb") self.smr.start(Conf.get_smr_args(self), sout=so)
def setup(): if len(sys.argv) != 2: raise Exception("Run as: python2.7 sim.py file.conf") return Conf.readFile(sys.argv[1])
def __init__(self, configpath=os.path.join(os.path.dirname(__file__), "BibleBot.conf")): self.channels = Conf.read_channels_from_configfile(configpath) logging.info("Loaded configuration from %s" % configpath)
import Conf print Conf.readFile('./test.conf')
def start_be(self, quiet=False): self.lazy_create_be() so = None if quiet: so = open(os.devnull, 'wb') self.be.start(Conf.get_be_args(self), sout=so)
def __init__(self): self._proc = subprocess.Popen(Conf.get_client_args(), stdin=subprocess.PIPE, stdout=subprocess.PIPE)
#!/usr/bin/python import Conf import main import sys import argparse if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-a",action="store") cmdArg=parser.parse_args() config=Conf.conf("./conf.info") m=main.mainClass(config) m.makeConnectionToReg(config) m.updateRegionsList(config,"") print m.connToReg['us-west-2'].insts m.CreateActiveInstListOfAllRegions() print "The active list is %s" % str(m.runningInstList) if cmdArg.a == "info" : m.updateRegionsOtherResource(config,"") m.dumpAwsResourceInfo() else : if config.checkIfMultiProcessingIsReq() == False: for i in m.connToReg['us-west-2'].insts[16] : i.actionWithInInst() m.dumpExceptionList("./abc") m.dumpResultSet() else : main.createWorkers(m) m.uploadFinalResultToS3()