def PlotByTime(p): (fn_ycsb, time_max, params1) = YcsbLog.GenDataMetricsByTime(p) #Cons.P("%s\n%s\n%s" % (fn_ycsb, time_max, params1)) params_formatted = str(p).replace("_", "\\\\_").replace( " ", "\\ ").replace("\n", "\\n").replace("{", "\{").replace("}", "\}") #Cons.P(params_formatted) fn_dstat = DstatLog.GenDataFileForGnuplot(p) fn_rocksdb = RocksdbLog.GenDataFileForGnuplot(p) fn_out = "%s/rocksdb-ycsb-all-metrics-by-time-%s.pdf" % (Conf.GetOutDir(), p.exp_dt) with Cons.MT("Plotting ..."): env = os.environ.copy() env["PARAMS"] = params_formatted env["TIME_MAX"] = str(time_max) env["IN_FN_DSTAT"] = fn_dstat env["IN_FN_YCSB"] = fn_ycsb env["IN_FN_ROCKSDB"] = fn_rocksdb env["OUT_FN"] = fn_out Util.RunSubp("gnuplot %s/rocksdb-ycsb-all-metrics-by-time.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def GetHourlyFn(): fn_out = "%s/cpu-hourly-usage" % Conf.GetOutDir() if os.path.exists(fn_out): return fn_out with Cons.MT("Generating file for cpu usage comparison ..."): dn_base = Conf.GetDir("dn_base") fn_ycsb_0 = "%s/%s" % (dn_base, Conf.Get(0)) fn_ycsb_1 = "%s/%s" % (dn_base, Conf.Get(1)) hour_cpustat_0 = _GetCpuStatByHour(fn_ycsb_0) hour_cpustat_1 = _GetCpuStatByHour(fn_ycsb_1) #Cons.P(hour_cpustat_0) #Cons.P(hour_cpustat_1) with open(fn_out, "w") as fo: fo.write("# 0: %s\n" % fn_ycsb_0) fo.write("# 1: %s\n" % fn_ycsb_1) fo.write("#\n") fmt = "%2d" \ " %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f" \ " %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f" fo.write(Util.BuildHeader(fmt, "hour" \ " 0_avg 0_min 0_1 0_25 0_50 0_75 0_99 0_max" \ " 1_avg 1_min 1_1 1_25 1_50 1_75 1_99 1_max" ) + "\n") for h, s0 in sorted(hour_cpustat_0.iteritems()): s1 = hour_cpustat_1[h] fo.write((fmt + "\n") % (h, s0.avg, s0.min, s0._1, s0._25, s0._50, s0._75, s0._99, s0.max, s1.avg, s1.min, s1._1, s1._25, s1._50, s1._75, s1._99, s1.max)) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def Get1minAvgFn(): exp_dts = [] for i in range(2): #Cons.P(Conf.Get(i)) mo = re.match(r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d", Conf.Get(i)) exp_dts.append(mo.group("exp_dt")) fn_out = "%s/mem-1minavg-%s" % (Conf.GetOutDir(), "-".join(exp_dts)) if os.path.exists(fn_out): return fn_out with Cons.MT("Creating avg memory usage comparison file for plotting ..."): records = [] dn_base = Conf.GetDir("dn_base") for i in range(2): fn_ycsb_log = "%s/%s" % (dn_base, Conf.Get(i)) hm_mem = _GetHmMem(fn_ycsb_log) for hm, mem in hm_mem.iteritems(): records.append(_RecordMemAvg(hm, i * 30, mem, i)) records.sort(key=operator.attrgetter("ts")) fmt = "%8s %6.3f %1d" header = Util.BuildHeader(fmt, "timestamp mem_avg_in_gb exp_type") with open(fn_out, "w") as fo: i = 0 for r in records: if i % 40 == 0: fo.write(header + "\n") i += 1 fo.write("%s\n" % r.ToStr(fmt)) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def Get1minAvgFn(): exp_dts = [] for i in range(2): #Cons.P(Conf.Get(i)) # computation/180126-142513/ycsb/180126-193525.769-d mo = re.match(r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d", Conf.Get(i)) exp_dts.append(mo.group("exp_dt")) fn_out = "%s/cpu-1minavg-%s" % (Conf.GetOutDir(), "-".join(exp_dts)) if os.path.exists(fn_out): return fn_out with Cons.MT("Creating avg cpu usage comparison file for plotting ..."): records = [] dn_base = Conf.GetDir("dn_base") for i in range(2): fn_ycsb_log = "%s/%s" % (dn_base, Conf.Get(i)) hm_cpu = _GetHmCpu(fn_ycsb_log) for hm, cpu in hm_cpu.iteritems(): records.append(_RecordCpuAvg(hm, i * 30, cpu, i)) records.sort(key=operator.attrgetter("ts")) fmt = "%8s %6.2f %1d" header = Util.BuildHeader(fmt, "timestamp cpu_avg exp_type") with open(fn_out, "w") as fo: i = 0 for r in records: if i % 40 == 0: fo.write(header + "\n") i += 1 fo.write("%s\n" % r.ToStr(fmt)) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def main(argv): Util.MkDirs(Conf.GetOutDir()) exp_set_id = "171013-134330" #exp_set_id = "171022-160102" conf_exp_set = Conf.Get(exp_set_id) if True: parallel_processing = True if parallel_processing: params = [] for stg_dev, v in conf_exp_set.iteritems(): params.append((exp_set_id, stg_dev, v)) p = multiprocessing.Pool() p.map(PlotByTime, params) else: for stg_dev, v in conf_exp_set.iteritems(): PlotByTime((exp_set_id, stg_dev, v)) # Plot (cost vs latency) by storage devices # Latency in avg and tail latencies # # The goal: # to show there are limited options # and show the baseline performances. # # Finish this and show that this was not a fair comparison. if True: PlotCostLatency(exp_set_id)
def PlotCompareTwo(): (fns_rocksdb, fn_sst_creation_stat) = RocksdbLog.GenDataFilesForGnuplot() #fn_cpu_stat_by_time = CompareCpu.GetHourlyFn() fn_cpu_1min_avg = CompareCpu.Get1minAvgFn() fn_mem_stat_by_time = CompareMem.GetHourlyFn() fn_mem_1min_avg = CompareMem.Get1minAvgFn() #time_max = "09:00:00" #time_max = "08:00:00" time_max = "07:50:00" exp_dts = [] for i in range(2): mo = re.match(r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d", Conf.Get(i)) exp_dts.append(mo.group("exp_dt")) fn_out = "%s/mutant-overhead-%s.pdf" % (Conf.GetOutDir(), "-".join(exp_dts)) with Cons.MT("Plotting ..."): env = os.environ.copy() env["TIME_MAX"] = str(time_max) #env["CPU_STAT"] = fn_cpu_stat_by_time env["FN_CPU_1MIN_AVG"] = fn_cpu_1min_avg #env["MEM_STAT"] = fn_mem_stat_by_time env["FN_MEM_1MIN_AVG"] = fn_mem_1min_avg env["ROCKSDB0"] = fns_rocksdb[0] env["ROCKSDB1"] = fns_rocksdb[1] env["OUT_FN"] = fn_out Util.RunSubp("gnuplot %s/compare-two-exps.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def _GetFnCpuOverhead(): fn_out = "%s/cpu-overhead-by-time" % Conf.GetOutDir() if os.path.exists(fn_out): return fn_out dn_base = Conf.GetDir("dn_base") fn_ycsb_0 = "%s/%s" % (dn_base, Conf.Get("unmodified_db")) fn_ycsb_1 = "%s/%s" % (dn_base, Conf.Get("computation_overhead")) hour_cpustat_0 = _GetCpuStatByHour(fn_ycsb_0) hour_cpustat_1 = _GetCpuStatByHour(fn_ycsb_1) #Cons.P(hour_cpustat_0) #Cons.P(hour_cpustat_1) with open(fn_out, "w") as fo: fo.write("# u: unmodified\n") fo.write( "# c: with SSTable access monitoring and SSTable placement computation\n" ) fo.write("#\n") fmt = "%2d" \ " %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f" \ " %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f" fo.write(Util.BuildHeader(fmt, "hour" \ " u_avg u_min u_1 u_25 u_50 u_75 u_99 u_max" \ " c_avg c_min c_1 c_25 c_50 c_75 c_99 c_max" ) + "\n") for h, s0 in sorted(hour_cpustat_0.iteritems()): s1 = hour_cpustat_1[h] fo.write( (fmt + "\n") % (h, s0.avg, s0.min, s0._1, s0._25, s0._50, s0._75, s0._99, s0.max, s1.avg, s1.min, s1._1, s1._25, s1._50, s1._75, s1._99, s1.max)) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def _GenDB(fn0, fn1): with Cons.MT("Building a stat DB ..."): # Put the SSTable creation info in a DB and generate statistics fn_db = "%s/sst-creation-info.db" % Conf.GetOutDir() try: os.remove(fn_db) except OSError as e: if e.errno != errno.ENOENT: raise e table_schema = """ CREATE TABLE IF NOT EXISTS sst_creation_info ( fn text NOT NULL , db_type text NOT NULL , hour integer NOT NULL , sst_id integer NOT NULL , sst_size integer NOT NULL , job_id integer NOT NULL , creation_reason text NOT NULL , temp_triggered_single_sst_migr BOOLEAN , migr_dirc text NOT NULL ); """ conn = sqlite3.connect(fn_db) if conn is None: raise RuntimeError("Error! cannot create the database connection.") cur = conn.cursor() cur.execute(table_schema) q = """INSERT INTO sst_creation_info (fn, db_type, hour, sst_id, sst_size, job_id, creation_reason, temp_triggered_single_sst_migr, migr_dirc) VALUES (?,?,?,?,?,?,?,?,?)""" for db_type in ["RocksDB", "Mutant"]: fn = fn0 if db_type == "RocksDB" else fn1 with open(fn) as fo: for line in fo: if line.startswith("#"): continue line = line.strip() t = re.split(r" +", line) hour = int(t[1].split(":")[0]) sst_id = t[6] # Ignore when end sst_id is -, which means an sstable was deleted. if sst_id == "-": continue sst_id = int(sst_id) sst_size = int(t[5]) job_id = int(t[7]) # Creation reason: R, F, C, - cr = t[8] temp_triggered_single_sst_migr = (t[9] == "T") migr_dirc = t[10] cur.execute(q, (fn, db_type, hour, sst_id, sst_size, job_id, cr, temp_triggered_single_sst_migr, migr_dirc)) conn.commit() cur.close() return conn
def _PlotTimeVsAllMetrics(fn_ycsb_log): # 171121-194901/ycsb/171122-010708.903-d mo = re.match( r"(?P<dn_log>.+)/(?P<job_id>\d\d\d\d\d\d-\d\d\d\d\d\d)/ycsb/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d).+", fn_ycsb_log) dn_log = mo.group("dn_log") job_id = mo.group("job_id") exp_dt = mo.group("exp_dt") #Cons.P(dn_log) #Cons.P(job_id) #Cons.P(exp_dt) fn_out = "%s/time-vs-all-metrics-%s.pdf" % (Conf.GetOutDir(), exp_dt) if os.path.exists(fn_out): Cons.P("%s %d already exists." % (fn_out, os.path.getsize(fn_out))) return (fn_ycsb, time_max, params1) = YcsbLog.GenDataMetricsByTime(fn_ycsb_log, exp_dt) #Cons.P("%s\n%s\n%s" % (fn_ycsb, time_max, params1)) time_max = "01:00:00" params_formatted = fn_ycsb_log + "\n" + pprint.pformat( params1[0]) + "\n" + pprint.pformat(params1[1]) # No idea how to put spaces for the indentations. It used to work. # Neither replace(" ", "\ ") or replace(" ", "\\ ") worked when a line starts with spaces followed by digits or [. # work when it is followed by u. I guess regular characters. params_formatted = params_formatted.replace("_", "\\\\_").replace( "\n", "\\n").replace("{", "\{").replace("}", "\}") #Cons.P(params_formatted) dn_log_job = "%s/%s" % (dn_log, job_id) (fn_dstat, num_stgdevs) = DstatLog.GetPlotFn1(dn_log_job, exp_dt) (fn_rocksdb, target_cost_changes) = RocksdbLog.GetFnTimeVsMetrics(fn_ycsb_log) #Cons.P(target_cost_changes) fn_cpu_avg = CpuAvg.GetFnForPlot(fn_ycsb_log) fn_mem_usage = ProcMemLog.GetFnForPlot(dn_log, job_id, exp_dt) with Cons.MT("Plotting ..."): env = os.environ.copy() env["PARAMS"] = params_formatted env["NUM_STGDEVS"] = str(num_stgdevs) env["TIME_MAX"] = str(time_max) env["IN_FN_DSTAT"] = fn_dstat env["IN_FN_YCSB"] = fn_ycsb env["IN_FN_ROCKSDB"] = fn_rocksdb env["IN_FN_CPU_AVG"] = fn_cpu_avg env["IN_FN_MEM"] = fn_mem_usage env["TARGET_COST_CHANGES_TIME"] = target_cost_changes[0] env["TARGET_COST_CHANGES_COST"] = target_cost_changes[1] env["OUT_FN"] = fn_out Util.RunSubp("gnuplot %s/time-vs-all-metrics.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def GetFnForPlot(fn_ycsb_log): # 171121-194901/ycsb/171122-010708.903-d mo = re.match( r"(?P<dn_log>.+)/(?P<job_id>\d\d\d\d\d\d-\d\d\d\d\d\d)/ycsb/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d).+", fn_ycsb_log) exp_dt = mo.group("exp_dt") fn_out = "%s/cpu-avg-%s" % (Conf.GetOutDir(), exp_dt) if os.path.exists(fn_out): return fn_out with Cons.MT("Creating avg cpu usage file for plotting ..."): (fn_dstat, num_stgdevs) = DstatLog.GetPlotFn(fn_ycsb_log) col_time = 17 col_cpu_idle = 19 col_cpu_sys = col_cpu_idle + 2 col_cpu_user = col_cpu_idle + 3 col_cpu_iowait = col_cpu_idle + 4 # {hour_minute: [cpu_usage]} hm_cpu = {} with open(fn_dstat) as fo: for line in fo: if line.startswith("#"): continue line = line.strip() t = re.split(r" +", line) # Parse these cause some hours and mins don't have left padding 0s. mo = re.match(r"(?P<h>\d+):(?P<m>\d+):(?P<s>\d+)", t[col_time - 1]) hour = int(mo.group("h")) minute = int(mo.group("m")) hour_minute = "%02d:%02d" % (hour, minute) cpu = 100.0 - float(t[col_cpu_idle - 1]) if hour_minute not in hm_cpu: hm_cpu[hour_minute] = [] hm_cpu[hour_minute].append(cpu) fmt = "%5s %6.2f" header = Util.BuildHeader(fmt, "hour_min cpu_avg") with open(fn_out, "w") as fo: i = 0 for hm, v in sorted(hm_cpu.iteritems()): if i % 40 == 0: fo.write(header + "\n") i += 1 l = len(v) avg = 0 if l == 0 else (float(sum(v)) / l) fo.write((fmt + "\n") % (hm, avg)) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def GetFnForPlot(dn_log, job_id, exp_dt): fn_out = "%s/mem-%s" % (Conf.GetOutDir(), exp_dt) if os.path.exists(fn_out): return fn_out with Cons.MT("Creating memory usage file for plotting ..."): fn = "%s/%s/procmon/%s" % (dn_log, job_id, exp_dt) if not os.path.exists(fn): fn_zipped = "%s.bz2" % fn if not os.path.exists(fn_zipped): raise RuntimeError("Unexpected: %s" % fn) Util.RunSubp( "cd %s && bzip2 -dk %s > /dev/null" % (os.path.dirname(fn_zipped), os.path.basename(fn_zipped))) if not os.path.exists(fn): raise RuntimeError("Unexpected") exp_begin_dt = datetime.datetime.strptime(exp_dt, "%y%m%d-%H%M%S.%f") # man proc. statm dt_rss = {} with open(fn) as fo: for line in fo: try: t = line.strip().split() if len(t) != 8: Cons.P("Unexpected format [%s] Ignoring" % line) continue dt = datetime.datetime.strptime(t[0], "%y%m%d-%H%M%S") rss = float(t[2]) * 4096 / 1024 / 1024 / 1024 #Cons.P("%s %d" % (dt, rss)) # Convert to relative time rel_dt = dt - exp_begin_dt totalSeconds = rel_dt.seconds hours, remainder = divmod(totalSeconds, 3600) minutes, seconds = divmod(remainder, 60) rel_dt_str = "%02d:%02d:%02d" % (hours, minutes, seconds) dt_rss[rel_dt_str] = rss except IndexError as e: Cons.P("%s: %s [%s]" % (e, fn, line)) raise e with open(fn_out, "w") as fo: fmt = "%8s %6.2f" header = Util.BuildHeader(fmt, "dt rss_in_gb") i = 0 for dt, rss in sorted(dt_rss.iteritems()): if i % 40 == 0: fo.write(header + "\n") fo.write((fmt + "\n") % (dt, rss)) i += 1 Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def GetHourlyFn(): exp_dts = [] for i in range(2): #Cons.P(Conf.Get(i)) mo = re.match(r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d", Conf.Get(i)) exp_dts.append(mo.group("exp_dt")) fn_out = "%s/memory-usage-by-time-%s" % (Conf.GetOutDir(), "-".join(exp_dts)) if os.path.exists(fn_out): return fn_out with Cons.MT("Generating file for memory usage comparison ..."): dn_base = Conf.GetDir("dn_base") fn_ycsb_0 = "%s/%s" % (dn_base, Conf.Get(0)) fn_ycsb_1 = "%s/%s" % (dn_base, Conf.Get(1)) hour_memstat_0 = _GetMemStatByHour(fn_ycsb_0) hour_memstat_1 = _GetMemStatByHour(fn_ycsb_1) #Cons.P(hour_memstat_0) #Cons.P(hour_memstat_1) with open(fn_out, "w") as fo: fo.write("# 0: %s\n" % fn_ycsb_0) fo.write("# 1: %s\n" % fn_ycsb_1) fo.write("#\n") fmt = "%2d" \ " %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f" \ " %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f" fo.write(Util.BuildHeader(fmt, "hour" \ " 0_avg 0_min 0_1 0_25 0_50 0_75 0_99 0_max" \ " 1_avg 1_min 1_1 1_25 1_50 1_75 1_99 1_max" ) + "\n") for h, s0 in sorted(hour_memstat_0.iteritems()): s1 = hour_memstat_1[h] fo.write((fmt + "\n") % (h , (float(s0.avg) / 1024 / 1024 / 1024) , (float(s0.min) / 1024 / 1024 / 1024) , (float(s0._1 ) / 1024 / 1024 / 1024) , (float(s0._25) / 1024 / 1024 / 1024) , (float(s0._50) / 1024 / 1024 / 1024) , (float(s0._75) / 1024 / 1024 / 1024) , (float(s0._99) / 1024 / 1024 / 1024) , (float(s0.max) / 1024 / 1024 / 1024) , (float(s1.avg) / 1024 / 1024 / 1024) , (float(s1.min) / 1024 / 1024 / 1024) , (float(s1._1 ) / 1024 / 1024 / 1024) , (float(s1._25) / 1024 / 1024 / 1024) , (float(s1._50) / 1024 / 1024 / 1024) , (float(s1._75) / 1024 / 1024 / 1024) , (float(s1._99) / 1024 / 1024 / 1024) , (float(s1.max) / 1024 / 1024 / 1024) )) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def PlotThrpLat(): fn_ycsb = YcsbLog.GenDataThrpVsLat() fn_out = "%s/mutant-ycsb-thrp-vs-lat-by-costslos.pdf" % Conf.GetOutDir() with Cons.MT("Plotting ..."): env = os.environ.copy() env["IN_YCSB"] = fn_ycsb env["OUT_FN"] = fn_out Util.RunSubp("gnuplot %s/mutant-ycsb-thrp-lat.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def GenDataThrpVsLat(): fn_out = "%s/rocksdb-ycsb-thrp-vs-lat-by-stgdevs" % Conf.GetOutDir() if os.path.exists(fn_out): return fn_out with Cons.MT("Generating thrp vs lat data file ..."): dn_base = Conf.GetDir("dn_base") # {stg_dev: {target_iops: YcsbLogReader}} stgdev_tio_ylr = {} #for stgdev in ["local-ssd", "ebs-st1"]: for stgdev in ["ebs-st1"]: if stgdev not in stgdev_tio_ylr: stgdev_tio_ylr[stgdev] = {} for target_iops, v in sorted(Conf.Get(stgdev).iteritems()): fn = "%s/%s" % (dn_base, v["fn"]) t = v["time"].split("-") time_begin = t[0] time_end = t[1] overloaded = ("overloaded" in v) and v["overloaded"] stgdev_tio_ylr[stgdev][target_iops] = YcsbLogReader(fn, time_begin, time_end, overloaded) with open(fn_out, "w") as fo: fmt = "%9s %6.0f %1d %6.0f" \ " %8.2f %8.2f %9.2f %10.2f %10.2f" \ " %8.2f %8.2f %8.2f %9.2f %9.2f" fo.write("%s\n" % Util.BuildHeader(fmt, "stg_dev target_iops overloaded iops" \ " r_avg r_90 r_99 r_99.9 r_99.99" \ " w_avg w_90 w_99 w_99.9 w_99.99" )) for stgdev, v in sorted(stgdev_tio_ylr.iteritems()): for tio, ylr in sorted(v.iteritems()): fo.write((fmt + "\n") % ( stgdev , tio , (1 if ylr.overloaded else 0) , ylr.db_iops_stat.avg , ylr.r_avg , ylr.r_90 , ylr.r_99 , ylr.r_999 , ylr.r_9999 , ylr.w_avg , ylr.w_90 , ylr.w_99 , ylr.w_999 , ylr.w_9999 )) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def GenDataFileForGnuplot(dn_log_job, exp_dt): fn_out = "%s/dstat-%s" % (Conf.GetOutDir(), exp_dt) if os.path.isfile(fn_out): return fn_out global _exp_begin_dt _exp_begin_dt = datetime.datetime.strptime(exp_dt, "%y%m%d-%H%M%S.%f") #Cons.P(_exp_begin_dt) with Cons.MT("Generating dstat data file for plot ..."): global _header_idx global _body_rows _header_idx = None _body_rows = None fn_log_dstat = "%s/dstat/%s.csv" % (dn_log_job, exp_dt) # Unzip when the file is not there if not os.path.exists(fn_log_dstat): fn_zipped = "%s.bz2" % fn_log_dstat if not os.path.exists(fn_zipped): raise RuntimeError("Unexpected: %s" % fn_log_dstat) Util.RunSubp( "cd %s && bzip2 -dk %s > /dev/null" % (os.path.dirname(fn_zipped), os.path.basename(fn_zipped))) if not os.path.exists(fn_log_dstat): raise RuntimeError("Unexpected") _Parse(fn_log_dstat) # For read and write fmt = " ".join(["%9.0f"] * 2 * _num_stg_devs + ["%6.1f"] * 2 * _num_stg_devs) fmt += " %8.0f %8.0f %8.0f %8.0f" \ " %3.0f %3.0f" \ " %3.0f %3.0f %11s" \ " %3.1f %6.2f %3.1f %6.2f %6.2f %6.3f" header = Util.BuildHeader( fmt, " ".join(k for k, v in sorted(_header_idx.iteritems()))) #Cons.P(header) with open(fn_out, "w") as fo: i = 0 for r in _body_rows: if not r.TimeValid(): continue if i % 50 == 0: fo.write("%s\n" % header) i += 1 fo.write((fmt + "\n") % tuple(r.Prepared())) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def PlotByTime(params): fn_ycsb_log = params[0] # 171121-194901/ycsb/171122-010708.903-d mo = re.match( r"(?P<dn_log>.+)/(?P<job_id>\d\d\d\d\d\d-\d\d\d\d\d\d)/ycsb/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d).+", fn_ycsb_log) dn_log = mo.group("dn_log") job_id = mo.group("job_id") exp_dt = mo.group("exp_dt") #Cons.P(dn_log) #Cons.P(job_id) #Cons.P(exp_dt) (fn_ycsb, time_max, params1) = YcsbLog.GenDataMetricsByTime(fn_ycsb_log, exp_dt) #Cons.P("%s\n%s\n%s\n%s" % (fn_ycsb, time_max, params1[0], params1[1])) # For dev #time_max = "00:10:00" #time_max = "03:00:00" params_formatted = fn_ycsb_log + "\n" + pprint.pformat( params1[0]) + "\n" + pprint.pformat(params1[1]) # The last, space substitution doesn't seem to work all of a sudden. Not the highest priority. params_formatted = params_formatted.replace("\n", "\\n").replace( "_", "\\\\_").replace("{", "\{").replace("}", "\}") #.replace(" ", "\\ ") #Cons.P(params_formatted) dn_log_job = "%s/%s" % (dn_log, job_id) (fn_dstat, num_stgdevs) = DstatLog.GenDataFileForGnuplot(dn_log_job, exp_dt) #Cons.P("%s %s" % (fn_dstat, num_stgdevs)) fn_rocksdb = RocksdbLog.GenDataFileForGnuplot(dn_log_job, exp_dt) fn_out = "%s/rocksdb-ycsb-all-metrics-by-time-%s.pdf" % (Conf.GetOutDir(), exp_dt) with Cons.MT("Plotting ..."): env = os.environ.copy() env["PARAMS"] = params_formatted env["NUM_STG_DEVS"] = str(num_stgdevs) env["TIME_MAX"] = str(time_max) env["IN_FN_DSTAT"] = fn_dstat env["IN_FN_YCSB"] = fn_ycsb env["IN_FN_ROCKSDB"] = fn_rocksdb env["OUT_FN"] = fn_out Util.RunSubp("gnuplot %s/rocksdb-ycsb-all-metrics-by-time.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def PlotCostLatency(exp_set_id): fn_ycsb = YcsbLog.GenDataCostVsMetrics(exp_set_id) fn_out = "%s/rocksdb-ycsb-cost-perf-%s.pdf" % (Conf.GetOutDir(), exp_set_id) with Cons.MT("Plotting ..."): env = os.environ.copy() env["IN_YCSB"] = fn_ycsb env["OUT_FN"] = fn_out Util.RunSubp("gnuplot %s/rocksdb-ycsb-cost-perf.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def PlotCseVsAll(): # Cost SLO epsilon vs all metrics (fn_cse_vs_all, linear_reg_params) = RocksdbLog.GetFnCostSloEpsilonVsMetrics() #Cons.P(linear_reg_params) fn_out = "%s/cost-slo-epsilon-vs-metrics.pdf" % Conf.GetOutDir() with Cons.MT("Plotting cost SLO epsilon vs metrics ..."): env = os.environ.copy() env["FN_CSE_VS_ALL"] = fn_cse_vs_all env["LINEAR_REG_PARAMS"] = linear_reg_params env["FN_OUT"] = fn_out Util.RunSubp("gnuplot %s/cost-slo-epsilon-vs-metrics.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def GenDataCostVsMetrics(exp_set_id): fn_out = "%s/rocksdb-ycsb-cost-vs-perf-%s" % (Conf.GetOutDir(), exp_set_id) fmt = "%5s %5.3f" \ " %14.6f" \ " %14.6f" \ " %14.6f" \ " %14.6f" \ " %14.6f" \ " %14.6f" \ " %13.6f %10.6f %14.6f %14.6f %14.6f %14.6f %14.6f" \ " %13.6f %10.6f %14.6f %14.6f %14.6f %14.6f %14.6f" with open(fn_out, "w") as fo: fo.write(Util.BuildHeader(fmt, "stg_dev cost_dollar_per_gb_per_month" \ " db_iops.avg" \ " db_iops.min" \ " db_iops.max" \ " db_iops._25" \ " db_iops._50" \ " db_iops._75" \ " r_avg r_min r_max r_90 r_99 r_999 r_9999" \ " w_avg w_min w_max w_90 w_99 w_999 w_9999" ) + "\n") for stg_dev, v in Conf.Get(exp_set_id).iteritems(): lr = YcsbLogReader(exp_set_id, stg_dev) fo.write((fmt + "\n") % ( stg_dev, float(Conf.Get("stg_cost")[stg_dev]) , lr.GetStat("db_iops.avg") , lr.GetStat("db_iops.min") , lr.GetStat("db_iops.max") , lr.GetStat("db_iops._25") , lr.GetStat("db_iops._50") , lr.GetStat("db_iops._75") , lr.GetStat("r_avg") , lr.GetStat("r_min") , lr.GetStat("r_max") , lr.GetStat("r_90") , lr.GetStat("r_99") , lr.GetStat("r_999") , lr.GetStat("r_9999") , lr.GetStat("w_avg") , lr.GetStat("w_min") , lr.GetStat("w_max") , lr.GetStat("w_90") , lr.GetStat("w_99") , lr.GetStat("w_999") , lr.GetStat("w_9999") )) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def GetOverhead(exp_rocksdb, exp_computation): exp_dts = [] pattern = r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d" mo = re.match(pattern, exp_rocksdb) exp_dts.append(mo.group("exp_dt")) mo = re.match(pattern, exp_computation) exp_dts.append(mo.group("exp_dt")) fn = "%s/mem-1minavg-%s" % (Conf.GetOutDir(), "-".join(exp_dts)) with open(fn) as fo: for line in fo: if not line.startswith("#"): continue if line.startswith("# C:R = "): mo = re.match(r"# C:R = (?P<v>(\d|\.)+)", line) return float(mo.group("v"))
def main(argv): Util.MkDirs(Conf.GetOutDir()) PlotThrpLat() sys.exit(0) # Plot (cost vs latency) by storage devices # Latency in avg and tail latencies # # The goal: # to show there are limited options # and show the baseline performances. # # Finish this and show that this was not a fair comparison. if True: PlotCostLatency(exp_set_id)
def main(argv): Util.MkDirs(Conf.GetOutDir()) dn_base = Conf.GetDir("dn_base") params = [] for db_type in ["unmodified_db", "computation_overhead", "io_overhead"]: fn = "%s/%s" % (dn_base, Conf.Get(db_type)) params.append((fn, )) parallel_processing = True if parallel_processing: p = multiprocessing.Pool() p.map(PlotByTime, params) else: for p in params: PlotByTime(p)
def PlotByTime(params): exp_set_id = params[0] stg_dev = params[1] p0 = params[2] jobid_expdt = p0["jobid_expdt"] time_window = p0["time_window"] (fn_ycsb, time_max, params1) = YcsbLog.GenDataMetricsByTime(exp_set_id, stg_dev) #Cons.P(time_max) params_formatted = exp_set_id + "\n" + pprint.pformat( params1[0]) + "\n" + pprint.pformat(params1[1]) params_formatted = params_formatted.replace("_", "\\\\_").replace( " ", "\\ ").replace("\n", "\\n").replace("{", "\{").replace("}", "\}") #Cons.P(params_formatted) t = jobid_expdt.split("/") job_id = t[0] exp_dt = t[1] dn_log = Conf.GetDir("dn") dn_log_job = "%s/%s" % (dn_log, job_id) fn_dstat = DstatLog.GenDataFileForGnuplot(dn_log_job, exp_dt) fn_rocksdb = RocksdbLog.GenDataFileForGnuplot(dn_log_job, exp_dt) fn_out = "%s/rocksdb-ycsb_d-%s-by-time-%s.pdf" % (Conf.GetOutDir(), stg_dev, exp_dt) with Cons.MT("Plotting ..."): env = os.environ.copy() env["EXP_SET_ID"] = exp_set_id env["PARAMS"] = params_formatted env["STG_DEV"] = stg_dev env["TIME_MAX"] = str(time_max) env["IN_FN_DSTAT"] = fn_dstat env["IN_FN_YCSB"] = fn_ycsb env["IN_FN_ROCKSDB"] = fn_rocksdb env["OUT_FN"] = fn_out Util.RunSubp("gnuplot %s/rocksdb-ycsb-by-time.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def Get1minAvgFn(exp_rocksdb, exp_computation): exp_dts = [] pattern = r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d" mo = re.match(pattern, exp_rocksdb) exp_dts.append(mo.group("exp_dt")) mo = re.match(pattern, exp_computation) exp_dts.append(mo.group("exp_dt")) fn_out = "%s/mem-1minavg-%s" % (Conf.GetOutDir(), "-".join(exp_dts)) if os.path.exists(fn_out): return fn_out with Cons.MT("Creating avg memory usage comparison file for plotting ..."): records = [] # {which_exp, [mem_usage]} which_memsum = {0: [], 1: []} hm_mem = _GetHmMem(exp_rocksdb) for hm, mem in hm_mem.iteritems(): records.append(_RecordMemAvg(hm, 0, mem, 0)) which_memsum[0].append(mem) hm_mem = _GetHmMem(exp_computation) for hm, mem in hm_mem.iteritems(): records.append(_RecordMemAvg(hm, 30, mem, 1)) which_memsum[1].append(mem) records.sort(key=operator.attrgetter("ts")) fmt = "%8s %6.3f %1d" header = Util.BuildHeader(fmt, "timestamp mem_avg_in_gb exp_type") with open(fn_out, "w") as fo: fo.write("# Memory usage * time (B * sec)\n") r = sum(which_memsum[0]) c = sum(which_memsum[1]) fo.write("# RocksDB: %f\n" % r) fo.write("# With computation: %f\n" % c) fo.write("# C:R = %f\n" % (c / r)) fo.write("\n") i = 0 for r in records: if i % 40 == 0: fo.write(header + "\n") i += 1 fo.write("%s\n" % r.ToStr(fmt)) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def AddStatToFile(fn): with Cons.MT("Updating SSTable creation stats ..."): mo = re.match(r".*-(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)", fn) exp_dt = mo.group("exp_dt") fn_db = "%s/sst-creation-info-%s.db" % (Conf.GetOutDir(), exp_dt) conn = _GenDB(fn, fn_db) conn.row_factory = sqlite3.Row cur = conn.cursor() fn2 = "%s.tmp" % fn with open(fn2, "w") as fo2: _OverallStat(cur, fo2) fo2.write("\n") with open(fn) as fo: for line in fo: fo2.write(line) os.rename(fn2, fn) Cons.P("Updated %s %d" % (fn, os.path.getsize(fn))) os.remove(fn_db)
def PlotOverheadByTime(): (fn_rocksdb0, fn_rocksdb1, fn_rocksdb_compmigr_histo) = RocksdbLog.GenDataFilesForGnuplot() fn_cpu_stat_by_time = _GetFnCpuOverhead() fn_mem_stat_by_time = _GetFnMemOverhead() #time_max = "09:00:00" time_max = "08:00:00" fn_out = "%s/mutant-overhead.pdf" % Conf.GetOutDir() with Cons.MT("Plotting ..."): env = os.environ.copy() env["TIME_MAX"] = str(time_max) env["CPU_STAT"] = fn_cpu_stat_by_time env["MEM_STAT"] = fn_mem_stat_by_time env["ROCKSDB0"] = fn_rocksdb0 env["ROCKSDB1"] = fn_rocksdb1 env["OUT_FN"] = fn_out Util.RunSubp("gnuplot %s/mutant-overhead-by-time.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def main(argv): Util.MkDirs(Conf.GetOutDir()) # Experiment root #r = Conf.Get("rocksdb-metadata-org") r = Conf.Get("rocksdb-baseline") dn_base = r["dn_base"].replace("~", os.path.expanduser("~")) params = [] #for stgdev in ["ebs-st1", "local-ssd"]: for stgdev in ["local-ssd"]: for target_iops, jobid_expdt in r[stgdev].iteritems(): fn_in = "%s/%s" % (dn_base, jobid_expdt) params.append((fn_in, stgdev)) parallel_processing = True if parallel_processing: p = multiprocessing.Pool() p.map(PlotByTime, params) else: for p in params: PlotByTime(p)
def GetFnStat(fn0, fn1): exp_dts = [] for fn in [fn0, fn1]: #Cons.P(fn) # rocksdb-by-time-180126-193525.769 mo = re.match(r".+-(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)", fn) exp_dts.append(mo.group("exp_dt")) fn_out = "%s/sst-creation-stat-%s" % (Conf.GetOutDir(), "-".join(exp_dts)) if os.path.exists(fn_out): return fn_out with Cons.MT("Generating SSTable creation stats ..."): conn = _GenDB2(fn0, fn1) # https://docs.python.org/2/library/sqlite3.html#row-objects conn.row_factory = sqlite3.Row cur = conn.cursor() with open(fn_out, "w") as fo: _OverallStat2(cur, fn0, fn1, fo) _HourlyStat(cur, fo) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def main(argv): Util.MkDirs(Conf.GetOutDir()) # Experiment root r = Conf.Get("cost-vs-perf") dn_base = r["dn_base"].replace("~", os.path.expanduser("~")) params = [] for cost_slo_str, v in r["exps"].iteritems(): t = cost_slo_str.split(", ") cost_slo = float(t[0]) cost_slo_epsilon = float(t[1]) #Cons.P("%f %f" % (cost_slo, cost_slo_epsilon)) for target_iops, fn in v.iteritems(): #Cons.P(" %s %s" % (target_iops, fn)) # 171204-162903/ycsb/171204-214803.510-d mo = re.match( r"(?P<job_id>\d\d\d\d\d\d-\d\d\d\d\d\d)/ycsb/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-(?P<workload>\w)", fn) job_id = mo.group("job_id") exp_dt = mo.group("exp_dt") workload = mo.group("workload") #Cons.P((job_id, exp_dt, workload)) params.append( ExpParam(dn_base, target_iops, cost_slo, cost_slo_epsilon, job_id, exp_dt, workload)) #Cons.P(pprint.pformat(params)) parallel_processing = True if parallel_processing: p = multiprocessing.Pool() p.map(PlotByTime, params) else: for p in params: PlotByTime(p)
def _PlotCompareTwo(params): exp_rocksdb = params[0] exp_computation = params[1] exp_dts = [] pattern = r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d" mo = re.match(pattern, exp_rocksdb) exp_dts.append(mo.group("exp_dt")) mo = re.match(pattern, exp_computation) exp_dts.append(mo.group("exp_dt")) fn_out = "%s/mutant-computation-overhead-%s.pdf" % (Conf.GetOutDir(), "-".join(exp_dts)) if os.path.exists(fn_out): #Cons.P("%s %d already exists" % (fn_out, os.path.getsize(fn_out))) return plot_custom_labels = ( "-".join(exp_dts) == "180201-033312.464-180201-033259.439") fn_rocksdb = RocksdbLog.GetFnTimeVsMetrics(exp_rocksdb) time_max = "07:50:00" fn_cpu_1min_avg = CompareCpu.Get1minAvgFn(exp_rocksdb, exp_computation) fn_mem_1min_avg = CompareMem.Get1minAvgFn(exp_rocksdb, exp_computation) with Cons.MT("Plotting ..."): env = os.environ.copy() env["TIME_MAX"] = str(time_max) env["FN_ROCKSDB"] = fn_rocksdb env["FN_CPU_1MIN_AVG"] = fn_cpu_1min_avg env["FN_MEM_1MIN_AVG"] = fn_mem_1min_avg env["PLOT_CUSTOM_LABELS"] = "1" if plot_custom_labels else "0" env["OUT_FN"] = fn_out Util.RunSubp("gnuplot %s/compare-two-exps.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))