def _GetHmCpu(fn_ycsb_log): (fn_dstat, num_stgdevs) = DstatLog.GetPlotFn(fn_ycsb_log) col_time = 17 col_cpu_idle = 19 # {hour_minute: [cpu_usage]} hm_cpu = {} with open(fn_dstat) as fo: for line in fo: if line.startswith("#"): continue line = line.strip() t = re.split(r" +", line) # Parse these cause some hours and mins don't have left padding 0s. mo = re.match(r"(?P<h>\d+):(?P<m>\d+):(?P<s>\d+)", t[col_time - 1]) hour = int(mo.group("h")) minute = int(mo.group("m")) hour_minute = "%02d:%02d" % (hour, minute) cpu = 100.0 - float(t[col_cpu_idle - 1]) if hour_minute not in hm_cpu: hm_cpu[hour_minute] = [] hm_cpu[hour_minute].append(cpu) hm_cpuavg = {} for hm, v in hm_cpu.iteritems(): l = len(v) avg = 0 if l == 0 else (sum(v) / l) hm_cpuavg[hm] = avg return hm_cpuavg
def _GetMemStatByHourFromDstat(fn_ycsb): fn_dstat = DstatLog.GetPlotFn(fn_ycsb) col_time = 21 col_mem_buff = 13 #col_mem_cache = 14 #Cons.P(fn_dstat) # Bucketize memory usage # {hour: [mem_usage]} hour_memusage = {} with open(fn_dstat) as fo: for line in fo: if line.startswith("#"): continue line = line.strip() t = re.split(r" +", line) time0 = t[col_time - 1] mem_buff = int(t[col_mem_buff - 1]) #Cons.P("%s %d" % (time0, mem_buff)) hour = int(time0.split(":")[0]) if hour not in hour_memusage: hour_memusage[hour] = [] hour_memusage[hour].append(mem_buff) hour_memstat = {} for hour, mem_usage in hour_memusage.iteritems(): r = Stat.Gen(mem_usage) #Cons.P("%d %s" % (hour, r)) hour_memstat[hour] = r return hour_memstat
def PlotByTime(p): (fn_ycsb, time_max, params1) = YcsbLog.GenDataMetricsByTime(p) #Cons.P("%s\n%s\n%s" % (fn_ycsb, time_max, params1)) params_formatted = str(p).replace("_", "\\\\_").replace( " ", "\\ ").replace("\n", "\\n").replace("{", "\{").replace("}", "\}") #Cons.P(params_formatted) fn_dstat = DstatLog.GenDataFileForGnuplot(p) fn_rocksdb = RocksdbLog.GenDataFileForGnuplot(p) fn_out = "%s/rocksdb-ycsb-all-metrics-by-time-%s.pdf" % (Conf.GetOutDir(), p.exp_dt) with Cons.MT("Plotting ..."): env = os.environ.copy() env["PARAMS"] = params_formatted env["TIME_MAX"] = str(time_max) env["IN_FN_DSTAT"] = fn_dstat env["IN_FN_YCSB"] = fn_ycsb env["IN_FN_ROCKSDB"] = fn_rocksdb env["OUT_FN"] = fn_out Util.RunSubp("gnuplot %s/rocksdb-ycsb-all-metrics-by-time.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def _PlotTimeVsAllMetrics(fn_ycsb_log): # 171121-194901/ycsb/171122-010708.903-d mo = re.match( r"(?P<dn_log>.+)/(?P<job_id>\d\d\d\d\d\d-\d\d\d\d\d\d)/ycsb/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d).+", fn_ycsb_log) dn_log = mo.group("dn_log") job_id = mo.group("job_id") exp_dt = mo.group("exp_dt") #Cons.P(dn_log) #Cons.P(job_id) #Cons.P(exp_dt) fn_out = "%s/time-vs-all-metrics-%s.pdf" % (Conf.GetOutDir(), exp_dt) if os.path.exists(fn_out): Cons.P("%s %d already exists." % (fn_out, os.path.getsize(fn_out))) return (fn_ycsb, time_max, params1) = YcsbLog.GenDataMetricsByTime(fn_ycsb_log, exp_dt) #Cons.P("%s\n%s\n%s" % (fn_ycsb, time_max, params1)) time_max = "01:00:00" params_formatted = fn_ycsb_log + "\n" + pprint.pformat( params1[0]) + "\n" + pprint.pformat(params1[1]) # No idea how to put spaces for the indentations. It used to work. # Neither replace(" ", "\ ") or replace(" ", "\\ ") worked when a line starts with spaces followed by digits or [. # work when it is followed by u. I guess regular characters. params_formatted = params_formatted.replace("_", "\\\\_").replace( "\n", "\\n").replace("{", "\{").replace("}", "\}") #Cons.P(params_formatted) dn_log_job = "%s/%s" % (dn_log, job_id) (fn_dstat, num_stgdevs) = DstatLog.GetPlotFn1(dn_log_job, exp_dt) (fn_rocksdb, target_cost_changes) = RocksdbLog.GetFnTimeVsMetrics(fn_ycsb_log) #Cons.P(target_cost_changes) fn_cpu_avg = CpuAvg.GetFnForPlot(fn_ycsb_log) fn_mem_usage = ProcMemLog.GetFnForPlot(dn_log, job_id, exp_dt) with Cons.MT("Plotting ..."): env = os.environ.copy() env["PARAMS"] = params_formatted env["NUM_STGDEVS"] = str(num_stgdevs) env["TIME_MAX"] = str(time_max) env["IN_FN_DSTAT"] = fn_dstat env["IN_FN_YCSB"] = fn_ycsb env["IN_FN_ROCKSDB"] = fn_rocksdb env["IN_FN_CPU_AVG"] = fn_cpu_avg env["IN_FN_MEM"] = fn_mem_usage env["TARGET_COST_CHANGES_TIME"] = target_cost_changes[0] env["TARGET_COST_CHANGES_COST"] = target_cost_changes[1] env["OUT_FN"] = fn_out Util.RunSubp("gnuplot %s/time-vs-all-metrics.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def GetFnForPlot(fn_ycsb_log): # 171121-194901/ycsb/171122-010708.903-d mo = re.match( r"(?P<dn_log>.+)/(?P<job_id>\d\d\d\d\d\d-\d\d\d\d\d\d)/ycsb/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d).+", fn_ycsb_log) exp_dt = mo.group("exp_dt") fn_out = "%s/cpu-avg-%s" % (Conf.GetOutDir(), exp_dt) if os.path.exists(fn_out): return fn_out with Cons.MT("Creating avg cpu usage file for plotting ..."): (fn_dstat, num_stgdevs) = DstatLog.GetPlotFn(fn_ycsb_log) col_time = 17 col_cpu_idle = 19 col_cpu_sys = col_cpu_idle + 2 col_cpu_user = col_cpu_idle + 3 col_cpu_iowait = col_cpu_idle + 4 # {hour_minute: [cpu_usage]} hm_cpu = {} with open(fn_dstat) as fo: for line in fo: if line.startswith("#"): continue line = line.strip() t = re.split(r" +", line) # Parse these cause some hours and mins don't have left padding 0s. mo = re.match(r"(?P<h>\d+):(?P<m>\d+):(?P<s>\d+)", t[col_time - 1]) hour = int(mo.group("h")) minute = int(mo.group("m")) hour_minute = "%02d:%02d" % (hour, minute) cpu = 100.0 - float(t[col_cpu_idle - 1]) if hour_minute not in hm_cpu: hm_cpu[hour_minute] = [] hm_cpu[hour_minute].append(cpu) fmt = "%5s %6.2f" header = Util.BuildHeader(fmt, "hour_min cpu_avg") with open(fn_out, "w") as fo: i = 0 for hm, v in sorted(hm_cpu.iteritems()): if i % 40 == 0: fo.write(header + "\n") i += 1 l = len(v) avg = 0 if l == 0 else (float(sum(v)) / l) fo.write((fmt + "\n") % (hm, avg)) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def _GenDstat(fn_ycsb_log): mo = re.match( r"(?P<dn_log>.+)/(?P<job_id>\d\d\d\d\d\d-\d\d\d\d\d\d)/ycsb/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d).+", fn_ycsb_log) dn_log = mo.group("dn_log") job_id = mo.group("job_id") exp_dt = mo.group("exp_dt") dn_log_job = "%s/%s" % (dn_log, job_id) (fn_dstat, num_stgdevs) = DstatLog.GenDataFileForGnuplot(dn_log_job, exp_dt) return fn_dstat
def PlotByTime(params): fn_ycsb_log = params[0] # 171121-194901/ycsb/171122-010708.903-d mo = re.match( r"(?P<dn_log>.+)/(?P<job_id>\d\d\d\d\d\d-\d\d\d\d\d\d)/ycsb/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d).+", fn_ycsb_log) dn_log = mo.group("dn_log") job_id = mo.group("job_id") exp_dt = mo.group("exp_dt") #Cons.P(dn_log) #Cons.P(job_id) #Cons.P(exp_dt) (fn_ycsb, time_max, params1) = YcsbLog.GenDataMetricsByTime(fn_ycsb_log, exp_dt) #Cons.P("%s\n%s\n%s\n%s" % (fn_ycsb, time_max, params1[0], params1[1])) # For dev #time_max = "00:10:00" #time_max = "03:00:00" params_formatted = fn_ycsb_log + "\n" + pprint.pformat( params1[0]) + "\n" + pprint.pformat(params1[1]) # The last, space substitution doesn't seem to work all of a sudden. Not the highest priority. params_formatted = params_formatted.replace("\n", "\\n").replace( "_", "\\\\_").replace("{", "\{").replace("}", "\}") #.replace(" ", "\\ ") #Cons.P(params_formatted) dn_log_job = "%s/%s" % (dn_log, job_id) (fn_dstat, num_stgdevs) = DstatLog.GenDataFileForGnuplot(dn_log_job, exp_dt) #Cons.P("%s %s" % (fn_dstat, num_stgdevs)) fn_rocksdb = RocksdbLog.GenDataFileForGnuplot(dn_log_job, exp_dt) fn_out = "%s/rocksdb-ycsb-all-metrics-by-time-%s.pdf" % (Conf.GetOutDir(), exp_dt) with Cons.MT("Plotting ..."): env = os.environ.copy() env["PARAMS"] = params_formatted env["NUM_STG_DEVS"] = str(num_stgdevs) env["TIME_MAX"] = str(time_max) env["IN_FN_DSTAT"] = fn_dstat env["IN_FN_YCSB"] = fn_ycsb env["IN_FN_ROCKSDB"] = fn_rocksdb env["OUT_FN"] = fn_out Util.RunSubp("gnuplot %s/rocksdb-ycsb-all-metrics-by-time.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def Plot(param): job_id = param[0] exp_dt = param[1] dn_log_job = "%s/work/mutant/log/quizup/sla-admin/%s" % ( os.path.expanduser("~"), job_id) fn_log_quizup = "%s/quizup/%s" % (dn_log_job, exp_dt) fn_log_rocksdb = "%s/rocksdb/%s" % (dn_log_job, exp_dt) fn_log_dstat = "%s/dstat/%s.csv" % (dn_log_job, exp_dt) log_q = QuizupLog(fn_log_quizup) SimTime.Init(log_q.SimTime("simulated_time_begin"), log_q.SimTime("simulated_time_end"), log_q.SimTime("simulation_time_begin"), log_q.SimTime("simulation_time_end")) qz_std_max = _QzSimTimeDur( log_q.quizup_options["simulation_time_dur_in_sec"]) qz_opt_str = _QuizupOptionsFormattedStr(log_q.quizup_options) error_adj_ranges = log_q.quizup_options["error_adj_ranges"].replace( ",", " ") (fn_rocksdb_sla_admin_log, pid_params, num_sla_adj) = RocksdbLog.ParseLog(fn_log_rocksdb, exp_dt) fn_dstat = DstatLog.GenDataFileForGnuplot(fn_log_dstat, exp_dt) fn_out = "%s/sla-admin-by-time-%s.pdf" % (Conf.GetDir("output_dir"), exp_dt) with Cons.MT("Plotting ..."): env = os.environ.copy() env["STD_MAX"] = qz_std_max env["ERROR_ADJ_RANGES"] = error_adj_ranges env["IN_FN_QZ"] = fn_log_quizup env["IN_FN_SLA_ADMIN"] = "" if num_sla_adj == 0 else fn_rocksdb_sla_admin_log env["QUIZUP_OPTIONS"] = qz_opt_str env["PID_PARAMS"] = "%s %s %s %s" % (pid_params["target_value"], pid_params["p"], pid_params["i"], pid_params["d"]) env["WORKLOAD_EVENTS"] = " ".join( str(t) for t in log_q.simulation_time_events) env["IN_FN_DS"] = fn_dstat env["OUT_FN"] = fn_out Util.RunSubp("gnuplot %s/sla-admin-by-time.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def PlotByTime(params): exp_set_id = params[0] stg_dev = params[1] p0 = params[2] jobid_expdt = p0["jobid_expdt"] time_window = p0["time_window"] (fn_ycsb, time_max, params1) = YcsbLog.GenDataMetricsByTime(exp_set_id, stg_dev) #Cons.P(time_max) params_formatted = exp_set_id + "\n" + pprint.pformat( params1[0]) + "\n" + pprint.pformat(params1[1]) params_formatted = params_formatted.replace("_", "\\\\_").replace( " ", "\\ ").replace("\n", "\\n").replace("{", "\{").replace("}", "\}") #Cons.P(params_formatted) t = jobid_expdt.split("/") job_id = t[0] exp_dt = t[1] dn_log = Conf.GetDir("dn") dn_log_job = "%s/%s" % (dn_log, job_id) fn_dstat = DstatLog.GenDataFileForGnuplot(dn_log_job, exp_dt) fn_rocksdb = RocksdbLog.GenDataFileForGnuplot(dn_log_job, exp_dt) fn_out = "%s/rocksdb-ycsb_d-%s-by-time-%s.pdf" % (Conf.GetOutDir(), stg_dev, exp_dt) with Cons.MT("Plotting ..."): env = os.environ.copy() env["EXP_SET_ID"] = exp_set_id env["PARAMS"] = params_formatted env["STG_DEV"] = stg_dev env["TIME_MAX"] = str(time_max) env["IN_FN_DSTAT"] = fn_dstat env["IN_FN_YCSB"] = fn_ycsb env["IN_FN_ROCKSDB"] = fn_rocksdb env["OUT_FN"] = fn_out Util.RunSubp("gnuplot %s/rocksdb-ycsb-by-time.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def _GetCpuStatByHour(fn_ycsb): (fn_dstat, num_stgdevs) = DstatLog.GetPlotFn(fn_ycsb) col_time = 17 col_cpu_idle = 19 col_cpu_sys = col_cpu_idle + 2 col_cpu_user = col_cpu_idle + 3 col_cpu_iowait = col_cpu_idle + 4 #Cons.P(fn_dstat) # Bucketize CPU usage # {hour: [cpu_usage]} hour_cpuusage = {} with open(fn_dstat) as fo: for line in fo: if line.startswith("#"): continue line = line.strip() t = re.split(r" +", line) time0 = t[col_time - 1] cpu = 100.0 - float(t[col_cpu_idle - 1]) #Cons.P("%s %s" % (time0, cpu)) hour = int(time0.split(":")[0]) if hour not in hour_cpuusage: hour_cpuusage[hour] = [] hour_cpuusage[hour].append(cpu) hour_cpustat = {} for hour, cpu_usage in hour_cpuusage.iteritems(): r = Stat.Gen(cpu_usage) #Cons.P("%d %s" % (hour, r)) hour_cpustat[hour] = r return hour_cpustat
def main(argv): Util.MkDirs(Conf.GetOutDir()) # To have a deeper understanding of what's causing the performance difference between metadata on and off. DstatLog.GenDataThrpVsAllMetrics("metadata-caching-on") DstatLog.GenDataThrpVsAllMetrics("metadata-caching-off")
def main(argv): Util.MkDirs(Conf.GetOutDir()) PlotThrpLat() # To have a deeper understanding of what's causing the performance difference between metadata on and off. fn_dstat = DstatLog.GenDataThrpVsAllMetrics()