def _GetFnCpuOverhead(): fn_out = "%s/cpu-overhead-by-time" % Conf.GetOutDir() if os.path.exists(fn_out): return fn_out dn_base = Conf.GetDir("dn_base") fn_ycsb_0 = "%s/%s" % (dn_base, Conf.Get("unmodified_db")) fn_ycsb_1 = "%s/%s" % (dn_base, Conf.Get("computation_overhead")) hour_cpustat_0 = _GetCpuStatByHour(fn_ycsb_0) hour_cpustat_1 = _GetCpuStatByHour(fn_ycsb_1) #Cons.P(hour_cpustat_0) #Cons.P(hour_cpustat_1) with open(fn_out, "w") as fo: fo.write("# u: unmodified\n") fo.write( "# c: with SSTable access monitoring and SSTable placement computation\n" ) fo.write("#\n") fmt = "%2d" \ " %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f" \ " %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f" fo.write(Util.BuildHeader(fmt, "hour" \ " u_avg u_min u_1 u_25 u_50 u_75 u_99 u_max" \ " c_avg c_min c_1 c_25 c_50 c_75 c_99 c_max" ) + "\n") for h, s0 in sorted(hour_cpustat_0.iteritems()): s1 = hour_cpustat_1[h] fo.write( (fmt + "\n") % (h, s0.avg, s0.min, s0._1, s0._25, s0._50, s0._75, s0._99, s0.max, s1.avg, s1.min, s1._1, s1._25, s1._50, s1._75, s1._99, s1.max)) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def Get1minAvgFn(): exp_dts = [] for i in range(2): #Cons.P(Conf.Get(i)) # computation/180126-142513/ycsb/180126-193525.769-d mo = re.match(r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d", Conf.Get(i)) exp_dts.append(mo.group("exp_dt")) fn_out = "%s/cpu-1minavg-%s" % (Conf.GetOutDir(), "-".join(exp_dts)) if os.path.exists(fn_out): return fn_out with Cons.MT("Creating avg cpu usage comparison file for plotting ..."): records = [] dn_base = Conf.GetDir("dn_base") for i in range(2): fn_ycsb_log = "%s/%s" % (dn_base, Conf.Get(i)) hm_cpu = _GetHmCpu(fn_ycsb_log) for hm, cpu in hm_cpu.iteritems(): records.append(_RecordCpuAvg(hm, i * 30, cpu, i)) records.sort(key=operator.attrgetter("ts")) fmt = "%8s %6.2f %1d" header = Util.BuildHeader(fmt, "timestamp cpu_avg exp_type") with open(fn_out, "w") as fo: i = 0 for r in records: if i % 40 == 0: fo.write(header + "\n") i += 1 fo.write("%s\n" % r.ToStr(fmt)) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def Get1minAvgFn(): exp_dts = [] for i in range(2): #Cons.P(Conf.Get(i)) mo = re.match(r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d", Conf.Get(i)) exp_dts.append(mo.group("exp_dt")) fn_out = "%s/mem-1minavg-%s" % (Conf.GetOutDir(), "-".join(exp_dts)) if os.path.exists(fn_out): return fn_out with Cons.MT("Creating avg memory usage comparison file for plotting ..."): records = [] dn_base = Conf.GetDir("dn_base") for i in range(2): fn_ycsb_log = "%s/%s" % (dn_base, Conf.Get(i)) hm_mem = _GetHmMem(fn_ycsb_log) for hm, mem in hm_mem.iteritems(): records.append(_RecordMemAvg(hm, i * 30, mem, i)) records.sort(key=operator.attrgetter("ts")) fmt = "%8s %6.3f %1d" header = Util.BuildHeader(fmt, "timestamp mem_avg_in_gb exp_type") with open(fn_out, "w") as fo: i = 0 for r in records: if i % 40 == 0: fo.write(header + "\n") i += 1 fo.write("%s\n" % r.ToStr(fmt)) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def GenDataFileForGnuplot(dt): SimTime.Init(dt) dn = "%s/%s" % (Conf.GetDir("output_dir"), dt) Util.MkDirs(dn) fn = "%s/dstat-data" % dn if os.path.isfile(fn): return fn with Cons.MT("Generating data file for plot ..."): global _header_idx global _body_rows _header_idx = None _body_rows = None _Parse(dt) fmt = "%9.0f %9.0f %9.0f %9.0f %9.0f %9.0f %9.0f %9.0f" \ " %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f" \ " %8.0f %8.0f %8.0f %8.0f" \ " %3.0f %3.0f" \ " %3.0f %3.0f %11s" \ " %3.1f %6.2f %3.1f %6.2f %6.2f %6.3f" header = Util.BuildHeader( fmt, " ".join(k for k, v in sorted(_header_idx.iteritems()))) with open(fn, "w") as fo: i = 0 for r in _body_rows: if i % 50 == 0: fo.write("%s\n" % header) i += 1 #Cons.P(fmt % tuple(r.Prepared())) fo.write((fmt + "\n") % tuple(r.Prepared())) Cons.P("Created %s %d" % (fn, os.path.getsize(fn))) return fn
def GetHourlyFn(): fn_out = "%s/cpu-hourly-usage" % Conf.GetOutDir() if os.path.exists(fn_out): return fn_out with Cons.MT("Generating file for cpu usage comparison ..."): dn_base = Conf.GetDir("dn_base") fn_ycsb_0 = "%s/%s" % (dn_base, Conf.Get(0)) fn_ycsb_1 = "%s/%s" % (dn_base, Conf.Get(1)) hour_cpustat_0 = _GetCpuStatByHour(fn_ycsb_0) hour_cpustat_1 = _GetCpuStatByHour(fn_ycsb_1) #Cons.P(hour_cpustat_0) #Cons.P(hour_cpustat_1) with open(fn_out, "w") as fo: fo.write("# 0: %s\n" % fn_ycsb_0) fo.write("# 1: %s\n" % fn_ycsb_1) fo.write("#\n") fmt = "%2d" \ " %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f" \ " %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f" fo.write(Util.BuildHeader(fmt, "hour" \ " 0_avg 0_min 0_1 0_25 0_50 0_75 0_99 0_max" \ " 1_avg 1_min 1_1 1_25 1_50 1_75 1_99 1_max" ) + "\n") for h, s0 in sorted(hour_cpustat_0.iteritems()): s1 = hour_cpustat_1[h] fo.write((fmt + "\n") % (h, s0.avg, s0.min, s0._1, s0._25, s0._50, s0._75, s0._99, s0.max, s1.avg, s1.min, s1._1, s1._25, s1._50, s1._75, s1._99, s1.max)) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def RunTermInst(tags): threads = [] sys.stdout.write("Terminating running instances:") sys.stdout.flush() tis = [] for r in Ec2Region.All(): tis.append(TermInst(r, tags)) for ti in tis: t = threading.Thread(target=ti.Run) t.daemon = True threads.append(t) t.start() for t in threads: t.join() print "" Cons.P( Util.BuildHeader(_fmt, "Region" " InstanceId" " PrevState" " CurrState")) for ti in tis: ti.PrintResult()
def SstLevelInfo(): # Set Conf.ExpStartTime(), if not already set. if Conf.ExpStartTime() is None: MutantLogReader.Get() fn = "%s/sst-info-by-time-by-levels-level-seps-%s" % (Conf.dn_result, Conf.ExpStartTime()) if os.path.isfile(fn): return fn sst_y_cord_level_sep_highs = SstYCord.LevelSepHigh() with Cons.MT( "Generating Sst info by time by levels: level separators data file ..." ): with open(fn, "w") as fo: fmt = "%1d %10d %10s" fo.write("%s\n" % Util.BuildHeader( fmt, "level level_mid_for_labels level_low_for_separators")) lh_prev = 0 for l, lh in sorted(sst_y_cord_level_sep_highs.iteritems()): lm = (lh + lh_prev) / 2 fo.write((fmt + "\n") % (l, lm, lh_prev)) lh_prev = lh Cons.P("Created %s %d" % (fn, os.path.getsize(fn))) return fn
def _DescInsts(self): fmt = "%-15s %19s %10s %13s %15s %10s %6.4f" Cons.P( Util.BuildHeader( fmt, "Placement:AvailabilityZone" " InstanceId" " InstanceType" " LaunchTime" #" PrivateIpAddress" " PublicIpAddress" " State:Name" " CurSpotPrice")) with self.status_by_regions_lock: for region, status in sorted(self.status_by_regions.iteritems()): for s in reversed(status): if isinstance(s, DescInstResp): # Print only the last desc instance response per region r = s.r["Reservations"][0]["Instances"][0] az = _Value(_Value(r, "Placement"), "AvailabilityZone") Cons.P(fmt % ( az, _Value(r, "InstanceId"), _Value(r, "InstanceType"), _Value(r, "LaunchTime").strftime("%y%m%d-%H%M%S") #, _Value(r, "PrivateIpAddress") , _Value(r, "PublicIpAddress"), _Value(_Value(r, "State"), "Name"), SpotPrice.GetCurPrice(az))) break
def GenDataThrpVsAllMetrics(root_str): #fn_out = "%s/rocksdb-ycsb-thrp-vs-dstat-metrics-by-stgdevs" % Conf.GetOutDir() #if os.path.exists(fn_out): # return fn_out root = Conf.Get(root_str) with Cons.MT("Generating thrp vs dstat data: %s ..." % root_str): dn_base = root["dn_base"].replace("~", os.path.expanduser("~")) # {stg_dev: {target_iops: DstatLogReader}} stgdev_tio_dlr = {} for stgdev in ["local-ssd", "ebs-st1"]: if stgdev not in stgdev_tio_dlr: stgdev_tio_dlr[stgdev] = {} for target_iops, v in sorted(root[stgdev].iteritems()): # 171125-110758/ycsb/171125-161934.339-d mo = re.match( r"(?P<dn>.+)/ycsb/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d).*", v["fn"]) dn = "%s/%s" % (dn_base, mo.group("dn")) exp_dt = mo.group("exp_dt") #Cons.P("%s %s" % (dn, exp_dt)) t = v["time"].split("-") time_begin = t[0] time_end = t[1] overloaded = ("overloaded" in v) and v["overloaded"] # Fast enough. Takes about 5 secs. No need for a parallelization. stgdev_tio_dlr[stgdev][target_iops] = DstatLogReader( dn, exp_dt, time_begin, time_end, overloaded) fmt = "%17s %9s %6.0f %1d" \ " %6.0f %4.0f %9.0f %8.0f %7.0f %6.0f" \ " %6.3f %6.3f %9.3f %8.3f %8.3f %8.3f" \ " %8.3f %8.3f %9.3f %8.3f" Cons.P("%s" % Util.BuildHeader(fmt, "exp_dt stg_dev target_iops overloaded" \ " dsk/xvda:read dsk/xvda:writ dsk/xvdc:read dsk/xvdc:writ dsk/xvde:read dsk/xvde:writ" \ " io/xvda:read io/xvda:writ io/xvdc:read io/xvdc:writ io/xvde:read io/xvde:writ" \ " memory_usage:buff memory_usage:cach memory_usage:free memory_usage:used" )) for stgdev, v in sorted(stgdev_tio_dlr.iteritems()): for tio, dlr in sorted(v.iteritems()): Cons.P( fmt % (dlr.exp_dt, stgdev, tio, (1 if dlr.overloaded else 0), dlr.GetAvg("dsk/xvda:read"), dlr.GetAvg("dsk/xvda:writ"), dlr.GetAvg("dsk/xvdc:read"), dlr.GetAvg("dsk/xvdc:writ"), dlr.GetAvg("dsk/xvde:read"), dlr.GetAvg("dsk/xvde:writ"), dlr.GetAvg("io/xvda:read"), dlr.GetAvg("io/xvda:writ"), dlr.GetAvg("io/xvdc:read"), dlr.GetAvg("io/xvdc:writ"), dlr.GetAvg("io/xvde:read"), dlr.GetAvg("io/xvde:writ"), dlr.GetAvg("memory_usage:buff") / (1024 * 1024), dlr.GetAvg("memory_usage:cach") / (1024 * 1024), dlr.GetAvg("memory_usage:free") / (1024 * 1024), dlr.GetAvg("memory_usage:used") / (1024 * 1024)))
def WriteHeader(fo): fo.write("%s\n" % Util.BuildHeader( StatPerSec.fmt, "timestamp_in_sec" " read_iops" " read_lat_avg_in_us" " ins_iops" " ins_lat_avg_in_us" " iops"))
def SstHeatAtLastTime(): # Set Conf.ExpStartTime(), if not already set. if Conf.ExpStartTime() is None: MutantLogReader.Get() fn_hlt = "%s/sst-heat-last-time-%s" % (Conf.dn_result, Conf.ExpStartTime()) if os.path.isfile(fn_hlt): return fn_hlt sst_lives = MemtSstLife.GetSstLives() with Cons.MT("Generating Sst heats at the last time ..."): # Gather heat info at n different times num_times = Conf.heatmap_by_time_num_times if Conf.ExpFinishTime() is None: MemtSstLife.SetExpEndTimeFromSstLives() min_sst_opened = None for sst_gen, sl in sorted(sst_lives.iteritems()): min_sst_opened = sl.Opened() if min_sst_opened is None else min( min_sst_opened, sl.Opened()) # Start time is when the first Sstable is opened, not the experiment start # time, when no SSTable exists yet. # Exp start time: 160927-143257.395 # First Sstable open time: 160927-143411.273 st = datetime.datetime.strptime(min_sst_opened, "%y%m%d-%H%M%S.%f") et = datetime.datetime.strptime(Conf.ExpFinishTime(), "%y%m%d-%H%M%S.%f") dur = (et - st).total_seconds() sstgen_heat = [] t = st + datetime.timedelta(seconds=(float(dur) * (num_times - 1) / num_times + time_offset_in_sec)) for sst_gen, sl in sorted(sst_lives.iteritems()): h = sl.HeatAtTime(t) if h is None: continue sstgen_heat.append((sst_gen, h)) sstgen_heat.sort(key=lambda sh: sh[1], reverse=True) # Note: Don't bother with the width proportional to the tablet size for now fmt = "%4d %1d %8.3f" with open(fn_hlt, "w") as fo: # y0 is smaller than y1 (y0 is placed higher in the plot than y1). fo.write("%s\n" % Util.BuildHeader(fmt, "sst_gen level heat")) for sh in sstgen_heat: sst_gen = sh[0] heat = sh[1] fo.write( (fmt + "\n") % (sst_gen, sst_lives[sst_gen].level, heat)) Cons.P("Created %s %d" % (fn_hlt, os.path.getsize(fn_hlt))) return fn_hlt
def __str__(self): fmt = "%-15s %6.4f %6.4f %6.4f" o = "" #o += ("# time checked: %s" % (self.time_checked.strftime("%y%m%d-%H%M%S"))) o += Util.BuildHeader(fmt, "Az cur 2d_avg 2d_max") for k, v in sorted(self.az_price.iteritems()): o += ("\n" + fmt) % (k, v[0], v[1], v[2]) return o
def GetFnForPlot(fn_ycsb_log): # 171121-194901/ycsb/171122-010708.903-d mo = re.match( r"(?P<dn_log>.+)/(?P<job_id>\d\d\d\d\d\d-\d\d\d\d\d\d)/ycsb/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d).+", fn_ycsb_log) exp_dt = mo.group("exp_dt") fn_out = "%s/cpu-avg-%s" % (Conf.GetOutDir(), exp_dt) if os.path.exists(fn_out): return fn_out with Cons.MT("Creating avg cpu usage file for plotting ..."): (fn_dstat, num_stgdevs) = DstatLog.GetPlotFn(fn_ycsb_log) col_time = 17 col_cpu_idle = 19 col_cpu_sys = col_cpu_idle + 2 col_cpu_user = col_cpu_idle + 3 col_cpu_iowait = col_cpu_idle + 4 # {hour_minute: [cpu_usage]} hm_cpu = {} with open(fn_dstat) as fo: for line in fo: if line.startswith("#"): continue line = line.strip() t = re.split(r" +", line) # Parse these cause some hours and mins don't have left padding 0s. mo = re.match(r"(?P<h>\d+):(?P<m>\d+):(?P<s>\d+)", t[col_time - 1]) hour = int(mo.group("h")) minute = int(mo.group("m")) hour_minute = "%02d:%02d" % (hour, minute) cpu = 100.0 - float(t[col_cpu_idle - 1]) if hour_minute not in hm_cpu: hm_cpu[hour_minute] = [] hm_cpu[hour_minute].append(cpu) fmt = "%5s %6.2f" header = Util.BuildHeader(fmt, "hour_min cpu_avg") with open(fn_out, "w") as fo: i = 0 for hm, v in sorted(hm_cpu.iteritems()): if i % 40 == 0: fo.write(header + "\n") i += 1 l = len(v) avg = 0 if l == 0 else (float(sum(v)) / l) fo.write((fmt + "\n") % (hm, avg)) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def Header(): return Util.BuildHeader(SstLife._fmt, \ "sst_gen" \ " time_open_early time_open_normal time_open_min time_deleted" \ " size level" \ " ts_min ts_max" \ " ks_first ks_last" \ " y_cord_low" \ " how_created" \ )
def GetHourlyFn(): exp_dts = [] for i in range(2): #Cons.P(Conf.Get(i)) mo = re.match(r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d", Conf.Get(i)) exp_dts.append(mo.group("exp_dt")) fn_out = "%s/memory-usage-by-time-%s" % (Conf.GetOutDir(), "-".join(exp_dts)) if os.path.exists(fn_out): return fn_out with Cons.MT("Generating file for memory usage comparison ..."): dn_base = Conf.GetDir("dn_base") fn_ycsb_0 = "%s/%s" % (dn_base, Conf.Get(0)) fn_ycsb_1 = "%s/%s" % (dn_base, Conf.Get(1)) hour_memstat_0 = _GetMemStatByHour(fn_ycsb_0) hour_memstat_1 = _GetMemStatByHour(fn_ycsb_1) #Cons.P(hour_memstat_0) #Cons.P(hour_memstat_1) with open(fn_out, "w") as fo: fo.write("# 0: %s\n" % fn_ycsb_0) fo.write("# 1: %s\n" % fn_ycsb_1) fo.write("#\n") fmt = "%2d" \ " %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f" \ " %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f" fo.write(Util.BuildHeader(fmt, "hour" \ " 0_avg 0_min 0_1 0_25 0_50 0_75 0_99 0_max" \ " 1_avg 1_min 1_1 1_25 1_50 1_75 1_99 1_max" ) + "\n") for h, s0 in sorted(hour_memstat_0.iteritems()): s1 = hour_memstat_1[h] fo.write((fmt + "\n") % (h , (float(s0.avg) / 1024 / 1024 / 1024) , (float(s0.min) / 1024 / 1024 / 1024) , (float(s0._1 ) / 1024 / 1024 / 1024) , (float(s0._25) / 1024 / 1024 / 1024) , (float(s0._50) / 1024 / 1024 / 1024) , (float(s0._75) / 1024 / 1024 / 1024) , (float(s0._99) / 1024 / 1024 / 1024) , (float(s0.max) / 1024 / 1024 / 1024) , (float(s1.avg) / 1024 / 1024 / 1024) , (float(s1.min) / 1024 / 1024 / 1024) , (float(s1._1 ) / 1024 / 1024 / 1024) , (float(s1._25) / 1024 / 1024 / 1024) , (float(s1._50) / 1024 / 1024 / 1024) , (float(s1._75) / 1024 / 1024 / 1024) , (float(s1._99) / 1024 / 1024 / 1024) , (float(s1.max) / 1024 / 1024 / 1024) )) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def GetFnForPlot(dn_log, job_id, exp_dt): fn_out = "%s/mem-%s" % (Conf.GetOutDir(), exp_dt) if os.path.exists(fn_out): return fn_out with Cons.MT("Creating memory usage file for plotting ..."): fn = "%s/%s/procmon/%s" % (dn_log, job_id, exp_dt) if not os.path.exists(fn): fn_zipped = "%s.bz2" % fn if not os.path.exists(fn_zipped): raise RuntimeError("Unexpected: %s" % fn) Util.RunSubp( "cd %s && bzip2 -dk %s > /dev/null" % (os.path.dirname(fn_zipped), os.path.basename(fn_zipped))) if not os.path.exists(fn): raise RuntimeError("Unexpected") exp_begin_dt = datetime.datetime.strptime(exp_dt, "%y%m%d-%H%M%S.%f") # man proc. statm dt_rss = {} with open(fn) as fo: for line in fo: try: t = line.strip().split() if len(t) != 8: Cons.P("Unexpected format [%s] Ignoring" % line) continue dt = datetime.datetime.strptime(t[0], "%y%m%d-%H%M%S") rss = float(t[2]) * 4096 / 1024 / 1024 / 1024 #Cons.P("%s %d" % (dt, rss)) # Convert to relative time rel_dt = dt - exp_begin_dt totalSeconds = rel_dt.seconds hours, remainder = divmod(totalSeconds, 3600) minutes, seconds = divmod(remainder, 60) rel_dt_str = "%02d:%02d:%02d" % (hours, minutes, seconds) dt_rss[rel_dt_str] = rss except IndexError as e: Cons.P("%s: %s [%s]" % (e, fn, line)) raise e with open(fn_out, "w") as fo: fmt = "%8s %6.2f" header = Util.BuildHeader(fmt, "dt rss_in_gb") i = 0 for dt, rss in sorted(dt_rss.iteritems()): if i % 40 == 0: fo.write(header + "\n") fo.write((fmt + "\n") % (dt, rss)) i += 1 Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def GenDataThrpVsLat(): fn_out = "%s/rocksdb-ycsb-thrp-vs-lat-by-stgdevs" % Conf.GetOutDir() if os.path.exists(fn_out): return fn_out with Cons.MT("Generating thrp vs lat data file ..."): dn_base = Conf.GetDir("dn_base") # {stg_dev: {target_iops: YcsbLogReader}} stgdev_tio_ylr = {} #for stgdev in ["local-ssd", "ebs-st1"]: for stgdev in ["ebs-st1"]: if stgdev not in stgdev_tio_ylr: stgdev_tio_ylr[stgdev] = {} for target_iops, v in sorted(Conf.Get(stgdev).iteritems()): fn = "%s/%s" % (dn_base, v["fn"]) t = v["time"].split("-") time_begin = t[0] time_end = t[1] overloaded = ("overloaded" in v) and v["overloaded"] stgdev_tio_ylr[stgdev][target_iops] = YcsbLogReader(fn, time_begin, time_end, overloaded) with open(fn_out, "w") as fo: fmt = "%9s %6.0f %1d %6.0f" \ " %8.2f %8.2f %9.2f %10.2f %10.2f" \ " %8.2f %8.2f %8.2f %9.2f %9.2f" fo.write("%s\n" % Util.BuildHeader(fmt, "stg_dev target_iops overloaded iops" \ " r_avg r_90 r_99 r_99.9 r_99.99" \ " w_avg w_90 w_99 w_99.9 w_99.99" )) for stgdev, v in sorted(stgdev_tio_ylr.iteritems()): for tio, ylr in sorted(v.iteritems()): fo.write((fmt + "\n") % ( stgdev , tio , (1 if ylr.overloaded else 0) , ylr.db_iops_stat.avg , ylr.r_avg , ylr.r_90 , ylr.r_99 , ylr.r_999 , ylr.r_9999 , ylr.w_avg , ylr.w_90 , ylr.w_99 , ylr.w_999 , ylr.w_9999 )) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def GenDataFileForGnuplot(dn_log_job, exp_dt): fn_out = "%s/dstat-%s" % (Conf.GetOutDir(), exp_dt) if os.path.isfile(fn_out): return fn_out global _exp_begin_dt _exp_begin_dt = datetime.datetime.strptime(exp_dt, "%y%m%d-%H%M%S.%f") #Cons.P(_exp_begin_dt) with Cons.MT("Generating dstat data file for plot ..."): global _header_idx global _body_rows _header_idx = None _body_rows = None fn_log_dstat = "%s/dstat/%s.csv" % (dn_log_job, exp_dt) # Unzip when the file is not there if not os.path.exists(fn_log_dstat): fn_zipped = "%s.bz2" % fn_log_dstat if not os.path.exists(fn_zipped): raise RuntimeError("Unexpected: %s" % fn_log_dstat) Util.RunSubp( "cd %s && bzip2 -dk %s > /dev/null" % (os.path.dirname(fn_zipped), os.path.basename(fn_zipped))) if not os.path.exists(fn_log_dstat): raise RuntimeError("Unexpected") _Parse(fn_log_dstat) # For read and write fmt = " ".join(["%9.0f"] * 2 * _num_stg_devs + ["%6.1f"] * 2 * _num_stg_devs) fmt += " %8.0f %8.0f %8.0f %8.0f" \ " %3.0f %3.0f" \ " %3.0f %3.0f %11s" \ " %3.1f %6.2f %3.1f %6.2f %6.2f %6.3f" header = Util.BuildHeader( fmt, " ".join(k for k, v in sorted(_header_idx.iteritems()))) #Cons.P(header) with open(fn_out, "w") as fo: i = 0 for r in _body_rows: if not r.TimeValid(): continue if i % 50 == 0: fo.write("%s\n" % header) i += 1 fo.write((fmt + "\n") % tuple(r.Prepared())) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def GenDataCostVsMetrics(exp_set_id): fn_out = "%s/rocksdb-ycsb-cost-vs-perf-%s" % (Conf.GetOutDir(), exp_set_id) fmt = "%5s %5.3f" \ " %14.6f" \ " %14.6f" \ " %14.6f" \ " %14.6f" \ " %14.6f" \ " %14.6f" \ " %13.6f %10.6f %14.6f %14.6f %14.6f %14.6f %14.6f" \ " %13.6f %10.6f %14.6f %14.6f %14.6f %14.6f %14.6f" with open(fn_out, "w") as fo: fo.write(Util.BuildHeader(fmt, "stg_dev cost_dollar_per_gb_per_month" \ " db_iops.avg" \ " db_iops.min" \ " db_iops.max" \ " db_iops._25" \ " db_iops._50" \ " db_iops._75" \ " r_avg r_min r_max r_90 r_99 r_999 r_9999" \ " w_avg w_min w_max w_90 w_99 w_999 w_9999" ) + "\n") for stg_dev, v in Conf.Get(exp_set_id).iteritems(): lr = YcsbLogReader(exp_set_id, stg_dev) fo.write((fmt + "\n") % ( stg_dev, float(Conf.Get("stg_cost")[stg_dev]) , lr.GetStat("db_iops.avg") , lr.GetStat("db_iops.min") , lr.GetStat("db_iops.max") , lr.GetStat("db_iops._25") , lr.GetStat("db_iops._50") , lr.GetStat("db_iops._75") , lr.GetStat("r_avg") , lr.GetStat("r_min") , lr.GetStat("r_max") , lr.GetStat("r_90") , lr.GetStat("r_99") , lr.GetStat("r_999") , lr.GetStat("r_9999") , lr.GetStat("w_avg") , lr.GetStat("w_min") , lr.GetStat("w_max") , lr.GetStat("w_90") , lr.GetStat("w_99") , lr.GetStat("w_999") , lr.GetStat("w_9999") )) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def main(argv): dn = "%s/work/mutant/misc/rocksdb/log" % os.path.expanduser("~") fn = "%s/manifest.yaml" % dn doc = None with open(fn) as fo: doc = yaml.load(fo) #Cons.P(pprint.pformat(doc["new_exps"])) # {exp_desc: ClientLogReader()} expdesc_log = {} for simulation_time_begin in doc["New exps"]: log = ClientLogReader.ClientLogReader(simulation_time_begin) if log.options["exp_desc"] not in expdesc_log: expdesc_log[log.options["exp_desc"]] = [] expdesc_log[log.options["exp_desc"]].append(log) for exp_desc, logs in sorted(expdesc_log.iteritems()): Cons.P("# Desc: %s" % exp_desc) fmt = "%17s %10s %7s %1s %4.0f" \ " %1s %1s %1s %1s %5s %5s %5s %17s" Cons.P(Util.BuildHeader(fmt, "simulation_time_begin" \ " fast_dev_path" \ " slow_dev1_path" \ " init_db_to_90p_loaded" \ " memory_limit_in_mb" \ \ " mutant_enabled(for_old_exps)" \ " cache_filter_index_at_all_levels" \ " monitor_temp migrate_sstables" \ " workload_start_from" \ " workload_stop_at" \ " simulation_time_dur_in_sec" \ " sst_migration_temperature_threshold" \ )) for l in logs: Cons.P(fmt % (l.simulation_time_begin, l.options["fast_dev_path"], l.options["slow_dev1_path"], V_bool(l.options, "init_db_to_90p_loaded"), float(l.options["memory_limit_in_mb"]), V_bool(l.options, "mutant_enabled"), V_bool(l.options, "cache_filter_index_at_all_levels"), V_bool(l.options, "monitor_temp"), V_bool(l.options, "migrate_sstables"), l.options["workload_start_from"], l.options["workload_stop_at"], l.options["simulation_time_dur_in_sec"], l.options["sst_migration_temperature_threshold"])) Cons.P("")
def GetSstAccFreqAtSpecificTime(at_simulated_time): fn = "%s/sst-accfreq-%s-at-%.04f" % (Conf.Get("dn_result"), Conf.Get("simulation_time_begin"), at_simulated_time) if os.path.isfile(fn): return fn # t0: a specific time when we take the snapshot t0 = SimTime.SimulatedTimeAt(at_simulated_time) Cons.P("t0 (time of snapshot): %s" % t0) sst_lives = GetSstLives() with open(fn, "w") as fo: fmt = "%4d %13s %13s %7.3f %1d %5.2f" fo.write("# t0 (time of snapshot): %s\n" % t0) fo.write("%s\n" % Util.BuildHeader( fmt, "sst_id ts_before ts_after reads_per_64MB_per_sec level age_in_day" )) for sst_id, sl in sst_lives.iteritems(): if t0 < SimTime.ToSimulatedTime(sl.ts_created): continue if (sl.ts_deleted is not None) and (SimTime.ToSimulatedTime( sl.ts_deleted) < t0): continue ts_prev = None for ts, v in sorted(sl.ts_acccnt.iteritems()): ts_simulated = SimTime.ToSimulatedTime(ts) if ts_simulated < t0: ts_prev = ts_simulated continue #Cons.P("ts_simulated: %s" % ts_simulated) if ts_prev is not None: #cnt = v[0] cnt_per_64MB_per_sec = v[1] #temp = v[2] fo.write( (fmt + "\n") % (sst_id, ts_prev.strftime("%y%d%m-%H%M%S"), ts_simulated.strftime("%y%d%m-%H%M%S"), cnt_per_64MB_per_sec, sl.Level(), ((t0 - SimTime.ToSimulatedTime( sl.TsCreated())).total_seconds() / 3600.0 / 24))) break Cons.P("Created %s %d" % (fn, os.path.getsize(fn))) return fn
def Write(fn): fmt = "%12s %12s %7.3f %4d %4d %12d %4s %4s %1s %1s %1s" with open(fn, "w") as fo: fo.write(Util.BuildHeader(fmt, "rel_ts_HHMMSS_begin" \ " rel_ts_HHMMSS_end" \ " ts_dur" \ " num_sstables_begin" \ " num_sstables_end" \ " sstable_size_sum_end" \ " end_sst_id" \ " end_sst_creation_jobid" \ " end_sst_creation_reason" \ " end_sst_temp_triggered_single_migr" \ " end_sst_migration_direction") + "\n") ts_prev = datetime.timedelta(0) ts_str_prev = "00:00:00.000" num_ssts_prev = 0 total_sst_size_prev = 0 for ts, num_ssts in sorted(SstEvents.ts_numssts.iteritems()): ts_str = _ToStr(ts) sst_id = "-" job_id = "-" creation_reason = "-" # Temperature-triggered single-sstable migration temp_triggered_migr = "-" migr_dirc = "-" if ts in SstEvents.createts_sstid: sst_id = SstEvents.createts_sstid[ts] hc = HowCreated.Get(sst_id) job_id = hc.JobId() creation_reason = hc.Reason() temp_triggered_migr = "T" if CompInfo.TempTriggeredSingleSstMigr( job_id) else "-" if SstEvents.migrate_sstables: if creation_reason == "C": migr_dirc = CompInfo.MigrDirc(job_id, sst_id) fo.write( (fmt + "\n") % (ts_str_prev, ts_str, (ts.total_seconds() - ts_prev.total_seconds()), num_ssts_prev, num_ssts, total_sst_size_prev, sst_id, job_id, creation_reason, temp_triggered_migr, migr_dirc)) ts_str_prev = ts_str ts_prev = ts num_ssts_prev = num_ssts total_sst_size_prev = SstEvents.ts_sstsize[ts] Cons.P("Created %s %d" % (fn, os.path.getsize(fn)))
def GetRocksDbCostLatencyDataFile(): dn = "%s/.output" % os.path.dirname(__file__) fn = "%s/rocksdb-cost-latency" % dn if os.path.exists(fn): return fn rocksdb_stgsizetimecost = StgSizeCost.StgSizetimeCostRocksDb() #Cons.P(rocksdb_stgsizetimecost) # { stg_dev: Latency() } rocksdb_dev_latexp = {} exps_root = Conf.Manifest.Get( "2-level Mutant latencies by SSTable migration temperature thresholds") for stg_dev, exps in {"local-ssd1": exps_root["UnmodifiedRocksDB"]["LocalSsd1"] \ , "ebs-st1": exps_root["UnmodifiedRocksDB"]["EbsSt1"] \ }.iteritems(): if stg_dev not in rocksdb_dev_latexp: rocksdb_dev_latexp[stg_dev] = Latency() for simulation_time_begin in exps: rocksdb_dev_latexp[stg_dev].Add( ClientLogReader.ClientLogReader(simulation_time_begin)) with open(fn, "w") as fo: fmt = "%10s" \ " %8.5f" \ " %10.3f %10.3f %10.3f" \ " %10.3f %10.3f %10.3f" fo.write("%s\n" % Util.BuildHeader(fmt, \ "stg_dev" \ " cost_$" \ " lat_put_avg_avg_in_ms lat_put_avg_min_in_ms lat_put_avg_max_in_ms" \ " lat_get_avg_avg_in_ms lat_get_avg_min_in_ms lat_get_avg_max_in_ms" )) #for stg_dev, exps in reversed(sorted(rocksdb_dev_latexp.iteritems())): for stg_dev, exps in rocksdb_dev_latexp.iteritems(): if stg_dev == "local-ssd1": cost = rocksdb_stgsizetimecost.Cost("Sum") elif stg_dev == "ebs-st1": cost = rocksdb_stgsizetimecost.Cost("Sum") * Conf.StgCost( "ebs-st1") / Conf.StgCost("local-ssd1") fo.write((fmt + "\n") % (stg_dev, cost, exps.PutAvgAvg(), exps.PutAvgMin(), exps.PutAvgMax(), exps.GetAvgAvg(), exps.GetAvgMin(), exps.GetAvgMax())) Cons.P("Created %s %d" % (fn, os.path.getsize(fn))) return fn
def Run(tags=None): sys.stdout.write("desc_instances:") sys.stdout.flush() diprs = [] for r in Ec2Region.All(): diprs.append(DescInstPerRegion(r, tags)) threads = [] for dipr in diprs: t = threading.Thread(target=dipr.Run) threads.append(t) t.daemon = True t.start() for t in threads: t.join() print "" num_insts = 0 for dipr in diprs: num_insts += dipr.NumInsts() if num_insts == 0: Cons.P("No instances found.") return print "" Cons.P( Util.BuildHeader( _fmt, "job_id" " Placement:AvailabilityZone" " InstanceId" #" InstanceType" #" LaunchTime" #" PrivateIpAddress" " PublicIpAddress" " State:Name" #" Tag:Name" )) results = [] for dipr in diprs: results += dipr.GetResults() for r in sorted(results): Cons.P(r)
def Get1minAvgFn(exp_rocksdb, exp_computation): exp_dts = [] pattern = r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d" mo = re.match(pattern, exp_rocksdb) exp_dts.append(mo.group("exp_dt")) mo = re.match(pattern, exp_computation) exp_dts.append(mo.group("exp_dt")) fn_out = "%s/mem-1minavg-%s" % (Conf.GetOutDir(), "-".join(exp_dts)) if os.path.exists(fn_out): return fn_out with Cons.MT("Creating avg memory usage comparison file for plotting ..."): records = [] # {which_exp, [mem_usage]} which_memsum = {0: [], 1: []} hm_mem = _GetHmMem(exp_rocksdb) for hm, mem in hm_mem.iteritems(): records.append(_RecordMemAvg(hm, 0, mem, 0)) which_memsum[0].append(mem) hm_mem = _GetHmMem(exp_computation) for hm, mem in hm_mem.iteritems(): records.append(_RecordMemAvg(hm, 30, mem, 1)) which_memsum[1].append(mem) records.sort(key=operator.attrgetter("ts")) fmt = "%8s %6.3f %1d" header = Util.BuildHeader(fmt, "timestamp mem_avg_in_gb exp_type") with open(fn_out, "w") as fo: fo.write("# Memory usage * time (B * sec)\n") r = sum(which_memsum[0]) c = sum(which_memsum[1]) fo.write("# RocksDB: %f\n" % r) fo.write("# With computation: %f\n" % c) fo.write("# C:R = %f\n" % (c / r)) fo.write("\n") i = 0 for r in records: if i % 40 == 0: fo.write(header + "\n") i += 1 fo.write("%s\n" % r.ToStr(fmt)) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def _GenSstAccFreqByAgeDataFiles(): global _gen_sst_accfreq_by_age_data_files if _gen_sst_accfreq_by_age_data_files: return _BuildMemtSstLives() #_CheckSizes() with Cons.MT("Generating Sst acc freq by age files ..."): dn0 = "%s/%s" % (Conf.Get("dn_result"), Conf.Get("simulation_time_begin")) dn1 = "%s/sst-age-accfreq-data" % dn0; Util.MkDirs(dn1) fmt = "%10.3f %10.3f %10.3f" \ " %10.3f %10.3f %10.3f" \ " %6.2f %7s" header = Util.BuildHeader(fmt, \ "age_begin_simulation_time_in_sec age_end_simulation_time_in_sec age_dur_simulation_time_in_sec" \ " age_begin_simulated_time_in_sec age_end_simulated_time_in_sec age_dur_simulated_time_in_sec" \ " reads_per_64MB_per_sec temperature") i = 0 for sst_id, sl in _sst_lives.iteritems(): # TODO: why? # Exclude sstables without any accesses. None seems to be such though. #if sl.LenTsAcccnt() == 0: # continue # TODO: Why do you want to exclude live SSTables? I don't see any reason. # Exclude ones without deleted ts. Those are current SSTables. #if sl.TsDeleted() is None: # continue fn = "%s/%d" % (dn1, sst_id) with open(fn, "w") as fo: fo.write("# sst_id: %d\n" % sst_id) fo.write("# level: %d\n" % sl.Level()) fo.write("# size: %d\n" % sl.Size()) fo.write("#\n") fo.write("%s\n" % header) sl.WriteAgeAccfreq(fo, fmt) #Cons.P("Created %s %d" % (fn, os.path.getsize(fn))) i += 1 Cons.P("Created %d files in %s" % (i, dn1)) _gen_sst_accfreq_by_age_data_files = True
def GenDataFileForGnuplot(fn, dt): fn_out = "%s/dstat-%s" % (Conf.GetDir("output_dir"), dt) if os.path.isfile(fn_out): return fn_out with Cons.MT("Generating dstat data file for plot ..."): global _header_idx global _body_rows _header_idx = None _body_rows = None # Unzip when the file is not there if not os.path.exists(fn): fn_zipped = "%s.7z" % fn if not os.path.exists(fn_zipped): raise RuntimeError("Unexpected: %s" % fn) Util.RunSubp( "cd %s && 7z e %s > /dev/null" % (os.path.dirname(fn_zipped), os.path.basename(fn_zipped))) if not os.path.exists(fn): raise RuntimeError("Unexpected") _Parse(fn) fmt = "%9.0f %9.0f %9.0f %9.0f %9.0f %9.0f %9.0f %9.0f" \ " %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f" \ " %8.0f %8.0f %8.0f %8.0f" \ " %3.0f %3.0f" \ " %3.0f %3.0f %11s" \ " %3.1f %6.2f %3.1f %6.2f %6.2f %6.3f" header = Util.BuildHeader( fmt, " ".join(k for k, v in sorted(_header_idx.iteritems()))) with open(fn_out, "w") as fo: i = 0 for r in _body_rows: if i % 50 == 0: fo.write("%s\n" % header) i += 1 #Cons.P(fmt % tuple(r.Prepared())) fo.write((fmt + "\n") % tuple(r.Prepared())) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def DescInsts(): fmt = "%-15s %19s %10s %13s %15s %10s" Cons.P( Util.BuildHeader( fmt, "Placement:AvailabilityZone" " InstanceId" " InstanceType" " LaunchTime" #" PrivateIpAddress" " PublicIpAddress" " State:Name" #" Tags" )) output = [] for k, v in InstLaunchProgMon.progress.iteritems(): if len(v.responses) == 0: continue r = v.responses[-1]["Reservations"][0]["Instances"][0] tags = {} if "Tags" in r: for t in r["Tags"]: tags[t["Key"]] = t["Value"] #Cons.P(Util.Indent(pprint.pformat(r, indent=2, width=100), 2)) output.append(fmt % ( _Value(_Value(r, "Placement"), "AvailabilityZone"), _Value(r, "InstanceId"), _Value(r, "InstanceType"), _Value(r, "LaunchTime").strftime("%y%m%d-%H%M%S") #, _Value(r, "PrivateIpAddress") , _Value(r, "PublicIpAddress"), _Value(_Value(r, "State"), "Name") #, ",".join(["%s:%s" % (k, v) for (k, v) in sorted(tags.items())]) )) for o in sorted(output): Cons.P(o)
def GetPlotData(): fn_out = "%s/ycsb-d-by-iops" % _dn_output if os.path.isfile(fn_out): return fn_out with Cons.MT("Generating plot data ..."): ycsb_logs = {} fns_log = Conf.Get("RocksDB with local SSD") #Cons.P(pprint.pformat(fns_log)) for fn in fns_log: fn = fn.replace("~", os.path.expanduser("~")) if not os.path.isfile(fn): if not os.path.isfile("%s.bz2" % fn): raise RuntimeError("Unexpected") Util.RunSubp("cd && pbzip2 -d %s.bz2" % (os.path.dirname(fn), fn)) ycsb_log = YcsbLog(fn) #Cons.P(ycsb_log) ycsb_logs[ycsb_log.target_iops] = ycsb_log #Cons.P(pprint.pformat(ycsb_logs)) fmt = "%6d %10.3f" \ " %8.3f %3.0f %5.0f %5.0f %5.0f %6.0f %6.0f %6.0f %6.0f" \ " %8.3f %3.0f %5.0f %5.0f %5.0f %6.0f %6.0f %6.0f %6.0f" with open(fn_out, "w") as fo: fo.write("# Latency in us\n") fo.write("%s\n" % Util.BuildHeader(fmt, "target_iops iops" \ " r_avg r_1 r_25 r_50 r_75 r_90 r_99 r_99.9 r_99.99" \ " w_avg w_1 w_25 w_50 w_75 w_90 w_99 w_99.9 w_99.99")) for ti, v in sorted(ycsb_logs.iteritems()): r = v.ReadLat() w = v.WriteLat() fo.write((fmt + "\n") % (ti, v.op_sec, r.avg, r._1, r._25, r._50, r._75, r._90, r._99, r._999, r._9999, w.avg, w._1, w._25, w._50, w._75, w._90, w._99, w._999, w._9999)) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def CalcCompareTwo(): with Cons.MT("Calculating the overhead of pairs ..."): dn_base = Conf.GetDir("dn_base") exps_rocksdb = Conf.Get("rocksdb") exps_computation = Conf.Get("computation") params = [] for r in exps_rocksdb: for c in exps_computation: params.append(("%s/%s" % (dn_base, r), "%s/%s" % (dn_base, c))) parallel_processing = True if parallel_processing: with terminating(Pool()) as pool: pool.map(_CalcCompareTwo, params) else: for p in params: _CalcCompareTwo(p) # Find the closest pair # You want the computation overhead one has the minimal overhead, but no smaller than the rocksdb one. exp_tuples = [] for p in params: o_cpu = CompareCpu.GetOverhead(p[0], p[1]) o_mem = CompareMem.GetOverhead(p[0], p[1]) if (o_cpu < 1.0) or (o_mem < 1.0): continue exp_tuples.append(ExpTuple(o_cpu, o_mem, p)) fmt = "%8.6f %8.6f %17s %17s" Cons.P( Util.BuildHeader( fmt, "cpu_overhead mem_overhead expdt_rocksdb expdt_computation")) for e in sorted(exp_tuples): Cons.P(fmt % (e.o_cpu, e.o_mem, e.GetExpDt("r"), e.GetExpDt("c"))) return exp_tuples