示例#1
0
def _GetHmCpu(fn_ycsb_log):
    (fn_dstat, num_stgdevs) = DstatLog.GetPlotFn(fn_ycsb_log)

    col_time = 17
    col_cpu_idle = 19

    # {hour_minute: [cpu_usage]}
    hm_cpu = {}
    with open(fn_dstat) as fo:
        for line in fo:
            if line.startswith("#"):
                continue
            line = line.strip()
            t = re.split(r" +", line)

            # Parse these cause some hours and mins don't have left padding 0s.
            mo = re.match(r"(?P<h>\d+):(?P<m>\d+):(?P<s>\d+)", t[col_time - 1])
            hour = int(mo.group("h"))
            minute = int(mo.group("m"))
            hour_minute = "%02d:%02d" % (hour, minute)

            cpu = 100.0 - float(t[col_cpu_idle - 1])

            if hour_minute not in hm_cpu:
                hm_cpu[hour_minute] = []
            hm_cpu[hour_minute].append(cpu)

    hm_cpuavg = {}
    for hm, v in hm_cpu.iteritems():
        l = len(v)
        avg = 0 if l == 0 else (sum(v) / l)
        hm_cpuavg[hm] = avg
    return hm_cpuavg
示例#2
0
def _GetMemStatByHourFromDstat(fn_ycsb):
  fn_dstat = DstatLog.GetPlotFn(fn_ycsb)

  col_time = 21
  col_mem_buff = 13
  #col_mem_cache = 14

  #Cons.P(fn_dstat)
  # Bucketize memory usage
  #   {hour: [mem_usage]}
  hour_memusage = {}
  with open(fn_dstat) as fo:
    for line in fo:
      if line.startswith("#"):
        continue
      line = line.strip()
      t = re.split(r" +", line)
      time0 = t[col_time - 1]
      mem_buff = int(t[col_mem_buff - 1])
      #Cons.P("%s %d" % (time0, mem_buff))
      hour = int(time0.split(":")[0])
      if hour not in hour_memusage:
        hour_memusage[hour] = []
      hour_memusage[hour].append(mem_buff)

  hour_memstat = {}
  for hour, mem_usage in hour_memusage.iteritems():
    r = Stat.Gen(mem_usage)
    #Cons.P("%d %s" % (hour, r))
    hour_memstat[hour] = r
  return hour_memstat
示例#3
0
def PlotByTime(p):
    (fn_ycsb, time_max, params1) = YcsbLog.GenDataMetricsByTime(p)
    #Cons.P("%s\n%s\n%s" % (fn_ycsb, time_max, params1))

    params_formatted = str(p).replace("_", "\\\\_").replace(
        " ", "\\ ").replace("\n", "\\n").replace("{", "\{").replace("}", "\}")
    #Cons.P(params_formatted)

    fn_dstat = DstatLog.GenDataFileForGnuplot(p)
    fn_rocksdb = RocksdbLog.GenDataFileForGnuplot(p)

    fn_out = "%s/rocksdb-ycsb-all-metrics-by-time-%s.pdf" % (Conf.GetOutDir(),
                                                             p.exp_dt)

    with Cons.MT("Plotting ..."):
        env = os.environ.copy()
        env["PARAMS"] = params_formatted
        env["TIME_MAX"] = str(time_max)
        env["IN_FN_DSTAT"] = fn_dstat
        env["IN_FN_YCSB"] = fn_ycsb
        env["IN_FN_ROCKSDB"] = fn_rocksdb
        env["OUT_FN"] = fn_out
        Util.RunSubp("gnuplot %s/rocksdb-ycsb-all-metrics-by-time.gnuplot" %
                     os.path.dirname(__file__),
                     env=env)
        Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
示例#4
0
def _PlotTimeVsAllMetrics(fn_ycsb_log):
    # 171121-194901/ycsb/171122-010708.903-d
    mo = re.match(
        r"(?P<dn_log>.+)/(?P<job_id>\d\d\d\d\d\d-\d\d\d\d\d\d)/ycsb/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d).+",
        fn_ycsb_log)
    dn_log = mo.group("dn_log")
    job_id = mo.group("job_id")
    exp_dt = mo.group("exp_dt")
    #Cons.P(dn_log)
    #Cons.P(job_id)
    #Cons.P(exp_dt)

    fn_out = "%s/time-vs-all-metrics-%s.pdf" % (Conf.GetOutDir(), exp_dt)
    if os.path.exists(fn_out):
        Cons.P("%s %d already exists." % (fn_out, os.path.getsize(fn_out)))
        return

    (fn_ycsb, time_max,
     params1) = YcsbLog.GenDataMetricsByTime(fn_ycsb_log, exp_dt)
    #Cons.P("%s\n%s\n%s" % (fn_ycsb, time_max, params1))
    time_max = "01:00:00"

    params_formatted = fn_ycsb_log + "\n" + pprint.pformat(
        params1[0]) + "\n" + pprint.pformat(params1[1])
    # No idea how to put spaces for the indentations. It used to work.
    #   Neither replace(" ", "\ ") or replace(" ", "\\ ") worked when a line starts with spaces followed by digits or [.
    #     work when it is followed by u. I guess regular characters.
    params_formatted = params_formatted.replace("_", "\\\\_").replace(
        "\n", "\\n").replace("{", "\{").replace("}", "\}")
    #Cons.P(params_formatted)

    dn_log_job = "%s/%s" % (dn_log, job_id)

    (fn_dstat, num_stgdevs) = DstatLog.GetPlotFn1(dn_log_job, exp_dt)
    (fn_rocksdb,
     target_cost_changes) = RocksdbLog.GetFnTimeVsMetrics(fn_ycsb_log)
    #Cons.P(target_cost_changes)

    fn_cpu_avg = CpuAvg.GetFnForPlot(fn_ycsb_log)
    fn_mem_usage = ProcMemLog.GetFnForPlot(dn_log, job_id, exp_dt)

    with Cons.MT("Plotting ..."):
        env = os.environ.copy()
        env["PARAMS"] = params_formatted
        env["NUM_STGDEVS"] = str(num_stgdevs)
        env["TIME_MAX"] = str(time_max)
        env["IN_FN_DSTAT"] = fn_dstat
        env["IN_FN_YCSB"] = fn_ycsb
        env["IN_FN_ROCKSDB"] = fn_rocksdb
        env["IN_FN_CPU_AVG"] = fn_cpu_avg
        env["IN_FN_MEM"] = fn_mem_usage
        env["TARGET_COST_CHANGES_TIME"] = target_cost_changes[0]
        env["TARGET_COST_CHANGES_COST"] = target_cost_changes[1]
        env["OUT_FN"] = fn_out
        Util.RunSubp("gnuplot %s/time-vs-all-metrics.gnuplot" %
                     os.path.dirname(__file__),
                     env=env)
        Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
示例#5
0
def GetFnForPlot(fn_ycsb_log):
    # 171121-194901/ycsb/171122-010708.903-d
    mo = re.match(
        r"(?P<dn_log>.+)/(?P<job_id>\d\d\d\d\d\d-\d\d\d\d\d\d)/ycsb/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d).+",
        fn_ycsb_log)
    exp_dt = mo.group("exp_dt")

    fn_out = "%s/cpu-avg-%s" % (Conf.GetOutDir(), exp_dt)
    if os.path.exists(fn_out):
        return fn_out

    with Cons.MT("Creating avg cpu usage file for plotting ..."):
        (fn_dstat, num_stgdevs) = DstatLog.GetPlotFn(fn_ycsb_log)

        col_time = 17

        col_cpu_idle = 19
        col_cpu_sys = col_cpu_idle + 2
        col_cpu_user = col_cpu_idle + 3
        col_cpu_iowait = col_cpu_idle + 4

        # {hour_minute: [cpu_usage]}
        hm_cpu = {}
        with open(fn_dstat) as fo:
            for line in fo:
                if line.startswith("#"):
                    continue
                line = line.strip()
                t = re.split(r" +", line)

                # Parse these cause some hours and mins don't have left padding 0s.
                mo = re.match(r"(?P<h>\d+):(?P<m>\d+):(?P<s>\d+)",
                              t[col_time - 1])
                hour = int(mo.group("h"))
                minute = int(mo.group("m"))
                hour_minute = "%02d:%02d" % (hour, minute)

                cpu = 100.0 - float(t[col_cpu_idle - 1])

                if hour_minute not in hm_cpu:
                    hm_cpu[hour_minute] = []
                hm_cpu[hour_minute].append(cpu)

        fmt = "%5s %6.2f"
        header = Util.BuildHeader(fmt, "hour_min cpu_avg")

        with open(fn_out, "w") as fo:
            i = 0
            for hm, v in sorted(hm_cpu.iteritems()):
                if i % 40 == 0:
                    fo.write(header + "\n")
                i += 1
                l = len(v)
                avg = 0 if l == 0 else (float(sum(v)) / l)
                fo.write((fmt + "\n") % (hm, avg))
        Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
        return fn_out
示例#6
0
def _GenDstat(fn_ycsb_log):
    mo = re.match(
        r"(?P<dn_log>.+)/(?P<job_id>\d\d\d\d\d\d-\d\d\d\d\d\d)/ycsb/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d).+",
        fn_ycsb_log)
    dn_log = mo.group("dn_log")
    job_id = mo.group("job_id")
    exp_dt = mo.group("exp_dt")

    dn_log_job = "%s/%s" % (dn_log, job_id)
    (fn_dstat,
     num_stgdevs) = DstatLog.GenDataFileForGnuplot(dn_log_job, exp_dt)
    return fn_dstat
示例#7
0
def PlotByTime(params):
    fn_ycsb_log = params[0]

    # 171121-194901/ycsb/171122-010708.903-d
    mo = re.match(
        r"(?P<dn_log>.+)/(?P<job_id>\d\d\d\d\d\d-\d\d\d\d\d\d)/ycsb/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d).+",
        fn_ycsb_log)
    dn_log = mo.group("dn_log")
    job_id = mo.group("job_id")
    exp_dt = mo.group("exp_dt")
    #Cons.P(dn_log)
    #Cons.P(job_id)
    #Cons.P(exp_dt)

    (fn_ycsb, time_max,
     params1) = YcsbLog.GenDataMetricsByTime(fn_ycsb_log, exp_dt)
    #Cons.P("%s\n%s\n%s\n%s" % (fn_ycsb, time_max, params1[0], params1[1]))
    # For dev
    #time_max = "00:10:00"
    #time_max = "03:00:00"

    params_formatted = fn_ycsb_log + "\n" + pprint.pformat(
        params1[0]) + "\n" + pprint.pformat(params1[1])
    # The last, space substitution doesn't seem to work all of a sudden. Not the highest priority.
    params_formatted = params_formatted.replace("\n", "\\n").replace(
        "_", "\\\\_").replace("{", "\{").replace("}",
                                                 "\}")  #.replace(" ", "\\ ")
    #Cons.P(params_formatted)

    dn_log_job = "%s/%s" % (dn_log, job_id)

    (fn_dstat,
     num_stgdevs) = DstatLog.GenDataFileForGnuplot(dn_log_job, exp_dt)
    #Cons.P("%s %s" % (fn_dstat, num_stgdevs))
    fn_rocksdb = RocksdbLog.GenDataFileForGnuplot(dn_log_job, exp_dt)

    fn_out = "%s/rocksdb-ycsb-all-metrics-by-time-%s.pdf" % (Conf.GetOutDir(),
                                                             exp_dt)

    with Cons.MT("Plotting ..."):
        env = os.environ.copy()
        env["PARAMS"] = params_formatted
        env["NUM_STG_DEVS"] = str(num_stgdevs)
        env["TIME_MAX"] = str(time_max)
        env["IN_FN_DSTAT"] = fn_dstat
        env["IN_FN_YCSB"] = fn_ycsb
        env["IN_FN_ROCKSDB"] = fn_rocksdb
        env["OUT_FN"] = fn_out
        Util.RunSubp("gnuplot %s/rocksdb-ycsb-all-metrics-by-time.gnuplot" %
                     os.path.dirname(__file__),
                     env=env)
        Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
示例#8
0
def Plot(param):
    job_id = param[0]
    exp_dt = param[1]
    dn_log_job = "%s/work/mutant/log/quizup/sla-admin/%s" % (
        os.path.expanduser("~"), job_id)

    fn_log_quizup = "%s/quizup/%s" % (dn_log_job, exp_dt)
    fn_log_rocksdb = "%s/rocksdb/%s" % (dn_log_job, exp_dt)
    fn_log_dstat = "%s/dstat/%s.csv" % (dn_log_job, exp_dt)

    log_q = QuizupLog(fn_log_quizup)
    SimTime.Init(log_q.SimTime("simulated_time_begin"),
                 log_q.SimTime("simulated_time_end"),
                 log_q.SimTime("simulation_time_begin"),
                 log_q.SimTime("simulation_time_end"))

    qz_std_max = _QzSimTimeDur(
        log_q.quizup_options["simulation_time_dur_in_sec"])
    qz_opt_str = _QuizupOptionsFormattedStr(log_q.quizup_options)
    error_adj_ranges = log_q.quizup_options["error_adj_ranges"].replace(
        ",", " ")

    (fn_rocksdb_sla_admin_log, pid_params,
     num_sla_adj) = RocksdbLog.ParseLog(fn_log_rocksdb, exp_dt)

    fn_dstat = DstatLog.GenDataFileForGnuplot(fn_log_dstat, exp_dt)

    fn_out = "%s/sla-admin-by-time-%s.pdf" % (Conf.GetDir("output_dir"),
                                              exp_dt)

    with Cons.MT("Plotting ..."):
        env = os.environ.copy()
        env["STD_MAX"] = qz_std_max
        env["ERROR_ADJ_RANGES"] = error_adj_ranges
        env["IN_FN_QZ"] = fn_log_quizup
        env["IN_FN_SLA_ADMIN"] = "" if num_sla_adj == 0 else fn_rocksdb_sla_admin_log
        env["QUIZUP_OPTIONS"] = qz_opt_str
        env["PID_PARAMS"] = "%s %s %s %s" % (pid_params["target_value"],
                                             pid_params["p"], pid_params["i"],
                                             pid_params["d"])
        env["WORKLOAD_EVENTS"] = " ".join(
            str(t) for t in log_q.simulation_time_events)
        env["IN_FN_DS"] = fn_dstat
        env["OUT_FN"] = fn_out
        Util.RunSubp("gnuplot %s/sla-admin-by-time.gnuplot" %
                     os.path.dirname(__file__),
                     env=env)
        Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
示例#9
0
def PlotByTime(params):
    exp_set_id = params[0]
    stg_dev = params[1]
    p0 = params[2]

    jobid_expdt = p0["jobid_expdt"]
    time_window = p0["time_window"]

    (fn_ycsb, time_max,
     params1) = YcsbLog.GenDataMetricsByTime(exp_set_id, stg_dev)
    #Cons.P(time_max)

    params_formatted = exp_set_id + "\n" + pprint.pformat(
        params1[0]) + "\n" + pprint.pformat(params1[1])
    params_formatted = params_formatted.replace("_", "\\\\_").replace(
        " ", "\\ ").replace("\n", "\\n").replace("{", "\{").replace("}", "\}")
    #Cons.P(params_formatted)

    t = jobid_expdt.split("/")
    job_id = t[0]
    exp_dt = t[1]

    dn_log = Conf.GetDir("dn")
    dn_log_job = "%s/%s" % (dn_log, job_id)

    fn_dstat = DstatLog.GenDataFileForGnuplot(dn_log_job, exp_dt)
    fn_rocksdb = RocksdbLog.GenDataFileForGnuplot(dn_log_job, exp_dt)

    fn_out = "%s/rocksdb-ycsb_d-%s-by-time-%s.pdf" % (Conf.GetOutDir(),
                                                      stg_dev, exp_dt)

    with Cons.MT("Plotting ..."):
        env = os.environ.copy()
        env["EXP_SET_ID"] = exp_set_id
        env["PARAMS"] = params_formatted
        env["STG_DEV"] = stg_dev
        env["TIME_MAX"] = str(time_max)
        env["IN_FN_DSTAT"] = fn_dstat
        env["IN_FN_YCSB"] = fn_ycsb
        env["IN_FN_ROCKSDB"] = fn_rocksdb
        env["OUT_FN"] = fn_out
        Util.RunSubp("gnuplot %s/rocksdb-ycsb-by-time.gnuplot" %
                     os.path.dirname(__file__),
                     env=env)
        Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
示例#10
0
def _GetCpuStatByHour(fn_ycsb):
    (fn_dstat, num_stgdevs) = DstatLog.GetPlotFn(fn_ycsb)

    col_time = 17

    col_cpu_idle = 19
    col_cpu_sys = col_cpu_idle + 2
    col_cpu_user = col_cpu_idle + 3
    col_cpu_iowait = col_cpu_idle + 4

    #Cons.P(fn_dstat)
    # Bucketize CPU usage
    #   {hour: [cpu_usage]}
    hour_cpuusage = {}
    with open(fn_dstat) as fo:
        for line in fo:
            if line.startswith("#"):
                continue
            line = line.strip()
            t = re.split(r" +", line)
            time0 = t[col_time - 1]

            cpu = 100.0 - float(t[col_cpu_idle - 1])

            #Cons.P("%s %s" % (time0, cpu))
            hour = int(time0.split(":")[0])
            if hour not in hour_cpuusage:
                hour_cpuusage[hour] = []
            hour_cpuusage[hour].append(cpu)

    hour_cpustat = {}
    for hour, cpu_usage in hour_cpuusage.iteritems():
        r = Stat.Gen(cpu_usage)
        #Cons.P("%d %s" % (hour, r))
        hour_cpustat[hour] = r
    return hour_cpustat
示例#11
0
def main(argv):
    Util.MkDirs(Conf.GetOutDir())

    # To have a deeper understanding of what's causing the performance difference between metadata on and off.
    DstatLog.GenDataThrpVsAllMetrics("metadata-caching-on")
    DstatLog.GenDataThrpVsAllMetrics("metadata-caching-off")
示例#12
0
def main(argv):
    Util.MkDirs(Conf.GetOutDir())
    PlotThrpLat()

    # To have a deeper understanding of what's causing the performance difference between metadata on and off.
    fn_dstat = DstatLog.GenDataThrpVsAllMetrics()