Пример #1
0
def PlotByTime(p):
    (fn_ycsb, time_max, params1) = YcsbLog.GenDataMetricsByTime(p)
    #Cons.P("%s\n%s\n%s" % (fn_ycsb, time_max, params1))

    params_formatted = str(p).replace("_", "\\\\_").replace(
        " ", "\\ ").replace("\n", "\\n").replace("{", "\{").replace("}", "\}")
    #Cons.P(params_formatted)

    fn_dstat = DstatLog.GenDataFileForGnuplot(p)
    fn_rocksdb = RocksdbLog.GenDataFileForGnuplot(p)

    fn_out = "%s/rocksdb-ycsb-all-metrics-by-time-%s.pdf" % (Conf.GetOutDir(),
                                                             p.exp_dt)

    with Cons.MT("Plotting ..."):
        env = os.environ.copy()
        env["PARAMS"] = params_formatted
        env["TIME_MAX"] = str(time_max)
        env["IN_FN_DSTAT"] = fn_dstat
        env["IN_FN_YCSB"] = fn_ycsb
        env["IN_FN_ROCKSDB"] = fn_rocksdb
        env["OUT_FN"] = fn_out
        Util.RunSubp("gnuplot %s/rocksdb-ycsb-all-metrics-by-time.gnuplot" %
                     os.path.dirname(__file__),
                     env=env)
        Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
Пример #2
0
def GetHourlyFn():
    fn_out = "%s/cpu-hourly-usage" % Conf.GetOutDir()
    if os.path.exists(fn_out):
        return fn_out

    with Cons.MT("Generating file for cpu usage comparison ..."):
        dn_base = Conf.GetDir("dn_base")
        fn_ycsb_0 = "%s/%s" % (dn_base, Conf.Get(0))
        fn_ycsb_1 = "%s/%s" % (dn_base, Conf.Get(1))

        hour_cpustat_0 = _GetCpuStatByHour(fn_ycsb_0)
        hour_cpustat_1 = _GetCpuStatByHour(fn_ycsb_1)
        #Cons.P(hour_cpustat_0)
        #Cons.P(hour_cpustat_1)

        with open(fn_out, "w") as fo:
            fo.write("# 0: %s\n" % fn_ycsb_0)
            fo.write("# 1: %s\n" % fn_ycsb_1)
            fo.write("#\n")
            fmt = "%2d" \
                " %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f" \
                " %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f"
            fo.write(Util.BuildHeader(fmt, "hour" \
                " 0_avg 0_min 0_1 0_25 0_50 0_75 0_99 0_max" \
                " 1_avg 1_min 1_1 1_25 1_50 1_75 1_99 1_max"
                ) + "\n")
            for h, s0 in sorted(hour_cpustat_0.iteritems()):
                s1 = hour_cpustat_1[h]
                fo.write((fmt + "\n") %
                         (h, s0.avg, s0.min, s0._1, s0._25, s0._50, s0._75,
                          s0._99, s0.max, s1.avg, s1.min, s1._1, s1._25,
                          s1._50, s1._75, s1._99, s1.max))
        Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
        return fn_out
Пример #3
0
def Get1minAvgFn():
  exp_dts = []
  for i in range(2):
    #Cons.P(Conf.Get(i))
    mo = re.match(r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d", Conf.Get(i))
    exp_dts.append(mo.group("exp_dt"))
  fn_out = "%s/mem-1minavg-%s" % (Conf.GetOutDir(), "-".join(exp_dts))
  if os.path.exists(fn_out):
    return fn_out

  with Cons.MT("Creating avg memory usage comparison file for plotting ..."):
    records = []
    dn_base = Conf.GetDir("dn_base")
    for i in range(2):
      fn_ycsb_log = "%s/%s" % (dn_base, Conf.Get(i))
      hm_mem = _GetHmMem(fn_ycsb_log)
      for hm, mem in hm_mem.iteritems():
        records.append(_RecordMemAvg(hm, i * 30, mem, i))
    records.sort(key=operator.attrgetter("ts"))

  fmt = "%8s %6.3f %1d"
  header = Util.BuildHeader(fmt, "timestamp mem_avg_in_gb exp_type")
  with open(fn_out, "w") as fo:
    i = 0
    for r in records:
      if i % 40 == 0:
        fo.write(header + "\n")
        i += 1
      fo.write("%s\n" % r.ToStr(fmt))
  Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
  return fn_out
Пример #4
0
def Get1minAvgFn():
    exp_dts = []
    for i in range(2):
        #Cons.P(Conf.Get(i))
        # computation/180126-142513/ycsb/180126-193525.769-d
        mo = re.match(r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d",
                      Conf.Get(i))
        exp_dts.append(mo.group("exp_dt"))
    fn_out = "%s/cpu-1minavg-%s" % (Conf.GetOutDir(), "-".join(exp_dts))
    if os.path.exists(fn_out):
        return fn_out

    with Cons.MT("Creating avg cpu usage comparison file for plotting ..."):
        records = []
        dn_base = Conf.GetDir("dn_base")
        for i in range(2):
            fn_ycsb_log = "%s/%s" % (dn_base, Conf.Get(i))
            hm_cpu = _GetHmCpu(fn_ycsb_log)
            for hm, cpu in hm_cpu.iteritems():
                records.append(_RecordCpuAvg(hm, i * 30, cpu, i))
        records.sort(key=operator.attrgetter("ts"))

    fmt = "%8s %6.2f %1d"
    header = Util.BuildHeader(fmt, "timestamp cpu_avg exp_type")
    with open(fn_out, "w") as fo:
        i = 0
        for r in records:
            if i % 40 == 0:
                fo.write(header + "\n")
                i += 1
            fo.write("%s\n" % r.ToStr(fmt))
    Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
    return fn_out
Пример #5
0
def main(argv):
    Util.MkDirs(Conf.GetOutDir())

    exp_set_id = "171013-134330"
    #exp_set_id = "171022-160102"
    conf_exp_set = Conf.Get(exp_set_id)

    if True:
        parallel_processing = True
        if parallel_processing:
            params = []
            for stg_dev, v in conf_exp_set.iteritems():
                params.append((exp_set_id, stg_dev, v))
            p = multiprocessing.Pool()
            p.map(PlotByTime, params)
        else:
            for stg_dev, v in conf_exp_set.iteritems():
                PlotByTime((exp_set_id, stg_dev, v))

    # Plot (cost vs latency) by storage devices
    #   Latency in avg and tail latencies
    #
    # The goal:
    #   to show there are limited options
    #   and show the baseline performances.
    #
    # Finish this and show that this was not a fair comparison.
    if True:
        PlotCostLatency(exp_set_id)
Пример #6
0
def PlotCompareTwo():
    (fns_rocksdb, fn_sst_creation_stat) = RocksdbLog.GenDataFilesForGnuplot()
    #fn_cpu_stat_by_time = CompareCpu.GetHourlyFn()
    fn_cpu_1min_avg = CompareCpu.Get1minAvgFn()
    fn_mem_stat_by_time = CompareMem.GetHourlyFn()
    fn_mem_1min_avg = CompareMem.Get1minAvgFn()
    #time_max = "09:00:00"
    #time_max = "08:00:00"
    time_max = "07:50:00"

    exp_dts = []
    for i in range(2):
        mo = re.match(r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d",
                      Conf.Get(i))
        exp_dts.append(mo.group("exp_dt"))
    fn_out = "%s/mutant-overhead-%s.pdf" % (Conf.GetOutDir(),
                                            "-".join(exp_dts))

    with Cons.MT("Plotting ..."):
        env = os.environ.copy()
        env["TIME_MAX"] = str(time_max)
        #env["CPU_STAT"] = fn_cpu_stat_by_time
        env["FN_CPU_1MIN_AVG"] = fn_cpu_1min_avg
        #env["MEM_STAT"] = fn_mem_stat_by_time
        env["FN_MEM_1MIN_AVG"] = fn_mem_1min_avg
        env["ROCKSDB0"] = fns_rocksdb[0]
        env["ROCKSDB1"] = fns_rocksdb[1]
        env["OUT_FN"] = fn_out
        Util.RunSubp("gnuplot %s/compare-two-exps.gnuplot" %
                     os.path.dirname(__file__),
                     env=env)
        Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
Пример #7
0
def _GetFnCpuOverhead():
    fn_out = "%s/cpu-overhead-by-time" % Conf.GetOutDir()
    if os.path.exists(fn_out):
        return fn_out

    dn_base = Conf.GetDir("dn_base")
    fn_ycsb_0 = "%s/%s" % (dn_base, Conf.Get("unmodified_db"))
    fn_ycsb_1 = "%s/%s" % (dn_base, Conf.Get("computation_overhead"))

    hour_cpustat_0 = _GetCpuStatByHour(fn_ycsb_0)
    hour_cpustat_1 = _GetCpuStatByHour(fn_ycsb_1)
    #Cons.P(hour_cpustat_0)
    #Cons.P(hour_cpustat_1)

    with open(fn_out, "w") as fo:
        fo.write("# u: unmodified\n")
        fo.write(
            "# c: with SSTable access monitoring and SSTable placement computation\n"
        )
        fo.write("#\n")
        fmt = "%2d" \
            " %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f" \
            " %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f"
        fo.write(Util.BuildHeader(fmt, "hour" \
            " u_avg u_min u_1 u_25 u_50 u_75 u_99 u_max" \
            " c_avg c_min c_1 c_25 c_50 c_75 c_99 c_max"
            ) + "\n")
        for h, s0 in sorted(hour_cpustat_0.iteritems()):
            s1 = hour_cpustat_1[h]
            fo.write(
                (fmt + "\n") % (h, s0.avg, s0.min, s0._1, s0._25, s0._50,
                                s0._75, s0._99, s0.max, s1.avg, s1.min, s1._1,
                                s1._25, s1._50, s1._75, s1._99, s1.max))
    Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
    return fn_out
Пример #8
0
def _GenDB(fn0, fn1):
  with Cons.MT("Building a stat DB ..."):
    # Put the SSTable creation info in a DB and generate statistics
    fn_db = "%s/sst-creation-info.db" % Conf.GetOutDir()
    try:
      os.remove(fn_db)
    except OSError as e:
      if e.errno != errno.ENOENT:
        raise e

    table_schema = """ CREATE TABLE IF NOT EXISTS sst_creation_info (
                        fn text NOT NULL
                        , db_type text NOT NULL
                        , hour integer NOT NULL
                        , sst_id integer NOT NULL
                        , sst_size integer NOT NULL
                        , job_id integer NOT NULL
                        , creation_reason text NOT NULL
                        , temp_triggered_single_sst_migr BOOLEAN
                        , migr_dirc text NOT NULL
                      ); """
    conn = sqlite3.connect(fn_db)
    if conn is None:
      raise RuntimeError("Error! cannot create the database connection.")
    cur = conn.cursor()
    cur.execute(table_schema)

    q = """INSERT INTO sst_creation_info (fn, db_type, hour, sst_id, sst_size, job_id, creation_reason, temp_triggered_single_sst_migr, migr_dirc)
             VALUES (?,?,?,?,?,?,?,?,?)"""

    for db_type in ["RocksDB", "Mutant"]:
      fn = fn0 if db_type == "RocksDB" else fn1
      with open(fn) as fo:
        for line in fo:
          if line.startswith("#"):
            continue
          line = line.strip()
          t = re.split(r" +", line)
          hour = int(t[1].split(":")[0])

          sst_id = t[6]
          # Ignore when end sst_id is -, which means an sstable was deleted.
          if sst_id == "-":
            continue
          sst_id = int(sst_id)

          sst_size = int(t[5])

          job_id = int(t[7])

          # Creation reason: R, F, C, -
          cr = t[8]
          temp_triggered_single_sst_migr = (t[9] == "T")
          migr_dirc = t[10]

          cur.execute(q, (fn, db_type, hour, sst_id, sst_size, job_id, cr, temp_triggered_single_sst_migr, migr_dirc))
    conn.commit()
    cur.close()
    return conn
Пример #9
0
def _PlotTimeVsAllMetrics(fn_ycsb_log):
    # 171121-194901/ycsb/171122-010708.903-d
    mo = re.match(
        r"(?P<dn_log>.+)/(?P<job_id>\d\d\d\d\d\d-\d\d\d\d\d\d)/ycsb/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d).+",
        fn_ycsb_log)
    dn_log = mo.group("dn_log")
    job_id = mo.group("job_id")
    exp_dt = mo.group("exp_dt")
    #Cons.P(dn_log)
    #Cons.P(job_id)
    #Cons.P(exp_dt)

    fn_out = "%s/time-vs-all-metrics-%s.pdf" % (Conf.GetOutDir(), exp_dt)
    if os.path.exists(fn_out):
        Cons.P("%s %d already exists." % (fn_out, os.path.getsize(fn_out)))
        return

    (fn_ycsb, time_max,
     params1) = YcsbLog.GenDataMetricsByTime(fn_ycsb_log, exp_dt)
    #Cons.P("%s\n%s\n%s" % (fn_ycsb, time_max, params1))
    time_max = "01:00:00"

    params_formatted = fn_ycsb_log + "\n" + pprint.pformat(
        params1[0]) + "\n" + pprint.pformat(params1[1])
    # No idea how to put spaces for the indentations. It used to work.
    #   Neither replace(" ", "\ ") or replace(" ", "\\ ") worked when a line starts with spaces followed by digits or [.
    #     work when it is followed by u. I guess regular characters.
    params_formatted = params_formatted.replace("_", "\\\\_").replace(
        "\n", "\\n").replace("{", "\{").replace("}", "\}")
    #Cons.P(params_formatted)

    dn_log_job = "%s/%s" % (dn_log, job_id)

    (fn_dstat, num_stgdevs) = DstatLog.GetPlotFn1(dn_log_job, exp_dt)
    (fn_rocksdb,
     target_cost_changes) = RocksdbLog.GetFnTimeVsMetrics(fn_ycsb_log)
    #Cons.P(target_cost_changes)

    fn_cpu_avg = CpuAvg.GetFnForPlot(fn_ycsb_log)
    fn_mem_usage = ProcMemLog.GetFnForPlot(dn_log, job_id, exp_dt)

    with Cons.MT("Plotting ..."):
        env = os.environ.copy()
        env["PARAMS"] = params_formatted
        env["NUM_STGDEVS"] = str(num_stgdevs)
        env["TIME_MAX"] = str(time_max)
        env["IN_FN_DSTAT"] = fn_dstat
        env["IN_FN_YCSB"] = fn_ycsb
        env["IN_FN_ROCKSDB"] = fn_rocksdb
        env["IN_FN_CPU_AVG"] = fn_cpu_avg
        env["IN_FN_MEM"] = fn_mem_usage
        env["TARGET_COST_CHANGES_TIME"] = target_cost_changes[0]
        env["TARGET_COST_CHANGES_COST"] = target_cost_changes[1]
        env["OUT_FN"] = fn_out
        Util.RunSubp("gnuplot %s/time-vs-all-metrics.gnuplot" %
                     os.path.dirname(__file__),
                     env=env)
        Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
Пример #10
0
def GetFnForPlot(fn_ycsb_log):
    # 171121-194901/ycsb/171122-010708.903-d
    mo = re.match(
        r"(?P<dn_log>.+)/(?P<job_id>\d\d\d\d\d\d-\d\d\d\d\d\d)/ycsb/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d).+",
        fn_ycsb_log)
    exp_dt = mo.group("exp_dt")

    fn_out = "%s/cpu-avg-%s" % (Conf.GetOutDir(), exp_dt)
    if os.path.exists(fn_out):
        return fn_out

    with Cons.MT("Creating avg cpu usage file for plotting ..."):
        (fn_dstat, num_stgdevs) = DstatLog.GetPlotFn(fn_ycsb_log)

        col_time = 17

        col_cpu_idle = 19
        col_cpu_sys = col_cpu_idle + 2
        col_cpu_user = col_cpu_idle + 3
        col_cpu_iowait = col_cpu_idle + 4

        # {hour_minute: [cpu_usage]}
        hm_cpu = {}
        with open(fn_dstat) as fo:
            for line in fo:
                if line.startswith("#"):
                    continue
                line = line.strip()
                t = re.split(r" +", line)

                # Parse these cause some hours and mins don't have left padding 0s.
                mo = re.match(r"(?P<h>\d+):(?P<m>\d+):(?P<s>\d+)",
                              t[col_time - 1])
                hour = int(mo.group("h"))
                minute = int(mo.group("m"))
                hour_minute = "%02d:%02d" % (hour, minute)

                cpu = 100.0 - float(t[col_cpu_idle - 1])

                if hour_minute not in hm_cpu:
                    hm_cpu[hour_minute] = []
                hm_cpu[hour_minute].append(cpu)

        fmt = "%5s %6.2f"
        header = Util.BuildHeader(fmt, "hour_min cpu_avg")

        with open(fn_out, "w") as fo:
            i = 0
            for hm, v in sorted(hm_cpu.iteritems()):
                if i % 40 == 0:
                    fo.write(header + "\n")
                i += 1
                l = len(v)
                avg = 0 if l == 0 else (float(sum(v)) / l)
                fo.write((fmt + "\n") % (hm, avg))
        Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
        return fn_out
Пример #11
0
def GetFnForPlot(dn_log, job_id, exp_dt):
    fn_out = "%s/mem-%s" % (Conf.GetOutDir(), exp_dt)
    if os.path.exists(fn_out):
        return fn_out

    with Cons.MT("Creating memory usage file for plotting ..."):
        fn = "%s/%s/procmon/%s" % (dn_log, job_id, exp_dt)
        if not os.path.exists(fn):
            fn_zipped = "%s.bz2" % fn
            if not os.path.exists(fn_zipped):
                raise RuntimeError("Unexpected: %s" % fn)
            Util.RunSubp(
                "cd %s && bzip2 -dk %s > /dev/null" %
                (os.path.dirname(fn_zipped), os.path.basename(fn_zipped)))
        if not os.path.exists(fn):
            raise RuntimeError("Unexpected")

        exp_begin_dt = datetime.datetime.strptime(exp_dt, "%y%m%d-%H%M%S.%f")

        # man proc. statm
        dt_rss = {}
        with open(fn) as fo:
            for line in fo:
                try:
                    t = line.strip().split()
                    if len(t) != 8:
                        Cons.P("Unexpected format [%s] Ignoring" % line)
                        continue
                    dt = datetime.datetime.strptime(t[0], "%y%m%d-%H%M%S")
                    rss = float(t[2]) * 4096 / 1024 / 1024 / 1024
                    #Cons.P("%s %d" % (dt, rss))

                    # Convert to relative time
                    rel_dt = dt - exp_begin_dt
                    totalSeconds = rel_dt.seconds
                    hours, remainder = divmod(totalSeconds, 3600)
                    minutes, seconds = divmod(remainder, 60)
                    rel_dt_str = "%02d:%02d:%02d" % (hours, minutes, seconds)
                    dt_rss[rel_dt_str] = rss
                except IndexError as e:
                    Cons.P("%s: %s [%s]" % (e, fn, line))
                    raise e

        with open(fn_out, "w") as fo:
            fmt = "%8s %6.2f"
            header = Util.BuildHeader(fmt, "dt rss_in_gb")
            i = 0
            for dt, rss in sorted(dt_rss.iteritems()):
                if i % 40 == 0:
                    fo.write(header + "\n")
                fo.write((fmt + "\n") % (dt, rss))
                i += 1
        Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
        return fn_out
Пример #12
0
def GetHourlyFn():
  exp_dts = []
  for i in range(2):
    #Cons.P(Conf.Get(i))
    mo = re.match(r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d", Conf.Get(i))
    exp_dts.append(mo.group("exp_dt"))
  fn_out = "%s/memory-usage-by-time-%s" % (Conf.GetOutDir(), "-".join(exp_dts))
  if os.path.exists(fn_out):
    return fn_out

  with Cons.MT("Generating file for memory usage comparison ..."):
    dn_base = Conf.GetDir("dn_base")
    fn_ycsb_0 = "%s/%s" % (dn_base, Conf.Get(0))
    fn_ycsb_1 = "%s/%s" % (dn_base, Conf.Get(1))

    hour_memstat_0 = _GetMemStatByHour(fn_ycsb_0)
    hour_memstat_1 = _GetMemStatByHour(fn_ycsb_1)
    #Cons.P(hour_memstat_0)
    #Cons.P(hour_memstat_1)

    with open(fn_out, "w") as fo:
      fo.write("# 0: %s\n" % fn_ycsb_0)
      fo.write("# 1: %s\n" % fn_ycsb_1)
      fo.write("#\n")
      fmt = "%2d" \
          " %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f" \
          " %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f"
      fo.write(Util.BuildHeader(fmt, "hour" \
          " 0_avg 0_min 0_1 0_25 0_50 0_75 0_99 0_max" \
          " 1_avg 1_min 1_1 1_25 1_50 1_75 1_99 1_max"
          ) + "\n")
      for h, s0 in sorted(hour_memstat_0.iteritems()):
        s1 = hour_memstat_1[h]
        fo.write((fmt + "\n") % (h
          , (float(s0.avg) / 1024 / 1024 / 1024)
          , (float(s0.min) / 1024 / 1024 / 1024)
          , (float(s0._1 ) / 1024 / 1024 / 1024)
          , (float(s0._25) / 1024 / 1024 / 1024)
          , (float(s0._50) / 1024 / 1024 / 1024)
          , (float(s0._75) / 1024 / 1024 / 1024)
          , (float(s0._99) / 1024 / 1024 / 1024)
          , (float(s0.max) / 1024 / 1024 / 1024)

          , (float(s1.avg) / 1024 / 1024 / 1024)
          , (float(s1.min) / 1024 / 1024 / 1024)
          , (float(s1._1 ) / 1024 / 1024 / 1024)
          , (float(s1._25) / 1024 / 1024 / 1024)
          , (float(s1._50) / 1024 / 1024 / 1024)
          , (float(s1._75) / 1024 / 1024 / 1024)
          , (float(s1._99) / 1024 / 1024 / 1024)
          , (float(s1.max) / 1024 / 1024 / 1024)
          ))
    Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
  return fn_out
Пример #13
0
def PlotThrpLat():
    fn_ycsb = YcsbLog.GenDataThrpVsLat()
    fn_out = "%s/mutant-ycsb-thrp-vs-lat-by-costslos.pdf" % Conf.GetOutDir()

    with Cons.MT("Plotting ..."):
        env = os.environ.copy()
        env["IN_YCSB"] = fn_ycsb
        env["OUT_FN"] = fn_out
        Util.RunSubp("gnuplot %s/mutant-ycsb-thrp-lat.gnuplot" %
                     os.path.dirname(__file__),
                     env=env)
        Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
Пример #14
0
def GenDataThrpVsLat():
  fn_out = "%s/rocksdb-ycsb-thrp-vs-lat-by-stgdevs" % Conf.GetOutDir()
  if os.path.exists(fn_out):
    return fn_out

  with Cons.MT("Generating thrp vs lat data file ..."):
    dn_base = Conf.GetDir("dn_base")

    # {stg_dev: {target_iops: YcsbLogReader}}
    stgdev_tio_ylr = {}

    #for stgdev in ["local-ssd", "ebs-st1"]:
    for stgdev in ["ebs-st1"]:
      if stgdev not in stgdev_tio_ylr:
        stgdev_tio_ylr[stgdev] = {}

      for target_iops, v in sorted(Conf.Get(stgdev).iteritems()):
        fn = "%s/%s" % (dn_base, v["fn"])
        t = v["time"].split("-")
        time_begin = t[0]
        time_end = t[1]
        overloaded = ("overloaded" in v) and v["overloaded"]
        stgdev_tio_ylr[stgdev][target_iops] = YcsbLogReader(fn, time_begin, time_end, overloaded)

    with open(fn_out, "w") as fo:
      fmt = "%9s %6.0f %1d %6.0f" \
          " %8.2f %8.2f %9.2f %10.2f %10.2f" \
          " %8.2f %8.2f %8.2f %9.2f %9.2f"
      fo.write("%s\n" % Util.BuildHeader(fmt, "stg_dev target_iops overloaded iops" \
          " r_avg r_90 r_99 r_99.9 r_99.99" \
          " w_avg w_90 w_99 w_99.9 w_99.99"
          ))
      for stgdev, v in sorted(stgdev_tio_ylr.iteritems()):
        for tio, ylr in sorted(v.iteritems()):
          fo.write((fmt + "\n") % (
            stgdev
            , tio
            , (1 if ylr.overloaded else 0)
            , ylr.db_iops_stat.avg
            , ylr.r_avg
            , ylr.r_90
            , ylr.r_99
            , ylr.r_999
            , ylr.r_9999
            , ylr.w_avg
            , ylr.w_90
            , ylr.w_99
            , ylr.w_999
            , ylr.w_9999
            ))
    Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
    return fn_out
Пример #15
0
def GenDataFileForGnuplot(dn_log_job, exp_dt):
    fn_out = "%s/dstat-%s" % (Conf.GetOutDir(), exp_dt)
    if os.path.isfile(fn_out):
        return fn_out

    global _exp_begin_dt
    _exp_begin_dt = datetime.datetime.strptime(exp_dt, "%y%m%d-%H%M%S.%f")
    #Cons.P(_exp_begin_dt)

    with Cons.MT("Generating dstat data file for plot ..."):
        global _header_idx
        global _body_rows
        _header_idx = None
        _body_rows = None

        fn_log_dstat = "%s/dstat/%s.csv" % (dn_log_job, exp_dt)
        # Unzip when the file is not there
        if not os.path.exists(fn_log_dstat):
            fn_zipped = "%s.bz2" % fn_log_dstat
            if not os.path.exists(fn_zipped):
                raise RuntimeError("Unexpected: %s" % fn_log_dstat)
            Util.RunSubp(
                "cd %s && bzip2 -dk %s > /dev/null" %
                (os.path.dirname(fn_zipped), os.path.basename(fn_zipped)))
        if not os.path.exists(fn_log_dstat):
            raise RuntimeError("Unexpected")

        _Parse(fn_log_dstat)

        # For read and write
        fmt = " ".join(["%9.0f"] * 2 * _num_stg_devs +
                       ["%6.1f"] * 2 * _num_stg_devs)

        fmt += " %8.0f %8.0f %8.0f %8.0f" \
            " %3.0f %3.0f" \
            " %3.0f %3.0f %11s" \
            " %3.1f %6.2f %3.1f %6.2f %6.2f %6.3f"

        header = Util.BuildHeader(
            fmt, " ".join(k for k, v in sorted(_header_idx.iteritems())))
        #Cons.P(header)
        with open(fn_out, "w") as fo:
            i = 0
            for r in _body_rows:
                if not r.TimeValid():
                    continue
                if i % 50 == 0:
                    fo.write("%s\n" % header)
                i += 1
                fo.write((fmt + "\n") % tuple(r.Prepared()))
        Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
        return fn_out
Пример #16
0
def PlotByTime(params):
    fn_ycsb_log = params[0]

    # 171121-194901/ycsb/171122-010708.903-d
    mo = re.match(
        r"(?P<dn_log>.+)/(?P<job_id>\d\d\d\d\d\d-\d\d\d\d\d\d)/ycsb/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d).+",
        fn_ycsb_log)
    dn_log = mo.group("dn_log")
    job_id = mo.group("job_id")
    exp_dt = mo.group("exp_dt")
    #Cons.P(dn_log)
    #Cons.P(job_id)
    #Cons.P(exp_dt)

    (fn_ycsb, time_max,
     params1) = YcsbLog.GenDataMetricsByTime(fn_ycsb_log, exp_dt)
    #Cons.P("%s\n%s\n%s\n%s" % (fn_ycsb, time_max, params1[0], params1[1]))
    # For dev
    #time_max = "00:10:00"
    #time_max = "03:00:00"

    params_formatted = fn_ycsb_log + "\n" + pprint.pformat(
        params1[0]) + "\n" + pprint.pformat(params1[1])
    # The last, space substitution doesn't seem to work all of a sudden. Not the highest priority.
    params_formatted = params_formatted.replace("\n", "\\n").replace(
        "_", "\\\\_").replace("{", "\{").replace("}",
                                                 "\}")  #.replace(" ", "\\ ")
    #Cons.P(params_formatted)

    dn_log_job = "%s/%s" % (dn_log, job_id)

    (fn_dstat,
     num_stgdevs) = DstatLog.GenDataFileForGnuplot(dn_log_job, exp_dt)
    #Cons.P("%s %s" % (fn_dstat, num_stgdevs))
    fn_rocksdb = RocksdbLog.GenDataFileForGnuplot(dn_log_job, exp_dt)

    fn_out = "%s/rocksdb-ycsb-all-metrics-by-time-%s.pdf" % (Conf.GetOutDir(),
                                                             exp_dt)

    with Cons.MT("Plotting ..."):
        env = os.environ.copy()
        env["PARAMS"] = params_formatted
        env["NUM_STG_DEVS"] = str(num_stgdevs)
        env["TIME_MAX"] = str(time_max)
        env["IN_FN_DSTAT"] = fn_dstat
        env["IN_FN_YCSB"] = fn_ycsb
        env["IN_FN_ROCKSDB"] = fn_rocksdb
        env["OUT_FN"] = fn_out
        Util.RunSubp("gnuplot %s/rocksdb-ycsb-all-metrics-by-time.gnuplot" %
                     os.path.dirname(__file__),
                     env=env)
        Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
Пример #17
0
def PlotCostLatency(exp_set_id):
    fn_ycsb = YcsbLog.GenDataCostVsMetrics(exp_set_id)
    fn_out = "%s/rocksdb-ycsb-cost-perf-%s.pdf" % (Conf.GetOutDir(),
                                                   exp_set_id)

    with Cons.MT("Plotting ..."):
        env = os.environ.copy()
        env["IN_YCSB"] = fn_ycsb
        env["OUT_FN"] = fn_out
        Util.RunSubp("gnuplot %s/rocksdb-ycsb-cost-perf.gnuplot" %
                     os.path.dirname(__file__),
                     env=env)
        Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
Пример #18
0
def PlotCseVsAll():
  # Cost SLO epsilon vs all metrics
  (fn_cse_vs_all, linear_reg_params) = RocksdbLog.GetFnCostSloEpsilonVsMetrics()
  #Cons.P(linear_reg_params)

  fn_out = "%s/cost-slo-epsilon-vs-metrics.pdf" % Conf.GetOutDir()

  with Cons.MT("Plotting cost SLO epsilon vs metrics ..."):
    env = os.environ.copy()
    env["FN_CSE_VS_ALL"] = fn_cse_vs_all
    env["LINEAR_REG_PARAMS"] = linear_reg_params
    env["FN_OUT"] = fn_out
    Util.RunSubp("gnuplot %s/cost-slo-epsilon-vs-metrics.gnuplot" % os.path.dirname(__file__), env=env)
    Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
Пример #19
0
def GenDataCostVsMetrics(exp_set_id):
  fn_out = "%s/rocksdb-ycsb-cost-vs-perf-%s" % (Conf.GetOutDir(), exp_set_id)

  fmt = "%5s %5.3f" \
      " %14.6f" \
      " %14.6f" \
      " %14.6f" \
      " %14.6f" \
      " %14.6f" \
      " %14.6f" \
      " %13.6f %10.6f %14.6f %14.6f %14.6f %14.6f %14.6f" \
      " %13.6f %10.6f %14.6f %14.6f %14.6f %14.6f %14.6f"
  with open(fn_out, "w") as fo:
    fo.write(Util.BuildHeader(fmt, "stg_dev cost_dollar_per_gb_per_month" \
        " db_iops.avg" \
        " db_iops.min" \
        " db_iops.max" \
        " db_iops._25" \
        " db_iops._50" \
        " db_iops._75" \
        " r_avg r_min r_max r_90 r_99 r_999 r_9999" \
        " w_avg w_min w_max w_90 w_99 w_999 w_9999"
        ) + "\n")
    for stg_dev, v in Conf.Get(exp_set_id).iteritems():
      lr = YcsbLogReader(exp_set_id, stg_dev)
      fo.write((fmt + "\n") % (
        stg_dev, float(Conf.Get("stg_cost")[stg_dev])
        , lr.GetStat("db_iops.avg")
        , lr.GetStat("db_iops.min")
        , lr.GetStat("db_iops.max")
        , lr.GetStat("db_iops._25")
        , lr.GetStat("db_iops._50")
        , lr.GetStat("db_iops._75")
        , lr.GetStat("r_avg")
        , lr.GetStat("r_min")
        , lr.GetStat("r_max")
        , lr.GetStat("r_90")
        , lr.GetStat("r_99")
        , lr.GetStat("r_999")
        , lr.GetStat("r_9999")
        , lr.GetStat("w_avg")
        , lr.GetStat("w_min")
        , lr.GetStat("w_max")
        , lr.GetStat("w_90")
        , lr.GetStat("w_99")
        , lr.GetStat("w_999")
        , lr.GetStat("w_9999")
        ))
  Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
  return fn_out
Пример #20
0
def GetOverhead(exp_rocksdb, exp_computation):
    exp_dts = []
    pattern = r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d"
    mo = re.match(pattern, exp_rocksdb)
    exp_dts.append(mo.group("exp_dt"))
    mo = re.match(pattern, exp_computation)
    exp_dts.append(mo.group("exp_dt"))

    fn = "%s/mem-1minavg-%s" % (Conf.GetOutDir(), "-".join(exp_dts))
    with open(fn) as fo:
        for line in fo:
            if not line.startswith("#"):
                continue
            if line.startswith("#   C:R = "):
                mo = re.match(r"#   C:R = (?P<v>(\d|\.)+)", line)
                return float(mo.group("v"))
Пример #21
0
def main(argv):
    Util.MkDirs(Conf.GetOutDir())

    PlotThrpLat()

    sys.exit(0)

    # Plot (cost vs latency) by storage devices
    #   Latency in avg and tail latencies
    #
    # The goal:
    #   to show there are limited options
    #   and show the baseline performances.
    #
    # Finish this and show that this was not a fair comparison.
    if True:
        PlotCostLatency(exp_set_id)
Пример #22
0
def main(argv):
    Util.MkDirs(Conf.GetOutDir())

    dn_base = Conf.GetDir("dn_base")

    params = []
    for db_type in ["unmodified_db", "computation_overhead", "io_overhead"]:
        fn = "%s/%s" % (dn_base, Conf.Get(db_type))
        params.append((fn, ))

    parallel_processing = True
    if parallel_processing:
        p = multiprocessing.Pool()
        p.map(PlotByTime, params)
    else:
        for p in params:
            PlotByTime(p)
Пример #23
0
def PlotByTime(params):
    exp_set_id = params[0]
    stg_dev = params[1]
    p0 = params[2]

    jobid_expdt = p0["jobid_expdt"]
    time_window = p0["time_window"]

    (fn_ycsb, time_max,
     params1) = YcsbLog.GenDataMetricsByTime(exp_set_id, stg_dev)
    #Cons.P(time_max)

    params_formatted = exp_set_id + "\n" + pprint.pformat(
        params1[0]) + "\n" + pprint.pformat(params1[1])
    params_formatted = params_formatted.replace("_", "\\\\_").replace(
        " ", "\\ ").replace("\n", "\\n").replace("{", "\{").replace("}", "\}")
    #Cons.P(params_formatted)

    t = jobid_expdt.split("/")
    job_id = t[0]
    exp_dt = t[1]

    dn_log = Conf.GetDir("dn")
    dn_log_job = "%s/%s" % (dn_log, job_id)

    fn_dstat = DstatLog.GenDataFileForGnuplot(dn_log_job, exp_dt)
    fn_rocksdb = RocksdbLog.GenDataFileForGnuplot(dn_log_job, exp_dt)

    fn_out = "%s/rocksdb-ycsb_d-%s-by-time-%s.pdf" % (Conf.GetOutDir(),
                                                      stg_dev, exp_dt)

    with Cons.MT("Plotting ..."):
        env = os.environ.copy()
        env["EXP_SET_ID"] = exp_set_id
        env["PARAMS"] = params_formatted
        env["STG_DEV"] = stg_dev
        env["TIME_MAX"] = str(time_max)
        env["IN_FN_DSTAT"] = fn_dstat
        env["IN_FN_YCSB"] = fn_ycsb
        env["IN_FN_ROCKSDB"] = fn_rocksdb
        env["OUT_FN"] = fn_out
        Util.RunSubp("gnuplot %s/rocksdb-ycsb-by-time.gnuplot" %
                     os.path.dirname(__file__),
                     env=env)
        Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
Пример #24
0
def Get1minAvgFn(exp_rocksdb, exp_computation):
    exp_dts = []
    pattern = r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d"
    mo = re.match(pattern, exp_rocksdb)
    exp_dts.append(mo.group("exp_dt"))
    mo = re.match(pattern, exp_computation)
    exp_dts.append(mo.group("exp_dt"))

    fn_out = "%s/mem-1minavg-%s" % (Conf.GetOutDir(), "-".join(exp_dts))
    if os.path.exists(fn_out):
        return fn_out

    with Cons.MT("Creating avg memory usage comparison file for plotting ..."):
        records = []
        # {which_exp, [mem_usage]}
        which_memsum = {0: [], 1: []}
        hm_mem = _GetHmMem(exp_rocksdb)
        for hm, mem in hm_mem.iteritems():
            records.append(_RecordMemAvg(hm, 0, mem, 0))
            which_memsum[0].append(mem)
        hm_mem = _GetHmMem(exp_computation)
        for hm, mem in hm_mem.iteritems():
            records.append(_RecordMemAvg(hm, 30, mem, 1))
            which_memsum[1].append(mem)
        records.sort(key=operator.attrgetter("ts"))

    fmt = "%8s %6.3f %1d"
    header = Util.BuildHeader(fmt, "timestamp mem_avg_in_gb exp_type")
    with open(fn_out, "w") as fo:
        fo.write("# Memory usage * time (B * sec)\n")
        r = sum(which_memsum[0])
        c = sum(which_memsum[1])
        fo.write("#   RocksDB: %f\n" % r)
        fo.write("#   With computation: %f\n" % c)
        fo.write("#   C:R = %f\n" % (c / r))
        fo.write("\n")

        i = 0
        for r in records:
            if i % 40 == 0:
                fo.write(header + "\n")
                i += 1
            fo.write("%s\n" % r.ToStr(fmt))
    Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
    return fn_out
Пример #25
0
def AddStatToFile(fn):
    with Cons.MT("Updating SSTable creation stats ..."):
        mo = re.match(r".*-(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)", fn)
        exp_dt = mo.group("exp_dt")
        fn_db = "%s/sst-creation-info-%s.db" % (Conf.GetOutDir(), exp_dt)
        conn = _GenDB(fn, fn_db)

        conn.row_factory = sqlite3.Row
        cur = conn.cursor()

        fn2 = "%s.tmp" % fn
        with open(fn2, "w") as fo2:
            _OverallStat(cur, fo2)
            fo2.write("\n")
            with open(fn) as fo:
                for line in fo:
                    fo2.write(line)

        os.rename(fn2, fn)
        Cons.P("Updated %s %d" % (fn, os.path.getsize(fn)))
        os.remove(fn_db)
Пример #26
0
def PlotOverheadByTime():
    (fn_rocksdb0, fn_rocksdb1,
     fn_rocksdb_compmigr_histo) = RocksdbLog.GenDataFilesForGnuplot()
    fn_cpu_stat_by_time = _GetFnCpuOverhead()
    fn_mem_stat_by_time = _GetFnMemOverhead()
    #time_max = "09:00:00"
    time_max = "08:00:00"
    fn_out = "%s/mutant-overhead.pdf" % Conf.GetOutDir()

    with Cons.MT("Plotting ..."):
        env = os.environ.copy()
        env["TIME_MAX"] = str(time_max)
        env["CPU_STAT"] = fn_cpu_stat_by_time
        env["MEM_STAT"] = fn_mem_stat_by_time
        env["ROCKSDB0"] = fn_rocksdb0
        env["ROCKSDB1"] = fn_rocksdb1
        env["OUT_FN"] = fn_out
        Util.RunSubp("gnuplot %s/mutant-overhead-by-time.gnuplot" %
                     os.path.dirname(__file__),
                     env=env)
        Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
Пример #27
0
def main(argv):
    Util.MkDirs(Conf.GetOutDir())

    # Experiment root
    #r = Conf.Get("rocksdb-metadata-org")
    r = Conf.Get("rocksdb-baseline")

    dn_base = r["dn_base"].replace("~", os.path.expanduser("~"))

    params = []
    #for stgdev in ["ebs-st1", "local-ssd"]:
    for stgdev in ["local-ssd"]:
        for target_iops, jobid_expdt in r[stgdev].iteritems():
            fn_in = "%s/%s" % (dn_base, jobid_expdt)
            params.append((fn_in, stgdev))

    parallel_processing = True
    if parallel_processing:
        p = multiprocessing.Pool()
        p.map(PlotByTime, params)
    else:
        for p in params:
            PlotByTime(p)
Пример #28
0
def GetFnStat(fn0, fn1):
    exp_dts = []
    for fn in [fn0, fn1]:
        #Cons.P(fn)
        # rocksdb-by-time-180126-193525.769
        mo = re.match(r".+-(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)", fn)
        exp_dts.append(mo.group("exp_dt"))

    fn_out = "%s/sst-creation-stat-%s" % (Conf.GetOutDir(), "-".join(exp_dts))
    if os.path.exists(fn_out):
        return fn_out

    with Cons.MT("Generating SSTable creation stats ..."):
        conn = _GenDB2(fn0, fn1)

        # https://docs.python.org/2/library/sqlite3.html#row-objects
        conn.row_factory = sqlite3.Row
        cur = conn.cursor()

        with open(fn_out, "w") as fo:
            _OverallStat2(cur, fn0, fn1, fo)
            _HourlyStat(cur, fo)
        Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
Пример #29
0
def main(argv):
    Util.MkDirs(Conf.GetOutDir())

    # Experiment root
    r = Conf.Get("cost-vs-perf")

    dn_base = r["dn_base"].replace("~", os.path.expanduser("~"))

    params = []
    for cost_slo_str, v in r["exps"].iteritems():
        t = cost_slo_str.split(", ")
        cost_slo = float(t[0])
        cost_slo_epsilon = float(t[1])
        #Cons.P("%f %f" % (cost_slo, cost_slo_epsilon))

        for target_iops, fn in v.iteritems():
            #Cons.P("  %s %s" % (target_iops, fn))
            # 171204-162903/ycsb/171204-214803.510-d
            mo = re.match(
                r"(?P<job_id>\d\d\d\d\d\d-\d\d\d\d\d\d)/ycsb/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-(?P<workload>\w)",
                fn)
            job_id = mo.group("job_id")
            exp_dt = mo.group("exp_dt")
            workload = mo.group("workload")
            #Cons.P((job_id, exp_dt, workload))
            params.append(
                ExpParam(dn_base, target_iops, cost_slo, cost_slo_epsilon,
                         job_id, exp_dt, workload))
    #Cons.P(pprint.pformat(params))

    parallel_processing = True
    if parallel_processing:
        p = multiprocessing.Pool()
        p.map(PlotByTime, params)
    else:
        for p in params:
            PlotByTime(p)
Пример #30
0
def _PlotCompareTwo(params):
    exp_rocksdb = params[0]
    exp_computation = params[1]

    exp_dts = []
    pattern = r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d"
    mo = re.match(pattern, exp_rocksdb)
    exp_dts.append(mo.group("exp_dt"))
    mo = re.match(pattern, exp_computation)
    exp_dts.append(mo.group("exp_dt"))
    fn_out = "%s/mutant-computation-overhead-%s.pdf" % (Conf.GetOutDir(),
                                                        "-".join(exp_dts))
    if os.path.exists(fn_out):
        #Cons.P("%s %d already exists" % (fn_out, os.path.getsize(fn_out)))
        return

    plot_custom_labels = (
        "-".join(exp_dts) == "180201-033312.464-180201-033259.439")

    fn_rocksdb = RocksdbLog.GetFnTimeVsMetrics(exp_rocksdb)

    time_max = "07:50:00"
    fn_cpu_1min_avg = CompareCpu.Get1minAvgFn(exp_rocksdb, exp_computation)
    fn_mem_1min_avg = CompareMem.Get1minAvgFn(exp_rocksdb, exp_computation)

    with Cons.MT("Plotting ..."):
        env = os.environ.copy()
        env["TIME_MAX"] = str(time_max)
        env["FN_ROCKSDB"] = fn_rocksdb
        env["FN_CPU_1MIN_AVG"] = fn_cpu_1min_avg
        env["FN_MEM_1MIN_AVG"] = fn_mem_1min_avg
        env["PLOT_CUSTOM_LABELS"] = "1" if plot_custom_labels else "0"
        env["OUT_FN"] = fn_out
        Util.RunSubp("gnuplot %s/compare-two-exps.gnuplot" %
                     os.path.dirname(__file__),
                     env=env)
        Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))