def __init__(self, benchmarks, repo_path, repo_url, build_cmd, db_path, tmp_dir, preparation_cmd, run_option='end_of_day', start_date=None, overwrite=False): self.benchmarks = benchmarks self.checksums = [b.checksum for b in benchmarks] self.start_date = start_date self.run_option = run_option self.repo_path = repo_path self.db_path = db_path self.repo = GitRepo(self.repo_path) self.db = BenchmarkDB(db_path) # where to copy the repo self.tmp_dir = tmp_dir self.bench_repo = BenchRepo(repo_url, self.tmp_dir, build_cmd, preparation_cmd) self._register_benchmarks()
def __init__(self, benchmarks, repo_path, repo_url, build_cmd, db_path, tmp_dir, prep_cmd, clean_cmd=None, run_option='eod', run_order='normal', start_date=None, overwrite=False, module_dependencies=None, always_clean=False, use_blacklist=True, verify=False): log.info("Initializing benchmark runner for %d benchmarks" % (len(benchmarks))) self._benchmarks = None self._checksums = None if verify: verify_benchmarks(benchmarks, raise_=True) self.start_date = start_date self.run_option = run_option self.run_order = run_order self.repo_path = repo_path self.db_path = db_path self.repo = GitRepo(self.repo_path) self.db = BenchmarkDB(db_path) self.use_blacklist = use_blacklist # where to copy the repo self.tmp_dir = tmp_dir self.bench_repo = BenchRepo(repo_url, self.tmp_dir, build_cmd, prep_cmd, clean_cmd, always_clean=always_clean, dependencies=module_dependencies) self.benchmarks = benchmarks
def __init__(self, benchmarks, repo_path, repo_url, build_cmd, db_path, tmp_dir, preparation_cmd, run_option='eod', start_date=None, overwrite=False, module_dependencies=None, always_clean=False, use_blacklist=True, time=True): self.benchmarks = benchmarks self.checksums = [b.checksum for b in benchmarks] self.start_date = start_date self.run_option = run_option self.repo_path = repo_path self.db_path = db_path self.repo = GitRepo(self.repo_path) self.db = BenchmarkDB(db_path) self.use_blacklist = use_blacklist self.blacklist = set(self.db.get_rev_blacklist()) self.time = time # where to copy the repo self.tmp_dir = tmp_dir self.bench_repo = BenchRepo(repo_url, self.tmp_dir, build_cmd, preparation_cmd, always_clean=always_clean, dependencies=module_dependencies) self._register_benchmarks() self._python = os.environ.get('VBENCH_PYTHON', 'python')
def main(): from pandas import DataFrame from vbench.api import BenchmarkRunner from vbench.db import BenchmarkDB from suite import REPO_PATH, BUILD, DB_PATH, PREPARE, dependencies, benchmarks if not args.base_commit: args.base_commit = BASELINE_COMMIT # GitRepo wants exactly 7 character hash? args.base_commit = args.base_commit[:7] if args.target_commit: args.target_commit = args.target_commit[:7] if not args.log_file: args.log_file = os.path.abspath(os.path.join(REPO_PATH, 'vb_suite.log')) TMP_DIR = tempfile.mkdtemp() prprint("TMP_DIR = %s" % TMP_DIR) prprint("LOG_FILE = %s\n" % args.log_file) try: logfile = open(args.log_file, 'w') prprint("Opening DB at '%s'...\n" % DB_PATH) db = BenchmarkDB(DB_PATH) prprint("Initializing Runner...") runner = BenchmarkRunner( benchmarks, REPO_PATH, REPO_PATH, BUILD, DB_PATH, TMP_DIR, PREPARE, always_clean=True, # run_option='eod', start_date=START_DATE, module_dependencies=dependencies) repo = runner.repo # (steal the parsed git repo used by runner) # ARGH. reparse the repo, without discarding any commits, # then overwrite the previous parse results # prprint ("Slaughtering kittens..." ) (repo.shas, repo.messages, repo.timestamps, repo.authors) = _parse_commit_log(REPO_PATH) h_head = args.target_commit or repo.shas[-1] h_baseline = args.base_commit prprint('Target [%s] : %s\n' % (h_head, repo.messages.get(h_head, ""))) prprint('Baseline [%s] : %s\n' % (h_baseline, repo.messages.get(h_baseline, ""))) prprint("removing any previous measurements for the commits.") db.delete_rev_results(h_baseline) db.delete_rev_results(h_head) # TODO: we could skip this, but we need to make sure all # results are in the DB, which is a little tricky with # start dates and so on. prprint("Running benchmarks for baseline [%s]" % h_baseline) runner._run_and_write_results(h_baseline) prprint("Running benchmarks for target [%s]" % h_head) runner._run_and_write_results(h_head) prprint('Processing results...') head_res = get_results_df(db, h_head) baseline_res = get_results_df(db, h_baseline) ratio = head_res['timing'] / baseline_res['timing'] totals = DataFrame(dict(t_head=head_res['timing'], t_baseline=baseline_res['timing'], ratio=ratio, name=baseline_res.name), columns=["t_head", "t_baseline", "ratio", "name"]) totals = totals.ix[totals.t_head > args.min_duration] # ignore below threshold totals = totals.dropna().sort("ratio").set_index( 'name') # sort in ascending order s = "\n\nResults:\n" s += totals.to_string( float_format=lambda x: "{:4.4f}".format(x).rjust(10)) s += "\n\n" s += "Columns: test_name | target_duration [ms] | baseline_duration [ms] | ratio\n\n" s += "- a Ratio of 1.30 means the target commit is 30% slower then the baseline.\n\n" s += 'Target [%s] : %s\n' % (h_head, repo.messages.get(h_head, "")) s += 'Baseline [%s] : %s\n\n' % (h_baseline, repo.messages.get(h_baseline, "")) logfile.write(s) logfile.close() prprint(s) prprint("Results were also written to the logfile at '%s'\n" % args.log_file) finally: # print("Disposing of TMP_DIR: %s" % TMP_DIR) shutil.rmtree(TMP_DIR) logfile.close()
def profile_comparative(benchmarks): from vbench.api import BenchmarkRunner from vbench.db import BenchmarkDB from vbench.git import GitRepo from suite import BUILD, DB_PATH, PREPARE, dependencies TMP_DIR = tempfile.mkdtemp() try: prprint("Opening DB at '%s'...\n" % DB_PATH) db = BenchmarkDB(DB_PATH) prprint("Initializing Runner...") # all in a good cause... GitRepo._parse_commit_log = _parse_wrapper(args.base_commit) runner = BenchmarkRunner( benchmarks, REPO_PATH, REPO_PATH, BUILD, DB_PATH, TMP_DIR, PREPARE, always_clean=True, # run_option='eod', start_date=START_DATE, module_dependencies=dependencies) repo = runner.repo # (steal the parsed git repo used by runner) h_head = args.target_commit or repo.shas[-1] h_baseline = args.base_commit # ARGH. reparse the repo, without discarding any commits, # then overwrite the previous parse results # prprint ("Slaughtering kittens..." ) (repo.shas, repo.messages, repo.timestamps, repo.authors) = _parse_commit_log(None,REPO_PATH, args.base_commit) prprint('Target [%s] : %s\n' % (h_head, repo.messages.get(h_head, ""))) prprint('Baseline [%s] : %s\n' % (h_baseline, repo.messages.get(h_baseline, ""))) prprint("removing any previous measurements for the commits.") db.delete_rev_results(h_baseline) db.delete_rev_results(h_head) # TODO: we could skip this, but we need to make sure all # results are in the DB, which is a little tricky with # start dates and so on. prprint("Running benchmarks for baseline [%s]" % h_baseline) runner._run_and_write_results(h_baseline) prprint("Running benchmarks for target [%s]" % h_head) runner._run_and_write_results(h_head) prprint('Processing results...') head_res = get_results_df(db, h_head) baseline_res = get_results_df(db, h_baseline) ratio = head_res['timing'] / baseline_res['timing'] totals = DataFrame({HEAD_COL:head_res['timing'], BASE_COL:baseline_res['timing'], 'ratio':ratio, 'name':baseline_res.name}, columns=[HEAD_COL, BASE_COL, "ratio", "name"]) totals = totals.ix[totals[HEAD_COL] > args.min_duration] # ignore below threshold totals = totals.dropna( ).sort("ratio").set_index('name') # sort in ascending order h_msg = repo.messages.get(h_head, "") b_msg = repo.messages.get(h_baseline, "") print_report(totals,h_head=h_head,h_msg=h_msg, h_baseline=h_baseline,b_msg=b_msg) if args.outdf: prprint("The results DataFrame was written to '%s'\n" % args.outdf) totals.save(args.outdf) finally: # print("Disposing of TMP_DIR: %s" % TMP_DIR) shutil.rmtree(TMP_DIR)
def profile_comparative(benchmarks): from vbench.api import BenchmarkRunner from vbench.db import BenchmarkDB from vbench.git import GitRepo from suite import BUILD, DB_PATH, PREPARE, dependencies TMP_DIR = tempfile.mkdtemp() try: prprint("Opening DB at '%s'...\n" % DB_PATH) db = BenchmarkDB(DB_PATH) prprint("Initializing Runner...") # all in a good cause... GitRepo._parse_commit_log = _parse_wrapper(args.base_commit) runner = BenchmarkRunner( benchmarks, REPO_PATH, REPO_PATH, BUILD, DB_PATH, TMP_DIR, PREPARE, always_clean=True, # run_option='eod', start_date=START_DATE, module_dependencies=dependencies) repo = runner.repo # (steal the parsed git repo used by runner) h_head = args.target_commit or repo.shas[-1] h_baseline = args.base_commit # ARGH. reparse the repo, without discarding any commits, # then overwrite the previous parse results # prprint("Slaughtering kittens...") (repo.shas, repo.messages, repo.timestamps, repo.authors) = _parse_commit_log(None,REPO_PATH, args.base_commit) prprint('Target [%s] : %s\n' % (h_head, repo.messages.get(h_head, ""))) prprint('Baseline [%s] : %s\n' % (h_baseline, repo.messages.get(h_baseline, ""))) prprint("Removing any previous measurements for the commits.") db.delete_rev_results(h_baseline) db.delete_rev_results(h_head) # TODO: we could skip this, but we need to make sure all # results are in the DB, which is a little tricky with # start dates and so on. prprint("Running benchmarks for baseline [%s]" % h_baseline) runner._run_and_write_results(h_baseline) prprint("Running benchmarks for target [%s]" % h_head) runner._run_and_write_results(h_head) prprint('Processing results...') head_res = get_results_df(db, h_head) baseline_res = get_results_df(db, h_baseline) report_comparative(head_res,baseline_res) finally: # print("Disposing of TMP_DIR: %s" % TMP_DIR) shutil.rmtree(TMP_DIR)
def main(): TMP_DIR = tempfile.mkdtemp() prprint("TMP_DIR = %s" % TMP_DIR) prprint("LOG_FILE = %s\n" % LOG_FILE) try: logfile = open(LOG_FILE, 'w') prprint("Processing Repo at '%s'..." % REPO_PATH) repo = GitRepo(REPO_PATH) # get hashes of baseline and current head h_head = repo.shas[-1] h_baseline = BASELINE_COMMIT prprint("Opening DB at '%s'...\n" % DB_PATH) db = BenchmarkDB(DB_PATH) prprint('Comparing Head [%s] : %s ' % (h_head, repo.messages.get(h_head, ""))) prprint('Against baseline [%s] : %s \n' % (h_baseline, repo.messages.get(h_baseline, ""))) prprint("Initializing Runner...") runner = BenchmarkRunner( benchmarks, REPO_PATH, REPO_PATH, BUILD, DB_PATH, TMP_DIR, PREPARE, always_clean=True, # run_option='eod', start_date=START_DATE, module_dependencies=dependencies) prprint("removing any previous measurements for the commits.") db.delete_rev_results(h_baseline) db.delete_rev_results(h_head) # TODO: we could skip this, but we need to make sure all # results are in the DB, which is a little tricky with # start dates and so on. prprint("Running benchmarks for baseline commit '%s'" % h_baseline) runner._run_and_write_results(h_baseline) prprint("Running benchmarks for current HEAD '%s'" % h_head) runner._run_and_write_results(h_head) prprint('Processing results...') head_res = get_results_df(db, h_head) baseline_res = get_results_df(db, h_baseline) ratio = head_res['timing'] / baseline_res['timing'] totals = DataFrame(dict(t_head=head_res['timing'], t_baseline=baseline_res['timing'], ratio=ratio, name=baseline_res.name), columns=["t_head", "t_baseline", "ratio", "name"]) totals = totals.ix[totals.t_head > 0.010] # ignore sub 10micros totals = totals.dropna().sort("ratio").set_index( 'name') # sort in ascending order s = "\n\nResults:\n" + totals.to_string( float_format=lambda x: "%0.4f" % x) + "\n\n" s += "Columns: test_name | head_time [ms] | baseline_time [ms] | ratio\n\n" s += "- a Ratio of 1.30 means HEAD is 30% slower then the Baseline.\n\n" s += 'Head [%s] : %s\n' % (h_head, repo.messages.get(h_head, "")) s += 'Baseline [%s] : %s\n\n' % (h_baseline, repo.messages.get(h_baseline, "")) logfile.write(s) logfile.close() prprint(s) prprint("Results were also written to the logfile at '%s'\n" % LOG_FILE) finally: # print("Disposing of TMP_DIR: %s" % TMP_DIR) shutil.rmtree(TMP_DIR) logfile.close()