def __init__(self, benchmarks, repo_path, repo_url, build_cmd, db_path, tmp_dir, preparation_cmd, run_option='end_of_day', start_date=None, overwrite=False): self.benchmarks = benchmarks self.checksums = [b.checksum for b in benchmarks] self.start_date = start_date self.run_option = run_option self.repo_path = repo_path self.db_path = db_path self.repo = GitRepo(self.repo_path) self.db = BenchmarkDB(db_path) # where to copy the repo self.tmp_dir = tmp_dir self.bench_repo = BenchRepo(repo_url, self.tmp_dir, build_cmd, preparation_cmd) self._register_benchmarks()
def __init__(self, benchmarks, repo_path, repo_url, build_cmd, db_path, tmp_dir, prep_cmd, clean_cmd=None, run_option='eod', run_order='normal', start_date=None, overwrite=False, module_dependencies=None, always_clean=False, use_blacklist=True, verify=False): log.info("Initializing benchmark runner for %d benchmarks" % (len(benchmarks))) self._benchmarks = None self._checksums = None if verify: verify_benchmarks(benchmarks, raise_=True) self.start_date = start_date self.run_option = run_option self.run_order = run_order self.repo_path = repo_path self.db_path = db_path self.repo = GitRepo(self.repo_path) self.db = BenchmarkDB(db_path) self.use_blacklist = use_blacklist # where to copy the repo self.tmp_dir = tmp_dir self.bench_repo = BenchRepo(repo_url, self.tmp_dir, build_cmd, prep_cmd, clean_cmd, always_clean=always_clean, dependencies=module_dependencies) self.benchmarks = benchmarks
def __init__(self, benchmarks, repo_path, repo_url, build_cmd, db_path, tmp_dir, preparation_cmd, run_option='eod', start_date=None, overwrite=False, module_dependencies=None, always_clean=False, use_blacklist=True, time=True): self.benchmarks = benchmarks self.checksums = [b.checksum for b in benchmarks] self.start_date = start_date self.run_option = run_option self.repo_path = repo_path self.db_path = db_path self.repo = GitRepo(self.repo_path) self.db = BenchmarkDB(db_path) self.use_blacklist = use_blacklist self.blacklist = set(self.db.get_rev_blacklist()) self.time = time # where to copy the repo self.tmp_dir = tmp_dir self.bench_repo = BenchRepo(repo_url, self.tmp_dir, build_cmd, preparation_cmd, always_clean=always_clean, dependencies=module_dependencies) self._register_benchmarks() self._python = os.environ.get('VBENCH_PYTHON', 'python')
def __init__( self, benchmarks, repo_path, repo_url, build_cmd, db_path, tmp_dir, preparation_cmd, run_option="end_of_day", start_date=None, overwrite=False, ): self.benchmarks = benchmarks self.checksums = [b.checksum for b in benchmarks] self.start_date = start_date self.run_option = run_option self.repo_path = repo_path self.db_path = db_path self.repo = GitRepo(self.repo_path) self.db = BenchmarkDB(db_path) # where to copy the repo self.tmp_dir = tmp_dir self.bench_repo = BenchRepo(repo_url, self.tmp_dir, build_cmd, preparation_cmd) self._register_benchmarks()
def __init__( self, benchmarks, repo_path, repo_url, build_cmd, db_path, tmp_dir, prep_cmd, clean_cmd=None, run_option="eod", run_order="normal", start_date=None, overwrite=False, module_dependencies=None, always_clean=False, use_blacklist=True, ): log.info("Initializing benchmark runner for %d benchmarks" % (len(benchmarks))) self._benchmarks = None self._checksums = None self.start_date = start_date self.run_option = run_option self.run_order = run_order self.repo_path = repo_path self.db_path = db_path self.repo = GitRepo(self.repo_path) self.db = BenchmarkDB(db_path) self.use_blacklist = use_blacklist self.blacklist = set(self.db.get_rev_blacklist()) # where to copy the repo self.tmp_dir = tmp_dir self.bench_repo = BenchRepo( repo_url, self.tmp_dir, build_cmd, prep_cmd, clean_cmd, always_clean=always_clean, dependencies=module_dependencies, ) self.benchmarks = benchmarks
def __init__( self, benchmarks, repo_path, repo_url, build_cmd, db_path, tmp_dir, preparation_cmd, run_option="end_of_day", start_date=None, overwrite=False, module_dependencies=None, use_blacklist=True, ): self.benchmarks = benchmarks self.checksums = [b.checksum for b in benchmarks] self.start_date = start_date self.run_option = run_option self.repo_path = repo_path self.db_path = db_path self.repo = GitRepo(self.repo_path) self.db = BenchmarkDB(db_path) self.use_blacklist = use_blacklist self.blacklist = set(self.db.get_rev_blacklist()) # where to copy the repo self.tmp_dir = tmp_dir self.bench_repo = BenchRepo( repo_url, self.tmp_dir, build_cmd, preparation_cmd, dependencies=module_dependencies ) self._register_benchmarks()
def get_results(self, db_path): from vbench.db import BenchmarkDB db = BenchmarkDB.get_instance(db_path) return db.get_benchmark_results(self.checksum)
def main(): TMP_DIR = tempfile.mkdtemp() prprint("TMP_DIR = %s" % TMP_DIR) prprint("LOG_FILE = %s\n" % LOG_FILE) try: logfile = open(LOG_FILE, 'w') prprint("Processing Repo at '%s'..." % REPO_PATH) repo = GitRepo(REPO_PATH) # get hashes of baseline and current head h_head = repo.shas[-1] h_baseline = BASELINE_COMMIT prprint("Opening DB at '%s'...\n" % DB_PATH) db = BenchmarkDB(DB_PATH) prprint('Comparing Head [%s] : %s ' % (h_head, repo.messages.get(h_head, ""))) prprint('Against baseline [%s] : %s \n' % (h_baseline, repo.messages.get(h_baseline, ""))) prprint("Initializing Runner...") runner = BenchmarkRunner( benchmarks, REPO_PATH, REPO_PATH, BUILD, DB_PATH, TMP_DIR, PREPARE, always_clean=True, # run_option='eod', start_date=START_DATE, module_dependencies=dependencies) prprint("removing any previous measurements for the commits.") db.delete_rev_results(h_baseline) db.delete_rev_results(h_head) # TODO: we could skip this, but we need to make sure all # results are in the DB, which is a little tricky with # start dates and so on. prprint("Running benchmarks for baseline commit '%s'" % h_baseline) runner._run_and_write_results(h_baseline) prprint("Running benchmarks for current HEAD '%s'" % h_head) runner._run_and_write_results(h_head) prprint('Processing results...') head_res = get_results_df(db, h_head) baseline_res = get_results_df(db, h_baseline) ratio = head_res['timing'] / baseline_res['timing'] totals = DataFrame(dict(t_head=head_res['timing'], t_baseline=baseline_res['timing'], ratio=ratio, name=baseline_res.name), columns=["t_head", "t_baseline", "ratio", "name"]) totals = totals.ix[totals.t_head > 0.010] # ignore sub 10micros totals = totals.dropna().sort("ratio").set_index( 'name') # sort in ascending order s = "\n\nResults:\n" + totals.to_string( float_format=lambda x: "%0.4f" % x) + "\n\n" s += "Columns: test_name | head_time [ms] | baseline_time [ms] | ratio\n\n" s += "- a Ratio of 1.30 means HEAD is 30% slower then the Baseline.\n\n" s += 'Head [%s] : %s\n' % (h_head, repo.messages.get(h_head, "")) s += 'Baseline [%s] : %s\n\n' % (h_baseline, repo.messages.get(h_baseline, "")) logfile.write(s) logfile.close() prprint(s) prprint("Results were also written to the logfile at '%s'\n" % LOG_FILE) finally: # print("Disposing of TMP_DIR: %s" % TMP_DIR) shutil.rmtree(TMP_DIR) logfile.close()
class BenchmarkRunner(object): """ Parameters ---------- benchmarks : list of Benchmark objects repo_path build_cmd db_path run_option : {'eod', 'all', 'last', integer}, default: 'eod' eod: use the last revision for each calendar day all: benchmark every revision last: only try to run the last revision some integer N: run each N revisions overwrite : boolean dependencies : list or None should be list of modules visible in cwd time : boolean whether to measure how much running the benchmarks takes """ def __init__(self, benchmarks, repo_path, repo_url, build_cmd, db_path, tmp_dir, preparation_cmd, run_option='eod', start_date=None, overwrite=False, module_dependencies=None, always_clean=False, use_blacklist=True, time=True): self.benchmarks = benchmarks self.checksums = [b.checksum for b in benchmarks] self.start_date = start_date self.run_option = run_option self.repo_path = repo_path self.db_path = db_path self.repo = GitRepo(self.repo_path) self.db = BenchmarkDB(db_path) self.use_blacklist = use_blacklist self.blacklist = set(self.db.get_rev_blacklist()) self.time = time # where to copy the repo self.tmp_dir = tmp_dir self.bench_repo = BenchRepo(repo_url, self.tmp_dir, build_cmd, preparation_cmd, always_clean=always_clean, dependencies=module_dependencies) self._register_benchmarks() self._python = os.environ.get('VBENCH_PYTHON', 'python') def run(self): revisions = self._get_revisions_to_run() for rev in revisions: if self.use_blacklist and rev in self.blacklist: print 'SKIPPING BLACKLISTED %s' % rev continue any_succeeded, n_active = self._run_and_write_results(rev) if not any_succeeded and n_active > 0: self.bench_repo.hard_clean() any_succeeded2, n_active = self._run_and_write_results(rev) # just guessing that this revision is broken, should stop # wasting our time if (not any_succeeded2 and n_active > 5 and self.use_blacklist): print 'BLACKLISTING %s' % rev self.db.add_rev_blacklist(rev) def _run_and_write_results(self, rev): """ Returns True if any runs succeeded """ n_active_benchmarks, results = self._run_revision(rev) tracebacks = [] any_succeeded = False for checksum, timing in results.iteritems(): if 'traceback' in timing: tracebacks.append(timing['traceback']) timestamp = self.repo.timestamps[rev] any_succeeded = any_succeeded or 'timing' in timing self.db.write_result(checksum, rev, timestamp, timing.get('loops'), timing.get('timing'), timing.get('memory'), timing.get('traceback')) return any_succeeded, n_active_benchmarks def _register_benchmarks(self): ex_benchmarks = self.db.get_benchmarks() db_checksums = set(ex_benchmarks.index) for bm in self.benchmarks: if bm.checksum in db_checksums: self.db.update_name(bm) else: print 'Writing new benchmark %s, %s' % (bm.name, bm.checksum) self.db.write_benchmark(bm) def _run_revision(self, rev): need_to_run = self._get_benchmarks_for_rev(rev) if not need_to_run: print 'No benchmarks need running at %s' % rev return 0, {} print 'Running %d benchmarks for revision %s' % (len(need_to_run), rev) for bm in need_to_run: print bm.name self.bench_repo.switch_to_revision(rev) pickle_path = os.path.join(self.tmp_dir, 'benchmarks.pickle') results_path = os.path.join(self.tmp_dir, 'results.pickle') if os.path.exists(results_path): os.remove(results_path) pickle.dump(need_to_run, open(pickle_path, 'w')) # run the process cmd = '%s vb_run_benchmarks.py %s %s' % (self._python, pickle_path, results_path) if self.time: cmd = 'time ' + cmd proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, cwd=self.tmp_dir) stdout, stderr = proc.communicate() print 'stdout: %s' % stdout if stderr: if ("object has no attribute" in stderr or 'ImportError' in stderr): print stderr print 'HARD CLEANING!' self.bench_repo.hard_clean() print stderr if not os.path.exists(results_path): print 'Failed for revision %s' % rev return len(need_to_run), {} results = pickle.load(open(results_path, 'r')) try: os.remove(pickle_path) except OSError: pass return len(need_to_run), results def _get_benchmarks_for_rev(self, rev): existing_results = self.db.get_rev_results(rev) need_to_run = [] timestamp = self.repo.timestamps[rev] for b in self.benchmarks: if b.start_date is not None and b.start_date > timestamp: continue if b.checksum not in existing_results: need_to_run.append(b) return need_to_run def _get_revisions_to_run(self): # TODO generalize someday to other vcs...git only for now rev_by_timestamp = self.repo.shas.sort_index() # # assume they're in order, but check for now # assert(rev_by_timestamp.index.is_monotonic) if self.start_date is not None: rev_by_timestamp = rev_by_timestamp.ix[self.start_date:] if self.run_option == 'eod': grouped = rev_by_timestamp.groupby(datetime.date) revs_to_run = grouped.apply(lambda x: x[-1]).values elif self.run_option == 'all': revs_to_run = rev_by_timestamp.values elif self.run_option == 'last': revs_to_run = rev_by_timestamp.values[-1:] # TODO: if the very last revision fails, there should be a way # to look for the second last, etc, until the last one that was run elif isinstance(self.run_option, int): revs_to_run = rev_by_timestamp.values[::self.run_option] else: raise Exception('unrecognized run_option %s' % self.run_option) return revs_to_run
class BenchmarkRunner(object): """ Parameters ---------- benchmarks : list of Benchmark objects repo_path build_cmd db_path run_option : {'eod', 'all', integer} eod: use the last revision for each calendar day all: benchmark every revision some integer N: run each N revisions overwrite : boolean dependencies : list or None should be list of modules visible in cwd """ def __init__( self, benchmarks, repo_path, repo_url, build_cmd, db_path, tmp_dir, preparation_cmd, run_option="end_of_day", start_date=None, overwrite=False, module_dependencies=None, use_blacklist=True, ): self.benchmarks = benchmarks self.checksums = [b.checksum for b in benchmarks] self.start_date = start_date self.run_option = run_option self.repo_path = repo_path self.db_path = db_path self.repo = GitRepo(self.repo_path) self.db = BenchmarkDB(db_path) self.use_blacklist = use_blacklist self.blacklist = set(self.db.get_rev_blacklist()) # where to copy the repo self.tmp_dir = tmp_dir self.bench_repo = BenchRepo( repo_url, self.tmp_dir, build_cmd, preparation_cmd, dependencies=module_dependencies ) self._register_benchmarks() def run(self): revisions = self._get_revisions_to_run() for rev in revisions: if self.use_blacklist and rev in self.blacklist: print "SKIPPING BLACKLISTED %s" % rev continue any_succeeded, n_active = self._run_and_write_results(rev) if not any_succeeded and n_active > 0: self.bench_repo.hard_clean() any_succeeded2, n_active = self._run_and_write_results(rev) # just guessing that this revision is broken, should stop # wasting our time if not any_succeeded2 and n_active > 5 and self.use_blacklist: print "BLACKLISTING %s" % rev self.db.add_rev_blacklist(rev) def _run_and_write_results(self, rev): """ Returns True if any runs succeeded """ n_active_benchmarks, results = self._run_revision(rev) tracebacks = [] any_succeeded = False for checksum, timing in results.iteritems(): if "traceback" in timing: tracebacks.append(timing["traceback"]) timestamp = self.repo.timestamps[rev] any_succeeded = any_succeeded or "timing" in timing self.db.write_result( checksum, rev, timestamp, timing.get("loops"), timing.get("timing"), timing.get("traceback") ) return any_succeeded, n_active_benchmarks def _register_benchmarks(self): ex_benchmarks = self.db.get_benchmarks() db_checksums = set(ex_benchmarks.index) for bm in self.benchmarks: if bm.checksum in db_checksums: self.db.update_name(bm) else: print "Writing new benchmark %s, %s" % (bm.name, bm.checksum) self.db.write_benchmark(bm) def _run_revision(self, rev): need_to_run = self._get_benchmarks_for_rev(rev) if not need_to_run: print "No benchmarks need running at %s" % rev return 0, {} print "Running %d benchmarks for revision %s" % (len(need_to_run), rev) for bm in need_to_run: print bm.name self.bench_repo.switch_to_revision(rev) pickle_path = os.path.join(self.tmp_dir, "benchmarks.pickle") results_path = os.path.join(self.tmp_dir, "results.pickle") if os.path.exists(results_path): os.remove(results_path) pickle.dump(need_to_run, open(pickle_path, "w")) # run the process cmd = "python vb_run_benchmarks.py %s %s" % (pickle_path, results_path) print cmd proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, cwd=self.tmp_dir) stdout, stderr = proc.communicate() print "stdout: %s" % stdout if stderr: if "object has no attribute" in stderr or "ImportError" in stderr: print stderr print "HARD CLEANING!" self.bench_repo.hard_clean() print stderr if not os.path.exists(results_path): print "Failed for revision %s" % rev return len(need_to_run), {} results = pickle.load(open(results_path, "r")) try: os.remove(pickle_path) except OSError: pass return len(need_to_run), results def _get_benchmarks_for_rev(self, rev): existing_results = self.db.get_rev_results(rev) need_to_run = [] timestamp = self.repo.timestamps[rev] for b in self.benchmarks: if b.start_date is not None and b.start_date > timestamp: continue if b.checksum not in existing_results: need_to_run.append(b) return need_to_run def _get_revisions_to_run(self): # TODO generalize someday to other vcs...git only for now rev_by_timestamp = self.repo.shas.sort_index() # # assume they're in order, but check for now # assert(rev_by_timestamp.index.is_monotonic) if self.start_date is not None: rev_by_timestamp = rev_by_timestamp.ix[self.start_date :] if self.run_option == "eod": grouped = rev_by_timestamp.groupby(datetime.date) revs_to_run = grouped.apply(lambda x: x[-1]).values elif self.run_option == "all": revs_to_run = rev_by_timestamp.values elif isinstance(self.run_option, int): revs_to_run = rev_by_timestamp.values[:: self.run_option] else: raise Exception("unrecognized run_option %s" % self.run_option) return revs_to_run
class BenchmarkRunner(object): """ Parameters ---------- benchmarks : list of Benchmark objects repo_path build_cmd db_path run_option : {'eod', 'all', 'last', integer}, default: 'eod' eod: use the last revision for each calendar day all: benchmark every revision last: only try to run the last revision some integer N: run each N revisions run_order : normal : original order (default) reverse: in reverse order (latest first) multires: cover all revisions but in the order increasing temporal detail overwrite : boolean dependencies : list or None should be list of modules visible in cwd """ def __init__(self, benchmarks, repo_path, repo_url, build_cmd, db_path, tmp_dir, prep_cmd, clean_cmd=None, run_option='eod', run_order='normal', start_date=None, overwrite=False, module_dependencies=None, always_clean=False, use_blacklist=True): log.info("Initializing benchmark runner for %d benchmarks" % (len(benchmarks))) self._benchmarks = None self._checksums = None self.start_date = start_date self.run_option = run_option self.run_order = run_order self.repo_path = repo_path self.db_path = db_path self.repo = GitRepo(self.repo_path) self.db = BenchmarkDB(db_path) self.use_blacklist = use_blacklist self.blacklist = set(self.db.get_rev_blacklist()) # where to copy the repo self.tmp_dir = tmp_dir self.bench_repo = BenchRepo(repo_url, self.tmp_dir, build_cmd, prep_cmd, clean_cmd, always_clean=always_clean, dependencies=module_dependencies) self.benchmarks = benchmarks def _get_benchmarks(self): return self._benchmarks def _set_benchmarks(self, benchmarks): self._benchmarks = benchmarks self._checksums = [b.checksum for b in benchmarks] self._register_benchmarks() benchmarks = property(fget=_get_benchmarks, fset=_set_benchmarks) checksums = property(fget=lambda self:self._checksums) def run(self): log.info("Collecting revisions to run") revisions = self._get_revisions_to_run() ran_revisions = [] log.info("Running benchmarks for %d revisions" % (len(revisions),)) for rev in revisions: if self.use_blacklist and rev in self.blacklist: log.warn('Skipping blacklisted %s' % rev) continue any_succeeded, n_active = self._run_and_write_results(rev) ran_revisions.append((rev, (any_succeeded, n_active))) log.debug("%s succeeded among %d active benchmarks", {True: "Some", False: "None"}[any_succeeded], n_active) if not any_succeeded and n_active > 0: self.bench_repo.hard_clean() any_succeeded2, n_active = self._run_and_write_results(rev) # just guessing that this revision is broken, should stop # wasting our time if (not any_succeeded2 and n_active > 5 and self.use_blacklist): log.warn('Blacklisting %s' % rev) self.db.add_rev_blacklist(rev) return ran_revisions def _run_and_write_results(self, rev): """ Returns True if any runs succeeded """ n_active_benchmarks, results = self._run_revision(rev) tracebacks = [] any_succeeded = False for checksum, timing in results.iteritems(): if 'traceback' in timing: tracebacks.append(timing['traceback']) timestamp = self.repo.timestamps[rev] any_succeeded = any_succeeded or 'timing' in timing self.db.write_result(checksum, rev, timestamp, timing.get('loops'), timing.get('timing'), timing.get('traceback')) return any_succeeded, n_active_benchmarks def _register_benchmarks(self): log.info('Getting benchmarks') ex_benchmarks = self.db.get_benchmarks() db_checksums = set(ex_benchmarks.index) log.info("Registering %d benchmarks" % len(ex_benchmarks)) for bm in self.benchmarks: if bm.checksum in db_checksums: self.db.update_name(bm) else: log.info('Writing new benchmark %s, %s' % (bm.name, bm.checksum)) self.db.write_benchmark(bm) def _run_revision(self, rev): need_to_run = self._get_benchmarks_for_rev(rev) if not need_to_run: log.info('No benchmarks need running at %s' % rev) return 0, {} log.info('Running %d benchmarks for revision %s' % (len(need_to_run), rev)) for bm in need_to_run: log.debug(bm.name) self.bench_repo.switch_to_revision(rev) pickle_path = os.path.join(self.tmp_dir, 'benchmarks.pickle') results_path = os.path.join(self.tmp_dir, 'results.pickle') if os.path.exists(results_path): os.remove(results_path) pickle.dump(need_to_run, open(pickle_path, 'w')) # run the process cmd = 'python vb_run_benchmarks.py %s %s' % (pickle_path, results_path) log.debug("CMD: %s" % cmd) proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, cwd=self.tmp_dir) stdout, stderr = proc.communicate() if stdout: log.debug('stdout: %s' % stdout) if proc.returncode: log.warn("Returned with non-0 code: %d" % proc.returncode) if stderr: log.warn("stderr: %s" % stderr) if ("object has no attribute" in stderr or 'ImportError' in stderr): log.warn('HARD CLEANING!') self.bench_repo.hard_clean() if not os.path.exists(results_path): log.warn('Failed for revision %s' % rev) return len(need_to_run), {} results = pickle.load(open(results_path, 'r')) try: os.remove(pickle_path) except OSError: pass return len(need_to_run), results def _get_benchmarks_for_rev(self, rev): existing_results = self.db.get_rev_results(rev) need_to_run = [] timestamp = self.repo.timestamps[rev] for b in self.benchmarks: if b.start_date is not None and b.start_date > timestamp: continue if b.checksum not in existing_results: need_to_run.append(b) return need_to_run def _get_revisions_to_run(self): # TODO generalize someday to other vcs...git only for now rev_by_timestamp = self.repo.shas.sort_index() # # assume they're in order, but check for now # assert(rev_by_timestamp.index.is_monotonic) if self.start_date is not None: rev_by_timestamp = rev_by_timestamp.ix[self.start_date:] if self.run_option == 'eod': grouped = rev_by_timestamp.groupby(datetime.date) revs_to_run = grouped.apply(lambda x: x[-1]).values elif self.run_option == 'all': revs_to_run = rev_by_timestamp.values elif self.run_option == 'last': revs_to_run = rev_by_timestamp.values[-1:] # TODO: if the very last revision fails, there should be a way # to look for the second last, etc, until the last one that was run elif isinstance(self.run_option, int): revs_to_run = rev_by_timestamp.values[::self.run_option] else: raise ValueError('unrecognized run_option=%r' % self.run_option) if not self.run_order in _RUN_ORDERS: raise ValueError('unrecognized run_order=%r. Must be among %s' % (self.run_order, _RUN_ORDERS.keys())) revs_to_run = _RUN_ORDERS[self.run_order](revs_to_run) return revs_to_run
def profile_comparative(benchmarks): from vbench.api import BenchmarkRunner from vbench.db import BenchmarkDB from vbench.git import GitRepo from suite import BUILD, DB_PATH, PREPARE, dependencies TMP_DIR = args.temp_dir or tempfile.mkdtemp() try: prprint("Opening DB at '%s'...\n" % DB_PATH) db = BenchmarkDB(DB_PATH) prprint("Initializing Runner...") # all in a good cause... GitRepo._parse_commit_log = _parse_wrapper(args.base_commit) runner = BenchmarkRunner( benchmarks, REPO_PATH, REPO_PATH, BUILD, DB_PATH, TMP_DIR, PREPARE, always_clean=True, # run_option='eod', start_date=START_DATE, module_dependencies=dependencies) repo = runner.repo # (steal the parsed git repo used by runner) h_head = args.target_commit or repo.shas[-1] h_baseline = args.base_commit # ARGH. reparse the repo, without discarding any commits, # then overwrite the previous parse results # prprint("Slaughtering kittens...") (repo.shas, repo.messages, repo.timestamps, repo.authors) = _parse_commit_log(None,REPO_PATH, args.base_commit) prprint('Target [%s] : %s\n' % (h_head, repo.messages.get(h_head, ""))) prprint('Baseline [%s] : %s\n' % (h_baseline, repo.messages.get(h_baseline, ""))) prprint("Removing any previous measurements for the commits.") db.delete_rev_results(h_baseline) db.delete_rev_results(h_head) # TODO: we could skip this, but we need to make sure all # results are in the DB, which is a little tricky with # start dates and so on. prprint("Running benchmarks for baseline [%s]" % h_baseline) runner._run_and_write_results(h_baseline) prprint("Running benchmarks for target [%s]" % h_head) runner._run_and_write_results(h_head) prprint('Processing results...') head_res = get_results_df(db, h_head) baseline_res = get_results_df(db, h_baseline) report_comparative(head_res,baseline_res) finally: # print("Disposing of TMP_DIR: %s" % TMP_DIR) shutil.rmtree(TMP_DIR)
class BenchmarkRunner(object): """ Parameters ---------- benchmarks : list of Benchmark objects repo_path build_cmd db_path run_option : {'eod', 'all', 'last', integer}, default: 'eod' eod: use the last revision for each calendar day all: benchmark every revision last: only try to run the last revision some integer N: run each N revisions run_order : normal : original order (default) reverse: in reverse order (latest first) multires: cover all revisions but in the order increasing temporal detail overwrite : boolean dependencies : list or None should be list of modules visible in cwd """ def __init__(self, benchmarks, repo_path, repo_url, build_cmd, db_path, tmp_dir, prep_cmd, clean_cmd=None, run_option='eod', run_order='normal', start_date=None, overwrite=False, module_dependencies=None, always_clean=False, use_blacklist=True, verify=False): log.info("Initializing benchmark runner for %d benchmarks" % (len(benchmarks))) self._benchmarks = None self._checksums = None if verify: verify_benchmarks(benchmarks, raise_=True) self.start_date = start_date self.run_option = run_option self.run_order = run_order self.repo_path = repo_path self.db_path = db_path self.repo = GitRepo(self.repo_path) self.db = BenchmarkDB(db_path) self.use_blacklist = use_blacklist # where to copy the repo self.tmp_dir = tmp_dir self.bench_repo = BenchRepo(repo_url, self.tmp_dir, build_cmd, prep_cmd, clean_cmd, always_clean=always_clean, dependencies=module_dependencies) self.benchmarks = benchmarks def _get_benchmarks(self): return self._benchmarks def _set_benchmarks(self, benchmarks): self._benchmarks = benchmarks self._checksums = [b.checksum for b in benchmarks] self._register_benchmarks() benchmarks = property(fget=_get_benchmarks, fset=_set_benchmarks) checksums = property(fget=lambda self: self._checksums) @property def blacklist(self): return set(self.db.get_rev_blacklist()) def _blacklist_rev(self, rev, msg=""): if self.use_blacklist: log.warn(('Blacklisting %s' % rev) + ": %s" % msg if msg else ".") self.db.add_rev_blacklist(rev) def run(self): log.info("Collecting revisions to run") revisions = self._get_revisions_to_run() ran_revisions = [] log.info("Running benchmarks for %d revisions" % (len(revisions), )) # get the current black list (might be a different one on a next .run()) blacklist = self.blacklist for rev in revisions: if self.use_blacklist and rev in blacklist: log.warn('Skipping blacklisted %s' % rev) continue try: any_succeeded, n_active = self._run_and_write_results(rev) except FailedToBuildError, e: self._blacklist_rev(rev, msg=str(e)) continue # All the rerunning below somewhat obscures the destiny of # ran_revisions. TODO: make it clear(er) ran_revisions.append((rev, (any_succeeded, n_active))) if n_active: log.debug("%s succeeded among %d active benchmarks", { True: "Some", False: "None" }[any_succeeded], n_active) if not any_succeeded: # Give them a second chance self.bench_repo.hard_clean() try: any_succeeded2, n_active2 = self._run_and_write_results( rev) except FailedToBuildError, e: log.warn( "Failed to build upon 2nd attempt to benchmark, " "verify build infrastructure. Skipping for now: %s" % e) continue assert ( n_active == n_active2, "Since not any_succeeded, number of benchmarks should remain the same" ) # just guessing that this revision is broken, should stop # wasting our time if (not any_succeeded2 and n_active > 5): self._blacklist_rev( rev, "None benchmark among %d has succeeded" % n_active)
class BenchmarkRunner(object): """ Parameters ---------- benchmarks : list of Benchmark objects repo_path build_cmd db_path run_option : {'eod', 'all', 'last', integer}, default: 'eod' eod: use the last revision for each calendar day all: benchmark every revision last: only try to run the last revision some integer N: run each N revisions run_order : normal : original order (default) reverse: in reverse order (latest first) multires: cover all revisions but in the order increasing temporal detail overwrite : boolean dependencies : list or None should be list of modules visible in cwd """ def __init__(self, benchmarks, repo_path, repo_url, build_cmd, db_path, tmp_dir, prep_cmd, clean_cmd=None, run_option='eod', run_order='normal', start_date=None, overwrite=False, module_dependencies=None, always_clean=False, use_blacklist=True): ### @end log.info("Initializing benchmark runner for %d benchmarks" % (len(benchmarks))) self._benchmarks = None self._checksums = None self.start_date = start_date self.run_option = run_option self.run_order = run_order self.repo_path = repo_path self.db_path = db_path self.repo = GitRepo(self.repo_path) self.db = BenchmarkDB(db_path) self.use_blacklist = use_blacklist self.blacklist = set(self.db.get_rev_blacklist()) # where to copy the repo self.tmp_dir = tmp_dir self.bench_repo = BenchRepo(repo_url, self.tmp_dir, build_cmd, prep_cmd, clean_cmd, always_clean=always_clean, dependencies=module_dependencies) self.benchmarks = benchmarks def _get_benchmarks(self): return self._benchmarks def _set_benchmarks(self, benchmarks): self._benchmarks = benchmarks self._checksums = [b.checksum for b in benchmarks] self._register_benchmarks() benchmarks = property(fget=_get_benchmarks, fset=_set_benchmarks) checksums = property(fget=lambda self:self._checksums) def run(self): log.info("Collecting revisions to run") revisions = self._get_revisions_to_run() ran_revisions = [] log.info("Running benchmarks for %d revisions" % (len(revisions),)) for rev in revisions: if self.use_blacklist and rev in self.blacklist: log.warn('Skipping blacklisted %s' % rev) continue any_succeeded, n_active = self._run_and_write_results(rev) ran_revisions.append((rev, (any_succeeded, n_active))) log.debug("%s succeeded among %d active benchmarks", {True: "Some", False: "None"}[any_succeeded], n_active) if not any_succeeded and n_active > 0: self.bench_repo.hard_clean() any_succeeded2, n_active = self._run_and_write_results(rev) # just guessing that this revision is broken, should stop # wasting our time if (not any_succeeded2 and n_active > 5 and self.use_blacklist): log.warn('Blacklisting %s' % rev) self.db.add_rev_blacklist(rev) return ran_revisions def _run_and_write_results(self, rev): """ Returns True if any runs succeeded """ n_active_benchmarks, results = self._run_revision(rev) tracebacks = [] any_succeeded = False for checksum, timing in results.iteritems(): if 'traceback' in timing: tracebacks.append(timing['traceback']) timestamp = self.repo.timestamps[rev] any_succeeded = any_succeeded or 'timing' in timing self.db.write_result(checksum, rev, timestamp, timing.get('loops'), timing.get('timing'), timing.get('traceback')) return any_succeeded, n_active_benchmarks def _register_benchmarks(self): log.info('Getting benchmarks') ex_benchmarks = self.db.get_benchmarks() db_checksums = set(ex_benchmarks.index) log.info("Registering %d benchmarks" % len(ex_benchmarks)) for bm in self.benchmarks: if bm.checksum in db_checksums: self.db.update_name(bm) else: log.info('Writing new benchmark %s, %s' % (bm.name, bm.checksum)) self.db.write_benchmark(bm) def _run_revision(self, rev): need_to_run = self._get_benchmarks_for_rev(rev) if not need_to_run: log.info('No benchmarks need running at %s' % rev) return 0, {} log.info('Running %d benchmarks for revision %s' % (len(need_to_run), rev)) for bm in need_to_run: log.debug(bm.name) self.bench_repo.switch_to_revision(rev) pickle_path = os.path.join(self.tmp_dir, 'benchmarks.pickle') results_path = os.path.join(self.tmp_dir, 'results.pickle') if os.path.exists(results_path): os.remove(results_path) pickle.dump(need_to_run, open(pickle_path, 'w')) # run the process cmd = 'python vb_run_benchmarks.py %s %s' % (pickle_path, results_path) log.debug("CMD: %s" % cmd) proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, cwd=self.tmp_dir) stdout, stderr = proc.communicate() if stdout: log.debug('stdout: %s' % stdout) if proc.returncode: log.warn("Returned with non-0 code: %d" % proc.returncode) if stderr: log.warn("stderr: %s" % stderr) if ("object has no attribute" in stderr or 'ImportError' in stderr): log.warn('HARD CLEANING!') self.bench_repo.hard_clean() if not os.path.exists(results_path): log.warn('Failed for revision %s' % rev) return len(need_to_run), {} results = pickle.load(open(results_path, 'r')) try: os.remove(pickle_path) except OSError: pass return len(need_to_run), results def _get_benchmarks_for_rev(self, rev): existing_results = self.db.get_rev_results(rev) need_to_run = [] timestamp = self.repo.timestamps[rev] for b in self.benchmarks: if b.start_date is not None and b.start_date > timestamp: continue if b.checksum not in existing_results: need_to_run.append(b) return need_to_run def _get_revisions_to_run(self): # TODO generalize someday to other vcs...git only for now rev_by_timestamp = self.repo.shas.sort_index() # # assume they're in order, but check for now # assert(rev_by_timestamp.index.is_monotonic) if self.start_date is not None: rev_by_timestamp = rev_by_timestamp.ix[self.start_date:] if self.run_option == 'eod': grouped = rev_by_timestamp.groupby(datetime.date) revs_to_run = grouped.apply(lambda x: x[-1]).values elif self.run_option == 'all': revs_to_run = rev_by_timestamp.values elif self.run_option == 'last': revs_to_run = rev_by_timestamp.values[-1:] # TODO: if the very last revision fails, there should be a way # to look for the second last, etc, until the last one that was run elif isinstance(self.run_option, int): revs_to_run = rev_by_timestamp.values[::self.run_option] else: raise ValueError('unrecognized run_option=%r' % self.run_option) if not self.run_order in _RUN_ORDERS: raise ValueError('unrecognized run_order=%r. Must be among %s' % (self.run_order, _RUN_ORDERS.keys())) revs_to_run = _RUN_ORDERS[self.run_order](revs_to_run) return revs_to_run
class BenchmarkRunner(object): """ Parameters ---------- benchmarks : list of Benchmark objects repo_path build_cmd db_path run_option : {'eod', 'all', integer} eod: use the last revision for each calendar day all: benchmark every revision some integer N: run each N revisions overwrite : boolean """ def __init__(self, benchmarks, repo_path, repo_url, build_cmd, db_path, tmp_dir, preparation_cmd, run_option='end_of_day', start_date=None, overwrite=False): self.benchmarks = benchmarks self.checksums = [b.checksum for b in benchmarks] self.start_date = start_date self.run_option = run_option self.repo_path = repo_path self.db_path = db_path self.repo = GitRepo(self.repo_path) self.db = BenchmarkDB(db_path) # where to copy the repo self.tmp_dir = tmp_dir self.bench_repo = BenchRepo(repo_url, self.tmp_dir, build_cmd, preparation_cmd) self._register_benchmarks() def run(self): revisions = self._get_revisions_to_run() for rev in revisions: results = self._run_revision(rev) tracebacks = [] for checksum, timing in results.iteritems(): if 'traceback' in timing: tracebacks.append(timing['traceback']) self.db.write_result(checksum, rev, timing.get('loops'), timing.get('timing'), timing.get('traceback')) for tb in tracebacks: if 'object has no attribute' in tb: print 'HARD CLEANING because of %s' % tb self.bench_repo.hard_clean() break def _register_benchmarks(self): db_checksums = set(v.checksum for v in self.db.get_benchmarks()) for bm in self.benchmarks: if bm.checksum in db_checksums: continue print 'Writing new benchmark %s, %s' % (bm.name, bm.checksum) self.db.write_benchmark(bm) def _run_revision(self, rev): need_to_run = self._get_benchmarks_for_rev(rev) if not need_to_run: print 'No benchmarks need running at %s' % rev return {} print 'Running %d benchmarks for revision %s' % (len(need_to_run), rev) self.bench_repo.switch_to_revision(rev) pickle_path = os.path.join(self.tmp_dir, 'benchmarks.pickle') results_path = os.path.join(self.tmp_dir, 'results.pickle') if os.path.exists(results_path): os.remove(results_path) pickle.dump(need_to_run, open(pickle_path, 'w')) # run the process cmd = 'python vb_run_benchmarks.py %s %s' % (pickle_path, results_path) print cmd proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, cwd=self.tmp_dir) stdout, stderr = proc.communicate() print 'stdout: %s' % stdout if stderr: if ("object has no attribute" in stderr or 'ImportError' in stderr): print 'HARD CLEANING!' self.bench_repo.hard_clean() print stderr if not os.path.exists(results_path): print 'Failed for revision %s' % rev return {} results = pickle.load(open(results_path, 'r')) try: os.remove(pickle_path) except OSError: pass return results def _get_benchmarks_for_rev(self, rev): existing_results = self.db.get_rev_results(rev) need_to_run = [] timestamp = self.repo.timestamps[rev] for b in self.benchmarks: if b.start_date is not None and b.start_date > timestamp: continue if b.checksum not in existing_results: need_to_run.append(b) return need_to_run def _get_revisions_to_run(self): # TODO generalize someday to other vcs...git only for now rev_by_timestamp = self.repo.shas.sort_index() # # assume they're in order, but check for now # assert(rev_by_timestamp.index.is_monotonic) if self.start_date is not None: rev_by_timestamp = rev_by_timestamp.ix[self.start_date:] if self.run_option == 'eod': grouped = rev_by_timestamp.groupby(datetime.date) revs_to_run = grouped.apply(lambda x: x[-1]).values elif self.run_option == 'all': revs_to_run = rev_by_timestamp.values elif isinstance(self.run_option, int): revs_to_run = rev_by_timestamp.values[::self.run_option] else: raise Exception('unrecognized run_method %s' % self.run_method) return revs_to_run
def profile_comparative(benchmarks): from vbench.api import BenchmarkRunner from vbench.db import BenchmarkDB from vbench.git import GitRepo from suite import BUILD, DB_PATH, PREPARE, dependencies TMP_DIR = tempfile.mkdtemp() try: prprint("Opening DB at '%s'...\n" % DB_PATH) db = BenchmarkDB(DB_PATH) prprint("Initializing Runner...") # all in a good cause... GitRepo._parse_commit_log = _parse_wrapper(args.base_commit) runner = BenchmarkRunner( benchmarks, REPO_PATH, REPO_PATH, BUILD, DB_PATH, TMP_DIR, PREPARE, always_clean=True, # run_option='eod', start_date=START_DATE, module_dependencies=dependencies) repo = runner.repo # (steal the parsed git repo used by runner) h_head = args.target_commit or repo.shas[-1] h_baseline = args.base_commit # ARGH. reparse the repo, without discarding any commits, # then overwrite the previous parse results # prprint("Slaughtering kittens...") (repo.shas, repo.messages, repo.timestamps, repo.authors) = _parse_commit_log(None,REPO_PATH, args.base_commit) prprint('Target [%s] : %s\n' % (h_head, repo.messages.get(h_head, ""))) prprint('Baseline [%s] : %s\n' % (h_baseline, repo.messages.get(h_baseline, ""))) prprint("Removing any previous measurements for the commits.") db.delete_rev_results(h_baseline) db.delete_rev_results(h_head) # TODO: we could skip this, but we need to make sure all # results are in the DB, which is a little tricky with # start dates and so on. prprint("Running benchmarks for baseline [%s]" % h_baseline) runner._run_and_write_results(h_baseline) prprint("Running benchmarks for target [%s]" % h_head) runner._run_and_write_results(h_head) prprint('Processing results...') head_res = get_results_df(db, h_head) baseline_res = get_results_df(db, h_baseline) report_comparative(head_res,baseline_res) finally: # print("Disposing of TMP_DIR: %s" % TMP_DIR) shutil.rmtree(TMP_DIR)
def main(): from pandas import DataFrame from vbench.api import BenchmarkRunner from vbench.db import BenchmarkDB from suite import REPO_PATH, BUILD, DB_PATH, PREPARE, dependencies, benchmarks if not args.base_commit: args.base_commit = BASELINE_COMMIT # GitRepo wants exactly 7 character hash? args.base_commit = args.base_commit[:7] if args.target_commit: args.target_commit = args.target_commit[:7] if not args.log_file: args.log_file = os.path.abspath( os.path.join(REPO_PATH, 'vb_suite.log')) TMP_DIR = tempfile.mkdtemp() prprint("TMP_DIR = %s" % TMP_DIR) prprint("LOG_FILE = %s\n" % args.log_file) try: logfile = open(args.log_file, 'w') prprint("Opening DB at '%s'...\n" % DB_PATH) db = BenchmarkDB(DB_PATH) prprint("Initializing Runner...") runner = BenchmarkRunner( benchmarks, REPO_PATH, REPO_PATH, BUILD, DB_PATH, TMP_DIR, PREPARE, always_clean=True, # run_option='eod', start_date=START_DATE, module_dependencies=dependencies) repo = runner.repo # (steal the parsed git repo used by runner) # ARGH. reparse the repo, without discarding any commits, # then overwrite the previous parse results # prprint ("Slaughtering kittens..." ) (repo.shas, repo.messages, repo.timestamps, repo.authors) = _parse_commit_log(REPO_PATH) h_head = args.target_commit or repo.shas[-1] h_baseline = args.base_commit prprint('Target [%s] : %s\n' % (h_head, repo.messages.get(h_head, ""))) prprint('Baseline [%s] : %s\n' % (h_baseline, repo.messages.get(h_baseline, ""))) prprint("removing any previous measurements for the commits.") db.delete_rev_results(h_baseline) db.delete_rev_results(h_head) # TODO: we could skip this, but we need to make sure all # results are in the DB, which is a little tricky with # start dates and so on. prprint("Running benchmarks for baseline [%s]" % h_baseline) runner._run_and_write_results(h_baseline) prprint("Running benchmarks for target [%s]" % h_head) runner._run_and_write_results(h_head) prprint('Processing results...') head_res = get_results_df(db, h_head) baseline_res = get_results_df(db, h_baseline) ratio = head_res['timing'] / baseline_res['timing'] totals = DataFrame(dict(t_head=head_res['timing'], t_baseline=baseline_res['timing'], ratio=ratio, name=baseline_res.name), columns=["t_head", "t_baseline", "ratio", "name"]) totals = totals.ix[totals.t_head > args.min_duration] # ignore below threshold totals = totals.dropna( ).sort("ratio").set_index('name') # sort in ascending order s = "\n\nResults:\n" s += totals.to_string( float_format=lambda x: "{:4.4f}".format(x).rjust(10)) s += "\n\n" s += "Columns: test_name | target_duration [ms] | baseline_duration [ms] | ratio\n\n" s += "- a Ratio of 1.30 means the target commit is 30% slower then the baseline.\n\n" s += 'Target [%s] : %s\n' % (h_head, repo.messages.get(h_head, "")) s += 'Baseline [%s] : %s\n\n' % ( h_baseline, repo.messages.get(h_baseline, "")) logfile.write(s) logfile.close() prprint(s) prprint("Results were also written to the logfile at '%s'\n" % args.log_file) finally: # print("Disposing of TMP_DIR: %s" % TMP_DIR) shutil.rmtree(TMP_DIR) logfile.close()
class BenchmarkRunner(object): """ Parameters ---------- benchmarks : list of Benchmark objects repo_path build_cmd db_path run_option : {'eod', 'all', 'last', integer}, default: 'eod' eod: use the last revision for each calendar day all: benchmark every revision last: only try to run the last revision some integer N: run each N revisions overwrite : boolean dependencies : list or None should be list of modules visible in cwd """ def __init__(self, benchmarks, repo_path, repo_url, build_cmd, db_path, tmp_dir, preparation_cmd, run_option='eod', start_date=None, overwrite=False, module_dependencies=None, always_clean=False, use_blacklist=True): self.benchmarks = benchmarks self.checksums = [b.checksum for b in benchmarks] self.start_date = start_date self.run_option = run_option self.repo_path = repo_path self.db_path = db_path self.repo = GitRepo(self.repo_path) self.db = BenchmarkDB(db_path) self.use_blacklist = use_blacklist self.blacklist = set(self.db.get_rev_blacklist()) # where to copy the repo self.tmp_dir = tmp_dir self.bench_repo = BenchRepo(repo_url, self.tmp_dir, build_cmd, preparation_cmd, always_clean=always_clean, dependencies=module_dependencies) self._register_benchmarks() def run(self): revisions = self._get_revisions_to_run() for rev in revisions: if self.use_blacklist and rev in self.blacklist: print 'SKIPPING BLACKLISTED %s' % rev continue any_succeeded, n_active = self._run_and_write_results(rev) if not any_succeeded and n_active > 0: self.bench_repo.hard_clean() any_succeeded2, n_active = self._run_and_write_results(rev) # just guessing that this revision is broken, should stop # wasting our time if (not any_succeeded2 and n_active > 5 and self.use_blacklist): print 'BLACKLISTING %s' % rev self.db.add_rev_blacklist(rev) def _run_and_write_results(self, rev): """ Returns True if any runs succeeded """ n_active_benchmarks, results = self._run_revision(rev) tracebacks = [] any_succeeded = False for checksum, timing in results.iteritems(): if 'traceback' in timing: tracebacks.append(timing['traceback']) timestamp = self.repo.timestamps[rev] any_succeeded = any_succeeded or 'timing' in timing self.db.write_result(checksum, rev, timestamp, timing.get('loops'), timing.get('timing'), timing.get('traceback')) return any_succeeded, n_active_benchmarks def _register_benchmarks(self): ex_benchmarks = self.db.get_benchmarks() db_checksums = set(ex_benchmarks.index) for bm in self.benchmarks: if bm.checksum in db_checksums: self.db.update_name(bm) else: print 'Writing new benchmark %s, %s' % (bm.name, bm.checksum) self.db.write_benchmark(bm) def _run_revision(self, rev): need_to_run = self._get_benchmarks_for_rev(rev) if not need_to_run: print 'No benchmarks need running at %s' % rev return 0, {} print 'Running %d benchmarks for revision %s' % (len(need_to_run), rev) for bm in need_to_run: print bm.name self.bench_repo.switch_to_revision(rev) pickle_path = os.path.join(self.tmp_dir, 'benchmarks.pickle') results_path = os.path.join(self.tmp_dir, 'results.pickle') if os.path.exists(results_path): os.remove(results_path) pickle.dump(need_to_run, open(pickle_path, 'w')) # run the process cmd = 'python vb_run_benchmarks.py %s %s' % (pickle_path, results_path) print cmd proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, cwd=self.tmp_dir) stdout, stderr = proc.communicate() print 'stdout: %s' % stdout if stderr: if ("object has no attribute" in stderr or 'ImportError' in stderr): print stderr print 'HARD CLEANING!' self.bench_repo.hard_clean() print stderr if not os.path.exists(results_path): print 'Failed for revision %s' % rev return len(need_to_run), {} results = pickle.load(open(results_path, 'r')) try: os.remove(pickle_path) except OSError: pass return len(need_to_run), results def _get_benchmarks_for_rev(self, rev): existing_results = self.db.get_rev_results(rev) need_to_run = [] timestamp = self.repo.timestamps[rev] for b in self.benchmarks: if b.start_date is not None and b.start_date > timestamp: continue if b.checksum not in existing_results: need_to_run.append(b) return need_to_run def _get_revisions_to_run(self): # TODO generalize someday to other vcs...git only for now rev_by_timestamp = self.repo.shas.sort_index() # # assume they're in order, but check for now # assert(rev_by_timestamp.index.is_monotonic) if self.start_date is not None: rev_by_timestamp = rev_by_timestamp.ix[self.start_date:] if self.run_option == 'eod': grouped = rev_by_timestamp.groupby(datetime.date) revs_to_run = grouped.apply(lambda x: x[-1]).values elif self.run_option == 'all': revs_to_run = rev_by_timestamp.values elif self.run_option == 'last': revs_to_run = rev_by_timestamp.values[-1:] # TODO: if the very last revision fails, there should be a way # to look for the second last, etc, until the last one that was run elif isinstance(self.run_option, int): revs_to_run = rev_by_timestamp.values[::self.run_option] else: raise Exception('unrecognized run_option %s' % self.run_option) return revs_to_run
def profile_comparative(benchmarks): from vbench.api import BenchmarkRunner from vbench.db import BenchmarkDB from vbench.git import GitRepo from suite import BUILD, DB_PATH, PREPARE, dependencies TMP_DIR = tempfile.mkdtemp() try: prprint("Opening DB at '%s'...\n" % DB_PATH) db = BenchmarkDB(DB_PATH) prprint("Initializing Runner...") # all in a good cause... GitRepo._parse_commit_log = _parse_wrapper(args.base_commit) runner = BenchmarkRunner( benchmarks, REPO_PATH, REPO_PATH, BUILD, DB_PATH, TMP_DIR, PREPARE, always_clean=True, # run_option='eod', start_date=START_DATE, module_dependencies=dependencies) repo = runner.repo # (steal the parsed git repo used by runner) h_head = args.target_commit or repo.shas[-1] h_baseline = args.base_commit # ARGH. reparse the repo, without discarding any commits, # then overwrite the previous parse results # prprint ("Slaughtering kittens..." ) (repo.shas, repo.messages, repo.timestamps, repo.authors) = _parse_commit_log(None,REPO_PATH, args.base_commit) prprint('Target [%s] : %s\n' % (h_head, repo.messages.get(h_head, ""))) prprint('Baseline [%s] : %s\n' % (h_baseline, repo.messages.get(h_baseline, ""))) prprint("removing any previous measurements for the commits.") db.delete_rev_results(h_baseline) db.delete_rev_results(h_head) # TODO: we could skip this, but we need to make sure all # results are in the DB, which is a little tricky with # start dates and so on. prprint("Running benchmarks for baseline [%s]" % h_baseline) runner._run_and_write_results(h_baseline) prprint("Running benchmarks for target [%s]" % h_head) runner._run_and_write_results(h_head) prprint('Processing results...') head_res = get_results_df(db, h_head) baseline_res = get_results_df(db, h_baseline) ratio = head_res['timing'] / baseline_res['timing'] totals = DataFrame({HEAD_COL:head_res['timing'], BASE_COL:baseline_res['timing'], 'ratio':ratio, 'name':baseline_res.name}, columns=[HEAD_COL, BASE_COL, "ratio", "name"]) totals = totals.ix[totals[HEAD_COL] > args.min_duration] # ignore below threshold totals = totals.dropna( ).sort("ratio").set_index('name') # sort in ascending order h_msg = repo.messages.get(h_head, "") b_msg = repo.messages.get(h_baseline, "") print_report(totals,h_head=h_head,h_msg=h_msg, h_baseline=h_baseline,b_msg=b_msg) if args.outdf: prprint("The results DataFrame was written to '%s'\n" % args.outdf) totals.save(args.outdf) finally: # print("Disposing of TMP_DIR: %s" % TMP_DIR) shutil.rmtree(TMP_DIR)
def main(): from pandas import DataFrame from vbench.api import BenchmarkRunner from vbench.db import BenchmarkDB from vbench.git import GitRepo from suite import REPO_PATH, BUILD, DB_PATH, PREPARE, dependencies, benchmarks # GitRepo wants exactly 7 character hash? args.base_commit = args.base_commit[:7] if args.target_commit: args.target_commit = args.target_commit[:7] if not args.log_file: args.log_file = os.path.abspath( os.path.join(REPO_PATH, 'vb_suite.log')) random.seed(args.seed) np.random.seed(args.seed) TMP_DIR = tempfile.mkdtemp() prprint("TMP_DIR = %s" % TMP_DIR) prprint("LOG_FILE = %s\n" % args.log_file) benchmarks = [x for x in benchmarks if re.search(args.regex,x.name)] try: logfile = open(args.log_file, 'w') prprint("Opening DB at '%s'...\n" % DB_PATH) db = BenchmarkDB(DB_PATH) prprint("Initializing Runner...") # all in a good cause... GitRepo._parse_commit_log = _parse_wrapper(args.base_commit) runner = BenchmarkRunner( benchmarks, REPO_PATH, REPO_PATH, BUILD, DB_PATH, TMP_DIR, PREPARE, always_clean=True, # run_option='eod', start_date=START_DATE, module_dependencies=dependencies) repo = runner.repo # (steal the parsed git repo used by runner) # ARGH. reparse the repo, without discarding any commits, # then overwrite the previous parse results # prprint ("Slaughtering kittens..." ) (repo.shas, repo.messages, repo.timestamps, repo.authors) = _parse_commit_log(None,REPO_PATH, args.base_commit) h_head = args.target_commit or repo.shas[-1] h_baseline = args.base_commit prprint('Target [%s] : %s\n' % (h_head, repo.messages.get(h_head, ""))) prprint('Baseline [%s] : %s\n' % (h_baseline, repo.messages.get(h_baseline, ""))) prprint("removing any previous measurements for the commits.") db.delete_rev_results(h_baseline) db.delete_rev_results(h_head) # TODO: we could skip this, but we need to make sure all # results are in the DB, which is a little tricky with # start dates and so on. prprint("Running benchmarks for baseline [%s]" % h_baseline) runner._run_and_write_results(h_baseline) prprint("Running benchmarks for target [%s]" % h_head) runner._run_and_write_results(h_head) prprint('Processing results...') head_res = get_results_df(db, h_head) baseline_res = get_results_df(db, h_baseline) ratio = head_res['timing'] / baseline_res['timing'] totals = DataFrame(dict(t_head=head_res['timing'], t_baseline=baseline_res['timing'], ratio=ratio, name=baseline_res.name), columns=["t_head", "t_baseline", "ratio", "name"]) totals = totals.ix[totals.t_head > args.min_duration] # ignore below threshold totals = totals.dropna( ).sort("ratio").set_index('name') # sort in ascending order hdr = ftr = """ ----------------------------------------------------------------------- Test name | target[ms] | base[ms] | ratio | ----------------------------------------------------------------------- """.strip() +"\n" s = "\n" s += hdr for i in range(len(totals)): t,b,r = totals.irow(i).values s += "{0:30s} {1: 12.4f} {2: 12.4f} {3: 12.4f}\n".format(totals.index[i],t,b,r) s+= ftr + "\n" s += "Ratio < 1.0 means the target commit is faster then the baseline.\n" s += "Seed used: %d\n\n" % args.seed s += 'Target [%s] : %s\n' % (h_head, repo.messages.get(h_head, "")) s += 'Base [%s] : %s\n\n' % ( h_baseline, repo.messages.get(h_baseline, "")) logfile.write(s) logfile.close() prprint(s) prprint("Results were also written to the logfile at '%s'\n" % args.log_file) finally: # print("Disposing of TMP_DIR: %s" % TMP_DIR) shutil.rmtree(TMP_DIR) logfile.close()
def profile_comparative(benchmarks): from vbench.api import BenchmarkRunner from vbench.db import BenchmarkDB from vbench.git import GitRepo from suite import BUILD, DB_PATH, PREPARE, dependencies TMP_DIR = tempfile.mkdtemp() try: prprint("Opening DB at '%s'...\n" % DB_PATH) db = BenchmarkDB(DB_PATH) prprint("Initializing Runner...") # all in a good cause... GitRepo._parse_commit_log = _parse_wrapper(args.base_commit) runner = BenchmarkRunner( benchmarks, REPO_PATH, REPO_PATH, BUILD, DB_PATH, TMP_DIR, PREPARE, always_clean=True, # run_option='eod', start_date=START_DATE, module_dependencies=dependencies) repo = runner.repo # (steal the parsed git repo used by runner) h_head = args.target_commit or repo.shas[-1] h_baseline = args.base_commit # ARGH. reparse the repo, without discarding any commits, # then overwrite the previous parse results # prprint ("Slaughtering kittens..." ) (repo.shas, repo.messages, repo.timestamps, repo.authors) = _parse_commit_log(None,REPO_PATH, args.base_commit) prprint('Target [%s] : %s\n' % (h_head, repo.messages.get(h_head, ""))) prprint('Baseline [%s] : %s\n' % (h_baseline, repo.messages.get(h_baseline, ""))) prprint("Removing any previous measurements for the commits.") db.delete_rev_results(h_baseline) db.delete_rev_results(h_head) # TODO: we could skip this, but we need to make sure all # results are in the DB, which is a little tricky with # start dates and so on. prprint("Running benchmarks for baseline [%s]" % h_baseline) runner._run_and_write_results(h_baseline) prprint("Running benchmarks for target [%s]" % h_head) runner._run_and_write_results(h_head) prprint('Processing results...') head_res = get_results_df(db, h_head) baseline_res = get_results_df(db, h_baseline) ratio = head_res['timing'] / baseline_res['timing'] totals = DataFrame({HEAD_COL:head_res['timing'], BASE_COL:baseline_res['timing'], 'ratio':ratio, 'name':baseline_res.name}, columns=[HEAD_COL, BASE_COL, "ratio", "name"]) totals = totals.ix[totals[HEAD_COL] > args.min_duration] # ignore below threshold totals = totals.dropna( ).sort("ratio").set_index('name') # sort in ascending order h_msg = repo.messages.get(h_head, "") b_msg = repo.messages.get(h_baseline, "") print_report(totals,h_head=h_head,h_msg=h_msg, h_baseline=h_baseline,b_msg=b_msg) if args.outdf: prprint("The results DataFrame was written to '%s'\n" % args.outdf) totals.save(args.outdf) finally: # print("Disposing of TMP_DIR: %s" % TMP_DIR) shutil.rmtree(TMP_DIR)
def main(): from pandas import DataFrame from vbench.api import BenchmarkRunner from vbench.db import BenchmarkDB from suite import REPO_PATH, BUILD, DB_PATH, PREPARE, dependencies, benchmarks if not args.base_commit: args.base_commit = BASELINE_COMMIT # GitRepo wants exactly 7 character hash? args.base_commit = args.base_commit[:7] if args.target_commit: args.target_commit = args.target_commit[:7] if not args.log_file: args.log_file = os.path.abspath(os.path.join(REPO_PATH, 'vb_suite.log')) TMP_DIR = tempfile.mkdtemp() prprint("TMP_DIR = %s" % TMP_DIR) prprint("LOG_FILE = %s\n" % args.log_file) try: logfile = open(args.log_file, 'w') prprint("Opening DB at '%s'...\n" % DB_PATH) db = BenchmarkDB(DB_PATH) prprint("Initializing Runner...") runner = BenchmarkRunner( benchmarks, REPO_PATH, REPO_PATH, BUILD, DB_PATH, TMP_DIR, PREPARE, always_clean=True, # run_option='eod', start_date=START_DATE, module_dependencies=dependencies) repo = runner.repo # (steal the parsed git repo used by runner) # ARGH. reparse the repo, without discarding any commits, # then overwrite the previous parse results # prprint ("Slaughtering kittens..." ) (repo.shas, repo.messages, repo.timestamps, repo.authors) = _parse_commit_log(REPO_PATH) h_head = args.target_commit or repo.shas[-1] h_baseline = args.base_commit prprint('Target [%s] : %s\n' % (h_head, repo.messages.get(h_head, ""))) prprint('Baseline [%s] : %s\n' % (h_baseline, repo.messages.get(h_baseline, ""))) prprint("removing any previous measurements for the commits.") db.delete_rev_results(h_baseline) db.delete_rev_results(h_head) # TODO: we could skip this, but we need to make sure all # results are in the DB, which is a little tricky with # start dates and so on. prprint("Running benchmarks for baseline [%s]" % h_baseline) runner._run_and_write_results(h_baseline) prprint("Running benchmarks for target [%s]" % h_head) runner._run_and_write_results(h_head) prprint('Processing results...') head_res = get_results_df(db, h_head) baseline_res = get_results_df(db, h_baseline) ratio = head_res['timing'] / baseline_res['timing'] totals = DataFrame(dict(t_head=head_res['timing'], t_baseline=baseline_res['timing'], ratio=ratio, name=baseline_res.name), columns=["t_head", "t_baseline", "ratio", "name"]) totals = totals.ix[totals.t_head > args.min_duration] # ignore below threshold totals = totals.dropna().sort("ratio").set_index( 'name') # sort in ascending order s = "\n\nResults:\n" s += totals.to_string( float_format=lambda x: "{:4.4f}".format(x).rjust(10)) s += "\n\n" s += "Columns: test_name | target_duration [ms] | baseline_duration [ms] | ratio\n\n" s += "- a Ratio of 1.30 means the target commit is 30% slower then the baseline.\n\n" s += 'Target [%s] : %s\n' % (h_head, repo.messages.get(h_head, "")) s += 'Baseline [%s] : %s\n\n' % (h_baseline, repo.messages.get(h_baseline, "")) logfile.write(s) logfile.close() prprint(s) prprint("Results were also written to the logfile at '%s'\n" % args.log_file) finally: # print("Disposing of TMP_DIR: %s" % TMP_DIR) shutil.rmtree(TMP_DIR) logfile.close()
def main(): TMP_DIR = tempfile.mkdtemp() prprint("TMP_DIR = %s" % TMP_DIR) prprint("LOG_FILE = %s\n" % LOG_FILE) try: logfile = open(LOG_FILE, 'w') prprint( "Processing Repo at '%s'..." % REPO_PATH) repo = GitRepo(REPO_PATH) # get hashes of baseline and current head h_head = repo.shas[-1] h_baseline = BASELINE_COMMIT prprint( "Opening DB at '%s'...\n" % DB_PATH) db = BenchmarkDB(DB_PATH) prprint( 'Comparing Head [%s] : %s ' % (h_head, repo.messages.get(h_head,""))) prprint( 'Against baseline [%s] : %s \n' % (h_baseline, repo.messages.get(h_baseline,""))) prprint("Initializing Runner...") runner = BenchmarkRunner(benchmarks, REPO_PATH, REPO_PATH, BUILD, DB_PATH, TMP_DIR, PREPARE, always_clean=True, # run_option='eod', start_date=START_DATE, module_dependencies=dependencies) prprint ("removing any previous measurements for the commits." ) db.delete_rev_results(h_baseline) db.delete_rev_results(h_head) # TODO: we could skip this, but we need to make sure all # results are in the DB, which is a little tricky with # start dates and so on. prprint( "Running benchmarks for baseline commit '%s'" % h_baseline) runner._run_and_write_results(h_baseline) prprint ("Running benchmarks for current HEAD '%s'" % h_head) runner._run_and_write_results(h_head) prprint( 'Processing results...') head_res = get_results_df(db,h_head) baseline_res = get_results_df(db,h_baseline) ratio = head_res['timing']/baseline_res['timing'] totals = DataFrame(dict(t_head=head_res['timing'], t_baseline=baseline_res['timing'], ratio=ratio, name=baseline_res.name),columns=["t_head","t_baseline","ratio","name"]) totals = totals.ix[totals.t_head > 0.010] # ignore sub 10micros totals = totals.dropna().sort("ratio").set_index('name') # sort in ascending order s = "\n\nResults:\n" + totals.to_string(float_format=lambda x: "%0.4f" %x) + "\n\n" s += "Columns: test_name | head_time [ms] | baseline_time [ms] | ratio\n\n" s += "- a Ratio of 1.30 means HEAD is 30% slower then the Baseline.\n\n" s += 'Head [%s] : %s\n' % (h_head, repo.messages.get(h_head,"")) s += 'Baseline [%s] : %s\n\n' % (h_baseline,repo.messages.get(h_baseline,"")) logfile.write(s) logfile.close() prprint(s ) prprint("Results were also written to the logfile at '%s'\n" % LOG_FILE) finally: # print("Disposing of TMP_DIR: %s" % TMP_DIR) shutil.rmtree(TMP_DIR) logfile.close()
class BenchmarkRunner(object): """ Parameters ---------- benchmarks : list of Benchmark objects repo_path build_cmd db_path run_option : {'eod', 'all', 'last', integer}, default: 'eod' eod: use the last revision for each calendar day all: benchmark every revision last: only try to run the last revision some integer N: run each N revisions run_order : normal : original order (default) reverse: in reverse order (latest first) multires: cover all revisions but in the order increasing temporal detail overwrite : boolean dependencies : list or None should be list of modules visible in cwd """ def __init__(self, benchmarks, repo_path, repo_url, build_cmd, db_path, tmp_dir, prep_cmd, clean_cmd=None, run_option='eod', run_order='normal', start_date=None, overwrite=False, module_dependencies=None, always_clean=False, use_blacklist=True, verify=False): log.info("Initializing benchmark runner for %d benchmarks" % (len(benchmarks))) self._benchmarks = None self._checksums = None if verify: verify_benchmarks(benchmarks, raise_=True) self.start_date = start_date self.run_option = run_option self.run_order = run_order self.repo_path = repo_path self.db_path = db_path self.repo = GitRepo(self.repo_path) self.db = BenchmarkDB(db_path) self.use_blacklist = use_blacklist # where to copy the repo self.tmp_dir = tmp_dir self.bench_repo = BenchRepo(repo_url, self.tmp_dir, build_cmd, prep_cmd, clean_cmd, always_clean=always_clean, dependencies=module_dependencies) self.benchmarks = benchmarks def _get_benchmarks(self): return self._benchmarks def _set_benchmarks(self, benchmarks): self._benchmarks = benchmarks self._checksums = [b.checksum for b in benchmarks] self._register_benchmarks() benchmarks = property(fget=_get_benchmarks, fset=_set_benchmarks) checksums = property(fget=lambda self:self._checksums) @property def blacklist(self): return set(self.db.get_rev_blacklist()) def _blacklist_rev(self, rev, msg=""): if self.use_blacklist: log.warn(('Blacklisting %s' % rev) + ": %s" % msg if msg else ".") self.db.add_rev_blacklist(rev) def run(self): log.info("Collecting revisions to run") revisions = self._get_revisions_to_run() ran_revisions = [] log.info("Running benchmarks for %d revisions" % (len(revisions),)) # get the current black list (might be a different one on a next .run()) blacklist = self.blacklist for rev in revisions: if self.use_blacklist and rev in blacklist: log.warn('Skipping blacklisted %s' % rev) continue try: any_succeeded, n_active = self._run_and_write_results(rev) except FailedToBuildError, e: self._blacklist_rev(rev, msg=str(e)) continue # All the rerunning below somewhat obscures the destiny of # ran_revisions. TODO: make it clear(er) ran_revisions.append((rev, (any_succeeded, n_active))) if n_active: log.debug("%s succeeded among %d active benchmarks", {True: "Some", False: "None"}[any_succeeded], n_active) if not any_succeeded: # Give them a second chance self.bench_repo.hard_clean() try: any_succeeded2, n_active2 = self._run_and_write_results(rev) except FailedToBuildError, e: log.warn("Failed to build upon 2nd attempt to benchmark, " "verify build infrastructure. Skipping for now: %s" % e) continue assert(n_active == n_active2, "Since not any_succeeded, number of benchmarks should remain the same") # just guessing that this revision is broken, should stop # wasting our time if (not any_succeeded2 and n_active > 5): self._blacklist_rev(rev, "None benchmark among %d has succeeded" % n_active)
class BenchmarkRunner(object): """ Parameters ---------- benchmarks : list of Benchmark objects repo_path build_cmd db_path run_option : {'eod', 'all', integer} eod: use the last revision for each calendar day all: benchmark every revision some integer N: run each N revisions overwrite : boolean """ def __init__( self, benchmarks, repo_path, repo_url, build_cmd, db_path, tmp_dir, preparation_cmd, run_option="end_of_day", start_date=None, overwrite=False, ): self.benchmarks = benchmarks self.checksums = [b.checksum for b in benchmarks] self.start_date = start_date self.run_option = run_option self.repo_path = repo_path self.db_path = db_path self.repo = GitRepo(self.repo_path) self.db = BenchmarkDB(db_path) # where to copy the repo self.tmp_dir = tmp_dir self.bench_repo = BenchRepo(repo_url, self.tmp_dir, build_cmd, preparation_cmd) self._register_benchmarks() def run(self): revisions = self._get_revisions_to_run() for rev in revisions: results = self._run_revision(rev) tracebacks = [] for checksum, timing in results.iteritems(): if "traceback" in timing: tracebacks.append(timing["traceback"]) self.db.write_result(checksum, rev, timing.get("loops"), timing.get("timing"), timing.get("traceback")) for tb in tracebacks: if "object has no attribute" in tb: print "HARD CLEANING because of %s" % tb self.bench_repo.hard_clean() break def _register_benchmarks(self): db_checksums = set(v.checksum for v in self.db.get_benchmarks()) for bm in self.benchmarks: if bm.checksum in db_checksums: continue print "Writing new benchmark %s, %s" % (bm.name, bm.checksum) self.db.write_benchmark(bm) def _run_revision(self, rev): need_to_run = self._get_benchmarks_for_rev(rev) if not need_to_run: print "No benchmarks need running at %s" % rev return {} print "Running %d benchmarks for revision %s" % (len(need_to_run), rev) self.bench_repo.switch_to_revision(rev) pickle_path = os.path.join(self.tmp_dir, "benchmarks.pickle") results_path = os.path.join(self.tmp_dir, "results.pickle") if os.path.exists(results_path): os.remove(results_path) pickle.dump(need_to_run, open(pickle_path, "w")) # run the process cmd = "python vb_run_benchmarks.py %s %s" % (pickle_path, results_path) print cmd proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, cwd=self.tmp_dir) stdout, stderr = proc.communicate() print "stdout: %s" % stdout if stderr: if "object has no attribute" in stderr or "ImportError" in stderr: print "HARD CLEANING!" self.bench_repo.hard_clean() print stderr if not os.path.exists(results_path): print "Failed for revision %s" % rev return {} results = pickle.load(open(results_path, "r")) try: os.remove(pickle_path) except OSError: pass return results def _get_benchmarks_for_rev(self, rev): existing_results = self.db.get_rev_results(rev) need_to_run = [] timestamp = self.repo.timestamps[rev] for b in self.benchmarks: if b.start_date is not None and b.start_date > timestamp: continue if b.checksum not in existing_results: need_to_run.append(b) return need_to_run def _get_revisions_to_run(self): # TODO generalize someday to other vcs...git only for now rev_by_timestamp = self.repo.shas.sort_index() # # assume they're in order, but check for now # assert(rev_by_timestamp.index.is_monotonic) if self.start_date is not None: rev_by_timestamp = rev_by_timestamp.ix[self.start_date :] if self.run_option == "eod": grouped = rev_by_timestamp.groupby(datetime.date) revs_to_run = grouped.apply(lambda x: x[-1]).values elif self.run_option == "all": revs_to_run = rev_by_timestamp.values elif isinstance(self.run_option, int): revs_to_run = rev_by_timestamp.values[:: self.run_option] else: raise Exception("unrecognized run_method %s" % self.run_method) return revs_to_run