Пример #1
0
    def __init__(self,
                 benchmarks,
                 repo_path,
                 repo_url,
                 build_cmd,
                 db_path,
                 tmp_dir,
                 preparation_cmd,
                 run_option='end_of_day',
                 start_date=None,
                 overwrite=False):

        self.benchmarks = benchmarks
        self.checksums = [b.checksum for b in benchmarks]

        self.start_date = start_date
        self.run_option = run_option

        self.repo_path = repo_path
        self.db_path = db_path

        self.repo = GitRepo(self.repo_path)
        self.db = BenchmarkDB(db_path)

        # where to copy the repo
        self.tmp_dir = tmp_dir
        self.bench_repo = BenchRepo(repo_url, self.tmp_dir, build_cmd,
                                    preparation_cmd)

        self._register_benchmarks()
Пример #2
0
    def __init__(self,
                 benchmarks,
                 repo_path,
                 repo_url,
                 build_cmd,
                 db_path,
                 tmp_dir,
                 prep_cmd,
                 clean_cmd=None,
                 run_option='eod',
                 run_order='normal',
                 start_date=None,
                 overwrite=False,
                 module_dependencies=None,
                 always_clean=False,
                 use_blacklist=True,
                 verify=False):
        log.info("Initializing benchmark runner for %d benchmarks" %
                 (len(benchmarks)))
        self._benchmarks = None
        self._checksums = None

        if verify:
            verify_benchmarks(benchmarks, raise_=True)

        self.start_date = start_date
        self.run_option = run_option
        self.run_order = run_order

        self.repo_path = repo_path
        self.db_path = db_path

        self.repo = GitRepo(self.repo_path)
        self.db = BenchmarkDB(db_path)

        self.use_blacklist = use_blacklist

        # where to copy the repo
        self.tmp_dir = tmp_dir
        self.bench_repo = BenchRepo(repo_url,
                                    self.tmp_dir,
                                    build_cmd,
                                    prep_cmd,
                                    clean_cmd,
                                    always_clean=always_clean,
                                    dependencies=module_dependencies)

        self.benchmarks = benchmarks
Пример #3
0
    def __init__(self,
                 benchmarks,
                 repo_path,
                 repo_url,
                 build_cmd,
                 db_path,
                 tmp_dir,
                 preparation_cmd,
                 run_option='eod',
                 start_date=None,
                 overwrite=False,
                 module_dependencies=None,
                 always_clean=False,
                 use_blacklist=True,
                 time=True):

        self.benchmarks = benchmarks
        self.checksums = [b.checksum for b in benchmarks]

        self.start_date = start_date
        self.run_option = run_option

        self.repo_path = repo_path
        self.db_path = db_path

        self.repo = GitRepo(self.repo_path)
        self.db = BenchmarkDB(db_path)

        self.use_blacklist = use_blacklist

        self.blacklist = set(self.db.get_rev_blacklist())
        self.time = time
        # where to copy the repo
        self.tmp_dir = tmp_dir
        self.bench_repo = BenchRepo(repo_url,
                                    self.tmp_dir,
                                    build_cmd,
                                    preparation_cmd,
                                    always_clean=always_clean,
                                    dependencies=module_dependencies)
        self._register_benchmarks()
        self._python = os.environ.get('VBENCH_PYTHON', 'python')
Пример #4
0
import subprocess
import os
import re
import sys

import numpy as np

from pandas import *


if __name__ == '__main__':
    from vbench.git import GitRepo
    repo = GitRepo('/Users/wesm/code/pandas')
    churn = repo.get_churn_by_file()

    file_include = []
    for path in churn.major_axis:
        if path.endswith('.pyx') or path.endswith('.py'):
            file_include.append(path)
    commits_include = [sha for sha in churn.minor_axis
                       if 'LF' not in repo.messages[sha]]
    commits_include.remove('dcf3490')

    clean_churn = churn.reindex(major=file_include, minor=commits_include)

    by_commit = clean_churn.sum('major').sum(1)

    by_date = by_commit.groupby(repo.commit_date).sum()

    by_date = by_date.drop([datetime(2011, 6, 10)])
Пример #5
0
from pandas import *
import matplotlib.pyplot as plt

import sqlite3

from vbench.git import GitRepo

REPO_PATH = '/home/adam/code/pandas'
repo = GitRepo(REPO_PATH)

con = sqlite3.connect('vb_suite/benchmarks.db')

bmk = '36900a889961162138c140ce4ae3c205'
# bmk = '9d7b8c04b532df6c2d55ef497039b0ce'
bmk = '4481aa4efa9926683002a673d2ed3dac'
bmk = '00593cd8c03d769669d7b46585161726'
bmk = '3725ab7cd0a0657d7ae70f171c877cea'
bmk = '3cd376d6d6ef802cdea49ac47a67be21'
bmk2 = '459225186023853494bc345fd180f395'
bmk = 'c22ca82e0cfba8dc42595103113c7da3'
bmk = 'e0e651a8e9fbf0270ab68137f8b9df5f'
bmk = '96bda4b9a60e17acf92a243580f2a0c3'


def get_results(bmk):
    results = con.execute("select * from results where checksum='%s'" %
                          bmk).fetchall()
    x = Series(dict((t[1], t[3]) for t in results))
    x.index = x.index.map(repo.timestamps.get)
    x = x.sort_index()
    return x
Пример #6
0
        {
            'insertions': DataFrame(insertions),
            'deletions': DataFrame(deletions)
        },
        minor_axis=shas)

    # return DataFrame({'insertions' : insertions,
    #                   'deletions' : deletions}, index=shas)


if __name__ == '__main__':
    # commits, hists = get_commit_history()
    # churn = get_code_churn(commits)

    from vbench.git import GitRepo
    repo = GitRepo('/Users/wesm/code/pandas')
    churn = repo.get_churn_by_file()

    file_include = []
    for path in churn.major_axis:
        if path.endswith('.pyx') or path.endswith('.py'):
            file_include.append(path)
    commits_include = [
        sha for sha in churn.minor_axis if 'LF' not in repo.messages[sha]
    ]
    commits_include.remove('dcf3490')

    clean_churn = churn.reindex(major=file_include, minor=commits_include)

    by_commit = clean_churn.sum('major').sum(1)
Пример #7
0
def main():
    TMP_DIR = tempfile.mkdtemp()
    prprint("TMP_DIR = %s" % TMP_DIR)
    prprint("LOG_FILE = %s\n" % LOG_FILE)

    try:
        logfile = open(LOG_FILE, 'w')

        prprint("Processing Repo at '%s'..." % REPO_PATH)
        repo = GitRepo(REPO_PATH)

        # get hashes of baseline and current head
        h_head = repo.shas[-1]
        h_baseline = BASELINE_COMMIT

        prprint("Opening DB at '%s'...\n" % DB_PATH)
        db = BenchmarkDB(DB_PATH)

        prprint('Comparing Head [%s] : %s ' %
                (h_head, repo.messages.get(h_head, "")))
        prprint('Against baseline [%s] : %s \n' %
                (h_baseline, repo.messages.get(h_baseline, "")))

        prprint("Initializing Runner...")
        runner = BenchmarkRunner(
            benchmarks,
            REPO_PATH,
            REPO_PATH,
            BUILD,
            DB_PATH,
            TMP_DIR,
            PREPARE,
            always_clean=True,
            #                             run_option='eod', start_date=START_DATE,
            module_dependencies=dependencies)

        prprint("removing any previous measurements for the commits.")
        db.delete_rev_results(h_baseline)
        db.delete_rev_results(h_head)

        # TODO: we could skip this, but we need to make sure all
        # results are in the DB, which is a little tricky with
        # start dates and so on.
        prprint("Running benchmarks for baseline commit '%s'" % h_baseline)
        runner._run_and_write_results(h_baseline)

        prprint("Running benchmarks for current HEAD '%s'" % h_head)
        runner._run_and_write_results(h_head)

        prprint('Processing results...')

        head_res = get_results_df(db, h_head)
        baseline_res = get_results_df(db, h_baseline)
        ratio = head_res['timing'] / baseline_res['timing']
        totals = DataFrame(dict(t_head=head_res['timing'],
                                t_baseline=baseline_res['timing'],
                                ratio=ratio,
                                name=baseline_res.name),
                           columns=["t_head", "t_baseline", "ratio", "name"])
        totals = totals.ix[totals.t_head > 0.010]  # ignore sub 10micros
        totals = totals.dropna().sort("ratio").set_index(
            'name')  # sort in ascending order

        s = "\n\nResults:\n" + totals.to_string(
            float_format=lambda x: "%0.4f" % x) + "\n\n"
        s += "Columns: test_name | head_time [ms] | baseline_time [ms] | ratio\n\n"
        s += "- a Ratio of 1.30 means HEAD is 30% slower then the Baseline.\n\n"

        s += 'Head [%s] : %s\n' % (h_head, repo.messages.get(h_head, ""))
        s += 'Baseline [%s] : %s\n\n' % (h_baseline,
                                         repo.messages.get(h_baseline, ""))

        logfile.write(s)
        logfile.close()

        prprint(s)
        prprint("Results were also written to the logfile at '%s'\n" %
                LOG_FILE)

    finally:
        #        print("Disposing of TMP_DIR: %s" % TMP_DIR)
        shutil.rmtree(TMP_DIR)
        logfile.close()