def sequential(lines, model_args):
    """
    Entry point for the sequential algorithm.

    See the description in the file docs.

    Yields FileData objects as tsv lines, with dev_uniq and
    tot_knowledge fields filled in.
    """
    knowledge_churn_constant = float(model_args[0])
    for line in lines:
        fd = FileData(line)
        dev_uniq, tot_knowledge = sequential_estimate_uniq(fd, knowledge_churn_constant)
        fd.dev_uniq = dev_uniq
        fd.tot_knowledge = tot_knowledge
        yield fd.as_line()
Пример #2
0
def sequential(lines, model_args):
    """
    Entry point for the sequential algorithm.

    See the description in the file docs.

    Yields FileData objects as tsv lines, with dev_uniq and
    tot_knowledge fields filled in.
    """
    knowledge_churn_constant = float(model_args[0])
    for line in lines:
        fd = FileData(line)
        dev_uniq, tot_knowledge = sequential_estimate_uniq(
            fd, knowledge_churn_constant)
        fd.dev_uniq = dev_uniq
        fd.tot_knowledge = tot_knowledge
        yield fd.as_line()
Пример #3
0
def gen_stats(root, project, interesting, not_interesting, options):
    """
    root: the root svn url of the project we are generating stats for
    (does not need to be the root of the svn repo).  Must be a url,
    not a checkout path.

    project: the project identifier.

    interesting: regular expressions that indicate an interesting path
    if they match

    not_interesting: regular expressions that trump interesting and
    indicate a path is not interesting.

    options: currently unused, options from gen_file_stats.py's main.

    Yields FileData objects encoded as tsv lines.  Only the fname,
    dev_experience and cnt_lines fields are filled in.
    """
    client = pysvn.Client()

    # we need the repo root because the paths returned by svn ls are relative to the repo root,
    # not our project root
    repo_root = client.root_url_from_path(root)

    interesting_fs = [
        f[0].repos_path for f in client.list(root, recurse=True)
        if is_interesting(f[0].repos_path, interesting, not_interesting)
        and f[0].kind == pysvn.node_kind.file
    ]

    for f in interesting_fs:
        dev_experience = parse_dev_experience(f, client, repo_root)
        if dev_experience:
            fd = FileData(':'.join([project, f]))
            # don't take revisions that are 0 lines added and 0 removed, like properties
            fd.dev_experience = [(dev, added, removed)
                                 for dev, added, removed in dev_experience
                                 if added or removed]
            fd.cnt_lines = count_lines(f, client, repo_root)
            fd_line = fd.as_line()
            if fd_line.strip():
                yield fd_line
Пример #4
0
def estimate_file_risks(lines, bus_risks, def_bus_risk):
    """
    Estimate the risk in the file as:

    sum(knowledge unique to a group of 1 or more devs * the
    probability that all devs in the group will be hit by a bus)

    We use a simple joint probability and assume that all bus killings
    are independently likely.
    """
    for line in lines:
        fd = FileData(line)
        dev_risk = []
        for devs, shared in fd.dev_uniq:
            risk = shared
            for dev in devs:
                risk = float(risk) * get_bus_risk(dev, bus_risks, def_bus_risk)
            dev_risk.append((devs, risk))
        fd.dev_risk = dev_risk
        yield fd.as_line()
Пример #5
0
def estimate_file_risks(lines, bus_risks, def_bus_risk):
    """
    Estimate the risk in the file as:

    sum(knowledge unique to a group of 1 or more devs * the
    probability that all devs in the group will be hit by a bus)

    We use a simple joint probability and assume that all bus killings
    are independently likely.
    """
    for line in lines:
        fd = FileData(line)
        dev_risk = []
        for devs, shared in fd.dev_uniq:
            risk = shared
            for dev in devs:
                risk = float(risk) * get_bus_risk(dev, bus_risks, def_bus_risk)
            dev_risk.append((devs, risk))
        fd.dev_risk = dev_risk
        yield fd.as_line()
Пример #6
0
def gen_stats(root, project, interesting, not_interesting, options):
    """
    root: the root svn url of the project we are generating stats for
    (does not need to be the root of the svn repo).  Must be a url,
    not a checkout path.

    project: the project identifier.

    interesting: regular expressions that indicate an interesting path
    if they match

    not_interesting: regular expressions that trump interesting and
    indicate a path is not interesting.

    options: currently unused, options from gen_file_stats.py's main.

    Yields FileData objects encoded as tsv lines.  Only the fname,
    dev_experience and cnt_lines fields are filled in.
    """
    client = pysvn.Client()

    # we need the repo root because the paths returned by svn ls are relative to the repo root,
    # not our project root
    repo_root = client.root_url_from_path(root)

    interesting_fs = [f[0].repos_path for f in client.list(root, recurse=True) if
                      is_interesting(f[0].repos_path, interesting, not_interesting) and f[0].kind == pysvn.node_kind.file]

    for f in interesting_fs:
        dev_experience = parse_dev_experience(f, client, repo_root)
        if dev_experience:
            fd = FileData(':'.join([project, f]))
            # don't take revisions that are 0 lines added and 0 removed, like properties
            fd.dev_experience = [(dev, added, removed) for dev, added, removed in dev_experience if added or removed]
            fd.cnt_lines = count_lines(f, client, repo_root)
            fd_line = fd.as_line()
            if fd_line.strip():
                yield fd_line
Пример #7
0
def gen_stats(root, project, interesting, not_interesting, options):
    """
    root: the path a local, git controlled-directory that is the root
    of this project

    project: the name of the project

    interesting: regular expressions that indicate an interesting path
    if they match

    not_interesting: regular expressions that trump interesting and
    indicate a path is not interesting.

    options: from gen_file_stats.py's main, currently only uses
    git_exe.

    Yields FileData objects encoded as tsv lines.  Only the fname,
    dev_experience and cnt_lines fields are filled in.
    """
    git_exe = options.git_exe

    # since git only works once you're in a git controlled path, we
    # need to get into one of those...
    prepare(root, git_exe)

    files = git_ls(root, git_exe)

    for f in files:
        if is_interesting(f, interesting, not_interesting):
            dev_experience = parse_dev_experience(f, git_exe)
            if dev_experience:
                fd = FileData(':'.join([project, f]))
                fd.dev_experience = dev_experience
                fd.cnt_lines = count_lines(f)
                fd_line = fd.as_line()
                if fd_line.strip():
                    yield fd_line
Пример #8
0
def gen_stats(root, project, interesting, not_interesting, options):
    """
    root: the path a local, git controlled-directory that is the root
    of this project

    project: the name of the project

    interesting: regular expressions that indicate an interesting path
    if they match

    not_interesting: regular expressions that trump interesting and
    indicate a path is not interesting.

    options: from gen_file_stats.py's main, currently only uses
    git_exe.

    Yields FileData objects encoded as tsv lines.  Only the fname,
    dev_experience and cnt_lines fields are filled in.
    """
    git_exe = options.git_exe

    # since git only works once you're in a git controlled path, we
    # need to get into one of those...
    prepare(root, git_exe)

    files = git_ls(root, git_exe)

    for f in files:
        if is_interesting(f, interesting, not_interesting):
            dev_experience = parse_dev_experience(f, git_exe)
            if dev_experience:
                fd = FileData(':'.join([project, f]))
                fd.dev_experience = dev_experience
                fd.cnt_lines = count_lines(f)
                fd_line = fd.as_line()
                if fd_line.strip():
                    yield fd_line