def test_fixes(): repos = get_repos() for project in repos: dir = path.join('data', project, 'test', 'test_fixes.json') fixes = load_data(dir) test_antikeywords(project, fixes) test_small_bugs(project, fixes)
def combine(): T = time() combines = dict() for repo in get_repos(): dir = os.path.join('data', repo, 'authors.json') authors = load_data(dir) for author in authors.keys(): if combines.get(author) is None: combines.update({author: authors.get(author)}) else: for date in authors.get(author).keys(): if combines.get(author).get(date) is None: combines.get(author).update({date: authors.get(author).get(date)}) else: combines.get(author).get(date)['commits'].extend(authors.get(author).get(date)['commits']) combines.get(author).get(date)['changes'][0] += authors.get(author).get(date)['changes'][0] # buggy combines.get(author).get(date)['changes'][1] += authors.get(author).get(date)['changes'][1] # good combines.get(author).get(date)['changes'][2] += authors.get(author).get(date)['changes'][2] # inserted # ratio if combines.get(author).get(date)['changes'][2] == 0: combines.get(author).get(date)['changes'][3] = 0 else: combines.get(author).get(date)['changes'][3] = combines.get(author).get(date)['changes'][0] / combines.get(author).get(date)['changes'][1] log_debug(None, 'Combined authors in {0[0]}s. There are {0[1]} authors.', (time()-T, len(combines))) return combines
def get_fixes_stats(): no_deletes = 0 total = 0 total_commits = 0 no_deletes_commits = 0 for repo in get_repos(): file = path.join('data', repo, 'test', 'test_buggy_fix_diffs.json') differences = load_data(file) for commit in differences.values(): total_commits += 1 nd = True # no delete for files in commit.values(): for file in files: if len(file['deleted']) == 0: no_deletes += 1 total += 1 if len(file['deleted']) > 0: nd = False continue if nd: no_deletes_commits += 1 log_info( repo, 'Number of fixed files with no deleted lines is {0[0]} out of {0[1]}', (no_deletes, total)) log_info( repo, 'Number of fixes with no deleted lines is {0[0]} out of {0[1]}', (no_deletes_commits, total_commits))
def test_dates(): for project in get_repos(): directory = path.join('data', project) commits = load_commits(directory) authors = load_authors(project) errors = list() for commit in commits: author = authors.get(commit['author_email']) if author is None: continue d = datetime.utcfromtimestamp(int(commit['time'])) date = author.get(d.strftime('%Y/%m/%d')) if date is None: if 2018 > d.year >= 2014: log_info( project, 'No such date: {0[0]} for {0[1]}. The commit timestamp: {0[2]}', (d, commit['author_email'], commit['time'])) errors.append(commit['commit']) continue if commit['commit'] not in [ list(x.keys())[0] for x in date.get('commits') ]: errors.append(commit['commit']) log_info(project, 'No commit{0[0]} for {0[1]}', (commit['commit'], commit['time'])) if len(errors) == 0: log_info(project, 'OK -- All dates correct', None)
def overview(): """ Creates overview of all repos with data: repo, min line num, max line ratio, max daily line ratio, commit min, commit ratio max, devs num, devs with ratio of exactly 0. """ repos = get_repos() overview = dict() for repo in repos: ratios_dir = os.path.join('data', repo, 'authors_line_ratio.json') commit_dir = os.path.join('data', repo, 'authors_commits_ratio.json') authors = load_data(ratios_dir) commits = load_data(commit_dir) data = [1000, 0, 0, 10, 0, 0, 0] for author in authors: if data[0] > author['commits']['sum']: data[0] = author['commits']['sum'] if data[1] < author['commits']['ratio']: data[1] = author['commits']['ratio'] if data[2] < author['commits']['daily_ratio']: data[2] = author['commits']['daily_ratio'] c = [(x['commits']['buggy'], x['commits']['good']) for x in commits if x['dev'] == author['dev']] if (c[0][0] + c[0][1]) < data[3]: data[3] = (c[0][0] + c[0][1]) if c[0][1] == 0: cr = 1 else: cr = c[0][0] / c[0][1] if cr > data[4]: data[4] = cr data[5] += 1 if author['commits']['ratio'] == 0: data[6] += 1 log_debug(repo, data, None) overview.update({repo: data})
def test_buggy(): repos = get_repos() for project in repos: test_buggy_annotated(project) test_annotated_sums(project) test_annotated_negatives(project) test_buggy_duplicates(project) test_buggy_lines(project)
def test_changes_sum(): for project in get_repos(): authors = load_authors(project) errors = list() for author, dates in authors.items(): for date, d in dates.items(): if d.get('changes')[0] + d.get('changes')[1] != d.get( 'changes')[2]: errors.append((author, date)) report_results(errors, project, 'test_changes_sum')
def test_changes_inserted(): for project in get_repos(): authors = load_authors(project) errors = list() for author, dates in authors.items(): for date, d in dates.items(): inserted = 0 for c in d.get('commits'): for sha, files in c.items(): for file in files: inserted += len(file.get('inserted')) if inserted != d.get('changes')[2]: errors.append((author, list(c.keys())[0])) report_results(errors, project, 'test_changes_inserted')
def devs_table(): """ Create a csv file summing up the data of each dev for each repo separately - dev email, total commits, total lines, ratio, daily ratio, commit ratio :return: None """ repos = get_repos() # todo: should we do it one for all repos? for repo in repos: data = list() data.append([ 'Author', 'Line Ratio', 'Daily Line Ratio', 'Lines', 'Commit Ratio', 'Commits' ]) ratios_dir = os.path.join('data', repo, 'authors_line_ratio.json') commit_dir = os.path.join('data', repo, 'authors_commits_ratio.json') authors = load_data(ratios_dir) commits = load_data(commit_dir) for author in authors: d = [ author['dev'], author['commits']['ratio'], author['commits']['daily_ratio'], author['commits']['sum'] ] c = [(x['commits']['buggy'], x['commits']['good']) for x in commits if x['dev'] == author['dev']] if c[0][1] == 0: d.append(1) else: d.append(c[0][0] / c[0][1]) d.append(c[0][0] + c[0][1]) data.append(d) save_in_file([], 'authors.csv', os.path.join('data', repo)) myFile = open(os.path.join('data', repo, 'authors.csv'), 'w') with myFile: writer = csv.writer(myFile) writer.writerows(data) log_debug(repo, 'done', None)
def get_authors(): for repo in get_repos(): prep_data(repo)
def run_fixes(): for repo in get_repos(): directory = os.path.join('data', repo) extract_fixes(directory, repo)
def get_data(): for repo in get_repos(): repo_dir = os.path.join('repos', repo) directory = os.path.join('data', repo) save_commits(repo, repo_dir, directory) save_bugs(repo)
def load_all(): for project in get_repos(): authors = load_authors(project) for author, data in authors.items(): all_authors.append(author) all.append({author: data})