def sava_bugs_for_project(repo, jira_key): if not os.path.exists(repo): print "start git clone https://github.com/apache/{0}.git".format( os.path.basename(repo)) return if not "pom.xml" in os.listdir(repo): return versions = get_repo_versions(repo) if len(versions) < 5: return save_bugs( os.path.join(VERSIONS, jira_key) + ".csv", repo, r"http://issues.apache.org/jira", jira_key, versions)
def get_repo_adata(repo_path, jira_key): if not os.path.exists(repo_path): print("start git clone https://github.com/apache/{0}.git".format( os.path.basename(repo_path))) return versions = get_repo_versions(repo_path) if len(versions) < 4: print("Less than 5 versions for repo, aborting") return save_tickets_stats( os.path.join(VERSIONS, jira_key) + "_STATS.csv", repo_path, r"http://issues.apache.org/jira", jira_key, versions)
def get_versions_by_type(repo): import re all_versions = get_repo_versions(repo) majors = [] minors = [] micros = [] SEPERATORS = ['\.', '\-', '\_'] template_base = [['([0-9])', '([0-9])([0-9])', '([0-9])$'], ['([0-9])', '([0-9])([0-9])$'], ['([0-9])', '([0-9])', '([0-9])([0-9])$'], ['([0-9])([0-9])', '([0-9])$'], ['([0-9])', '([0-9])', '([0-9])$'], ['([0-9])', '([0-9])$']] templates = [] for base in template_base: templates.extend(map(lambda sep: sep.join(base), SEPERATORS)) templates.extend(['([0-9])([0-9])([0-9])$', '([0-9])([0-9])$']) for version in all_versions: for template in templates: values = re.findall(template, version._name) if values: values = map(int, values[0]) if len(values) == 4: micros.append(version) major, minor1, minor2, micro = values minor = 10 * minor1 + minor2 elif len(values) == 3: micros.append(version) major, minor, micro = values else: major, minor = values micro = 0 if micro == 0: minors.append(version) if minor == 0 and micro == 0: majors.append(version) break return { "all": all_versions, "majors": majors, "minors": minors, "micros": micros }.items()
def visualize_repo(repo_path): if not os.path.exists(repo_path): print("start git clone https://github.com/apache/{0}.git".format( os.path.basename(repo_path))) return versions = get_repo_versions(repo_path) if len(versions) < 4: print("Less than 5 versions for repo, aborting") return version_tag_dates = [v._commit._commit_date for v in versions] repo = git.Repo(repo_path) print('getting master') dates = [] num_files = [] num_additions = [] num_deletions = [] total_files = [] for git_commit in repo.iter_commits(): if not git_commit.parents: text_file_stats = repo.git.diff(git_commit.hexsha, '--', '--name-status', numstat=True) else: text_file_stats = repo.git.diff(git_commit.parents[0].hexsha, git_commit.hexsha, '--name-status', numstat=True) all_statuses = [ line.split("\t")[0] for line in text_file_stats.splitlines() ] dates.append(git_commit.committed_datetime) num_deletions.append(len([s for s in all_statuses if 'D' in s]) * -1) num_additions.append(len([s for s in all_statuses if 'A' in s])) num_files.append(len(all_statuses)) total_files.append( len([ 1 for item in git_commit.tree.traverse() if item.type == 'blob' ])) # # # commits_dates = [c.committed_datetime for c in commits] # # print('got dates ', len(commits_dates)) # # print('single commit stats ') # print(dir(commits[0].stats)) # # commits_num_files = [len(c.stats.files) for c in commits] # # print('got num files ', len(commits_num_files)) from matplotlib import pyplot as plt fig, ax = plt.subplots(2, 1, sharex=True) ax[0].plot(dates, num_additions, label='additions') ax[0].plot(dates, num_deletions, label='deletions') ax[1].plot(dates, num_files, label='commit files') ax[1].plot(dates, total_files, label='total files') ax[0].set(title=os.path.split(repo_path)[1]) ax[0].legend() ax[1].set(xlabel="dates", ylabel="# files") ax[1].legend() # import plot_repos # plot_repos.plot_line( dates, {'additions': num_additions, # 'deletions': num_deletions, # 'commit files': num_files, # 'total files': total_files}, # "dates", "#files", os.path.split(repo_path)[1], num_subplots=3) pass
ratio_bugged_commits, datetime.fromtimestamp( tag.tag._commit._commit_date).strftime("%Y-%m-%d") ]) def main(out_file, gitPath, jira_url, jira_project_name): commits = get_data(jira_project_name, jira_url, gitPath) with open(out_file, "wb") as f: writer = csv.writer(f) writer.writerows([c.to_list() for c in commits]) if __name__ == "__main__": repo = git.Repo(r"c:\temp\tika") versions = get_repo_versions(r"c:\temp\tika") tags_commits = get_commits_between_versions( map(lambda c: Commit.init_commit_by_git_commit(c, 0), list(repo.iter_commits())[:1000]), versions) tags = [] for tag in tags_commits: tags.append(VersionInfo(tag, tags_commits[tag])) import apache_repos from caching import REPOSIROTY_DATA_DIR VERSIONS = os.path.join(REPOSIROTY_DATA_DIR, r"apache_versions") repos_and_jira = apache_repos.get_apache_repos_data() for repo, jira_key in repos_and_jira: if not os.path.exists(repo): print "start git clone https://github.com/apache/{0}.git".format( os.path.basename(repo)) continue