def computeQualitySub(): # 缺陷修复比例,平均修复时间 repair_ratio, repair_time = [], [] metrics = [repair_ratio, repair_time] for repo in REPOS: if repo in NONE_GH: repair_ratio.append(None) repair_time.append(None) else: # issue_total,done result = dbop.select_all( "select closed_at,created_at from issues_info where repo_id=%s and is_pr=0", (repo, )) total_num = len(result) if total_num == 0: tmp_repair_ratio = 0 tmp_repair_time = 0 else: issue_done = [item for item in result if item[0] is not None] tmp_repair_ratio = len(issue_done) * 1.0 / total_num tmp_repair_time = sum([ _datetime2int(item[0]) - _datetime2int(item[1]) for item in issue_done ]) * 1.0 / len(issue_done) repair_ratio.append(tmp_repair_ratio) repair_time.append(1.0 / (tmp_repair_time + 1)) repair_time = _nor_data(repair_time) for i in range(0, len(REPOS)): dbop.execute( "insert into quality_sub(repo_id,repair_ratio,repair_time) values(%s,%s,%s)", (REPOS[i], repair_ratio[i], repair_time[i]))
def readPrjLists(): result = dbop.select_all("select prj_id,github_url from prj_list") for prj in result: if prj[1] is not None: repo_name = prj[1][19:] PRJS.put(repo_name) REPO_ID[repo_name] = int(prj[0])
def _get_last_commit_fetch(prj): last_page = dbop.select_one("select page from commits_json_raw where repo_id=%s order by id desc limit 1", (REPO_ID[prj],), (1,))[0] last_data_set = set([ item[0] for item in dbop.select_all("select sha from commits_info where repo_id=%s and page =%s", ( REPO_ID[prj],last_page))]) return last_page, last_data_set
def readPrjLists(): prjs = [] result = dbop.select_all("select prj_id,github_url from prj_list") for prj in result: if prj[1] is not None: repo_name = prj[1][19:] prjs.append(repo_name) REPO_ID[repo_name] = int(prj[0]) return prjs
def _get_last_issue_fetch(prj): # 获取上次记录,以及上次获得数据集合 last_page = dbop.select_one("select page from issues_json_raw where repo_id=%s order by id desc limit 1", (REPO_ID[prj],), (1,))[0] last_data_set = set([ item[0] for item in dbop.select_all("select number from issues_info where repo_id=%s and page =%s", ( REPO_ID[prj],last_page)) ]) return last_page, last_data_set
def readPrjLists(): prjs = dbop.select_all( "select prj_id,github_url,facebook_url,twitter_url from prj_list") for prj in prjs: REPOS.append(prj[0]) if prj[1] is None: NONE_GH.add(prj[0]) if prj[2] is None or len(prj[2].strip()) == 0: NONE_FB.add(prj[0]) if prj[3] is None or len(prj[3].strip()) == 0: NONE_TW.add(prj[0])
def computeTeamHealth(): # 几个重要时间点 time_now = time.time() time_now_str = _strtime_before_days(time_now, 0) time_before_1_window = _strtime_before_days(time_now, EXAMINE_WINDOW) time_before_2_window = _strtime_before_days(time_now, 2 * EXAMINE_WINDOW) time_before_3_window = _strtime_before_days(time_now, 3 * EXAMINE_WINDOW) ccrs, ngrs, tbrs = [], [], [] for repo in REPOS: if repo in NONE_GH: ccrs.append(None) ngrs.append(None) tbrs.append(None) else: # 几个重要集合 data_before_1_window = set([ item[0] for item in dbop.select_all( "select author_id from commits_info where repo_id=%s and author_id is not null and (author_date>%s and author_date<%s)", (repo, time_before_1_window, time_now_str)) ]) data_before_2_window = set([ item[0] for item in dbop.select_all( "select author_id from commits_info where repo_id=%s and author_id is not null and (author_date>%s and author_date<%s)", (repo, time_before_2_window, time_before_1_window)) ]) data_before_3_window = set([ item[0] for item in dbop.select_all( "select author_id from commits_info where repo_id=%s and author_id is not null and (author_date>%s and author_date<%s)", (repo, time_before_3_window, time_before_2_window)) ]) # ccr data_common = _common_num(data_before_1_window, data_before_2_window) ccrs.append(data_common * 1.0 / (len(data_before_2_window) + 1)) #避免分母为0 # ngr new_users_1 = len(data_before_1_window) - data_common + 1 #避免分母为0 data_common_2 = _common_num(data_before_3_window, data_before_2_window) new_users_2 = len( data_before_2_window) - data_common_2 + 1 #避免分母为0 ngrs.append((new_users_1 - new_users_2) * 1.0 / new_users_2) # tbr 上一个窗口期的 commits_dis = dbop.select_all( "select count(*) from commits_info where repo_id=%s and author_id is not null group by author_id", (repo, )) issues_dis = dbop.select_all( "select count(*) from issues_info where repo_id=%s and user_id is not null group by user_id", (repo, )) tbrs.append(1.0 / (_gini([item[0] for item in commits_dis]) + _gini([item[0] for item in issues_dis]) + 1)) metrics = [] metrics.append(_nor_data(ccrs)) metrics.append(_nor_data(ngrs)) metrics.append(_nor_data(tbrs)) for i in range(0, len(REPOS)): tmp_row = [REPOS[i]] for j in range(0, len(metrics)): tmp_row.append(metrics[j][i]) dbop.execute( "insert into team_health(repo_id, ccr,ngr,tbr) values(%s,%s,%s,%s)", tmp_row)
def computeMaturity(): # maturity: repo_id, issue_done, commit_total, age_dev, fans_dev issue_done, commit_total, age_dev = [], [], [] stars, watchs, forks = [], [], [] fans_fb, fans_tw = [], [] metrics = [ issue_done, commit_total, age_dev, stars, watchs, forks, fans_fb, fans_tw ] # 获取每个指标 for repo_id in REPOS: if repo_id in NONE_GH: issue_done.append(None) commit_total.append(None) age_dev.append(None) stars.append(None) watchs.append(None) forks.append(None) else: # issue_done result = dbop.select_one( "select count(*) from issues_info where repo_id=%s and is_pr=0 and closed_at is not NULL", (repo_id, )) issue_done.append(result[0]) # commit_total result = dbop.select_one( "select count(*) from commits_info where repo_id=%s", (repo_id, )) commit_total.append(result[0]) # age_dev result = dbop.select_all( "select author_date from commits_info where repo_id =%s", (repo_id, )) age_dev.append(_continuous_dev_month(result)) # fans_dev result = dbop.select_one( "select watch,star,fork from html_info where repo_id=%s order by id desc limit 1", (repo_id, ), (0, 0, 0)) stars.append(result[0]) watchs.append(result[1]) forks.append(result[2]) if repo_id in NONE_FB: fans_fb.append(None) else: # fans_social result = dbop.select_one( "select watches_num from facebook_data where coin_id=%s order by id desc limit 1", (repo_id, ), (0, )) fans_fb.append(result[0]) if repo_id in NONE_TW: fans_tw.append(None) else: result = dbop.select_one( "select followers_num from twitters_data where coin_id=%s order by id desc limit 1", (repo_id, ), (0, )) fans_tw.append(result[0]) # 归一化 nor_data = [] for metric in metrics: nor_data.append(_nor_data(metric)) for i in range(0, len(REPOS)): tmp_row = [nor_metric[i] for nor_metric in nor_data] dbop.execute( "insert into maturity(repo_id, issue_done, commit_total, age_dev, fans_dev, fans_social) values(%s,%s,%s,%s,%s,%s)", (REPOS[i], tmp_row[0], tmp_row[1], tmp_row[2], _my_avg(tmp_row[3:-2]), _my_avg(tmp_row[-2:])))