Пример #1
0
def computeQualitySub():

    # 缺陷修复比例,平均修复时间
    repair_ratio, repair_time = [], []
    metrics = [repair_ratio, repair_time]
    for repo in REPOS:
        if repo in NONE_GH:
            repair_ratio.append(None)
            repair_time.append(None)
        else:
            # issue_total,done
            result = dbop.select_all(
                "select closed_at,created_at from issues_info where repo_id=%s and is_pr=0",
                (repo, ))
            total_num = len(result)
            if total_num == 0:
                tmp_repair_ratio = 0
                tmp_repair_time = 0
            else:
                issue_done = [item for item in result if item[0] is not None]
                tmp_repair_ratio = len(issue_done) * 1.0 / total_num
                tmp_repair_time = sum([
                    _datetime2int(item[0]) - _datetime2int(item[1])
                    for item in issue_done
                ]) * 1.0 / len(issue_done)
            repair_ratio.append(tmp_repair_ratio)
            repair_time.append(1.0 / (tmp_repair_time + 1))

    repair_time = _nor_data(repair_time)
    for i in range(0, len(REPOS)):
        dbop.execute(
            "insert into quality_sub(repo_id,repair_ratio,repair_time) values(%s,%s,%s)",
            (REPOS[i], repair_ratio[i], repair_time[i]))
Пример #2
0
def readPrjLists():
	result = dbop.select_all("select prj_id,github_url from prj_list")
	for prj in result:
		if prj[1] is not None:
			repo_name = prj[1][19:]
			PRJS.put(repo_name)
			REPO_ID[repo_name] = int(prj[0])
Пример #3
0
def _get_last_commit_fetch(prj):
	last_page = dbop.select_one("select page from commits_json_raw where repo_id=%s order by id desc limit 1",
								(REPO_ID[prj],), (1,))[0]
	last_data_set = set([ item[0] for item in 
						dbop.select_all("select sha from commits_info where repo_id=%s and page =%s", (
							REPO_ID[prj],last_page))])
	
	return last_page, last_data_set
Пример #4
0
def readPrjLists():
    prjs = []
    result = dbop.select_all("select prj_id,github_url from prj_list")
    for prj in result:
        if prj[1] is not None:
            repo_name = prj[1][19:]
            prjs.append(repo_name)
            REPO_ID[repo_name] = int(prj[0])
    return prjs
Пример #5
0
def _get_last_issue_fetch(prj):
	# 获取上次记录,以及上次获得数据集合
	last_page = dbop.select_one("select page from issues_json_raw  where repo_id=%s order by id desc limit 1",
								(REPO_ID[prj],), (1,))[0]
	last_data_set = set([ item[0] for item in 
						dbop.select_all("select number from issues_info where repo_id=%s and page =%s", (
							REPO_ID[prj],last_page))
						])
	
	return last_page, last_data_set
Пример #6
0
def readPrjLists():
    prjs = dbop.select_all(
        "select prj_id,github_url,facebook_url,twitter_url from prj_list")
    for prj in prjs:
        REPOS.append(prj[0])
        if prj[1] is None:
            NONE_GH.add(prj[0])
        if prj[2] is None or len(prj[2].strip()) == 0:
            NONE_FB.add(prj[0])
        if prj[3] is None or len(prj[3].strip()) == 0:
            NONE_TW.add(prj[0])
Пример #7
0
def computeTeamHealth():

    # 几个重要时间点
    time_now = time.time()
    time_now_str = _strtime_before_days(time_now, 0)
    time_before_1_window = _strtime_before_days(time_now, EXAMINE_WINDOW)
    time_before_2_window = _strtime_before_days(time_now, 2 * EXAMINE_WINDOW)
    time_before_3_window = _strtime_before_days(time_now, 3 * EXAMINE_WINDOW)

    ccrs, ngrs, tbrs = [], [], []
    for repo in REPOS:
        if repo in NONE_GH:
            ccrs.append(None)
            ngrs.append(None)
            tbrs.append(None)
        else:
            # 几个重要集合
            data_before_1_window = set([
                item[0] for item in dbop.select_all(
                    "select author_id from commits_info where repo_id=%s and author_id is not null and (author_date>%s and author_date<%s)",
                    (repo, time_before_1_window, time_now_str))
            ])
            data_before_2_window = set([
                item[0] for item in dbop.select_all(
                    "select author_id from commits_info where repo_id=%s and author_id is not null and (author_date>%s and author_date<%s)",
                    (repo, time_before_2_window, time_before_1_window))
            ])
            data_before_3_window = set([
                item[0] for item in dbop.select_all(
                    "select author_id from commits_info where repo_id=%s and author_id is not null and (author_date>%s and author_date<%s)",
                    (repo, time_before_3_window, time_before_2_window))
            ])
            # ccr
            data_common = _common_num(data_before_1_window,
                                      data_before_2_window)
            ccrs.append(data_common * 1.0 /
                        (len(data_before_2_window) + 1))  #避免分母为0
            # ngr
            new_users_1 = len(data_before_1_window) - data_common + 1  #避免分母为0
            data_common_2 = _common_num(data_before_3_window,
                                        data_before_2_window)
            new_users_2 = len(
                data_before_2_window) - data_common_2 + 1  #避免分母为0
            ngrs.append((new_users_1 - new_users_2) * 1.0 / new_users_2)
            # tbr 上一个窗口期的
            commits_dis = dbop.select_all(
                "select count(*) from commits_info where repo_id=%s and author_id is not null group by author_id",
                (repo, ))
            issues_dis = dbop.select_all(
                "select count(*) from issues_info where repo_id=%s and user_id is not null group by user_id",
                (repo, ))
            tbrs.append(1.0 / (_gini([item[0] for item in commits_dis]) +
                               _gini([item[0] for item in issues_dis]) + 1))

    metrics = []
    metrics.append(_nor_data(ccrs))
    metrics.append(_nor_data(ngrs))
    metrics.append(_nor_data(tbrs))
    for i in range(0, len(REPOS)):
        tmp_row = [REPOS[i]]
        for j in range(0, len(metrics)):
            tmp_row.append(metrics[j][i])
        dbop.execute(
            "insert into team_health(repo_id, ccr,ngr,tbr) values(%s,%s,%s,%s)",
            tmp_row)
Пример #8
0
def computeMaturity():
    # maturity: repo_id, issue_done, commit_total, age_dev, fans_dev

    issue_done, commit_total, age_dev = [], [], []
    stars, watchs, forks = [], [], []
    fans_fb, fans_tw = [], []
    metrics = [
        issue_done, commit_total, age_dev, stars, watchs, forks, fans_fb,
        fans_tw
    ]

    # 获取每个指标
    for repo_id in REPOS:
        if repo_id in NONE_GH:
            issue_done.append(None)
            commit_total.append(None)
            age_dev.append(None)
            stars.append(None)
            watchs.append(None)
            forks.append(None)
        else:
            # issue_done
            result = dbop.select_one(
                "select count(*) from issues_info where repo_id=%s and is_pr=0 and closed_at is not NULL",
                (repo_id, ))
            issue_done.append(result[0])

            # commit_total
            result = dbop.select_one(
                "select count(*) from commits_info where repo_id=%s",
                (repo_id, ))
            commit_total.append(result[0])

            # age_dev
            result = dbop.select_all(
                "select author_date from commits_info where repo_id =%s",
                (repo_id, ))
            age_dev.append(_continuous_dev_month(result))

            # fans_dev
            result = dbop.select_one(
                "select watch,star,fork from html_info where repo_id=%s order by id desc limit 1",
                (repo_id, ), (0, 0, 0))
            stars.append(result[0])
            watchs.append(result[1])
            forks.append(result[2])

        if repo_id in NONE_FB:
            fans_fb.append(None)
        else:
            # fans_social
            result = dbop.select_one(
                "select watches_num from facebook_data where coin_id=%s order by id desc limit 1",
                (repo_id, ), (0, ))
            fans_fb.append(result[0])

        if repo_id in NONE_TW:
            fans_tw.append(None)
        else:
            result = dbop.select_one(
                "select followers_num from twitters_data where coin_id=%s order by id desc limit 1",
                (repo_id, ), (0, ))
            fans_tw.append(result[0])

    # 归一化
    nor_data = []
    for metric in metrics:
        nor_data.append(_nor_data(metric))

    for i in range(0, len(REPOS)):
        tmp_row = [nor_metric[i] for nor_metric in nor_data]
        dbop.execute(
            "insert into maturity(repo_id, issue_done, commit_total, age_dev, fans_dev, fans_social) values(%s,%s,%s,%s,%s,%s)",
            (REPOS[i], tmp_row[0], tmp_row[1], tmp_row[2],
             _my_avg(tmp_row[3:-2]), _my_avg(tmp_row[-2:])))