def blasphemy_ccp_cochange_by_dev_num_group(repo_file_quality_per_year, repo_file_blasphemy_per_year): """ Note that repo_file_quality_per_year uses bug hit ratio and not ccp. For change analysis it doesn't matter. :param repo_file_quality_per_year: :return: """ key = 'repo_name' control_variables = ['dev_num_group'] repo_file_quality_per_year_df = build_repo_per_year_df( repo_file_quality_per_year, key=key, control_variables=control_variables) repo_file_blasphemy_per_year_df = build_repo_per_year_df( repo_file_blasphemy_per_year, key=key, control_variables=control_variables) per_year_df = pd.merge(repo_file_quality_per_year_df, repo_file_blasphemy_per_year_df, on=[key, 'year'] + control_variables) cochange_analysis_by_value(per_year_df, first_metric='corrective_commits_ratio', second_metric='blasphemy_hit_rates', first_the_higher_the_better=False, second_the_higher_the_better=False, first_sig_threshold=0.1, second_sig_threshold=0.01, fixed_variable='dev_num_group', fixed_values=['small', 'medium', 'large'], key=key, control_variables=control_variables)
def onboarding_ccp_cochange(repo_file_quality_per_year, repo_file_onboarding_per_year): """ Note that repo_file_quality_per_year uses bug hit ratio and not ccp. For change analysis it doesn't matter. :param repo_file_quality_per_year: :return: """ key = 'repo_name' repo_file_quality_per_year_df = build_repo_per_year_df( repo_file_quality_per_year, key=key) repo_file_onboarding_per_year_df = build_repo_per_year_df( repo_file_onboarding_per_year, key=key) repo_file_onboarding_per_year_df = repo_file_onboarding_per_year_df[ repo_file_onboarding_per_year_df.comming_developers > 9] per_year_df = pd.merge(repo_file_quality_per_year_df, repo_file_onboarding_per_year_df, on=[key, 'year']) per_year_df['ccp'] = per_year_df.corrective_commits_ratio.map( lambda x: ccp_estimator.estimate_positives(x)) cochange_analysis(per_year_df, first_metric='ccp', second_metric='comming_involved_developers_ratio', first_the_higher_the_better=False, second_the_higher_the_better=True, first_sig_threshold=0.1, second_sig_threshold=0.1, key=key)
def blasphemy_ccp_cochange(repo_file_quality_per_year, repo_file_blasphemy_per_year): """ Note that repo_file_quality_per_year uses bug hit ratio and not ccp. For change analysis it doesn't matter. :param repo_file_quality_per_year: :return: """ key = 'repo_name' repo_file_quality_per_year_df = build_repo_per_year_df( repo_file_quality_per_year, key=key) repo_file_blasphemy_per_year_df = build_repo_per_year_df( repo_file_blasphemy_per_year, key=key) per_year_df = pd.merge(repo_file_quality_per_year_df, repo_file_blasphemy_per_year_df, on=[key, 'year']) cochange_analysis(per_year_df, first_metric='corrective_commits_ratio', second_metric='blasphemy_hit_rates', first_the_higher_the_better=False, second_the_higher_the_better=False, first_sig_threshold=0.1, second_sig_threshold=0.01, key=key)
def churn_ccp_cochange_by_age(repo_file_quality_per_year, repo_file_churn_per_year): """ Note that repo_file_quality_per_year uses bug hit ratio and not ccp. For change analysis it doesn't matter. :param repo_file_quality_per_year: :return: """ key = 'repo_name' control_variables = ['age_group'] repo_file_quality_per_year_df = build_repo_per_year_df( repo_file_quality_per_year, key=key, control_variables=control_variables) repo_file_churn_per_year_df = build_repo_per_year_df( repo_file_churn_per_year, key=key, control_variables=control_variables) repo_file_churn_per_year_df = repo_file_churn_per_year_df[( repo_file_churn_per_year_df.base_year_developers > 9)] per_year_df = pd.merge(repo_file_quality_per_year_df, repo_file_churn_per_year_df, on=[key, 'year'] + control_variables) per_year_df['ccp'] = per_year_df.corrective_commits_ratio.map( lambda x: ccp_estimator.estimate_positives(x)) cochange_analysis_by_value(per_year_df, first_metric='ccp', second_metric='continuing_developers_ratio', first_the_higher_the_better=False, second_the_higher_the_better=True, first_sig_threshold=0.1, second_sig_threshold=0.1, fixed_variable='age_group', fixed_values=['old', 'medium', 'young'], key=key, control_variables=control_variables)
def onboarding_ccp_cochange_by_lang(repo_file_quality_per_year, repo_file_onboarding_per_year): """ Note that repo_file_quality_per_year uses bug hit ratio and not ccp. For change analysis it doesn't matter. :param repo_file_quality_per_year: :return: """ key = 'repo_name' fixed_variable = 'language' control_variables = [fixed_variable] repo_file_quality_per_year_df = build_repo_per_year_df( repo_file_quality_per_year, key=key, control_variables=control_variables) repo_file_churn_per_year_df = build_repo_per_year_df( repo_file_onboarding_per_year, key=key, control_variables=control_variables) repo_file_churn_per_year_df = repo_file_churn_per_year_df[ (repo_file_churn_per_year_df.comming_developers > 9) & (repo_file_churn_per_year_df.language.isin(lang_name))] per_year_df = pd.merge(repo_file_quality_per_year_df, repo_file_churn_per_year_df, on=[key, 'year'] + control_variables) per_year_df['ccp'] = per_year_df.corrective_commits_ratio.map( lambda x: ccp_estimator.estimate_positives(x)) cochange_analysis_by_value( per_year_df, first_metric='ccp', second_metric='comming_involved_developers_ratio', first_the_higher_the_better=False, second_the_higher_the_better=True, first_sig_threshold=0.1, second_sig_threshold=0.1, fixed_variable=fixed_variable, fixed_values=lang_name, key=key, control_variables=control_variables)
def coupling_ccp_cochange(repo_file_quality_per_year, repo_file_coupling_per_year): """ Note that repo_file_quality_per_year uses bug hit ratio and not ccp. For change analysis it doesn't matter. :param repo_file_quality_per_year: :return: """ key = 'repo_name' repo_file_quality_per_year_df = build_repo_per_year_df( repo_file_quality_per_year, key=key) repo_file_coupling_per_year_df = build_repo_per_year_df( repo_file_coupling_per_year, key=key) per_year_df = pd.merge(repo_file_quality_per_year_df, repo_file_coupling_per_year_df, on=[key, 'year']) repos = get_valid_repos() per_year_df = pd.merge(per_year_df, repos, on=[key]) cochange_analysis(per_year_df, first_metric='corrective_commits_ratio', second_metric='avg_capped_files', first_the_higher_the_better=False, second_the_higher_the_better=False, first_sig_threshold=0.1, second_sig_threshold=1, key=key) cochange_with_control(per_year_df, first_metric='corrective_commits_ratio', second_metric='avg_capped_files', first_the_higher_the_better=False, second_the_higher_the_better=False, first_sig_threshold=0.1, second_sig_threshold=1, key=key)
def blasphemy_ccp_cochange_by_lang(repo_file_quality_per_year, repo_file_blasphemy_per_year): """ Note that repo_file_quality_per_year uses bug hit ratio and not ccp. For change analysis it doesn't matter. :param repo_file_quality_per_year: :return: """ key = 'repo_name' control_variables = ['language'] #import pdb; pdb.set_trace() repo_file_quality_per_year_df = build_repo_per_year_df( repo_file_quality_per_year, key=key, control_variables=control_variables) repo_file_blasphemy_per_year_df = build_repo_per_year_df( repo_file_blasphemy_per_year, key=key, control_variables=control_variables) repo_file_blasphemy_per_year_df = repo_file_blasphemy_per_year_df[( repo_file_blasphemy_per_year_df.language.isin(lang_name))] per_year_df = pd.merge(repo_file_quality_per_year_df, repo_file_blasphemy_per_year_df, on=[key, 'year'] + control_variables) cochange_analysis_by_value(per_year_df, first_metric='corrective_commits_ratio', second_metric='blasphemy_hit_rates', first_the_higher_the_better=False, second_the_higher_the_better=False, first_sig_threshold=0.1, second_sig_threshold=0.01, fixed_variable='language', fixed_values=lang_name, key=key, control_variables=control_variables)
def run_blasphemy_stability(): key = 'repo_name' repo_file_blasphemy_per_year = os.path.join( DATA_PATH, 'ccp_by_quality_terms_by_repo_per_year.csv') repo_file_blasphemy_per_year_df = build_repo_per_year_df( repo_file_blasphemy_per_year, key=key) print( analyze_stability(repo_file_blasphemy_per_year_df, key=key, metric_name='blasphemy_hit_rates', time_column='year', minimal_time=EARLIEST_ANALYZED_YEAR, control_variables=[], min_cnt_column='commits', min_cnt_threshold=200))