def main(args): """ Runs univariate regression for each of the available metrics. """ warnings.filterwarnings("ignore", category=RuntimeWarning, module="sklearn") if args.split_paradigm_score: folder = f'{args.folder}/split-regression/univariate/' else: folder = f'{args.folder}/regression/univariate/' estimator = LogisticRegression(class_weight='balanced', random_state=42) cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42) for category in categories: for path, name in projects.items(): df = get_metric_results(args.folder, path, category) if df is not None: if args.split_paradigm_score: for paradigm, scores in split_paradigm_score( df, args.folder, path, category): univariate(scores, folder, category, name + paradigm, estimator, cv, args) else: univariate(df, folder, category, name, estimator, cv, args) if args.split_paradigm_score: summarise_split_directory(args, 'univariate') else: summarise_directory(args, 'univariate')
def main(args): """ Calculates statistics of the available fault data. """ if args.split_paradigm_score: folder = f'{args.folder}/split-regression/fault-statistics/' else: folder = f'{args.folder}/regression/fault-statistics/' for category in categories: statistics = pd.DataFrame(columns=[ 'name', 'rows', 'faulty_rows', 'non_faulty_rows', 'percentage_faulty' ]) for path, name in projects.items(): df = get_metric_results(args.folder, path, category) if df is not None: if args.split_paradigm_score: for paradigm, scores in split_paradigm_score( df, args.folder, path, category): statistics = fault_statistics(scores, statistics, category, name + paradigm) else: statistics = fault_statistics(df, statistics, category, name) if not statistics.empty: save_dataframe(statistics, folder, category, False)
def main(args): """ Runs multivariate regression on all metrics together. """ warnings.filterwarnings("ignore", category=ConvergenceWarning, module="sklearn") if args.split_paradigm_score: folder = f'{args.folder}/split-regression/multivariate/' else: folder = f'{args.folder}/regression/multivariate/' estimator = LogisticRegression(class_weight='balanced', random_state=42) cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42) for category in categories: regression_results = pd.DataFrame(columns=[ 'name', 'tn', 'fp', 'fn', 'tp', 'r2', 'precision', 'recall', 'mcc' ]) for path, name in projects.items(): df = get_metric_results(args.folder, path, category) if df is not None: if args.split_paradigm_score: for paradigm, scores in split_paradigm_score( df, args.folder, path, category): regression_results = multivatiate( scores, regression_results, category, name + paradigm, estimator, cv, args) else: regression_results = multivatiate(df, regression_results, category, name, estimator, cv, args) if not regression_results.empty: save_dataframe(regression_results, folder, category, False)
def main(args): """ Calculates statistics of the fault data per metric. """ if args.split_paradigm_score: folder = f'{args.folder}/split-regression/fault-metric-statistics/' else: folder = f'{args.folder}/regression/fault-metric-statistics/' for category in categories: for path, name in projects.items(): df = get_metric_results(args.folder, path, category) if df is not None: if args.split_paradigm_score: for paradigm, scores in split_paradigm_score( df, args.folder, path, category): fault_metric_statistics(folder, scores, category, name + paradigm, args) else: fault_metric_statistics(folder, df, category, name, args) if args.split_paradigm_score: summarise_split_directory( args, 'fault-metric-statistics', ['name', 'percentage_faulty', 'percentage_total_faults']) else: summarise_directory( args, 'fault-metric-statistics', ['name', 'percentage_faulty', 'percentage_total_faults'])
def main(args): """ Runs univariate and multivariate baseline regression on a control metric. The control metric is the expected values with a small probability of being incorrect. """ warnings.filterwarnings("ignore", category=ConvergenceWarning, module="sklearn") folder = f'{args.folder}/regression/multivariate-baseline-control/' estimator = LogisticRegression(class_weight='balanced', random_state=42) cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42) for category in categories: for path, name in projects.items(): df = get_metric_results('baseline', path, category) if df is not None: multivariate_baseline_control(df, folder, category, name, estimator, cv, args) summarise_directory(args, 'multivariate-baseline-control')
def main(args): """ For each of the avialable metrics, runs multivariate regression on the baseline metric set with one of the metrics added. """ warnings.filterwarnings("ignore", category=ConvergenceWarning, module="sklearn") folder = f'{args.folder}/regression/multivariate-baseline/' estimator = LogisticRegression(class_weight='balanced', random_state=42) cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42) for category in categories: for path, name in projects.items(): baseline_df = get_metric_results('baseline', path, category) df = get_metric_results(args.folder, path, category) if df is None and 'object' in category: base_category = 'objectResultsBriand' if 'Briand' in category else 'objectResultsLandkroon' df = get_metric_results(args.folder, path, base_category) if baseline_df is not None and df is not None: multivariate_baseline(baseline_df, df, folder, category, name, estimator, cv, args) summarise_directory(args, 'multivariate-baseline')
def main(args): """ Calculates descriptive statistics of the metric values. """ if args.split_paradigm_score: folder = f'{args.folder}/split-regression/descriptive/' else: folder = f'{args.folder}/regression/descriptive/' for category in categories: for path, name in projects.items(): df = get_metric_results(args.folder, path, category) if df is not None: if args.split_paradigm_score: for paradigm, scores in split_paradigm_score( df, args.folder, path, category): descriptive(scores, folder, category, name + paradigm, args) else: descriptive(df, folder, category, name, args) if args.split_paradigm_score: summarise_split_directory(args, 'descriptive', ['name', 'mean', 'std']) else: summarise_directory(args, 'descriptive', ['name', 'mean', 'std'])