def changes(data=None): if data is None: data = DefectData.get_all_projects()["Apache"] for proj, paths in data.iteritems(): "Make sure we don't test on the bellwether dataset" if not proj in paths.bellw: res = { proj[:6]: { "xtree_local": [], "xtree_bellw": [], "alves": [], "olive": [], "shatw": [] } } bellw = list2dataframe(data[paths.bellw].data) test = list2dataframe(paths.data) test_local = list2dataframe(paths.data[-1]) train_local = list2dataframe(paths.data[:-1]) for train_bellw, validation in CrossValidation.split(bellw, ways=2): orig = DataFrame([ test.iloc[n].values.tolist() for n in xrange(test.shape[0]) if test.iloc[n][-1] > 0 ], columns=test.columns) patched_alves, changes_alves = alves(train_bellw, test_local) patched_shatw, changes_shatw = shatnawi( train_bellw, test_local) patched_olive, changes_olive = oliveira( train_bellw, test_local) patched_xtree, changes_xtree = xtree(train_bellw, test_local) patched_xtree_local, changes_xtree_local = xtree( train_local, test_local) # How good are the patches from local lessons? res[proj[:6]]["alves"].append( deltas_count(test.columns, changes_alves)) res[proj[:6]]["olive"].append( deltas_count(test.columns, changes_olive)) res[proj[:6]]["shatw"].append( deltas_count(test.columns, changes_shatw)) res[proj[:6]]["xtree_bellw"].append( deltas_count(test.columns, changes_xtree)) res[proj[:6]]["xtree_local"].append( deltas_count(test.columns, changes_xtree_local)) yield res
def transfer_lessons(data=None): if data is None: data = DefectData.get_all_projects()["Apache"] for proj, paths in data.iteritems(): "Make sure we don't test on the bellwether dataset" if not proj in paths.bellw: res = {proj[:6]: { "pd": [], "pf": [], "local": [], "bellw": []} } bellw = list2dataframe(data[paths.bellw].data) for train_bellw, validation in CrossValidation.split(bellw, ways=5): train_local = list2dataframe(paths.data[:-1]) test = list2dataframe(paths.data[-1]) patched_local = xtree(train_local, test) patched_bellw = xtree(train_bellw, list2dataframe(paths.data)) # How good are the patches from local lessons? pred_local, distr_local = xgboost(validation, patched_local) # How good are the patches from the bellwether lessons? pred_bellw, distr_bellw = xgboost(validation, patched_bellw) # How good are the predictions pred_qual, distr_qual = xgboost(validation, test) pred = pred_stats(before=test[test.columns[-1]], after=pred_qual, distr=distr_qual) res[proj[:6]]["pd"].append(pred[0]) res[proj[:6]]["pf"].append(pred[1]) res[proj[:6]]["local"].append(impact(test, pred_local)) res[proj[:6]]["bellw"].append(impact(test, pred_bellw)) yield res
def transfer_lessons(data=None): if data is None: data = DefectData.get_all_projects()["Apache"] for proj, paths in data.iteritems(): "Make sure we don't test on the bellwether dataset" if proj in ["ant", "ivy", "poi", "jedit"]: res = { proj[:6]: { "xtree_local": [], "xtree_bellw": [], "alves": [], "olive": [], "shatw": [] } } bellw = list2dataframe(data[paths.bellw].data) test = list2dataframe(paths.data) test_local = list2dataframe(paths.data[-1]) train_local = list2dataframe(paths.data[:-1]) for train_bellw, validation in CrossValidation.split(bellw, ways=5): patched_alves = alves(train_bellw, test_local) patched_shatw = shatnawi(train_bellw, test_local) patched_olive = oliveira(train_bellw, test_local) patched_xtree = xtree(train_bellw, test_local) patched_xtree_local = xtree(train_local, test_local) # How good are the patches from Alves? pred_alves, distr_alves = xgboost(validation, patched_alves) # How good are the patches from Shatnawi? pred_shatw, distr_shatw = xgboost(validation, patched_shatw) # How good are the patches from Oliveira? pred_olive, distr_olive = xgboost(validation, patched_olive) # How good are the patches from the bellwether XTREE? pred_xtree, distr_xtree = xgboost(validation, patched_xtree) # How good are the patches from the local XTREE? pred_xtree_local, distr_xtree_local = xgboost( validation, patched_xtree_local) res[proj[:6]]["alves"].append(impact(test, pred_alves)) res[proj[:6]]["shatw"].append(impact(test, pred_shatw)) res[proj[:6]]["olive"].append(impact(test, pred_olive)) res[proj[:6]]["xtree_bellw"].append(impact(test, pred_xtree)) res[proj[:6]]["xtree_local"].append( impact(test, pred_xtree_local)) # Not yet... # # How good are the patches from the bellwether lessons? # pred_fontana, distr_fontana = xgboost(validation, patched_xtree) # # # res[proj[:6]]["fontana"].append(pred[1]) yield res
def __test_xtree(data): train, test = data.data[:-1], data.data[-1] xtree(train, test) pass