def transfer_lessons3(): data = DefectData.get_all_projects()["Apache"] for proj, paths in data.iteritems(): if not proj in paths.bellw: res = {proj[:6]: []} "If training data.dat doesn't exist, create it." pred, pred2, distr, distr2 = [], [], [], [] if not "train" in locals(): train = list2dataframe(data[paths.bellw].data) test, validation = train_test_split(list2dataframe(paths.data), test_size=0.8) # test = list2dataframe(paths.data.dat[-1]) # validation = list2dataframe(paths.data.dat[:-1]) patched = xtree.execute(train, test) a, b = rforest(train, patched) # How good are the patches aa, bb = rforest(train, test) # How good are the predcitions pred.append(a) pred2.append(aa) distr.append(b) distr2.append(bb) res[proj[:6]].extend(pred_stats(before=test[test.columns[-1]], after=pred2, distr=distr2)) res[proj[:6]].extend(impact(test, pred)) yield res
def transfer_lessons4(): data = DefectData.get_all_projects()["Apache"] for proj, paths in data.iteritems(): if not proj in paths.bellw: res = {proj[:6]: []} "If training data.dat doesn't exist, create it." pred, pred2, distr, distr2 = [], [], [], [] if not "train_bellw" in locals(): train_bellw = list2dataframe(data[paths.bellw].data) train_local = list2dataframe(paths.data[:-1]) test = list2dataframe(paths.data[-1]) patched_local = xtree.execute(train_local, test) patched_bellw = xtree.execute(train_bellw, test) pred, distr = rforest(train_bellw, patched_local) # How good are the patches pred2, distr2 = rforest(train_bellw, patched_bellw) # How good are the predcitions pred3, distr3 = rforest(train_bellw, test) # How good are the predcitions res[proj[:6]].extend(pred_stats(before=test[test.columns[-1]], after=pred3, distr=distr3)) res[proj[:6]].append(impact(test, pred)) res[proj[:6]].append(impact(test, pred2)) yield res
def changes(data=None): if data is None: data = DefectData.get_all_projects()["Apache"] for proj, paths in data.iteritems(): "Make sure we don't test on the bellwether dataset" if not proj in paths.bellw: res = { proj[:6]: { "xtree_local": [], "xtree_bellw": [], "alves": [], "olive": [], "shatw": [] } } bellw = list2dataframe(data[paths.bellw].data) test = list2dataframe(paths.data) test_local = list2dataframe(paths.data[-1]) train_local = list2dataframe(paths.data[:-1]) for train_bellw, validation in CrossValidation.split(bellw, ways=2): orig = DataFrame([ test.iloc[n].values.tolist() for n in xrange(test.shape[0]) if test.iloc[n][-1] > 0 ], columns=test.columns) patched_alves, changes_alves = alves(train_bellw, test_local) patched_shatw, changes_shatw = shatnawi( train_bellw, test_local) patched_olive, changes_olive = oliveira( train_bellw, test_local) patched_xtree, changes_xtree = xtree(train_bellw, test_local) patched_xtree_local, changes_xtree_local = xtree( train_local, test_local) # How good are the patches from local lessons? res[proj[:6]]["alves"].append( deltas_count(test.columns, changes_alves)) res[proj[:6]]["olive"].append( deltas_count(test.columns, changes_olive)) res[proj[:6]]["shatw"].append( deltas_count(test.columns, changes_shatw)) res[proj[:6]]["xtree_bellw"].append( deltas_count(test.columns, changes_xtree)) res[proj[:6]]["xtree_local"].append( deltas_count(test.columns, changes_xtree_local)) yield res
def transfer_lessons(data=None): if data is None: data = DefectData.get_all_projects()["Apache"] for proj, paths in data.iteritems(): "Make sure we don't test on the bellwether dataset" if not proj in paths.bellw: res = {proj[:6]: { "pd": [], "pf": [], "local": [], "bellw": []} } bellw = list2dataframe(data[paths.bellw].data) for train_bellw, validation in CrossValidation.split(bellw, ways=5): train_local = list2dataframe(paths.data[:-1]) test = list2dataframe(paths.data[-1]) patched_local = xtree(train_local, test) patched_bellw = xtree(train_bellw, list2dataframe(paths.data)) # How good are the patches from local lessons? pred_local, distr_local = xgboost(validation, patched_local) # How good are the patches from the bellwether lessons? pred_bellw, distr_bellw = xgboost(validation, patched_bellw) # How good are the predictions pred_qual, distr_qual = xgboost(validation, test) pred = pred_stats(before=test[test.columns[-1]], after=pred_qual, distr=distr_qual) res[proj[:6]]["pd"].append(pred[0]) res[proj[:6]]["pf"].append(pred[1]) res[proj[:6]]["local"].append(impact(test, pred_local)) res[proj[:6]]["bellw"].append(impact(test, pred_bellw)) yield res
def test_oracles(data=None): if data is None: data = DefectData.get_all_projects()["Apache"] for proj, paths in data.iteritems(): "Make sure we don't test on the bellwether dataset" if not proj in paths.bellw: res = { proj[:6]: { "pd_rf": [], "pf_rf": [], "pd_xg": [], "pf_xg": [] } } pred, pred2, distr, distr2 = [], [], [], [] validate = list2dataframe(paths.data[:-1]) test = list2dataframe(paths.data[-1]) set_trace() # How good are the predictions pred1, distr1 = rforest(validate, test) pred2, distr2 = xgboost(validate, test) pred_rf = pred_stats(before=test[test.columns[-1]], after=pred1, distr=distr1) pred_xg = pred_stats(before=test[test.columns[-1]], after=pred2, distr=distr2) res[proj[:6]]["pd_rf"].append(pred_rf[0]) res[proj[:6]]["pf_rf"].append(pred_rf[1]) res[proj[:6]]["pd_xg"].append(pred_xg[0]) res[proj[:6]]["pf_xg"].append(pred_xg[1]) yield res
def transfer_lessons2(n_folds=1): data = DefectData.get_all_projects()["Apache"] print("Name\tPd\tPf\tImprovement") for proj, paths in data.iteritems(): if not proj in paths.bellw: print(proj[:4], end="\t") "If training data.dat doesn't exist, create it." train, validation = train_test_split(list2dataframe(paths.data), test_size=0.8) test = paths.data[-1] validation = paths.data[:-1] patched = xtree.execute(train, test) test = list2dataframe(test) pred, distr = rforest(validation, patched) # How good are the patches pred2, distr2 = rforest(validation, test) # How good are the predcitions pred_stats(before=test[test.columns[-1]], after=pred2, distr=distr2) impact(test, pred)
def run_experiment(): data = DefectData.get_all_projects()["Apache"] metrics = list2dataframe(data["ant"].data[-1]).columns for res in changes(data): for key, value in res.iteritems(): print(key) for n, (attr, xtree_local, xtree_bellw, Olive, Alves, Shatw) in \ enumerate( zip(metrics, np.median(value["xtree_local"], axis=0), np.median(value["xtree_bellw"], axis=0), np.median(value["olive"], axis=0), np.median(value["alves"], axis=0), np.median(value["shatw"], axis=0))): print(n, attr[1:], int(xtree_local), int(xtree_bellw), int(Olive), int(Alves), int(Shatw), sep="\t") set_trace()
featureTot = 0 information_gain = [] for i in range(0, len(nz[0])): if (i != 0 and nz[0][i] != pre): for notappear in range(pre + 1, nz[0][i]): information_gain.append(0) ig = _calIg() information_gain.append(ig) pre = nz[0][i] classCnt = {} featureTot = 0 featureTot = featureTot + 1 yclass = y[nz[1][i]] if yclass not in classCnt: classCnt[yclass] = 1 else: classCnt[yclass] = classCnt[yclass] + 1 ig = _calIg() information_gain.append(ig) return np.asarray(information_gain) if __name__ == "__main__": data = DefectData.get_all_projects()["Apache"] test_data = list2dataframe(data["ant"].data) indep_var = test_data[test_data.columns[:-1]] depen_var = test_data[test_data.columns[-1]] information_gain(indep_var.values, depen_var.values) set_trace()
def transfer_lessons(data=None): if data is None: data = DefectData.get_all_projects()["Apache"] for proj, paths in data.iteritems(): "Make sure we don't test on the bellwether dataset" if proj in ["ant", "ivy", "poi", "jedit"]: res = { proj[:6]: { "xtree_local": [], "xtree_bellw": [], "alves": [], "olive": [], "shatw": [] } } bellw = list2dataframe(data[paths.bellw].data) test = list2dataframe(paths.data) test_local = list2dataframe(paths.data[-1]) train_local = list2dataframe(paths.data[:-1]) for train_bellw, validation in CrossValidation.split(bellw, ways=5): patched_alves = alves(train_bellw, test_local) patched_shatw = shatnawi(train_bellw, test_local) patched_olive = oliveira(train_bellw, test_local) patched_xtree = xtree(train_bellw, test_local) patched_xtree_local = xtree(train_local, test_local) # How good are the patches from Alves? pred_alves, distr_alves = xgboost(validation, patched_alves) # How good are the patches from Shatnawi? pred_shatw, distr_shatw = xgboost(validation, patched_shatw) # How good are the patches from Oliveira? pred_olive, distr_olive = xgboost(validation, patched_olive) # How good are the patches from the bellwether XTREE? pred_xtree, distr_xtree = xgboost(validation, patched_xtree) # How good are the patches from the local XTREE? pred_xtree_local, distr_xtree_local = xgboost( validation, patched_xtree_local) res[proj[:6]]["alves"].append(impact(test, pred_alves)) res[proj[:6]]["shatw"].append(impact(test, pred_shatw)) res[proj[:6]]["olive"].append(impact(test, pred_olive)) res[proj[:6]]["xtree_bellw"].append(impact(test, pred_xtree)) res[proj[:6]]["xtree_local"].append( impact(test, pred_xtree_local)) # Not yet... # # How good are the patches from the bellwether lessons? # pred_fontana, distr_fontana = xgboost(validation, patched_xtree) # # # res[proj[:6]]["fontana"].append(pred[1]) yield res
def run_all_tests(): data = DefectData.get_all_projects()["Apache"]["ant"] set_trace() __test_alves(data) pass