return data.iloc[1:] ans = augment(a, ignore_index=1, times=1, std=0.1, sample_size=1000, add_initial=False) #%% #dataset = gf.get_dataset() dataset = X_train.copy() length = len(dataset) new = pd.DataFrame() for i in range(length): row = dataset.iloc[[i]] row.insert(0, 'y', y_train.iloc[i]) ans = augment(row, ignore_index=0, times=1, std=0.1, sample_size=1000, add_initial=False) new = new.append(ans) #%% dataset = dataset.append(new) [X_train, X_test, y_train, y_test] = gf.divide(dataset) print(gf.run_xgboost(X_train, y_train, X_test, y_test))
#a,b=augmenter(X_train,y_train,sd=0.1,col=1) def get_index_combo(data, num): lis = gf.get_top_n(data["accuracy"], num) temp = gf.subsets(lis) return temp index_list = get_index_combo(data, 7) data1 = pd.DataFrame() for i in index_list: Xt, yt = X_train.copy(), y_train.copy() print("Currently augmenting") print(i) for j in i: Xtr, ytr = augmenter(X_train.copy(), y_train.copy(), sd=0.1, col=j) #creating augmented values Xt = Xt.append(Xtr) yt = yt.append(pd.Series(ytr)) d = gf.run_xgboost(Xt, yt, X_test, y_test) d["combination"] = i data1 = data1.append(d, ignore_index=True) print(d["accuracy"]) print("-------") data1.to_csv("data/data3.csv")
Xtr = gf.loadfile("Xtr") Xte = gf.loadfile("Xte") ytr = gf.loadfile("ytr") yte = gf.loadfile("yte") temp = gf.convertstrtolist(temp) Xt,yt = Xtr.copy(),ytr.copy() print("Currently augmenting") for j in temp: Xtr1,ytr1=augmenter(Xtr.copy(),ytr.copy(),sd=0.1,col=j) #creating augmented values Xt = Xt.append(Xtr1) yt = yt.append(pd.Series(ytr1)) print("Augmenting Finished") d = gf.run_xgboost(Xt,yt,Xte,yte) print(d) """ {'accuracy': 0.882377508399508, 'std': 0.012872278905955601, 'AUC': 0.7476636185875316} """ #%% data = pd.DataFrame() for i in range(150): print(i) Xt1,yt1= augmenter(Xt.copy(),yt.copy(),0.2,i) d = gf.run_xgboost(Xt1,yt1,Xte,yte) data= data.append(d,ignore_index=True)
@author: adityavyas """ import global_functions as gf import pandas as pd [X_train, X_test, y_train, y_test] = gf.get_dataset() def run150(df): for index in range(150): df = gf.add_gaussian_index(df, index) return df data = pd.DataFrame() runnable = X_train.copy() runnabletrain = y_train.copy() for i in range(1, 10): df = X_train.copy() for j in range(i): df = run150(df) runnable = pd.concat([runnable, df]) runnabletrain = pd.concat([runnabletrain, y_train.copy()]) frame = gf.run_xgboost(runnable, runnabletrain, X_test, y_test) data = data.append(frame, ignore_index=True) print(frame)