def run_reg_linear_reg_one_vs_all(dTrain,dTest): lda = 1.0 for i in range(0,10): dTrain_current = getDataOneVsAll(dTrain,i) t_set = [] # in sample for d in dTrain_current: t_set.append([[1,d[1],d[2]],d[0]]) # out of sample dTest_current = getDataOneVsAll(dTest,i) t_setout = [] for d in dTest_current: t_setout.append([[1,d[1],d[2]],d[0]]) # in sample with no transform wlin,X0,y0 = linear_regression(len(t_set),t_set) print 'For %s vs all Ein = %s'%(i,compute_Ein(wlin,X0,y0)) # out of sample with no transform wout,Xout,yout = linear_regression(len(t_setout),t_setout) print 'For %s vs all Eout = %s'%(i,compute_Ein(wlin,Xout,yout)) # in sample with transform t_set_trans = transform_t_set(t_set) wtrans,Xtrans,ytrans = linear_regression(len(t_set_trans),t_set_trans) # out of sample with transform t_setout = transform_t_set(t_setout) wt,xt,yt = linear_regression(len(t_setout),t_setout) print 'For %s vs all with transformation Eout = %s'%(i,compute_Ein(wtrans,xt,yt))
def run_reg_linear_reg_one_vs_all(dTrain, dTest): lda = 1.0 for i in range(0, 10): dTrain_current = getDataOneVsAll(dTrain, i) t_set = [] # in sample for d in dTrain_current: t_set.append([[1, d[1], d[2]], d[0]]) # out of sample dTest_current = getDataOneVsAll(dTest, i) t_setout = [] for d in dTest_current: t_setout.append([[1, d[1], d[2]], d[0]]) # in sample with no transform wlin, X0, y0 = linear_regression(len(t_set), t_set) print 'For %s vs all Ein = %s' % (i, compute_Ein(wlin, X0, y0)) # out of sample with no transform wout, Xout, yout = linear_regression(len(t_setout), t_setout) print 'For %s vs all Eout = %s' % (i, compute_Ein(wlin, Xout, yout)) # in sample with transform t_set_trans = transform_t_set(t_set) wtrans, Xtrans, ytrans = linear_regression(len(t_set_trans), t_set_trans) # out of sample with transform t_setout = transform_t_set(t_setout) wt, xt, yt = linear_regression(len(t_setout), t_setout) print 'For %s vs all with transformation Eout = %s' % ( i, compute_Ein(wtrans, xt, yt))
def run_linear_regression(N_samples, N_points): '''runs on N_samples and with N_points a linear regression computes Ein by average of the samples as well as Eout ''' print 'running Linear Regression on %s samples' % str(N_samples) print 'Each sample has %s data points' % str(N_points) Ein_avg = [] Eout_avg = [] for i in range(N_samples): d = data(N_points) l = randomline() f = target_function(l) t_set = build_training_set(d, f) wlin, X, y = linear_regression(N_points, t_set) Ein = compute_Ein(wlin, X, y) Ein_avg.append(Ein) Eout = compute_Eout(wlin, f, N_points) Eout_avg.append(Eout) print_avg('Ein', Ein_avg) print_avg('Eout', Eout_avg)
def run_linear_regression(N_samples,N_points): '''runs on N_samples and with N_points a linear regression computes Ein by average of the samples as well as Eout ''' print 'running Linear Regression on %s samples' %str(N_samples) print 'Each sample has %s data points' %str(N_points) Ein_avg = [] Eout_avg = [] for i in range(N_samples): d = data(N_points) l = randomline() f = target_function(l) t_set = build_training_set(d,f) wlin,X,y = linear_regression(N_points,t_set) Ein = compute_Ein(wlin,X,y) Ein_avg.append(Ein) Eout = compute_Eout(wlin,f,N_points) Eout_avg.append(Eout) print_avg('Ein',Ein_avg) print_avg('Eout',Eout_avg)
def evaluate_error(X_train, y_train, X_val, y_val, X_test, y_test): E_val = [] E_test = [] for k in [3, 4, 5, 6, 7]: #Fit transformed train data using linear regression without regularization #, using only k features of X_train w_lin = tools.linear_regression(X_train[:, :k + 1], y_train) #Predict class of validation set y_val_pred = tools.predict(X_val[:, :k + 1], w_lin) #Calculate classification error on validation set E_val.append(tools.cal_error(y_val, y_val_pred)) #Predict class of test set y_test_pred = tools.predict(X_test[:, :k + 1], w_lin) #Calculate classification error on test set E_test.append(tools.cal_error(y_test, y_test_pred)) print(E_val) print(E_test) print('Smallest error on validation set is achieved when k = {}'.format( 3 + np.argmin(E_val))) print('Smallest error on test set is {}, achieved when k = {}'.format( E_test[np.argmin(E_test)], 3 + np.argmin(E_test)))
def run_reg_linear_reg_one_vs_one(dTrain, dTest): lda1 = 0.01 lda2 = 1 # 1 vs 5 dTrain_current = getDataOneVsOne(dTrain, 1, 5) t_set = [] # in sample for d in dTrain_current: t_set.append([[1, d[1], d[2]], d[0]]) # out of sample dTest_current = getDataOneVsOne(dTest, 1, 5) t_setout = [] t_setout2 = [] for d in dTest_current: t_setout.append([[1, d[1], d[2]], d[0]]) t_setout2.append([[1, d[1], d[2]], d[0]]) print '--------------------------------------------------' print 'lambda is: %s' % (lda1) # in sample with no transform wlin, X0, y0 = linear_regression(len(t_set), t_set, lda1) print 'For 1 vs 5 Ein = %s' % (compute_Ein(wlin, X0, y0)) # out of sample with no transform wout, Xout, yout = linear_regression(len(t_setout), t_setout, lda1) print 'For 1 vs 5 Eout = %s' % (compute_Ein(wlin, Xout, yout)) # in sample with transform t_set_trans = transform_t_set(t_set) wtrans, Xtrans, ytrans = linear_regression(len(t_set_trans), t_set_trans, lda1) # out of sample with transform t_setout = transform_t_set(t_setout) wt, xt, yt = linear_regression(len(t_setout), t_setout, lda1) print 'For 1 vs 5 with transformation Ein = %s' % (compute_Ein( wtrans, Xtrans, ytrans)) print 'For 1 vs 5 with transformation Eout = %s' % (compute_Ein( wtrans, xt, yt)) print '--------------------------------------------------' print 'lambda is: %s' % (lda2) # in sample with no transform wlin2, X02, y02 = linear_regression(len(t_set), t_set, lda2) print 'For 1 vs 5 Ein = %s' % (compute_Ein(wlin2, X02, y02)) # out of sample with no transform wout2, Xout2, yout2 = linear_regression(len(t_setout2), t_setout2, lda2) print 'For 1 vs 5 Eout = %s' % (compute_Ein(wlin2, Xout2, yout2)) # in sample with transform t_set_trans2 = transform_t_set(t_set) wtrans2, Xtrans2, ytrans2 = linear_regression(len(t_set_trans2), t_set_trans2, lda2) # out of sample with transform t_setout2 = transform_t_set(t_setout2) wt2, xt2, yt2 = linear_regression(len(t_setout2), t_setout2, lda2) print 'For 1 vs 5 with transformation Ein = %s' % (compute_Ein( wtrans2, Xtrans2, ytrans2)) print 'For 1 vs 5 with transformation Eout = %s' % (compute_Ein( wtrans2, xt2, yt2))
def run_reg_linear_reg_one_vs_one(dTrain,dTest): lda1 = 0.01 lda2 = 1 # 1 vs 5 dTrain_current = getDataOneVsOne(dTrain,1,5) t_set = [] # in sample for d in dTrain_current: t_set.append([[1,d[1],d[2]],d[0]]) # out of sample dTest_current = getDataOneVsOne(dTest,1,5) t_setout = [] t_setout2 = [] for d in dTest_current: t_setout.append([[1,d[1],d[2]],d[0]]) t_setout2.append([[1,d[1],d[2]],d[0]]) print '--------------------------------------------------' print 'lambda is: %s'%(lda1) # in sample with no transform wlin,X0,y0 = linear_regression(len(t_set),t_set,lda1) print 'For 1 vs 5 Ein = %s'%(compute_Ein(wlin,X0,y0)) # out of sample with no transform wout,Xout,yout = linear_regression(len(t_setout),t_setout,lda1) print 'For 1 vs 5 Eout = %s'%(compute_Ein(wlin,Xout,yout)) # in sample with transform t_set_trans = transform_t_set(t_set) wtrans,Xtrans,ytrans = linear_regression(len(t_set_trans),t_set_trans,lda1) # out of sample with transform t_setout = transform_t_set(t_setout) wt,xt,yt = linear_regression(len(t_setout),t_setout,lda1) print 'For 1 vs 5 with transformation Ein = %s'%(compute_Ein(wtrans,Xtrans,ytrans)) print 'For 1 vs 5 with transformation Eout = %s'%(compute_Ein(wtrans,xt,yt)) print '--------------------------------------------------' print 'lambda is: %s'%(lda2) # in sample with no transform wlin2,X02,y02 = linear_regression(len(t_set),t_set,lda2) print 'For 1 vs 5 Ein = %s'%(compute_Ein(wlin2,X02,y02)) # out of sample with no transform wout2,Xout2,yout2 = linear_regression(len(t_setout2),t_setout2,lda2) print 'For 1 vs 5 Eout = %s'%(compute_Ein(wlin2,Xout2,yout2)) # in sample with transform t_set_trans2 = transform_t_set(t_set) wtrans2,Xtrans2,ytrans2 = linear_regression(len(t_set_trans2),t_set_trans2,lda2) # out of sample with transform t_setout2 = transform_t_set(t_setout2) wt2,xt2,yt2 = linear_regression(len(t_setout2),t_setout2,lda2) print 'For 1 vs 5 with transformation Ein = %s'%(compute_Ein(wtrans2,Xtrans2,ytrans2)) print 'For 1 vs 5 with transformation Eout = %s'%(compute_Ein(wtrans2,xt2,yt2))
def run_nonlineartransformation(indata, outdata): N_points = len(indata) t_set_trans = transform_t_set(indata) wtrans, Xtrans, ytrans = linear_regression(N_points, t_set_trans) print '-2-' print 'Linear regression on training set after non linear transformation:' Eintrans = compute_Ein(wtrans, Xtrans, ytrans) Eouttrans = compute_Eout_nonlineartrans(wtrans, outdata) print 'in sample classification error: %s' % (Eintrans) print 'out of sample classification error: %s' % (Eouttrans) print '-3-' print 'Adding weight decay to linear regression with lambda = 10k and k = -3' w_decay = compute_weight_decay(wtrans, t_set_trans, Xtrans, ytrans, -3) Eintrans_decay = compute_Ein(w_decay, Xtrans, ytrans) Eouttrans_decay = compute_Eout_nonlineartrans(w_decay, outdata) print 'in sample classification error:%s' % (Eintrans_decay) print 'out of sample classification error: %s' % (Eouttrans_decay) print '-4-' print 'Using now k = 3' w_decay = compute_weight_decay(wtrans, t_set_trans, Xtrans, ytrans, 3) Eintrans_decay = compute_Ein(w_decay, Xtrans, ytrans) Eouttrans_decay = compute_Eout_nonlineartrans(w_decay, outdata) print 'in sample classification error: %s' % (Eintrans_decay) print 'out of sample classification error: %s' % (Eouttrans_decay) print '-5-' Ks = [2, 1, 0, -1, -2] print 'searching the lowest out of sample classification error for the following k values.' print 'k in (%s)' % (str(Ks)) for k in Ks: w_decay = compute_weight_decay(wtrans, t_set_trans, Xtrans, ytrans, k) Eintrans_decay = compute_Ein(w_decay, Xtrans, ytrans) Eouttrans_decay = compute_Eout_nonlineartrans(w_decay, outdata) print 'K : %s' % (k) print 'in sample classification error: %s' % (Eintrans_decay) print 'out of sample classification error: %s' % (Eouttrans_decay) print '-6-' print 'searching the minimum out of sample classification error by varying k in the integer values.' mink = 999 minEout = 999 for k in range(-200, 200): w_decay = compute_weight_decay(wtrans, t_set_trans, Xtrans, ytrans, k) Eintrans_decay = compute_Ein(w_decay, Xtrans, ytrans) Eout_decay = compute_Eout_nonlineartrans(w_decay, outdata) if Eout_decay < minEout: minEout = Eout_decay mink = k print 'K: %s' % (k) print 'out of sample classification error: %s' % (minEout)
def run_nonlineartransformation(indata,outdata): N_points = len(indata) t_set_trans = transform_t_set(indata) wtrans,Xtrans,ytrans = linear_regression(N_points,t_set_trans) print '-2-' print 'Linear regression on training set after non linear transformation:' Eintrans = compute_Ein(wtrans,Xtrans,ytrans) Eouttrans = compute_Eout_nonlineartrans(wtrans,outdata) print 'in sample classification error: %s'%(Eintrans) print 'out of sample classification error: %s'%(Eouttrans) print '-3-' print 'Adding weight decay to linear regression with lambda = 10k and k = -3' w_decay = compute_weight_decay(wtrans,t_set_trans,Xtrans,ytrans,-3) Eintrans_decay = compute_Ein(w_decay,Xtrans,ytrans) Eouttrans_decay=compute_Eout_nonlineartrans(w_decay,outdata) print 'in sample classification error:%s'%(Eintrans_decay) print 'out of sample classification error: %s'%(Eouttrans_decay) print '-4-' print 'Using now k = 3' w_decay = compute_weight_decay(wtrans,t_set_trans,Xtrans,ytrans,3) Eintrans_decay = compute_Ein(w_decay,Xtrans,ytrans) Eouttrans_decay=compute_Eout_nonlineartrans(w_decay,outdata) print 'in sample classification error: %s'%(Eintrans_decay) print 'out of sample classification error: %s'%(Eouttrans_decay) print '-5-' Ks = [2,1,0,-1,-2] print 'searching the lowest out of sample classification error for the following k values.' print 'k in (%s)'%(str(Ks)) for k in Ks: w_decay = compute_weight_decay(wtrans,t_set_trans,Xtrans,ytrans,k) Eintrans_decay = compute_Ein(w_decay,Xtrans,ytrans) Eouttrans_decay=compute_Eout_nonlineartrans(w_decay,outdata) print 'K : %s'%(k) print 'in sample classification error: %s'%(Eintrans_decay) print 'out of sample classification error: %s'%(Eouttrans_decay) print '-6-' print 'searching the minimum out of sample classification error by varying k in the integer values.' mink = 999 minEout = 999 for k in range(-200,200): w_decay = compute_weight_decay(wtrans,t_set_trans,Xtrans,ytrans,k) Eintrans_decay = compute_Ein(w_decay,Xtrans,ytrans) Eout_decay=compute_Eout_nonlineartrans(w_decay,outdata) if Eout_decay < minEout: minEout = Eout_decay mink = k print 'K: %s'%(k) print 'out of sample classification error: %s'%(minEout)
def hsvt_ols(X1, X2, y1, t=0.99, rcond=1e-15, include_pre=True): # find underlying ranks rank1 = approximate_rank(X1, t=t) rank2 = approximate_rank(X2, t=t) print(rank1, rank2) # de-noise donor matrices X1_hsvt = hsvt(X1, rank=rank1) X2_hsvt = hsvt(X2, rank=rank2) # learn synthetic control via linear regression beta = linear_regression(X1_hsvt, y1, rcond=rcond) # forecast counterfactuals y2h = X2_hsvt.dot(beta).T yh = np.concatenate([X1_hsvt.dot(beta).T, y2h]) if include_pre else y2h # prediction intervals std = np.sqrt(np.mean((X1 - X1_hsvt)**2)) return yh
def hsvt_fit(controls, treated, T0, t=0.99, rcond=1e-15, include_pre=True, retbeta=True, verbose=False, combined=False): y1 = treated[:T0] if combined: X1, X2 = controls[:, :T0], controls[:, T0:] X1, X2 = X1.T, X2.T rank = approximate_rank(controls.T, t=t) X_hsvt = hsvt(controls.T, rank=rank) X1_hsvt = X_hsvt[:T0, :] X2_hsvt = X_hsvt[T0:, :] if verbose: print(rank) else: X1, X2 = controls[:, :T0], controls[:, T0:] X1, X2 = X1.T, X2.T # find underlying ranks rank1 = approximate_rank(X1, t=t) rank2 = approximate_rank(X2, t=t) if verbose: print(rank1, rank2) # de-noise donor matrices X1_hsvt = hsvt(X1, rank=rank1) X2_hsvt = hsvt(X2, rank=rank2) # learn synthetic control via linear regression beta = linear_regression(X1_hsvt, y1, rcond=rcond) # forecast counterfactuals y2h = X2_hsvt.dot(beta).T yh = np.concatenate([X1_hsvt.dot(beta).T, y2h]) if include_pre else y2h # prediction intervals std = np.sqrt(np.mean((X1 - X1_hsvt)**2)) if retbeta: return yh, beta else: return yh
def run_lr_and_pla(N_samples, N_points): '''runs on N_samples and with N_points a linear regresion then from the weight vector runs PLA algorithm compute the average number of iterations of PLA with this w vector ''' print 'running Linear Regression on %s samples' % N_samples print 'Each samples has %s data points' % N_points iteration_avg = [] for i in range(N_samples): d = data(N_points) l = randomline() f = target_function(l) t_set = build_training_set(d, f) wlin, X, y = linear_regression(N_points, t_set) w_pla, iteration = PLA(N_points, wlin, f, t_set) iteration_avg.append(iteration) print_avg('Number of iterations', iteration_avg)
def run_lr_and_pla(N_samples, N_points): '''runs on N_samples and with N_points a linear regresion then from the weight vector runs PLA algorithm compute the average number of iterations of PLA with this w vector ''' print 'running Linear Regression on %s samples' %N_samples print 'Each samples has %s data points' %N_points iteration_avg = [] for i in range(N_samples): d = data(N_points) l = randomline() f = target_function(l) t_set = build_training_set(d,f) wlin,X,y = linear_regression(N_points,t_set) w_pla,iteration = PLA(N_points,wlin,f,t_set) iteration_avg.append(iteration) print_avg('Number of iterations',iteration_avg)
def run_validation(indata_train,indata_val,outdata): dict_t_set = {} dict_wlin = {} dict_t_setval = {} dict_yval = {} dict_Xval = {} dict_Eval = {} dict_Eout = {} dict_outdata = {} #t_set train with transformation for i in range(3,8): dict_t_set[i] = transform_t_set(indata_train,i) #linear regression for i in range(3,8): t_set = dict_t_set[i] size_t_set = len(t_set) wlin,X,y = linear_regression(size_t_set,t_set) dict_wlin[i] = wlin #t_set validation for i in range(3,8): t_setval = transform_t_set(indata_val,i) dict_t_setval[i] = t_setval for i in range(3,8): t_setval = dict_t_setval[i] yval = target_vector(t_setval) dict_yval[i] = yval Xval = input_data_matrix(t_setval) dict_Xval[i] = Xval #Eval for i in range(3,8): wlin = dict_wlin[i] Xval = dict_Xval[i] yval = dict_yval[i] Eval = compute_Eval(wlin,Xval,yval) dict_Eval[i] = Eval #Eout for i in range(3,8): curr_outdata = transform_t_set(outdata,i) dict_outdata[i] = curr_outdata for i in range(3,8): wlin = dict_wlin[i] curr_outdata = dict_outdata[i] eout = compute_Eout_from_data(wlin,curr_outdata,len(curr_outdata)) dict_Eout[i] = eout for i in range(3,8): Eval = dict_Eval[i] Eout = dict_Eout[i] print 'Eval for k = %s is: %s'%(i,Eval) print 'Eout for k = %s is: %s'%(i,Eout) print ''
def run_nonlinear_transformation(N_samples, N_points): '''use N_samples to have a consistent result create a trainng set (1; x1; x2) from a constalation on N_points runs linear regration from training set computes Ein and averages it through all the samples transform the training set following (1; x1; x2; x1x2; x1^2; x2^2) run linear transformation on this transformed training set compute Ein of transformed t_set and average through all the samples create a hypothesis vector from the weight vector and the X matrix of the t_set transformed Average for each function g the difference between the hypothesis vector and the function finaly compute Eout from the f (target function) and the weight vector from training set that was not transformed ''' Ein_avg = [] Eout_avg = [] Eintrans_avg = [] EdiffA = [] EdiffB = [] EdiffC = [] EdiffD = [] EdiffE = [] for i in range(N_samples): t_set, f = generate_t_set(N_points) wlin, X, y = linear_regression(N_points, t_set) Ein = compute_Ein(wlin, X, y) Ein_avg.append(Ein) #transform the training data into the following nonlinear feature vector: #(1; x1; x2; x1x2; x1^2; x2^2) t_set_trans = transform_t_set(t_set) wtrans, Xtrans, ytrans = linear_regression(N_points, t_set_trans) Eintrans = compute_Ein(wtrans, Xtrans, ytrans) Eintrans_avg.append(Eintrans) h_vector = sign(dot(Xtrans, wtrans)) gA_vector = compute_g_vector(t_set_trans, 'a') Ediff_a = compute_avg_difference(h_vector, gA_vector) EdiffA.append(1 - Ediff_a) gB_vector = compute_g_vector(t_set_trans, 'b') Ediff_b = compute_avg_difference(h_vector, gB_vector) EdiffB.append(1 - Ediff_b) gC_vector = compute_g_vector(t_set_trans, 'c') Ediff_c = compute_avg_difference(h_vector, gC_vector) EdiffC.append(1 - Ediff_c) gD_vector = compute_g_vector(t_set_trans, 'd') Ediff_d = compute_avg_difference(h_vector, gD_vector) EdiffD.append(1 - Ediff_d) gE_vector = compute_g_vector(t_set_trans, 'e') Ediff_e = compute_avg_difference(h_vector, gE_vector) EdiffE.append(1 - Ediff_e) Eout = compute_Eout_nonlineartrans(wtrans, f, N_points) Eout_avg.append(Eout) print_avg('Ein', Ein_avg) print_avg('Ein Transformed', Eintrans_avg) print_avg('P of agreeing A', EdiffA) print_avg('P of agreeing B', EdiffB) print_avg('P of agreeing C', EdiffC) print_avg('P of agreeing D', EdiffD) print_avg('P of agreeing E', EdiffE) print_avg('Eout', Eout_avg)
def run_nonlinear_transformation(N_samples, N_points): '''use N_samples to have a consistent result create a trainng set (1; x1; x2) from a constalation on N_points runs linear regration from training set computes Ein and averages it through all the samples transform the training set following (1; x1; x2; x1x2; x1^2; x2^2) run linear transformation on this transformed training set compute Ein of transformed t_set and average through all the samples create a hypothesis vector from the weight vector and the X matrix of the t_set transformed Average for each function g the difference between the hypothesis vector and the function finaly compute Eout from the f (target function) and the weight vector from training set that was not transformed ''' Ein_avg = [] Eout_avg = [] Eintrans_avg = [] EdiffA = [] EdiffB = [] EdiffC = [] EdiffD = [] EdiffE = [] for i in range(N_samples): t_set,f = generate_t_set(N_points) wlin,X,y = linear_regression(N_points,t_set) Ein = compute_Ein(wlin, X, y) Ein_avg.append(Ein) #transform the training data into the following nonlinear feature vector: #(1; x1; x2; x1x2; x1^2; x2^2) t_set_trans = transform_t_set(t_set) wtrans,Xtrans,ytrans = linear_regression(N_points,t_set_trans) Eintrans = compute_Ein(wtrans,Xtrans,ytrans) Eintrans_avg.append(Eintrans) h_vector =sign(dot(Xtrans,wtrans)) gA_vector = compute_g_vector(t_set_trans,'a') Ediff_a = compute_avg_difference(h_vector,gA_vector) EdiffA.append(1-Ediff_a) gB_vector = compute_g_vector(t_set_trans,'b') Ediff_b = compute_avg_difference(h_vector,gB_vector) EdiffB.append(1-Ediff_b) gC_vector = compute_g_vector(t_set_trans,'c') Ediff_c = compute_avg_difference(h_vector,gC_vector) EdiffC.append(1-Ediff_c) gD_vector = compute_g_vector(t_set_trans,'d') Ediff_d = compute_avg_difference(h_vector,gD_vector) EdiffD.append(1-Ediff_d) gE_vector = compute_g_vector(t_set_trans,'e') Ediff_e = compute_avg_difference(h_vector,gE_vector) EdiffE.append(1-Ediff_e) Eout = compute_Eout_nonlineartrans(wtrans,f,N_points) Eout_avg.append(Eout) print_avg('Ein',Ein_avg) print_avg('Ein Transformed',Eintrans_avg) print_avg('P of agreeing A',EdiffA) print_avg('P of agreeing B',EdiffB) print_avg('P of agreeing C',EdiffC) print_avg('P of agreeing D',EdiffD) print_avg('P of agreeing E',EdiffE) print_avg('Eout',Eout_avg)
def run_validation(indata_train, indata_val, outdata): dict_t_set = {} dict_wlin = {} dict_t_setval = {} dict_yval = {} dict_Xval = {} dict_Eval = {} dict_Eout = {} dict_outdata = {} #t_set train with transformation for i in range(3, 8): dict_t_set[i] = transform_t_set(indata_train, i) #linear regression for i in range(3, 8): t_set = dict_t_set[i] size_t_set = len(t_set) wlin, X, y = linear_regression(size_t_set, t_set) dict_wlin[i] = wlin #t_set validation for i in range(3, 8): t_setval = transform_t_set(indata_val, i) dict_t_setval[i] = t_setval for i in range(3, 8): t_setval = dict_t_setval[i] yval = target_vector(t_setval) dict_yval[i] = yval Xval = input_data_matrix(t_setval) dict_Xval[i] = Xval #Eval for i in range(3, 8): wlin = dict_wlin[i] Xval = dict_Xval[i] yval = dict_yval[i] Eval = compute_Eval(wlin, Xval, yval) dict_Eval[i] = Eval #Eout for i in range(3, 8): curr_outdata = transform_t_set(outdata, i) dict_outdata[i] = curr_outdata for i in range(3, 8): wlin = dict_wlin[i] curr_outdata = dict_outdata[i] eout = compute_Eout_from_data(wlin, curr_outdata, len(curr_outdata)) dict_Eout[i] = eout for i in range(3, 8): Eval = dict_Eval[i] Eout = dict_Eout[i] print 'Eval for k = %s is: %s' % (i, Eval) print 'Eout for k = %s is: %s' % (i, Eout) print ''