def test_ridgecv_sample_weight(): rng = np.random.RandomState(0) alphas = (0.1, 1.0, 10.0) # There are different algorithms for n_samples > n_features # and the opposite, so test them both. for n_samples, n_features in ((6, 5), (5, 10)): y = rng.randn(n_samples) X = rng.randn(n_samples, n_features) sample_weight = 1.0 + rng.rand(n_samples) cv = KFold(5) ridgecv = RidgeCV(alphas=alphas, cv=cv) ridgecv.fit(X, y, sample_weight=sample_weight) # Check using GridSearchCV directly parameters = {'alpha': alphas} gs = GridSearchCV(Ridge(), parameters, cv=cv) gs.fit(X, y, sample_weight=sample_weight) assert_equal(ridgecv.alpha_, gs.best_estimator_.alpha) assert_array_almost_equal(ridgecv.coef_, gs.best_estimator_.coef_)
def test_ridge_shapes(): # Test shape of coef_ and intercept_ rng = np.random.RandomState(0) n_samples, n_features = 5, 10 X = rng.randn(n_samples, n_features) y = rng.randn(n_samples) Y1 = y[:, np.newaxis] Y = np.c_[y, 1 + y] ridge = Ridge() ridge.fit(X, y) assert_equal(ridge.coef_.shape, (n_features, )) assert_equal(ridge.intercept_.shape, ()) ridge.fit(X, Y1) assert_equal(ridge.coef_.shape, (1, n_features)) assert_equal(ridge.intercept_.shape, (1, )) ridge.fit(X, Y) assert_equal(ridge.coef_.shape, (2, n_features)) assert_equal(ridge.intercept_.shape, (2, ))
def test_toy_ridge_object(): """Test BayesianRegression ridge classifier TODO: test also n_samples > n_features """ X = np.array([[1], [2]]) Y = np.array([1, 2]) clf = Ridge(alpha=0.0) clf.fit(X, Y) X_test = [[1], [2], [3], [4]] assert_almost_equal(clf.predict(X_test), [1., 2, 3, 4]) assert_equal(len(clf.coef_.shape), 1) assert_equal(type(clf.intercept_), np.float64) Y = np.vstack((Y, Y)).T clf.fit(X, Y) X_test = [[1], [2], [3], [4]] assert_equal(len(clf.coef_.shape), 2) assert_equal(type(clf.intercept_), np.ndarray)
def test_raises_value_error_if_sample_weights_greater_than_1d(): """Sample weights must be either scalar or 1D""" n_sampless = [2, 3] n_featuress = [3, 2] rng = np.random.RandomState(42) for n_samples, n_features in zip(n_sampless, n_featuress): X = rng.randn(n_samples, n_features) y = rng.randn(n_samples) sample_weights_OK = rng.randn(n_samples) ** 2 + 1 sample_weights_OK_1 = 1. sample_weights_OK_2 = 2. sample_weights_not_OK = sample_weights_OK[:, np.newaxis] sample_weights_not_OK_2 = sample_weights_OK[np.newaxis, :] ridge = Ridge(alpha=1) # make sure the "OK" sample weights actually work ridge.fit(X, y, sample_weights_OK) ridge.fit(X, y, sample_weights_OK_1) ridge.fit(X, y, sample_weights_OK_2) def fit_ridge_not_ok(): ridge.fit(X, y, sample_weights_not_OK) def fit_ridge_not_ok_2(): ridge.fit(X, y, sample_weights_not_OK_2) assert_raise_message(ValueError, "Sample weights must be 1D array or scalar", fit_ridge_not_ok) assert_raise_message(ValueError, "Sample weights must be 1D array or scalar", fit_ridge_not_ok_2)
rcv.fit(X,y); #print('rcv score = ', rcv.score(X,y)); print('alpha selected from cv is ', rcv.alpha_); # Construct an estimator using the best alpha and rank features from sklearn.cross_validation import cross_val_score, ShuffleSplit def rankfeatures(X,Y,rf,names): scores = [] for i in range(X.shape[1]): score = cross_val_score(rf, X[:, i:i+1], Y, scoring="r2" ,cv=ShuffleSplit(len(X), 20, .2)); scores.append((np.mean(score),names[i])); # for i in (sorted(scores, reverse=True)): # print(i[1], ' ', round(i[0],2)); return sorted(scores); rf = Ridge(alpha=rcv.alpha_); scores = rankfeatures(X,y,rf,features); plotscores(scores,rf); # Model Selection def plotfit(model,Xtest,ytest,c = 'red', title = 'Fit model'): y_sm = np.array(model.predict(Xtest)); x_sm = np.array(list(range(1,len(ytest)+1))); x_smooth = np.linspace(x_sm.min(), x_sm.max(), 200) y_smooth = spline(x_sm, y_sm, x_smooth); plt.plot(x_smooth, y_smooth, color=c, linewidth=3) plt.scatter(x_sm, ytest, color='black') plt.xlabel('Samples') plt.ylabel('Returns') plt.title(title)
'instance': SGDRegressor(penalty='elasticnet', alpha=0.01, l1_ratio=0.25, fit_intercept=True, tol=1e-4), 'complexity_label': 'non-zero coefficients', 'complexity_computer': lambda clf: np.count_nonzero(clf.coef_)}, {'name': 'RandomForest', 'instance': RandomForestRegressor(n_estimators=100), 'complexity_label': 'estimators', 'complexity_computer': lambda clf: clf.n_estimators}, {'name': 'SVR', 'instance': SVR(kernel='rbf'), 'complexity_label': 'support vectors', 'complexity_computer': lambda clf: len(clf.support_vectors_)}, ] } benchmark(configuration) # benchmark n_features influence on prediction speed percentile = 90 percentiles = n_feature_influence({'ridge': Ridge()}, configuration['n_train'], configuration['n_test'], [100, 250, 500], percentile) plot_n_features_influence(percentiles, percentile) # benchmark throughput throughputs = benchmark_throughputs(configuration) plot_benchmark_throughput(throughputs, configuration) stop_time = time.time() print("example run in %.2fs" % (stop_time - start_time))
########################################################################### # # BENCHMARK DES METHODES DE REGRESSION # from sklearn.svm import SVR from sklearn.ensemble.forest import RandomForestRegressor from sklearn.linear_model.ridge import Ridge from sklearn.linear_model import Lasso from sklearn.linear_model import ElasticNet #from sklearn.linear_model.stochastic_gradient import SGDRegressor #(SVR(gamma='scale', C=1.0, epsilon=0.2),"SVR2"), models = [(SVR(kernel='linear',degree=3),"SVR"), (RandomForestRegressor(max_depth=2, random_state=0,n_estimators=100),"RFR"), (Ridge(alpha=1.0),"RIDGE"), (Lasso(alpha=0.1),"LASSO"), (ElasticNet(alpha=1.0),"ElasiticNet")] #clf_sgd = SGDRegressor(max_iter=5) # # PCA DATA X_train, X_test, y_train, y_test = train_test_split(features_X, y, test_size=0.2, random_state=42) for model in models: yy,err = compute_predict(model[0],model[1],X_train, X_test, y_train, y_test) plotRegressionModelResults(model[1],y_test,yy,err) ############################################################################ # # XGBOOST TESTS #
from sklearn import datasets from sklearn.linear_model.ridge import Ridge from sklearn.model_selection import KFold, cross_val_score boston = datasets.load_boston() score = cross_val_score(estimator=Ridge(), X=boston.data, y=boston.target, cv=KFold(n_splits=10), scoring='neg_mean_squared_error') print(score.mean())
def test_ridge_sparse_svd(): X = sp.csc_matrix(rng.rand(100, 10)) y = rng.rand(100) ridge = Ridge(solver='svd') assert_raises(TypeError, ridge.fit, X, y)
def _test_ridge_loo(filter_): # test that can work with both dense or sparse matrices n_samples = X_diabetes.shape[0] ret = [] ridge_gcv = _RidgeGCV(fit_intercept=False) ridge = Ridge(alpha=1.0, fit_intercept=False) # generalized cross-validation (efficient leave-one-out) decomp = ridge_gcv._pre_compute(X_diabetes, y_diabetes) errors, c = ridge_gcv._errors(1.0, y_diabetes, *decomp) values, c = ridge_gcv._values(1.0, y_diabetes, *decomp) # brute-force leave-one-out: remove one example at a time errors2 = [] values2 = [] for i in range(n_samples): sel = np.arange(n_samples) != i X_new = X_diabetes[sel] y_new = y_diabetes[sel] ridge.fit(X_new, y_new) value = ridge.predict([X_diabetes[i]])[0] error = (y_diabetes[i] - value)**2 errors2.append(error) values2.append(value) # check that efficient and brute-force LOO give same results assert_almost_equal(errors, errors2) assert_almost_equal(values, values2) # generalized cross-validation (efficient leave-one-out, # SVD variation) decomp = ridge_gcv._pre_compute_svd(X_diabetes, y_diabetes) errors3, c = ridge_gcv._errors_svd(ridge.alpha, y_diabetes, *decomp) values3, c = ridge_gcv._values_svd(ridge.alpha, y_diabetes, *decomp) # check that efficient and SVD efficient LOO give same results assert_almost_equal(errors, errors3) assert_almost_equal(values, values3) # check best alpha ridge_gcv.fit(filter_(X_diabetes), y_diabetes) alpha_ = ridge_gcv.alpha_ ret.append(alpha_) # check that we get same best alpha with custom loss_func f = ignore_warnings scoring = make_scorer(mean_squared_error, greater_is_better=False) ridge_gcv2 = RidgeCV(fit_intercept=False, scoring=scoring) f(ridge_gcv2.fit)(filter_(X_diabetes), y_diabetes) assert_equal(ridge_gcv2.alpha_, alpha_) # check that we get same best alpha with custom score_func func = lambda x, y: -mean_squared_error(x, y) scoring = make_scorer(func) ridge_gcv3 = RidgeCV(fit_intercept=False, scoring=scoring) f(ridge_gcv3.fit)(filter_(X_diabetes), y_diabetes) assert_equal(ridge_gcv3.alpha_, alpha_) # check that we get same best alpha with a scorer scorer = get_scorer('mean_squared_error') ridge_gcv4 = RidgeCV(fit_intercept=False, scoring=scorer) ridge_gcv4.fit(filter_(X_diabetes), y_diabetes) assert_equal(ridge_gcv4.alpha_, alpha_) # check that we get same best alpha with sample weights ridge_gcv.fit(filter_(X_diabetes), y_diabetes, sample_weight=np.ones(n_samples)) assert_equal(ridge_gcv.alpha_, alpha_) # simulate several responses Y = np.vstack((y_diabetes, y_diabetes)).T ridge_gcv.fit(filter_(X_diabetes), Y) Y_pred = ridge_gcv.predict(filter_(X_diabetes)) ridge_gcv.fit(filter_(X_diabetes), y_diabetes) y_pred = ridge_gcv.predict(filter_(X_diabetes)) assert_array_almost_equal(np.vstack((y_pred, y_pred)).T, Y_pred, decimal=5) return ret
from helper_functions import create_X, create_y_train, train_model, predict, score >>>>>>> 0e453a6a82a8c1a46c61f3419a174391e7c7affd train = pd.read_csv('data/Train.csv', parse_dates=['saledate']) test = pd.read_csv('data/Test.csv', parse_dates=['saledate']) X_train = create_X(train) X_test = create_X(test) y_train = create_y_train(train) <<<<<<< HEAD X_train_normalized, X_test_normalized = normalize_X(X_train, X_test) model_linear = train_model(X_train, y_train, LinearRegression()) model_ridge = train_model(X_train_normalized, y_train, Ridge()) model_lasso = train_model(X_train_normalized, y_train, Lasso(alpha=0.00005, max_iter=120000)) submit_linear = predict(model_linear, test, X_test, 'model_lin') submit_ridge = predict(model_ridge, test, X_test_normalized, 'model_rid') submit_lasso = predict(model_lasso, test, X_test_normalized, 'model_las') y_test = pd.read_csv('data/do_not_open/test_soln.csv') print('Linear: ', score(submit_linear, y_test), '; Ridge: ', score(submit_ridge, y_test), '; Lasso: ', score(submit_lasso, y_test)) # Linear: 0.40826129534246886 ; Ridge: 0.40822991882415727 ; Lasso: 0.40834486305959367 # Pick Ridge ======= model = train_model(X_train, y_train) submit = predict(model, test, X_test, 'model_1')
NAIVE_BAYS = GaussianNB() K_N_N = KNeighborsClassifier() SUPPORT_VECTOR = svm.SVC(kernel="linear") # Ensemble classifiers RANDOM_FOREST = RandomForestClassifier(n_estimators=100) GRADIENT_BOOST_CL = GradientBoostingClassifier(n_estimators=100) ADA_BOOST = AdaBoostClassifier(n_estimators=100) EXTRA_TREE = ExtraTreesClassifier(n_estimators=100) # Regressors GRADIENT_BOOST_RG = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1) LINEAR_RG = LinearRegression() RIDGE_RG = Ridge() LASSO_RG = Lasso() SVR_RG = SVR() def getClassifierMap(): CLASSIFIER_MAP = { "DECISION_TREE": DECISION_TREE, "LOGISTIC_REGRESSION": LOGISTIC_REGRESSION, "NAIVE_BAYS": NAIVE_BAYS, "K_N_N": K_N_N, "SUPPORT_VECTOR": SUPPORT_VECTOR, "RANDOM_FOREST": RANDOM_FOREST, "GRADIENT_BOOST": GRADIENT_BOOST_CL, "ADA_BOOST": GRADIENT_BOOST_CL, "EXTRA_TREE": EXTRA_TREE }
krrs_trigpredictions.append(pred_y) #print(krrs_trigpredictions) for lst in krrs_trigpredictions: print(compute_MSE(np.array(lst).reshape(200, 1), test_y.reshape(200, 1))) berr_polypredictions = [] for deg in [1, 2, 4, 6]: train_x_new = [] for i in range(train_x.shape[0]): train_x_new.append([train_x[i]**j for j in range(deg + 1)]) test_x_new = [] for i in range(test_x.shape[0]): test_x_new.append([test_x[i]**j for j in range(deg + 1)]) clf = Ridge() clf.fit(train_x_new, train_y) pred_y = clf.predict(test_x_new) #print(pred_y) berr_polypredictions.append(pred_y) #print(berr_polypredictions) for lst in berr_polypredictions: print(compute_MSE(np.array(lst).reshape(200, 1), test_y.reshape(200, 1))) def trig_expansion(train_x, degree): res = [1] d = 0.5 for de in range(degree): res.append(np.sin(de * d * train_x))
return min([ drop_down_options.index(correct_option) for correct_option in correct_options ]) + 1 #check how far is that index in the dropdown list and return that value def average_lowest_correct(list_of_trues, list_of_preds): length = len(list_of_trues) # number of data points return np.mean([ lowest_correct(list(list_of_trues.iloc[i]), list(list_of_preds[i])) for i in range(length) ]) # Top four models selected formatted as a pipteline to be used for gridsearch model_1 = Pipeline([('md1', MultiOutputRegressor(Ridge()))]) model_2 = Pipeline([('md2', MultiOutputRegressor(KernelRidge()))]) model_3 = Pipeline([('md3', MultiOutputRegressor(LinearSVR()))]) model_4 = Pipeline([('md4', MultiOutputRegressor(SGDRegressor()))]) # Dictionary of all the variable hyperparameters for all four models. Except of the SGD regressor, the hyperparameter list is complete. model_params = { 'Multi_Ridge': { 'model': model_1, 'params': { 'md1__estimator__normalize': [True, False], 'md1__estimator__fit_intercept': [True, False], 'md1__estimator__solver': ['svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'], 'md1__estimator__alpha': [i for i in range(10, 110, 10)], 'md1__estimator__max_iter': [1000, 2000, 3000]
}, { 'name': 'RandomForest', 'instance': RandomForestRegressor(), 'complexity_label': 'estimators', 'complexity_computer': lambda clf: clf.n_estimators }, { 'name': 'SVR', 'instance': SVR(kernel='rbf'), 'complexity_label': 'support vectors', 'complexity_computer': lambda clf: len(clf.support_vectors_) }, ] } benchmark(configuration) # benchmark n_features influence on prediction speed percentile = 90 percentiles = n_feature_influence({'ridge': Ridge()}, configuration['n_train'], configuration['n_test'], [100, 250, 500], percentile) plot_n_features_influence(percentiles, percentile) # benchmark throughput throughputs = benchmark_throughputs(configuration) plot_benchmark_throughput(throughputs, configuration) stop_time = time.time() print("example run in %.2fs" % (stop_time - start_time))
def exercise_one(): hitters = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/Hitters.csv', sep=',', header=0).dropna() X = pd.get_dummies(hitters, drop_first=True) y = hitters.Salary # standardize and split the data x_scalar = StandardScaler().fit(X) y_scalar = StandardScaler().fit(y.values.reshape(-1,1)) X = x_scalar.transform(X) y = y_scalar.transform(y.values.reshape((-1, 1))).reshape((-1)) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) def objective(x, y, beta, l): return 2/len(y) * (norm((y-x@beta)**2)+l*norm(beta)**2) def compute_grad(x, y, beta, l): return 2/len(y) * (x.T@x@beta + l*beta - x.T@y) def grad_descent(x, y, l, eta, max_iter): beta = np.zeros(x.shape[1]) i, xvals = 0, [] grad_x = compute_grad(x, y, beta, l) while i < max_iter: beta = beta - eta * grad_x xvals.append(objective(x, y, beta, l)) grad_x = compute_grad(x, y, beta, l) i += 1 return xvals, beta fx, bt = grad_descent(X_train, y_train, l=0.1, eta=0.1, max_iter=1000) # compare with sklearn's ridge clf = Ridge(alpha=0.1, max_iter=1000, solver='saga').fit(X_train, y_train) # plot the object function vs iteration number plt.plot(fx) plt.title("Objective function rapidly decreases") plt.xlabel("iterations (t)") plt.ylabel(r'$F(\beta)$') # calculate the difference in objective values between sklearn's and my own descent objective(X_train, y_train, bt, 0.1) - objective(X_train, y_train, clf.coef_, 0.1) # -1.21634123961e-05 # visualize the comparison def visualize(betas, sklb): fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(9, 4.5)) plt.subplot(ax1) plt.bar(np.arange(len(betas)), betas, width=.5) plt.bar(np.arange(len(betas))+0.5, sklb, width=.5) plt.ylabel(r'$\beta$') plt.xlabel(r'$\beta_i$') plt.axis([0,20,-.05,max(max(betas), max(sklb))+0.1]) plt.xticks(np.arange(0, 20, step=2)) plt.subplot(ax2) plt.bar(np.arange(len(betas)), (betas-sklb)) plt.ylabel(r'$\Delta \, \beta$') plt.xlabel(r'$\beta_i$') plt.xticks(np.arange(0, 20, step=2)) st = plt.suptitle(r'Resulting $\beta$ values are quite similar', fontsize=16) st.set_y(.95) fig.subplots_adjust(wspace=.3, top=.85) visualize(bt, clf.coef_) runs = [(1/10**n, grad_descent(X_train, y_train, l=0.1, eta=1/10**n, max_iter=1000)) for n in np.linspace(1, 5, 10)] plt.clf() [plt.plot(r[1][0]) for r in runs] plt.title("Objective values per iteration") best_idx = np.argmin([min(r[1][0]) for r in runs]) bt = runs[best_idx][1][1] visualize(bt, clf.coef_) # calculate the difference in objective values between sklearn's and my own descent objective(X_train, y_train, bt, 0.1) - objective(X_train, y_train, clf.coef_, 0.1)
Xtest = X Utrain = U Utest = U else: raise Exception('Train size must be in (0,1]') dad = DaDControl() dad.learn(Xtrain, Utrain, learner, iters, Xtest, Utest, verbose=False) print(' DaD (iters:{:d}). Initial Err: {:.4g}, Best: {:.4g}'.format( iters, dad.initial_test_err, dad.min_test_error)) return dad if __name__ == "__main__": print('Defining the learner') learner = DynamicsControlDeltaWrapper(Ridge(alpha=1e-4, fit_intercept=True)) NUM_EPISODES = 50 T = 50 print('Generating train data') policy = RandomLinearPolicy(SYSTEM.state_dim(), SYSTEM.control_dim()) Xtrain, Utrain = run_episodes(policy, NUM_EPISODES, T) print('Generating test data') Xtest, Utest = run_episodes(policy, NUM_EPISODES, T) print('\nLearning dynamics') iters = 25 dad = optimize_learner_dad(learner, Xtrain, Utrain, iters, train_size=0.5) _, dad_err = dad.test(Xtest, Utest, dad.min_test_error_model)
'PLSSVD':PLSSVD(), 'PassiveAggressiveClassifier':PassiveAggressiveClassifier(), 'PassiveAggressiveRegressor':PassiveAggressiveRegressor(), 'Perceptron':Perceptron(), 'ProjectedGradientNMF':ProjectedGradientNMF(), 'QuadraticDiscriminantAnalysis':QuadraticDiscriminantAnalysis(), 'RANSACRegressor':RANSACRegressor(), 'RBFSampler':RBFSampler(), 'RadiusNeighborsClassifier':RadiusNeighborsClassifier(), 'RadiusNeighborsRegressor':RadiusNeighborsRegressor(), 'RandomForestClassifier':RandomForestClassifier(), 'RandomForestRegressor':RandomForestRegressor(), 'RandomizedLasso':RandomizedLasso(), 'RandomizedLogisticRegression':RandomizedLogisticRegression(), 'RandomizedPCA':RandomizedPCA(), 'Ridge':Ridge(), 'RidgeCV':RidgeCV(), 'RidgeClassifier':RidgeClassifier(), 'RidgeClassifierCV':RidgeClassifierCV(), 'RobustScaler':RobustScaler(), 'SGDClassifier':SGDClassifier(), 'SGDRegressor':SGDRegressor(), 'SVC':SVC(), 'SVR':SVR(), 'SelectFdr':SelectFdr(), 'SelectFpr':SelectFpr(), 'SelectFwe':SelectFwe(), 'SelectKBest':SelectKBest(), 'SelectPercentile':SelectPercentile(), 'ShrunkCovariance':ShrunkCovariance(), 'SkewedChi2Sampler':SkewedChi2Sampler(),
def _test_ridge_diabetes(filter_): ridge = Ridge(fit_intercept=False) ridge.fit(filter_(X_diabetes), y_diabetes) return np.round(ridge.score(filter_(X_diabetes), y_diabetes), 5)
weights=[1.01, 1.01]), ['predict'], create_weird_classification_problem_1()), (GradientBoostingClassifier(max_depth=10, n_estimators=10), ['predict_proba', 'predict'], create_weird_classification_problem_1()), (LogisticRegression(), ['predict_proba', 'predict'], create_weird_classification_problem_1()), (IsotonicRegression(out_of_bounds='clip'), ['predict'], create_isotonic_regression_problem_1()), (Earth(), ['predict', 'transform'], create_regression_problem_1()), (Earth(allow_missing=True), ['predict', 'transform'], create_regression_problem_with_missingness_1()), (ElasticNet(), ['predict'], create_regression_problem_1()), (ElasticNetCV(), ['predict'], create_regression_problem_1()), (LassoCV(), ['predict'], create_regression_problem_1()), (Ridge(), ['predict'], create_regression_problem_1()), (RidgeCV(), ['predict'], create_regression_problem_1()), (SGDRegressor(), ['predict'], create_regression_problem_1()), (Lasso(), ['predict'], create_regression_problem_1()), (Pipeline([('earth', Earth()), ('logistic', LogisticRegression())]), ['predict', 'predict_proba'], create_weird_classification_problem_1()), (FeatureUnion([('earth', Earth()), ('earth2', Earth(max_degree=2))], transformer_weights={ 'earth': 1, 'earth2': 2 }), ['transform'], create_weird_classification_problem_1()), (RandomForestRegressor(), ['predict'], create_regression_problem_1()), (CalibratedClassifierCV(LogisticRegression(), 'isotonic'), ['predict_proba'], create_weird_classification_problem_1()), (AdaBoostRegressor(), ['predict'], create_regression_problem_1()),
def test_sparse_cg_max_iter(): reg = Ridge(solver="sparse_cg", max_iter=1) reg.fit(X_diabetes, y_diabetes) assert_equal(reg.coef_.shape[0], X_diabetes.shape[1])
def main(num_pts, num_children, learning_rate=1.5, learning_scale=0.8, rand_seed=0): top_node = Node(SqLoss, parent=None, name="root", input_dim=0) child_nodes = [Node(SqLoss, parent=top_node, input_dim=FEATURE_DIM, name='Child {:d}'.format(i)) for i in xrange(num_children)] #child_nodes = [] # for i in xrange(num_children): # func = linear_features # if i % 2 == 0: # func = square_features # child_nodes.append(Node(None, parent=top_node, input_dim=FEATURE_DIM, predict_func=func, # name='Child {:d}'.format(i))) validation_set = [pt for pt in dataset(500, seed=rand_seed + 1)] batch_set = [pt for pt in dataset(num_pts, seed=rand_seed)] from sklearn.linear_model.ridge import Ridge batch_learner = Ridge(alpha=1e-15, fit_intercept=False) batch_learner.fit(np.vstack([pt.x for pt in batch_set]), np.array([pt.y for pt in batch_set])) batch_pred = batch_learner.predict(np.vstack([pt.x for pt in validation_set])) Yval = np.array([pt.y for pt in validation_set]) # THIS HAS TO BE THE SAME LOSS AS THE TOP NODE! mean_batch_err = np.mean([top_node.loss(pred, val) for (pred, val) in zip(batch_pred, Yval)]) #err = batch_pred - Yval; mean_batch_err = np.mean(0.5*err*err) print('Batch err: {:.4g}'.format(mean_batch_err)) npprint = partial(np.array_str, precision=3) multiprocess = num_children >= 75 if multiprocess: from pathos.multiprocessing import ProcessingPool as Pool from pathos.multiprocessing import cpu_count #p = Pool(int(ceil(0.75*cpu_count()))) p = Pool(cpu_count()) val_helper = partial(predict_layer, child_nodes=child_nodes, top_node=top_node) learner_weights = np.array([node.w for node in child_nodes]) disp_num_child = 15 if num_children < disp_num_child: print('Child learner weights: {}'.format(npprint(learner_weights.ravel()))) validation_preds = [] per_iter_learner_weights = [] print 'Starting Online Boosting...' for i, pt in enumerate(dataset(num_pts, seed=rand_seed)): per_iter_learner_weights.append(learner_weights) # Compute loss on Validation set if multiprocess: val_results = p.map(val_helper, validation_set) else: val_results = [predict_layer(val_pt, child_nodes, top_node) for val_pt in validation_set] val_psums, val_losses = zip(*val_results) val_preds = [psum[-1] for psum in val_psums] validation_preds.append(val_preds) avg_val_loss = np.mean(val_losses) # Compute the partial sums, loss on current data point partial_sums, top_loss = predict_layer(pt, child_nodes, top_node) # get the gradient of the top loss at each partial sum true_val = pt.y offset_partials = partial_sums.copy() offset_partials[1:] = partial_sums[:-1] offset_partials[0] = 0 dlosses = [node.dloss(pred_val, true_val) for pred_val, node in zip(offset_partials, child_nodes)] step_size = learning_scale / np.power((i + 1), learning_rate) learner_weights = np.array([node.grad_step(pt.x, loss, step_size) for (node, loss) in zip(child_nodes, dlosses)]) if i < 1 or i == num_pts - 1 or (i < num_children and num_children < disp_num_child)\ or i % min(int(ceil(num_pts * 0.05)), 25) == 0 or avg_val_loss > 1e3: print('Iteration {:d}/{:d}: (x={:.2g},y={:.2g})'.format(i + 1, num_pts, pt.x, pt.y)) print(' Avg validation loss on pt: {:.4g} vs Batch: {:.4g}'.format(avg_val_loss, mean_batch_err)) print(' Top layer loss on pt: {:.4g}'.format(top_loss)) if num_children < disp_num_child: print(' Child learner weights: {}'.format(npprint(learner_weights.ravel()))) print(' Partial sums: {}'.format(npprint(partial_sums))) print(' Took descent step of step size {:.4g}...'.format(step_size)) # endfor return validation_set, validation_preds, batch_pred, batch_set, per_iter_learner_weights