def fun_bag_fs(x, *args):
    X, y, flag, n_splits, random_seed = args
    n_samples, n_var = X.shape
    _estimator = [None, None]
    base_estimator = _estimator[int(round(x[0]))]
    n_estimators = int(round(x[1]))
    clf = BaggingRegressor(random_state=random_seed, )
    p = {
        'base_estimator': base_estimator,
        'n_estimators': n_estimators,
    }
    clf.set_params(**p)

    if len(x) <= 2:
        ft = np.array([1 for i in range(n_var)])
        ft = np.where(ft > 0.5)
    else:
        ft = np.array([1 if k > 0.5 else 0 for k in x[2::]])
        ft = np.where(ft > 0.5)

    #x[4::] = [1 if k>0.5 else 0 for k in x[4::]]
    #ft = np.array([1 if k>0.5 else 0 for k in x[4::]])
    #ft = np.where(ft>0.5)
    try:
        #cv=KFold(n_splits=n_splits, shuffle=True, random_state=random_seed)
        #cv=KFold(n=n_samples, n_folds=5, shuffle=True, random_state=int(random_seed))
        cv = KFold(n_splits=n_splits,
                   shuffle=True,
                   random_state=int(random_seed))
        y_p = cross_val_predict(clf, X[:, ft].squeeze(), y, cv=cv, n_jobs=1)
        #r = r2_score(y_p,y)
        #r =  mean_squared_error(y,y_p)**0.5
        #r =  -accuracy_score(y,y_p)
        #r =  -f1_score(y,y_p,average='weighted')
        r = RMSE(y_p, y)
    except:
        y_p = [None]
        r = 1e12

    #print(r,'\t',p)
    if flag == 'eval':
        return r
    else:
        clf.fit(X[:, ft].squeeze(), y)
        return {
            'Y_TRUE': y,
            'Y_PRED': y_p,
            'EST_PARAMS': p,
            'PARAMS': x,
            'EST_NAME': 'BAG',
            'ESTIMATOR': clf,
            'ACTIVE_VAR': ft,
            'DATA': X,
            'SEED': random_seed
        }
def run():
	print "Decision Tree Regression started..."

	#Preparing Training data
	dir_path = ""
	train_file_path = dir_path + "train.csv"
	train_file = read_csv(train_file_path,skiprows=1,header=None)

	train_file = train_file.drop(train_file.columns[0],axis=1)
	train_file = train_file.values

	train_X_temp = train_file[5:50000,:-1]
	train_Y = train_file[6:50001,-1]

	#Combining previous 5 time step data into one row
	train_X = np.zeros((train_X_temp.shape[0],8*5))
	for i in range(train_X_temp.shape[0]):
		for j in range(5):
			for k in range(8):
				train_X[i][j*8+k] = train_X_temp[i-j][k]

	#Preparing testing data
	test_file_name = dir_path + "test2.csv"
	test_file = read_csv(test_file_name,skiprows=1,header=None)
	test_file = test_file.values
	test_X = np.array(test_file[:,:-1])
	test_y = test_file[:,-1]

	#Model training and prediction for different no of trees
	estimators = np.arange(10, 100, 10)
	print "\nBagged Decision Tree:"
	bag_reg = BaggingRegressor(DecisionTreeRegressor(),n_jobs=2,random_state=0).fit(train_X, train_Y)
	scores = []
	prediction = []
	for n in estimators:
	    bag_reg.set_params(n_estimators=n)
	    bag_reg.fit(train_X, train_Y)
	    score = bag_reg.score(test_X, test_y)
	    print score
	    scores.append(score)
	    #prediction.append(bag_reg.predict(test_X))
	
	#plotting the effect of increasing no of trees on accuracy score
	plt.title("Effect of n_estimators")
	plt.xlabel("n_estimator")
	plt.ylabel("score")
	plt.plot(estimators, scores)
	plt.show()
Exemplo n.º 3
0
def test_parallel_regression():
    # Check parallel regression.
    rng = check_random_state(0)

    X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng)

    ensemble = BaggingRegressor(DecisionTreeRegressor(), n_jobs=3, random_state=0).fit(X_train, y_train)

    ensemble.set_params(n_jobs=1)
    y1 = ensemble.predict(X_test)
    ensemble.set_params(n_jobs=2)
    y2 = ensemble.predict(X_test)
    assert_array_almost_equal(y1, y2)

    ensemble = BaggingRegressor(DecisionTreeRegressor(), n_jobs=1, random_state=0).fit(X_train, y_train)

    y3 = ensemble.predict(X_test)
    assert_array_almost_equal(y1, y3)
Exemplo n.º 4
0
def test_parallel_regression():
    # Check parallel regression.
    rng = check_random_state(0)

    X_train, X_test, y_train, y_test = train_test_split(diabetes.data,
                                                        diabetes.target,
                                                        random_state=rng)

    ensemble = BaggingRegressor(DecisionTreeRegressor(),
                                n_jobs=3,
                                random_state=0).fit(X_train, y_train)

    ensemble.set_params(n_jobs=1)
    y1 = ensemble.predict(X_test)
    ensemble.set_params(n_jobs=2)
    y2 = ensemble.predict(X_test)
    assert_array_almost_equal(y1, y2)

    ensemble = BaggingRegressor(DecisionTreeRegressor(),
                                n_jobs=1,
                                random_state=0).fit(X_train, y_train)

    y3 = ensemble.predict(X_test)
    assert_array_almost_equal(y1, y3)