def test_polynomialfeatures_vs_sklearn(): # Compare msmbuilder.preprocessing.PolynomialFeatures # with sklearn.preprocessing.PolynomialFeatures polynomialfeaturesr = PolynomialFeaturesR() polynomialfeaturesr.fit(np.concatenate(trajs)) polynomialfeatures = PolynomialFeatures() polynomialfeatures.fit(trajs) y_ref1 = polynomialfeaturesr.transform(trajs[0]) y1 = polynomialfeatures.transform(trajs)[0] np.testing.assert_array_almost_equal(y_ref1, y1)
def get_polynomial_features(df, interaction_sign=' x ', **kwargs): """ Gets polynomial features for the given data frame using the given sklearn.PolynomialFeatures arguments :param df: DataFrame to create new features from :param kwargs: Arguments for PolynomialFeatures :return: DataFrame with labeled polynomial feature values """ pf = PolynomialFeatures(**kwargs) feats = _get_polynomial_features(df.columns.tolist(), pf.fit(df), interaction_sign=interaction_sign) return pd.DataFrame(pf.transform(df), columns=feats)
def _polynomial_features(self, input_df): """Uses Scikit-learn's PolynomialFeatures to construct new degree-2 polynomial features from the existing feature set Parameters ---------- input_df: pandas.DataFrame {n_samples, n_features+['class', 'group', 'guess']} Input DataFrame to scale Returns ------- modified_df: pandas.DataFrame {n_samples, n_constructed_features + ['guess', 'group', 'class']} Returns a DataFrame containing the constructed features """ training_features = input_df.loc[input_df['group'] == 'training'].drop(['class', 'group', 'guess'], axis=1) if len(training_features.columns.values) == 0: return input_df.copy() elif len(training_features.columns.values) > 700: # Too many features to produce - skip this operator return input_df.copy() # The feature constructor must be fit on only the training data poly = PolynomialFeatures(degree=2, include_bias=False) poly.fit(training_features.values.astype(np.float64)) constructed_features = poly.transform(input_df.drop(['class', 'group', 'guess'], axis=1).values.astype(np.float64)) modified_df = pd.DataFrame(data=constructed_features) modified_df['class'] = input_df['class'].values modified_df['group'] = input_df['group'].values modified_df['guess'] = input_df['guess'].values new_col_names = {} for column in modified_df.columns.values: if type(column) != str: new_col_names[column] = str(column).zfill(10) modified_df.rename(columns=new_col_names, inplace=True) return modified_df.copy()
y = np.array(y) y = y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=5) poly = PolynomialFeatures(degree=2) print("Printing first row of X before Polynomial Features have been applied:", X_train[0]) X_poly = poly.fit_transform(X_train) print( "Printing first row of X after Polynomial Features (w/ deg =2) have been applied:", X_poly[0]) poly.fit(X_poly, y_train) X_test_poly = poly.fit_transform(X_test) poly.fit(X_test_poly, y_test) clf = Perceptron() clf.fit(X_poly, y_train) y_test_pred = clf.predict(X_test_poly) finalscore = clf.score(X_test_poly, y_test) coef = clf.coef_ intercept = clf.intercept_ print("Final Parameters values:", coef) print("Intercept:", intercept) print("Final Score:", finalscore)
import matplotlib.pyplot as plt import pandas as pd from sklearn.linear_model import LinearRegression from sklearn.preprocessing import PolynomialFeatures # Importing the dataset datas = pd.read_csv('data.csv') datas X = datas.iloc[:, 0:1].values y = datas.iloc[:, 1].values poly = PolynomialFeatures(degree=1) X_poly = poly.fit_transform(X) poly.fit(X_poly, y) lin2 = LinearRegression() lin2.fit(X_poly, y) lin = LinearRegression() lin.fit(X, y) plt.scatter(X, y, color='blue') plt.plot(X, lin.predict(X), color='red') plt.title('Linear Regression') plt.xlabel('SF') plt.ylabel('numPersons') plt.show() plt.scatter(X, y, color='blue')
# Simple Linear Regression from sklearn.linear_model import LinearRegression regressor = LinearRegression() regressor.fit(X, y) y_pred_sim = regressor.predict(X) # Polynomial Regression from sklearn.linear_model import LinearRegression lin_reg = LinearRegression() lin_reg.fit(X, y) from sklearn.preprocessing import PolynomialFeatures poly_reg = PolynomialFeatures(degree=4) X_poly = poly_reg.fit_transform(X) poly_reg.fit(X_poly, y) lin_reg_2 = LinearRegression() lin_reg_2.fit(X_poly, y) y_pred_poly = lin_reg_2.predict(poly_reg.fit_transform(X)) #RandomForest Regression from sklearn.ensemble import RandomForestRegressor regressor = RandomForestRegressor(n_estimators=280, random_state=0) regressor.fit(X, y) y_pred_ran = regressor.predict(X) #DecisionTree Regression from sklearn.tree import DecisionTreeRegressor regressor = DecisionTreeRegressor(random_state=0) regressor.fit(X, y) y_pred_dec = regressor.predict(X)
file = ["1", "2", "3", "4", "5", "6"] for i in file: data = pd.read_csv("dim_reduced_to_" + i + ".csv") print("For dim_reduced_to_" + i + ".csv ") data.drop("CreationTime", inplace=True, axis=1) col = data["InBandwidth"] data.drop("InBandwidth", inplace=True, axis=1) X_train, X_test, Y_train, Y_test = train_test_split(data, col, test_size=0.33, random_state=42) degrees = [2, 3, 4] for i in degrees: regressor = PolynomialFeatures(degree=i) x_poly = regressor.fit_transform(X_train) regressor.fit(x_poly, Y_train) lin_reg = LinearRegression() lin_reg.fit(x_poly, Y_train) Y_pred_test = lin_reg.predict(regressor.fit_transform(X_test)) plt.scatter(Y_test, Y_pred_test) plt.xlabel("Y_test") plt.ylabel("Y_pred_test") plt.show() print("R2 score when degree =", i, "is: ", r2_score(Y_test.values, Y_pred_test)) print("rmse score when degree=", i, "is:", rms(Y_test.values, Y_pred_test)) Y_pred_train = lin_reg.predict(regressor.fit_transform(X_train)) plt.scatter(Y_test, Y_pred_test) plt.xlabel("Y_train") plt.ylabel("Y_pred_train")
# Let's proceed with seeing how we can invoke some # machine learning functionalities in scikit-learn import numpy as np from sklearn.preprocessing import PolynomialFeatures # Data Preprocessing routines x = np.asmatrix([[1,2],[2,4]]) # instantiate poynomial feature poly = PolynomialFeatures(degree = 2) # buid model poly.fit(x) x_poly = poly.transform(x) print ("Original x variable shape",x.shape) print (x) print ('\n##############################\n') print ("Transformed x variables",x_poly.shape) print (x_poly) #alternatively x_poly = poly.fit_transform(x) print ('##################alternatively') print(x_poly) from sklearn.tree import DecisionTreeClassifier from sklearn.datasets import load_iris #Load data: Let's use the iris dataset to see how the tree algorithm can be used data = load_iris() # We will load the iris dataset in the x and y variables.
np.mean((prediction - test_y)**2) pd.DataFrame({ 'actual': test_y, 'prediction': prediction, 'diff': (test_y - prediction) }) #end of linear Regression model #start of Polynomial Regression from sklearn.preprocessing import PolynomialFeatures poly = PolynomialFeatures(degree=4) x_poly = poly.fit_transform(train_x) poly.fit(x_poly, train_y) lin2 = LinearRegression() lin2.fit(x_poly, train_y) prediction2 = lin2.predict(poly.fit_transform(test_x)) #calculating error np.mean((prediction2 - test_y)**2) pd.DataFrame({ 'actual': test_y, 'prediction': prediction2, 'diff': (test_y - prediction2) }) #end of Polynomial Regression #start of DecisionTreeRegressor
ds = pd.read_csv("Position_Salaries.csv") x = ds.iloc[:,1:2].values y = ds.iloc[:,2].values from sklearn.linear_model import LinearRegression as LR# comparison purposes linreg1 = LR() linreg2 = LR() linreg1.fit(x,y) y_pred1 = linreg1.predict(x) y_pred1 = np.array(y_pred1,dtype = 'int64') from sklearn.preprocessing import PolynomialFeatures as PF# polynomial regression obj polyreg = PF(degree = 2) x_poly = polyreg.fit_transform(x)# adding new features like x0 x_poly = np.array(x_poly,dtype = 'int64') polyreg.fit(x_poly,y) linreg2.fit(x_poly,y) y_pred2 = linreg2.predict(polyreg.fit_transform(x)) #plotting the results plt.scatter(x,y,c = 'r') plt.plot(x,y_pred1,c = 'b')# plotting the linear model which is bad plt.plot(x,y_pred2,c = 'g') plt.xlabel("Position") plt.ylabel("Salary") plt.show()
def poly2_regr(x, y): from sklearn.preprocessing import PolynomialFeatures poly = PolynomialFeatures(degree=2) poly.fit(x, y) return poly
regression_lineaire.fit(X, y) #utilisation du modele polynomial qui est dans le package preprocessing plutot """ se modele permet de prendre le degre de la fonction et de l'approximer jusqua se qu'elle a l'allure de nos données """ from sklearn.preprocessing import PolynomialFeatures regression_polynomial = PolynomialFeatures(degree=4) X_optimal_data = regression_polynomial.fit_transform(X) #entrainement sur des valeurs de X_optimaux regression_polynomial.fit(X_optimal_data, y) regression_lineaire_X_optimal = LinearRegression() regression_lineaire_X_optimal.fit(X_optimal_data, y) #visualisation regresion lineaire plt.scatter(X, y, color='red') plt.plot(X, regression_lineaire.predict(X), color='blue') plt.title('affichage avec la regression lineaire') plt.xlabel('ranking de la personnalité') plt.ylabel('salaire moyen annuelle') plt.show() #visualisation regression lineaire optimal """plt.scatter(X_optimal_data, y[:,np.newaxis], color='red')
poly_target = poly_features['TARGET'] poly_features = poly_features.drop(columns=['TARGET']) # Need to impute missing values poly_features = imputer.fit_transform(poly_features) poly_features_test = imputer.transform(poly_features_test) from sklearn.preprocessing import PolynomialFeatures # Create the polynomial object with specified degree poly_transformer = PolynomialFeatures(degree=3) # Train the polynomial features poly_transformer.fit(poly_features) # Transform the features poly_features = poly_transformer.transform(poly_features) poly_features_test = poly_transformer.transform(poly_features_test) print('Polynomial Features shape: ', poly_features.shape) poly_transformer.get_feature_names(input_features=[ 'EXT_SOURCE_1', 'EXT_SOURCE_2', 'EXT_SOURCE_3', 'DAYS_BIRTH' ])[:15] # Create a dataframe of the features poly_features = pd.DataFrame(poly_features, columns=poly_transformer.get_feature_names([ 'EXT_SOURCE_1', 'EXT_SOURCE_2', 'EXT_SOURCE_3', 'DAYS_BIRTH'
Python 3.8.1 (tags/v3.8.1:1b293b6, Dec 18 2019, 23:11:46) [MSC v.1916 64 bit (AMD64)] on win32 Type "help", "copyright", "credits" or "license()" for more information. >>> import pandas as pd >>> import numpy as np >>> import matplotlib.pyplot as plt >>> from sklearn.linear_model import LinearRegression >>> from sklearn.preprocessing import PolynomialFeatures >>> df = pd.read_csv("C:\\Users\shashikant\Desktop\polynomial_regression\polynomial.csv") >>> x = df[['level']].values >>> y = df[['salary']].values >>> model = LinearRegression() >>> model.fit(x,y) LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False) >>> poly = PolynomialFeatures() >>> p_x = poly.fit_transform(x) >>> poly.fit(p_x,y) PolynomialFeatures(degree=2, include_bias=True, interaction_only=False, order='C') >>> model1 = LinearRegression() >>> model1.fit(p_x,y) LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False) >>> plt.title('Linear Model') Text(0.5, 1.0, 'Linear Model') >>> plt.xlabel('Position Level') Text(0.5, 0, 'Position Level') >>> plt.ylabel('salary') Text(0, 0.5, 'salary') >>> plt.scatter(x,y,color = 'r') <matplotlib.collections.PathCollection object at 0x0000000018C33F70> >>> plt.plot(x,model.predict(x),color = 'b') [<matplotlib.lines.Line2D object at 0x0000000018C4E4C0>]
'Part_2:_Regression/Section 6 - Polynomial' ' Regression/Polynomial_Regression/' 'Position_Salaries.csv') X = D.iloc[:, 1:2].values # independent variable y = D.iloc[:, c(3)].values # dependent variable from sklearn.linear_model import LinearRegression modelLinear = LinearRegression().fit(X=X, y=y) # y_artificial = modelLinear.predict(X) from sklearn.preprocessing import PolynomialFeatures polyRegression = PolynomialFeatures(degree=4) X_poly = polyRegression.fit_transform(X) polyRegression.fit(X_poly, y) modelPoly = LinearRegression().fit(X_poly, y) # Visualization Linear Model plt.scatter(X, y, color='red') plt.plot(X, modelLinear.predict(X), color='blue') plt.title('Truth or Bluff') plt.show() # Visualization Polynomial Model X_grid = np.arange(min(X), max(X), 0.1) X_grid = X_grid.reshape(len(X_grid), 1) plt.scatter(X, y, color='red') plt.plot(X_grid,
async def deprocessing(self, event): x_train = event['value'][0] y_train = event['value'][1] Type = event['value'][2] if Type == "logistic": if len(x_train) == 0: return if len(set(y_train)) <= 1: returne clf = LogisticRegression() clf.fit(np.array(x_train), np.array(y_train)) w = clf.coef_ b = clf.intercept_ x = np.array([0, 1]) y = -(x * w[0][0] + b) / w[0][1] await self.send( text_data=json.dumps({ 'y1': y[0], 'y2': y[1], 'intercept': clf.intercept_.tolist(), 'slope': clf.coef_.tolist() })) elif Type == "linear-reg": if len(x_train) == 0: return clf = LinearRegression() clf.fit( np.array(x_train).reshape(-1, 1), np.array(y_train).reshape(-1, 1)) x_test = [0, 1] y_pred = clf.predict(np.array(x_test).reshape(-1, 1)) await self.send(text_data=json.dumps({'y_pred': y_pred.tolist()})) elif Type == "poly-reg": if len(x_train) == 0: return poly_reg = PolynomialFeatures(degree=4) X_poly = poly_reg.fit_transform(np.array(x_train).reshape(-1, 1)) # print(X_poly) poly_reg.fit(X_poly, np.array(y_train)) clf = LinearRegression() clf.fit(X_poly, np.array(y_train).reshape(-1, 1)) x_test = np.arange(0.0, 1.0, 0.02) y_pred = clf.predict(poly_reg.fit_transform(x_test.reshape(-1, 1))) await self.send(text_data=json.dumps({ 'y_pred': y_pred.tolist(), 'x_test': x_test.tolist() }))
Y = data.iloc[:, -1] from sklearn.model_selection import train_test_split X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 42) from sklearn.linear_model import LinearRegression linear = LinearRegression() linear = linear.fit(X_train, Y_train) """from sklearn import metrics print('Mean Absolute Error:', metrics.mean_absolute_error(Y_test, y_pred)) print('Mean Squared Error:', metrics.mean_squared_error(Y_test, y_pred)) print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(Y_test, y_pred)))""" from sklearn.preprocessing import PolynomialFeatures polyR = PolynomialFeatures(degree = 4) x_poly = polyR.fit_transform(X) polyR.fit(x_poly, Y) polymodel = linear.predict(X_test) pickle.dump(linear, open('model.pk1', 'wb')) model = pickle.load(open('model.pk1','rb'))
x, y = make_circles() plt.close('all') plt.figure(1) plt.scatter(x[:,0], x[:,1], c=y) x, y = make_moons() plt.figure(2) plt.scatter(x[:,0], x[:,1], c=y) # plt.show() from sklearn.preprocessing import PolynomialFeatures # Data Preprocessing routines x = np.asmatrix([[1,2],[2,4]]) poly = PolynomialFeatures(degree = 2) poly.fit(x) x_poly = poly.transform(x) print "Original x variable shape", x.shape print x print print "Transformed x variables", x_poly.shape print x_poly # alternatively x_poly = poly.fit_transform(x) from sklearn.tree import DecisionTreeClassifier from sklearn.datasets import load_iris data = load_iris() x = data['data']
# Fitting Simple Linear Regression to the Training set from sklearn.linear_model import LinearRegression regressor = LinearRegression() regressor.fit(Years_train, x_train) # Predicting the Test set results Y_pred = regressor.predict(Years_test) # Fitting Polynomial Regression to the dataset from sklearn.preprocessing import PolynomialFeatures poly_reg = PolynomialFeatures(degree=5) Years_poly = poly_reg.fit_transform(Years) poly_reg.fit(Years_poly, x) lin_reg_2 = LinearRegression() lin_reg_2.fit(Years_poly, x) # Visualising the Training set results plt.scatter(Years_train, x_train, color='yellow') plt.plot(Years_train, regressor.predict(Years_train), color='red') plt.title('Annual Temp Years (Training set)') plt.xlabel('Years ') plt.ylabel('MEAN TEMP') plt.show() # Visualising the Test set results plt.scatter(Years_test, x_test, color='brown')
class PolynomialTransformation(Transformer): def __init__(self, degree=2, interaction_only='True', include_bias='False', random_state=1): super().__init__("polynomial_regression", 34) self.input_type = [DISCRETE, NUMERICAL] self.compound_mode = 'concatenate' self.best_idxs = list() if degree == 2: self.bestn = 25 elif degree == 3: self.bestn = 10 elif degree == 4: self.bestn = 6 self.output_type = NUMERICAL self.degree = degree self.interaction_only = check_for_bool(interaction_only) self.include_bias = check_for_bool(include_bias) self.random_state = random_state @ease_trans def operate(self, input_datanode, target_fields): from sklearn.preprocessing import PolynomialFeatures from lightgbm import LGBMRegressor X, y = input_datanode.data if not self.best_idxs: lgb = LGBMRegressor(random_state=1) lgb.fit(X, y) _importance = lgb.feature_importances_ idx_importance = np.argsort(-_importance) cur_idx = 0 while len(self.best_idxs) < self.bestn and cur_idx < len( _importance): if idx_importance[cur_idx] in target_fields: self.best_idxs.append(idx_importance[cur_idx]) cur_idx += 1 X_new = X[:, self.best_idxs] if not self.model: self.degree = int(self.degree) self.model = PolynomialFeatures( degree=self.degree, interaction_only=self.interaction_only, include_bias=self.include_bias) self.model.fit(X_new) _X = self.model.transform(X_new) _X = _X[:, X_new.shape[1]:] return _X @staticmethod def get_hyperparameter_search_space(dataset_properties=None): degree = UniformIntegerHyperparameter("degree", lower=2, upper=4, default_value=2) interaction_only = CategoricalHyperparameter("interaction_only", ["False", "True"], default_value="False") include_bias = UnParametrizedHyperparameter("include_bias", "False") cs = ConfigurationSpace() cs.add_hyperparameters([degree, interaction_only, include_bias]) return cs
class HigherOrderSimulator(BaseSimulator): def __init__(self, n, p, noise_var=0.1, x_var=1., degree=3, with_input_blocks=False, drop_a=0.2, drop_i=0.8, discretize_beta=False, discretize_x=False, *args, **kwargs): """ A vanilla simulator that simulates an arbitrary high-order Polynomial, for benchmarking interaction effects Args: n: p: noise_var: degree: with_input_blocks: drop_a: drop_i: discretize_beta: discretize_x: max_x: """ self.n = n self.p = p self.with_input_blocks = with_input_blocks self.noise_var = noise_var self.x_var = x_var self.degree = degree self.polynomial_fitter = PolynomialFeatures(degree=degree, interaction_only=False, include_bias=False) self.polynomial_fitter.fit(np.zeros((self.n, self.p))) self.beta_a = np.zeros(p) self.beta_i = np.zeros(self.polynomial_fitter.n_output_features_ - p) self.powers_i_ = self.polynomial_fitter.powers_[p:] self.drop_a = drop_a self.drop_i = drop_i if discretize_beta: self.beta_rng = lambda p: np.random.choice(range(-1, 2), p) else: self.beta_rng = lambda p: np.random.uniform(-1, 1, p) if discretize_x: self.x_rng = lambda n: np.random.poisson(x_var, n) else: self.x_rng = lambda n: np.random.normal(0, np.sqrt(x_var), n) self.is_beta_built = False def sample_effect(self): # additive a_idx = np.random.choice(self.p, int(np.ceil(self.p * (1 - self.drop_a))), replace=False) self.beta_a[a_idx] = self.beta_rng(len(a_idx)) # interaction i_idx = np.random.choice( len(self.beta_i), int(np.ceil(len(self.beta_i) * (1 - self.drop_i))), replace=False) self.beta_i[i_idx] = self.beta_rng(len(i_idx)) self.is_beta_built = True def set_effect(self, beta_a, beta_i): self.beta_a = beta_a self.beta_i = beta_i self.is_beta_built = True def sample_data(self, N=None, *args, **kwargs): N = self.n if N is None else N X = self.x_rng(N * self.p).reshape(N, self.p) X_s = self.polynomial_fitter.transform(X) if not self.is_beta_built: self.sample_effect() beta = np.concatenate([self.beta_a, self.beta_i]) y = X_s.dot(beta) + np.random.normal(0, np.sqrt(self.noise_var), N) if self.with_input_blocks: X = [ X[:, i] if len(X.shape) > 2 else X[:, i].reshape(X.shape[0], 1) for i in range(X.shape[1]) ] return X, y def get_ground_truth(self, X): if self.with_input_blocks: X_ = np.concatenate(X, axis=1) else: X_ = X X_s = self.polynomial_fitter.transform(X_) beta = np.concatenate([self.beta_a, self.beta_i]) return X_s.dot(beta) def get_nonzero_powers(self): if not self.is_beta_built: self.sample_effect() self.is_beta_built = True return self.powers_i_[np.where(self.beta_i != 0)]
class ValueFunction(): """ The member functions of this class compute action-value function, epsGreedyPolicy, or perform a semi-gradient training step. """ def __init__(self, in_len, out_len, degree=1): """ Takes number of features in the state vector, number of actions, and polynomial degree. """ self.in_len = in_len self.out_len = out_len self.featureTransfromer = PolynomialFeatures(degree=degree, interaction_only=False, include_bias=False) self.featureTransfromer.fit(np.zeros(in_len).reshape(1, -1)) self.weights = np.zeros( (len(self.featureTransfromer.get_feature_names()), out_len)) self.old_weights = np.zeros( self.weights.shape ) # old_weights are used for dutch eligibility trace self.eligibility_trace = np.zeros(self.weights.shape) def _checkDims(self, state): if state.shape[0] != self.in_len: raise TypeError('Length of state must be equal to', self.in_len) def _transformState(self, state): self._checkDims(state) return self.featureTransfromer.transform(state.reshape(1, -1))[0] def computeVF(self, state): """ Takes a state vector and returns an array contains value for each possible action. """ transformed_state = self._transformState(state) return np.matmul(transformed_state, self.weights) def epsGreedyPolicy(self, state, eps): """ Takes a state vector and epsilon; returns an epsilon greedy action. """ probs = np.zeros(self.out_len) probs[np.argmax(self.computeVF(state))] = 1 - eps probs = probs + (eps / len(probs)) return np.argmax(np.random.multinomial(1, probs, 1)) def softmaxPolicy(self, state, temperature=1.0): """ Choose a soft-max action with respect to the actionVF. It is possible to set a temperature parameter, default is 1.0. """ expVF = np.exp(self.computeVF(state) / temperature) probs = expVF / np.sum(expVF) return np.argmax(np.random.multinomial(1, probs, 1)) def trainSemiGrad(self, state, action, td_error, learning_rate): """ Performs a semi-gragient training step: state: a state vector in which to train action: an action choosen from the state vector td_error: the TD error at the current state, usually denoted delta learning_rate: learning rate for training, usually denoted alpha """ # To derive gradient, realize that the value function is a matrix multiplication # of state (1,4)-matrix and weight (4,2)-matrix and gives (1,2)-matrix (two actions). # Gradient of this matrix multiplication w.r.t. weight vector gives two matrices # of shape (4,2), one for each action. For the action 0, the first column of its gradient # matrix is basically the state vector the other column is full of zeros; for the other # action the columns are interchanged. grad = np.zeros(self.weights.shape) grad[:, action] = self._transformState(state) self.weights = self.weights + learning_rate * td_error * grad def trainEligibTraceSemiGrad(self, state, action, td_error, discount, decay_factor, learning_rate): """ Perform a semi-gradient training step with eligibility trace: state: a state vector in which to train action: an action choosen from the state vector td_error: the TD error at the current state, usually denoted delta discount: discount of the future rewards, usually denoted gamma decay_factor: decay of trace elements, usually denoted lambda learning_rate: learning rate for training, usually denoted alpha """ grad = np.zeros(self.weights.shape) grad[:, action] = self._transformState(state) self.eligibility_trace = discount * decay_factor * self.eligibility_trace + grad self.weights = self.weights + learning_rate * td_error * self.eligibility_trace def trainDutchTraceSemiGrad(self, state, action, td_error, discount, decay_factor, learning_rate): grad = np.zeros(self.weights.shape) grad[:, action] = self._transformState(state) lr = learning_rate ddf = discount * decay_factor lrddf = lr * ddf self.eligibility_trace = ddf*self.eligibility_trace + grad \ -lrddf*np.matmul(grad, np.matmul(self.eligibility_trace.T, grad)) temp = self.weights self.weights = self.weights + lr*td_error*self.eligibility_trace \ +lr*np.matmul( self.eligibility_trace - grad, \ np.matmul(self.weights.T - self.old_weights.T, grad) ) self.old_weights = temp def reset(self): """ Reset the weight vector and eligibility trace to zeros. """ self.weights = np.zeros(self.weights.shape) self.old_weights = np.zeros(self.old_weights.shape) self.eligibility_trace = np.zeros(self.eligibility_trace.shape) def resetTraces(self): """ Reset eligibility trace back to zeros. """ self.old_weights = np.zeros(self.old_weights.shape) self.eligibility_trace = np.zeros(self.eligibility_trace.shape)
) # --- Modelo lineal --- print("\n--- Modelo lineal ---\n") resp = input( '¿Quiere ejecutar la búsqueda de hiperparámetros y generación de gráficos? Tiempo aproximado 30 min - 1 hora. (S/N)' ) if resp == 'S': print('\n\nEstudio de la variabilidad polinómica de los datos') X_copy = X.copy() for i in range(1, 3): print('Estudio con dimensión: ', i) poly = PolynomialFeatures(i) np.random.seed(0) poly.fit(X) poly.transform(X) poly.transform(X_tst) clf = LR(random_state=0) clf.fit(X, y) resultados(clf, X, y, X_tst, y_tst) X = X_copy.copy() input("\n--- Pulsar intro para continuar ---\n") print('\n\nEstudio de la Fuerza de Regularización Lineal (tarda un poco).') acu = [] fsc = [] x_axis = [i for i in range(-5, 10)] for i in x_axis: clf = LR(penalty='l2', random_state=0, solver='liblinear', C=10**i)
import numpy as np from sklearn.linear_model import LinearRegression from sklearn.preprocessing import PolynomialFeatures # Generate the input data uisng random numbers size = 20 x = np.random.randint(1, 100, size=size) error = np.random.rand(size) #error = np.zeros(size) y = x * x + error #print(error) #print(x) #print(y) X = x.reshape((-1, 1)) #print(X) transformer = PolynomialFeatures(degree=2, include_bias=False) transformer.fit(X) X = transformer.transform(X) # X = PolynomialFeatures(degree=2, include_bias=False).fit_transform(X) #print(X) model = LinearRegression().fit(X, y) r_sq = model.score(X, y) print('coefficient of determination:', r_sq) print('intercept:', model.intercept_) print('coefficients:', model.coef_)
#Get all of the y values except the last n rows y = y[:-forecast_out] print(y) from sklearn.linear_model import LinearRegression from sklearn.svm import SVR from sklearn.preprocessing import PolynomialFeatures from sklearn.model_selection import train_test_split x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.2) #Create and train the Polynomial REgression Model poly_reg = PolynomialFeatures(degree = 3) X_poly = poly_reg.fit_transform(x_train) poly_reg.fit(X_poly, y_train) lin_reg_2 = LinearRegression() lin_reg_2.fit(X_poly, y_train) #plt.scatter(X, y, color = 'red') #plt.plot(X, lin_reg_2.predict(poly_reg.fit_transform(X)), color = 'blue') #plt.title('Truth or Bluff (Polynomial Regression)') #plt.xlabel('Position level') #plt.ylabel('Salary') #plt.show() #Testing Model pr_confidence = lin_reg_2.score(poly_reg.fit_transform(x_test), y_test) print(pr_confidence) predictionss = lin_reg_2.predict(poly_reg.fit_transform(x_test))
lin_reg2.fit(X2, y) y_predict2 = lin_reg2.predict(X2) print("绘制多元回归结果图") plt.scatter(x, y) plt.plot(np.sort(x), y_predict2[np.argsort(x)], color='r') plt.show() print("\n新特征处理下,多元线性回归:", ) print("系数:{}".format(lin_reg2.coef_)) print("截距:{}\n".format(lin_reg2.intercept_)) # sklearn中多元线性回归 # 特征准备 # 这个degree表示我们使用多少次幂的多项式 poly = PolynomialFeatures(degree=2) poly.fit(X) X2 = poly.transform(X) X2.shape # 输出:(100, 3) reg = LinearRegression() reg.fit(X2, y) y_predict = reg.predict(X2) print("绘制sklearn多元回归结果图") plt.scatter(x, y) plt.plot(np.sort(x), y_predict2[np.argsort(x)], color='r') plt.show() print("\nsklearn中的多元线性回归:", ) print("系数:{}".format(reg.coef_)) print("截距:{}\n".format(reg.intercept_)) # 多元多项式回归 print("三元多项式回归")
warnings.filterwarnings(action="ignore", module="scipy", message="^internal gelsd") data = pd.read_csv('Position_Salaries.csv') # does this so that x is a matrix; upper bound is non-inclusive so this matrix # will only contain column 1 x = data.iloc[:, 1:2].values y = data.iloc[:, -1].values # No need to split set into training and test set because the dataset is very # small poly_reg = PolynomialFeatures(degree=4) x_poly = poly_reg.fit_transform(x) poly_reg.fit(x_poly, y) x_grid = np.arange(min(x), max(x), 0.1) x_grid = x_grid.reshape((len(x_grid), 1)) lin_reg = LinearRegression() lin_reg.fit(x_poly, y) y_pred = lin_reg.predict(poly_reg.fit_transform(x_grid)) plt.scatter(x, y, color='red') plt.plot(x_grid, y_pred, color='blue') plt.title('Salary vs Position') plt.xlabel('Position') plt.ylabel('Salary') salary = lin_reg.predict(poly_reg.fit_transform(6.5))[0] print('Projected salary for position 6.5: %0.2f' % (salary))
extrapolation_days = 4 # how many extrapolate in future previous_days = 7 # how many days regression in past X = [[x] for x in day_of_march[-previous_days:]] first_future_day = current_day + 1 trendline_dates = [[x] for x in range(first_future_day, first_future_day + extrapolation_days)] X_ = X + trendline_dates y = log2_hosp[-previous_days:] print(f"Days of March: {trendline_dates}") # linear (degree 1) poly_1 = PolynomialFeatures(degree=1) X_poly_1 = poly_1.fit_transform(X) poly_1.fit(X_poly_1, y) lin2_1 = LinearRegression() lin2_1.fit(X_poly_1, y) trend_1 = [pow(2, x) for x in lin2_1.predict(poly_1.fit_transform(X_))] log2_hosp_trend_1 = lin2_1.predict(poly_1.fit_transform(trendline_dates)) print( f"Trendline LINEAR numbers: {[int(pow(2, x)) for x in log2_hosp_trend_1]}" ) # quadratic (degree 2) poly_2 = PolynomialFeatures(degree=2) X_poly_2 = poly_2.fit_transform(X) poly_2.fit(X_poly_2, y)
Created on Sun Dec 15 18:42:23 2019 @author: 64191 """ import numpy as np import matplotlib.pyplot as plt #生成虚拟数据集 x = np.random.uniform(-3,3,size = 100) X = x.reshape(-1,1) y = 0.5 * x**2 + x + 2 + np.random.normal(0,1,size = 100) from sklearn.preprocessing import PolynomialFeatures #degree :为数据添加几次幂 ploy = PolynomialFeatures(degree = 5) ploy.fit(X) X2 = ploy.transform(X) #里面已经在第一列添加一列1了,所以不需要增加一列纯1的X0 from sklearn.linear_model import LinearRegression,Ridge lin_reg2 = LinearRegression() lin_reg2.fit(X2,y) y_predict2 = lin_reg2.predict(X2) ridge=Ridge(alpha=60) ridge.fit(X2,y) y_pre=ridge.predict(X2) plt.scatter(x,y) plt.plot(np.sort(x),y_predict2[np.argsort(x)],color = 'r') plt.plot(np.sort(x),y_pre[np.argsort(x)],color = 'g') plt.show() print(lin_reg2.coef_) print(ridge.coef_)
x, y = get_data() # Divide the data into Train, dev and test x_train, x_test_all, y_train, y_test_all = train_test_split(x, y, test_size=0.3, random_state=9) x_dev, x_test, y_dev, y_test = train_test_split(x_test_all, y_test_all, test_size=0.3, random_state=9) #Prepare some polynomial features poly_features = PolynomialFeatures(interaction_only=True) poly_features.fit(x_train) x_train_poly = poly_features.transform(x_train) x_dev_poly = poly_features.transform(x_dev) #choosen_model,choosen_subset,low_mse = subset_selection(x_train_poly,y_train) choosen_model = build_model(x_train_poly, y_train, 20) #print choosen_subse predicted_y = choosen_model.predict(x_train_poly) print "\n Model Performance in Training set (Polynomial features)\n" mse = model_worth(y_train, predicted_y) # Apply the model on dev set predicted_y = choosen_model.predict(x_dev_poly) print "\n Model Performance in Dev set (Polynomial features)\n" model_worth(y_dev, predicted_y)
import numpy as np import matplotlib.pyplot as plt import pandas as pd df = pd.read_csv('bluegills.csv') x = df.iloc[:, 0:1].values y = df.iloc[:, -1].values from sklearn.preprocessing import PolynomialFeatures poly = PolynomialFeatures(degree=4) X = poly.fit_transform(x) poly.fit(X, y) from sklearn.linear_model import LinearRegression linear = LinearRegression() linear.fit(X, y) x_pred = linear.predict(X) plt.scatter(x, y, color='red') plt.plot(x, linear.predict(X), color='blue') plt.xlabel('Position_level') plt.ylabel('Salary') plt.show()
plt.subplots_adjust(top=.96) plt.ylim(-1000, 8000) plt.xlim(-2.03, 2.03) plt.legend(loc='lower center', borderaxespad=0, borderpad=0, ncol=2) style_figs.light_axis() plt.savefig('tide_polynome_%d.pdf' % d, facecolor='none', edgecolor='none') # %% # Plot the corresponding basis plt.figure(figsize=[5.12, 3]) for d in (10, 100, 1000): transformer = PolynomialFeatures(degree=d) transformer.fit(t.reshape(-1, 1), y) basis = transformer.transform(t_test.reshape(-1, 1)) for i in range(2, 10): this_signal = basis[:, -i] this_signal /= this_signal.max() plt.plot(t_test, this_signal, linewidth=2, color='.75') this_signal = basis[:, -3] this_signal /= this_signal.max() this_signal = basis[:, -1] this_signal /= this_signal.max() plt.plot(t_test, this_signal, label='Degree %d' % d) #style_figs.no_axis() plt.subplots_adjust(top=.96)
X_poly = poly.fit_transform(X) # create training and test sets from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split( X_poly, y, test_size=0.3, random_state=0) ## POLYNOMINAL # Create linear regression object poly = linear_model.LinearRegression(normalize=True) # Train the model using the training sets X_train_no_intercept = X_train X_train = X_train.reshape(-1, X_train.shape[1]) poly.fit(X_train, y_train) # The intercept print('Intercept: \n', poly.intercept_) # The coefficients print('Coefficients: \n', poly.coef_) # The mean square error print("Residual sum of squares, training data: %.2f" % np.mean((poly.predict(X_train) - y_train) ** 2)) print("Residual sum of squares, test data: %.2f" % np.mean((poly.predict(X_test) - y_test) ** 2)) var_to_graph['multReg_poly'] = np.mean((poly.predict(X_test) - y_test) ** 2) # Explained variance score: 1 is perfect prediction print('Variance score, training data: %.2f' % poly.score(X_train, y_train)) #vector of prediction error print('Distribution of prediction error on training data:')
x_train = traindata['Father'].values.reshape(-1, 1) y_train = traindata['Son'].values.reshape(-1, 1) x_test = testdata['Father'].values.reshape(-1, 1) y_test = testdata['Son'].values.reshape(-1, 1) from sklearn.metrics import mean_squared_error from math import sqrt polyreg = PolynomialFeatures(degree=10) x_modified_train = polyreg.fit_transform(x_train) x_modified_test = polyreg.fit_transform(x_test) model = linear_model.Lasso(alpha=0.5) model.fit(x_modified_train, y_train) y_predicted_test = model.predict(x_modified_test) y_predicted_train = model.predict(x_modified_train) print('RMSE Train:', sqrt(mean_squared_error(y_train, y_predicted_train))) print('RMSE Test:', sqrt(mean_squared_error(y_test, y_predicted_test))) train_err = [] test_err = [] alpha_vals = np.linspace(0, 1, 9) for alpha_v in alpha_vals: polyreg = linear_model.Lasso(alpha=alpha_v) polyreg.fit(x_train, y_train) train_err.append( sqrt(mean_squared_error(y_train, polyreg.predict(x_train)))) test_err.append(sqrt(mean_squared_error(y_test, polyreg.predict(x_test)))) plt.title('Lasso') plt.xlabel('Alpha value') plt.ylabel('RMSE') plt.plot(np.linspace(0, 1, 9), train_err, 'bo-', label='Train') plt.plot(np.linspace(0, 1, 9), test_err, 'ro-', label='Test') plt.legend() plt.show()
print('Mean of MAE after 1000 tests: ',np.mean(MAE_arr,axis=0)) print('STD of MAE after 1000 tests: ', np.std(MAE_arr,axis=0)) #Create loop for P equal 1,2,3,4 for n in range (1,5): RMSE_list = [] #Create empty array error_array = np.zeros(shape=(1,1)) #create loop for 1000 tests for i in range(0,1000): #define polynomail degree poly_reg_model = PolynomialFeatures(degree=n) X_train, X_test, y_train, y_test = train_test_split(X_matrix,y_matrix,test_size=20,train_size=372) #train data poly_reg_model.fit(X_train,y_train) #predict y_predict = reg_model_train.predict(X_test) #calculate mean squared error RMSE_list.append(sqrt(mean_squared_error(y_predict,y_test))) RMSE_arr = np.array(RMSE_list) #subtract test and predict error = y_test - y_predict #add to array for poltting error_array = np.concatenate((error_array,error),axis=0) print('Mean of squared Error when P equals ',n,np.mean(RMSE_arr,axis=0)) print('STD of squared Error when P equals ',n,np.std(RMSE_arr,axis=0))
""" import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.linear_model import LinearRegression from sklearn.preprocessing import PolynomialFeatures data = pd.read_csv("Income_Data.csv") reg = LinearRegression() polyf = PolynomialFeatures(degree=6) features = data.iloc[:, 0:1] labels = data.iloc[:, 1:] features_poln = polyf.fit_transform(features) polyf.fit(features_poln) reg.fit(features_poln, labels) """ we will have to change the value too in the polynomial regression format in order to predict the value """ print(reg.predict(polyf.fit_transform([[6.5]]))) """ OBSERVING THE RESULT OF POLYNOMIAL REGRESSION """ plt.scatter(features, labels, color='red') plt.plot(features, reg.predict(polyf.fit_transform(features)), color='blue') plt.title('Truth or Bluff (Polynomial Regression)') plt.xlabel('Position level') plt.ylabel('Salary') plt.show() # Visualising the Polynomial Regression results (for higher resolution and smoother curve)
if __name__ == "__main__": x,y = get_data() # Divide the data into Train, dev and test x_train,x_test_all,y_train,y_test_all = train_test_split(x,y,test_size = 0.3,random_state=9) x_dev,x_test,y_dev,y_test = train_test_split(x_test_all,y_test_all,test_size=0.3,random_state=9) #Prepare some polynomial features poly_features = PolynomialFeatures(2,interaction_only=True) poly_features.fit(x_train) x_train_poly = poly_features.transform(x_train) x_dev_poly = poly_features.transform(x_dev) # Build model with polynomial features model_poly = build_model(x_train_poly,y_train) predicted_y = model_poly.predict(x_train_poly) print "\n Model Performance in Training set (Polynomial features)\n" model_worth(y_train,predicted_y) # View model details view_model(model_poly) # Apply the model on dev set predicted_y = model_poly.predict(x_dev_poly) print "\n Model Performance in Dev set (Polynomial features)\n"
class HiddenStateSimulator(HigherOrderSimulator): def __init__(self, n, x_index, h_index=None, degree=2, interaction_strength=None, *args, **kwargs): """ Args: n: x_index: h_index: degree: interaction_strength: interaction strength defines drop_i as well as beta_rng for interaction terms effect sizes *args: **kwargs: """ if "noise_var" in kwargs: assert kwargs[ 'noise_var'] == 0, "HiddenStateSimulator must set Noise_var=0; got %s" % kwargs[ 'noise_var'] self.x_index = x_index self.x_len = len(self.x_index) self.interaction_strength = interaction_strength self.h_index = h_index if h_index is not None else [] self.h_len = len(self.h_index) # the order for concat is x + h p = self.x_len + self.h_len if interaction_strength is None: super().__init__(n=n, p=p, degree=degree, noise_var=0, drop_a=0, *args, **kwargs) else: super().__init__(n=n, p=p, degree=degree, noise_var=0, drop_a=0, drop_i=1 - interaction_strength, *args, **kwargs) # overwrite self.polynomial_fitter = PolynomialFeatures(degree=degree, interaction_only=True, include_bias=False) self.polynomial_fitter.fit(np.zeros((self.n, self.p))) self.beta_a = np.zeros(p) self.beta_i = np.zeros(self.polynomial_fitter.n_output_features_ - p) self.powers_i_ = self.polynomial_fitter.powers_[p:] if self.interaction_strength is None: self.beta_i_rng = self.beta_rng else: # normal distribution has 95% prob. of falling within mu +/- 2*sigma self.beta_i_rng = lambda n: np.sign( np.random.uniform(-1, 1, n)) * np.random.uniform( self.interaction_strength, 0.1, n) def sample_effect(self): # additive a_idx = np.random.choice(self.p, int(np.ceil(self.p * (1 - self.drop_a))), replace=False) self.beta_a[a_idx] = self.beta_rng(len(a_idx)) # interaction i_idx = np.random.choice( len(self.beta_i), int(np.ceil(len(self.beta_i) * (1 - self.drop_i))), replace=False) self.beta_i[i_idx] = self.beta_i_rng(len(i_idx)) self.is_beta_built = True def sample_data(self, N=None, hs=None, *args, **kwargs): assert self.h_len == 0 or hs is not None, "If h_index is not empty, must parse `hs` in argument" N = self.n if N is None else N X = self.x_rng(N * self.x_len).reshape(N, self.x_len) if hs is not None: h = hs[:, self.h_index] X = np.concatenate([X, h], axis=1) X_s = self.polynomial_fitter.transform(X) if not self.is_beta_built: self.sample_effect() beta = np.concatenate([self.beta_a, self.beta_i], ) y = X_s.dot(beta) + np.random.normal(0, np.sqrt(self.noise_var), N) if self.with_input_blocks: X = [ X[:, i] if len(X.shape) > 2 else X[:, i].reshape(X.shape[0], 1) for i in range(X.shape[1]) ] return X, y
from sklearn.cross_validation import train_test_split from sklearn.ensemble import AdaBoostClassifier from sklearn.preprocessing import PolynomialFeatures # NOTE: Make sure that the class is labeled 'class' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR') training_indices, testing_indices = train_test_split(tpot_data.index, stratify = tpot_data['class'].values, train_size=0.75, test_size=0.25) result1 = tpot_data.copy() # Use Scikit-learn's PolynomialFeatures to construct new features from the existing feature set training_features = result1.loc[training_indices].drop('class', axis=1) if len(training_features.columns.values) > 0 and len(training_features.columns.values) <= 700: # The feature constructor must be fit on only the training data poly = PolynomialFeatures(degree=2, include_bias=False) poly.fit(training_features.values.astype(np.float64)) constructed_features = poly.transform(result1.drop('class', axis=1).values.astype(np.float64)) result1 = pd.DataFrame(data=constructed_features) result1['class'] = result1['class'].values else: result1 = result1.copy() result2 = result1.copy() # Perform classification with an Ada Boost classifier adab2 = AdaBoostClassifier(learning_rate=0.15, n_estimators=500, random_state=42) adab2.fit(result2.loc[training_indices].drop('class', axis=1).values, result2.loc[training_indices, 'class'].values) result2['adab2-classification'] = adab2.predict(result2.drop('class', axis=1).values)
# Feature Scaling """from sklearn.preprocessing import StandardScaler sc_X = StandardScaler() X_train = sc_X.fit_transform(X_train) X_test = sc_X.transform(X_test)""" # Fitting Linear Regression to the dataset from sklearn.linear_model import LinearRegression lin_reg = LinearRegression() lin_reg.fit(X, y) # Fitting Polynomial Regression to the dataset from sklearn.preprocessing import PolynomialFeatures poly_reg = PolynomialFeatures(degree = 5) X_poly = poly_reg.fit_transform(X) poly_reg.fit(X_poly, y) lin_reg_2 = LinearRegression() lin_reg_2.fit(X_poly, y) # Visualising the Linear Regression results plt.scatter(X, y, color = 'red') plt.plot(X, lin_reg.predict(X), color = 'blue') plt.title('Truth or Bluff (Linear Regression)') plt.xlabel('Position level') plt.ylabel('Salary') plt.show() # Visualising the Polynomial Regression results plt.scatter(X, y, color = 'red') plt.plot(X, lin_reg_2.predict(poly_reg.fit_transform(X)), color = 'blue') plt.title('Truth or Bluff (Polynomial Regression)')