def poly_regression_lgbmr(X_train_confirmed, y_train_confirmed, future_forcast): linear_model = LGBMRegressor(n_estimators=1300) linear_model.fit(X_train_confirmed, y_train_confirmed) #linear_model = LinearRegression(normalize=True, fit_intercept=False) #linear_model.fit(X_train_confirmed, y_train_confirmed) #valid_linear_pred = linear_model.predict(X_valid_confirmed) poly_pred_future_forcast = linear_model.predict(future_forcast) #poly_pred_future_forcast = np.exp(poly_pred_future_forcast) #print('MAE:', mean_absolute_error(y_valid_confirmed, valid_linear_pred)) #print('MSE:',mean_squared_error(y_valid_confirmed, valid_linear_pred)) return poly_pred_future_forcast
def getModel_deprecated(self, x_train, y_train, x_test, y_test): # use the validate process to pick the most important 10 linear features # scale the data min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1)) scalar = min_max_scaler.fit(x_train) x_train_scaled = scalar.transform(x_train) # iterate through all models selected_features = [] selected_poly = False min_mse = 99999 selected_model_name = '' for model_name, model in PModel.CANDIDATE_MODELS.items(): rfe = RFE(model, 5) fit = rfe.fit(x_train_scaled, y_train) # get the feature names: feature_names = list( filter(lambda f: fit.ranking_[self.features.index(f)] == 1, self.features)) selected_x_train = x_train[feature_names] selected_x_train_poly = PolynomialFeatures( degree=2).fit_transform(selected_x_train) selected_x_test_poly = PolynomialFeatures(degree=2).fit_transform( self.x_test[feature_names]) # train 1st order linear_model = clone(model) linear_model.fit(selected_x_train, y_train) diff, mse = self.validate(self.x_test[feature_names], linear_model) if mse < min_mse: selected_features = feature_names selected_poly = False selected_model_name = model_name min_mse = mse self.model = linear_model # train the 2nd order high_model = clone(model) high_model.fit(selected_x_train_poly, y_train) diff, mse = self.validate(selected_x_test_poly, high_model) if mse < min_mse: selected_poly = True min_mse = mse self.model = high_model # set all members self.polyFeature = selected_poly self.features = selected_features self.modelType = selected_model_name if selected_poly: self.x_test_selected = selected_x_test_poly else: self.x_test_selected = self.x_test[selected_features]
def poly_regression_2(X_train_confirmed, y_train_confirmed, future_forcast, country_cases): poly = PolynomialFeatures(degree=8) poly_X_train_confirmed = poly.fit_transform(X_train_confirmed) #poly_X_valid_confirmed = poly.fit_transform(X_valid_confirmed) poly_future_forcast = poly.fit_transform(future_forcast) linear_model = make_pipeline(PolynomialFeatures(2), HuberRegressor()) #linear_model = LinearRegression(normalize=True, fit_intercept=False) linear_model.fit(X_train_confirmed, y_train_confirmed) #valid_linear_pred = linear_model.predict(X_valid_confirmed) poly_pred_future_forcast = linear_model.predict(future_forcast) print('MSE:',mean_squared_error(country_cases[:len(future_forcast)], poly_pred_future_forcast)) #print('MAE:', mean_absolute_error(y_valid_confirmed, valid_linear_pred)) #print('MSE:',mean_squared_error(y_valid_confirmed, valid_linear_pred)) return poly_pred_future_forcast
def poly_regression_regr(X_train_confirmed, country_cases, future_forcast): linear_model = regr(n_jobs=-1) estimators=100 scores=[] linear_model.set_params(n_estimators=estimators) linear_model.fit(X_train_confirmed, country_cases) #scores.append(linear_model.score(X_test, y_test)) #linear_model = LinearRegression(normalize=True, fit_intercept=False) #linear_model.fit(X_train_confirmed, y_train_confirmed) poly_pred_future_forcast = linear_model.predict(future_forcast) #poly_pred_future_forcast = np.exp(poly_pred_future_forcast) #print('MAE:', mean_absolute_error(y_valid_confirmed, valid_linear_pred)) #print('MSE:',mean_squared_error(y_valid_confirmed, valid_linear_pred)) return poly_pred_future_forcast
def linear_regression(train_features, train_targets, test_features, test_targets): # Train linear_model = sklearn.linear_model.LinearRegression() linear_model.fit(train_features, train_targets) predict_targets = linear_model.predict(test_features) n_test_sample = len(test_targets) X = range(n_test_sample) # validation error = numpy.linalg.norm(predict_targets - test_targets, ord = 1) / n_test_sample print "Linear Regression Error: %.2f" % (error) #Draw plot.plot(X, predict_targets, 'r--', label='Predict Price') plot.plot(X, test_targets, 'g', label='True Price') legend = plot.legend() plot.title('Linear Regression') plot.show()
def linear_regression(self): linear_model = LinearRegression() linear_model.fit(self.x_train, self.y_train) y_pred_test = linear_model.predict(self.x_test) print('Linear Regression') print(self.big_line) print('Mean squared error (MSE): %.2f' % mean_squared_error(self.y_test, y_pred_test)) print('Mean absolute error (MSE): %.2f' % mean_absolute_error(self.y_test, y_pred_test)) print('Coefficient of determination (R^2): %.2f' % r2_score(self.y_test, y_pred_test)) total_correct = 0 for i in range(len(y_pred_test)): if abs(y_pred_test[i] - self.y_test[i]) <= 1: total_correct += 1 print( f'Percent Correct within 1 Yard: {total_correct/len(y_pred_test) * 100}%' ) print()
def SVRlinear(self, normalization, df, testSize): X, y = df.drop(['Project', 'ActualDuration'], axis=1), df['ActualDuration'].astype('int') X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=testSize) linear_model = svm.SVR(kernel="linear") linear_model.fit(X_train, y_train) train_prediction = linear_model.predict(X_train) test_prediction = linear_model.predict(X_test) print("SVR linear prediction with " + str(normalization.percentageOfWeeksRegression*100) + "% of the data in each of the " + str(len(normalization.allProjectDataFrames)) + \ " projects, \nand a train/test ratio of " + str(int((1-testSize)*100)) + "/" + str(int(testSize*100)) + " resulted in:") print("MAE:\t{0:.3f}".format( metrics.mean_absolute_error(y_test, test_prediction))) print("RMSE:\t{0:.3f}".format( metrics.mean_squared_error(y_test, test_prediction, squared=False))) print("R^2:\t{0:>1.3f}\n".format( metrics.r2_score(y_test, test_prediction))) return ""
def abs_reg_coeffs(self, linear_model): """ Method for calculating the top X variables by the absolute coefficient Args: df (Dataframe) target_var (String) k_features (Integer) linear_model (Class) Attributes: df: Pandas dataframe containing the target and feature variables target_var: The target variable k_features: The number of features to return linear_model: A Linear Model using sci-kit learn linear_model Returns: Pandas Dataframe with the top X variables by absolute coefficient size """ df = self.df cat_features = df.loc[:, df.dtypes == object] if not cat_features.empty: df = self.prep_cat_vars(df) X = df.drop([self.target_var], axis=1) y = df[self.target_var] feat_labels = pd.DataFrame(X.columns) # fit the model lm = linear_model.fit(X, y) # get the coefficients and features into a data frame and create the rank lm_coeff = pd.DataFrame(lm.coef_).T feat_labels = pd.DataFrame(X.columns[1:]) lm_reg_coeff = feat_labels.merge(lm_coeff, left_index=True, right_index=True) lm_reg_coeff.columns = ['features', 'coeff'] lm_reg_coeff['coeff_abs'] = lm_reg_coeff['coeff'].abs() lm_reg_coeff.sort_values('coeff_abs', ascending=False, inplace=True) lm_reg_coeff['coeff_rank'] = range(1, len(lm_reg_coeff) + 1) lm_reg_coeff = lm_reg_coeff[lm_reg_coeff.coeff_rank <= self.k_features] return lm_reg_coeff
### list the features you want to look at--first item in the ### list will be the "target" feature features_list = ["bonus", "salary"] data = featureFormat( dictionary, features_list, remove_any_zeroes=True) target, features = targetFeatureSplit( data ) ### training-testing split needed in regression, just like classification from sklearn.cross_validation import train_test_split feature_train, feature_test, target_train, target_test = train_test_split(features, target, test_size=0.5, random_state=42) train_color = "b" test_color = "r" from sklearn import linear_model as lm reg = lm.fit(features_train, target_train) ### draw the scatterplot, with color-coded training and testing points import matplotlib.pyplot as plt for feature, target in zip(feature_test, target_test): plt.scatter( feature, target, color=test_color ) for feature, target in zip(feature_train, target_train): plt.scatter( feature, target, color=train_color ) ### labels for the legend plt.scatter(feature_test[0], target_test[0], color=test_color, label="test") plt.scatter(feature_test[0], target_test[0], color=train_color, label="train")
linear_model.predict(train_features[:10]) linear_model.layers[1].kernel """### Training model configuration Mean absolute error will be optimized using Adam algorithm. Adam optimization is a stochastic gradient descent method that is based on adaptive estimation of first-order and second-order moments. """ linear_model.compile( optimizer=tf.optimizers.Adam(learning_rate=0.1), loss='mean_absolute_error') history = linear_model.fit( train_features, train_labels, epochs=200, verbose=0, # Calculate validation results on 20% of the training data validation_split = 0.2) hist = pd.DataFrame(history.history) hist['epoch'] = history.epoch hist.tail() def plot_loss(history): plt.plot(history.history['loss'], label='loss') plt.plot(history.history['val_loss'], label='val_loss') plt.ylim([0, 900000]) plt.xlabel('Epoch') plt.ylabel('Error [medianCompexValue]') plt.legend() plt.grid(True)
sns.regplot(x="T2", y="T6", data=slr_df) # In[47]: from sklearn.preprocessing import MinMaxScaler scaler = MinMaxScaler() # In[48]: from sklearn.linear_model import LinearRegression linear_model = LinearRegression() # In[49]: model = linear_model.fit(df[['T2']], df.T6) # In[51]: from sklearn.model_selection import train_test_split # model # In[55]: predicted_values = linear_model.predict(df[['T2']]) # In[56]: from sklearn.metrics import mean_absolute_error mae = mean_absolute_error(df[['T2']], predicted_values)
inputs = StandardScaler().fit_transform(inputs) output = StandardScaler().fit_transform(output) ''' # create models nn_model = sklearn.neural_network.MLPRegressor(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=hl_size, activation=act) linear_model = sklearn.linear_model.LinearRegression() rtree = sklearn.tree.DecisionTreeRegressor(min_samples_leaf=10, max_features=0.50) # fit models to data nn_model.fit(inputs[train_indx, :], output[train_indx]) linear_model.fit(inputs[train_indx, :], output[train_indx]) rtree.fit(inputs[train_indx, :], output[train_indx]) # predict with model nn_predicted = nn_model.predict(inputs[test_indx]) linear_predicted = linear_model.predict(inputs[test_indx]) rtree_predicted = rtree.predict(inputs[test_indx]) nn_known_predicted = nn_model.predict(inputs[train_indx]) linear_known_predicted = linear_model.predict(inputs[train_indx]) rtree_known_predicted = rtree.predict(inputs[train_indx]) # target vs predicted plt.figure() plt.plot(output[test_indx], nn_predicted, '.', label='Neural net', color='r') plt.plot(output[test_indx],
# In[10]: from sklearn import linear_model # Choosing the linear model linear_model = linear_model.Lasso(alpha=0.01) #Preparing the model by setting up target and features features = [ "Cement", "Blast_Furnace_Slag", "Fly_Ash", "Water", "Superplasticizer", "Coarse_Aggregate", "Fine_Aggregate", "Age" ] target = "Concrete_Compressive_Strength" # Fitting the linear model linear_model.fit(df[features], df[target]) #Coefficients of the linear model pd.DataFrame([dict(zip(features, linear_model.coef_))]) # ## 7.Generate predictions for all the observations and a scatterplot comparing the predicted compressive strengths to the actual values. # # In[11]: preds = linear_model.predict(df[features]) predictions_df = df.assign(predictions=preds) predictions_df[["Concrete_Compressive_Strength", "predictions"]] # In[12]:
linear_Y = policy_claim_train_after_preprocessing["Next_Premium"] linear_X = policy_claim_train_after_preprocessing[["Last_Renewal", "竊盜", "車損", "女", "法人", "國產車", "lia_class", "plia_acc", "plia_acc", "claim", "Engine_Displacement_(Cubic_Centimeter)", "Manafactured_Year_and_Month", "age", "Replacement_cost_of_insured_vehicle", "Coverage_Deductible_if_applied", "Insured_Amount2", "Insured_Amount3"]] linear_model = sm.OLS(linear_Y, linear_X) result = linear_model.fit() print(result.summary()) # OLS Regression Results # ============================================================================== # Dep. Variable: Next_Premium R-squared: 0.541 # Model: OLS Adj. R-squared: 0.541 # Method: Least Squares F-statistic: 7.807e+04 # Date: Sun, 09 Sep 2018 Prob (F-statistic): 0.00 # Time: 17:16:27 Log-Likelihood: -1.1010e+07 # No. Observations: 1061148 AIC: 2.202e+07 # Df Residuals: 1061132 BIC: 2.202e+07 # Df Model: 16 # Covariance Type: nonrobust # ========================================================================================================== # coef std err t P>|t| [0.025 0.975]
n_informative=2, n_targets=1, coef=True, random_state=1) print('Actual coefficients: {}\n'.format(coef)) ones = np.ones((len(X), 1)) X = np.append(ones, X, axis=1) X_train, X_test, y_train, y_test = cross_validation.train_test_split( X, y, test_size=.2) print('\nTraining data (X/y): \n{}\n{}\n'.format(X_train[:5], y_train[:5])) linear_model = linear_model.LinearRegression() linear_model.fit(X_train, y_train) print('Coefficients (scikit-learn): {}\n'.format(linear_model.coef_)) print('Intercept (scikit-learn): {}\n'.format(linear_model.intercept_)) print('Accuracy (scikit-learn train): {}\n'.format( metrics.r2_score(y_train, linear_model.predict(X_train)))) print('Accuracy (scikit-learn test): {}\n'.format( metrics.r2_score(y_test, linear_model.predict(X_test)))) initial_weights = np.zeros(X_train.shape[1]) step_size = 1e-7 tolerance = 1e-3 weights = regression_gradient_descent(X_train, y_train, initial_weights, step_size, tolerance) print('Coefficients (from scratch): {}\n'.format(weights))
plt.xlabel('F (N)') plt.ylabel('a (m/s2)') plt.legend() plt.title('Data = Signal + Noise') # **The aim of machine learning is to find the signal** # In[36]: # We know the signal, let's see if ML can find it from sklearn.linear_model import LinearRegression linear_model = LinearRegression() linear_model.fit(F, a) # In[37]: # Signal vs. Fit plt.figure(dpi=200) plt.scatter(F, a, label='data') plt.plot(F, F/m, label='signal', color='orange') plt.plot(F, linear_model.predict(F), label='fit') for i in range(len(F)): plt.plot([F[i], F[i]], [F[i]/m, a[i]], 'k--', label='noise' if i == 0 else None) plt.xlabel('F (N)')
X = data.X.values X X=X.reshape(-1,1) X y = data.Y.values y=y.reshape(-1,1) y #%%% from sklearn import linear_model lm = linear_model.LinearRegression() model1 = lm.fit(X, y) print(model1) model1.score(X,y) #R2 #Coefficients model1.coef_ #b1 coef model1.intercept_ #b0 coef y_pred1 = model1.predict(X) y_pred1
scoring = 'accuracy' # Spot Check Algorithms lr = LogisticRegression(C=6) #42,44,44,45,45,45 #lr=SVC(C=6.0,kernel='linear') #models.append(('SVM', SVC(C=6.0,kernel='linear'))) #43,43,43,43,43,43 # evaluate each model in turn kfold = model_selection.KFold(n_splits=10, random_state=seed) cv_results = model_selection.cross_val_score(lr, X_train, Y_train, cv=kfold, scoring=scoring) msg = "logistic R: ortalama= %f standart sapma =(%f)\n" % (cv_results.mean(), cv_results.std()) print(msg) lr.fit(X_train, Y_train) predictions = lr.predict(X_validation) print("dogruluk oranı: ", accuracy_score(Y_validation, predictions)) print("karşılaştırma matrisi :\n ", confusion_matrix(Y_validation, predictions)) print("sınıflandırma sonuçları:\n", classification_report(Y_validation, predictions)) prediction_video = lr.predict([ext]) print("video tahmin etiketi :", prediction_video)
# Test # # ------------------------------------------------------------------------------- X, y, coef = datasets.make_regression(n_samples=100, n_features=2, n_informative=2, n_targets=1, coef=True, random_state=1) print('Actual coefficients: {}\n'.format(coef)) ones = np.ones((len(X),1)) X = np.append(ones, X, axis=1) X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=.2) print('\nTraining data (X/y): \n{}\n{}\n'.format(X_train[:5], y_train[:5])) linear_model = linear_model.LinearRegression() linear_model.fit(X_train, y_train) print('Coefficients (scikit-learn): {}\n'.format(linear_model.coef_)) print('Intercept (scikit-learn): {}\n'.format(linear_model.intercept_)) print('Accuracy (scikit-learn train): {}\n'.format(metrics.r2_score(y_train, linear_model.predict(X_train)))) print('Accuracy (scikit-learn test): {}\n'.format(metrics.r2_score(y_test, linear_model.predict(X_test)))) initial_weights = np.zeros(X_train.shape[1]) step_size = 1e-7 tolerance = 1e-3 weights = regression_gradient_descent(X_train, y_train, initial_weights, step_size, tolerance) print('Coefficients (from scratch): {}\n'.format(weights)) print('Accuracy (from scratch train): {}\n'.format(metrics.r2_score(y_train, predict_output(X_train, weights)))) print('Accuracy (from scratch test): {}\n'.format(metrics.r2_score(y_test, predict_output(X_test, weights))))
#joining the data of two tables consolidated_data = pd.merge(country_satisfacton_value, coutry_gdp_values, on='Country') consolidated_data.columns = [ 'Country', 'Life Satisfaction', 'GDP per Capita' ] print(consolidated_data) return consolidated_data consolidated_data = prepare_country_stats(oecd_bli, gdp_per_capita) X = np.c_[consolidated_data["GDP per Capita"]] Y = np.c_[consolidated_data["Life Satisfaction"]] #Visualize the Data consolidated_data.plot(kind='scatter', x='GDP per Capita', y='Life Satisfaction') plt.show() #Linear Model linear_model = sklearn.linear_model.LinearRegression() #Training the Model linear_model.fit(X, Y) #Make a prediction for Cyprus X_new = [[22587]] print(linear_model.predict(X_new))