예제 #1
0
def residual_plot(model_properties=None, output_path=None):
    '''
    Method that shows the residual plot of the trained model
    '''
    if model_properties is None or output_path is None:
        raise ValueError('Need Model properties and Output path as arguments !')
    estimator = model_properties['estimator']
    X_train = model_properties['X_train']
    y_train = model_properties['y_train']
    X_validation = model_properties['X_validation']
    y_validation = model_properties['y_validation']
    config_map = model_properties['config_map']
    X_scaler = model_properties['X_scaler']
    y_scaler = model_properties['y_scaler']
    X_train[config_map['scale_columns']] = X_scaler.transform(
        X_train[config_map['scale_columns']])
    y_train[config_map['label']] = y_scaler.transform(
        y_train[config_map['label']])
    X_validation[config_map['scale_columns']] = X_scaler.transform(
        X_validation[config_map['scale_columns']])
    y_validation[config_map['label']] = y_scaler.transform(
        y_validation[config_map['label']])
    visualizer = ResidualsPlot(estimator)
    visualizer.fit(X_train.values, y_train.values)
    visualizer.score(X_validation.values, y_validation.values)
    visualizer.poof(outpath=os.path.join(output_path, 'residual_plot.png'))
    return None
예제 #2
0
def plot_residuals(X, y, model, outpath="images/residuals.png", **kwargs):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    _, ax = plt.subplots()

    visualizer = ResidualsPlot(model, ax=ax, **kwargs)
    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    visualizer.poof(outpath=outpath)
예제 #3
0
def plot_residuals(X, y, model, outpath="images/residuals.png", **kwargs):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    _, ax = plt.subplots()

    visualizer = ResidualsPlot(model, ax=ax, **kwargs)
    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    visualizer.poof(outpath=outpath)
예제 #4
0
def residuals_plot(model, X_test, y_test, road):
    """
    param 
    model : 已训练好的模型
    X_test : 测试集数据
    y_test : 测试集标签
    """
    visualizer = ResidualsPlot(model)
    visualizer.score(X_test, y_test)
    visualizer.poof(road)
예제 #5
0
	def visualize_residuals_plot(self, model_info):
		model = model_info['model']	   
		X_train = model_info['X_train']
		X_test = model_info['X_test']
		Y_train = model_info['Y_train']
		Y_test = model_info['Y_test']

		visualizer = ResidualsPlot(model)

		visualizer.fit(X_train, Y_train)  # Fit the training data to the model
		visualizer.score(X_test, Y_test)  # Evaluate the model on the test data
		visualizer.poof()				  # Draw/show/poof the data
예제 #6
0
def testFunc7(savepath='Results/bikeshare_LinearRegression_ResidualsPlot.png'):
    '''
    基于共享单车数据使用线性回归模型预测
    '''
    data = pd.read_csv('fixtures/bikeshare/bikeshare.csv')
    X = data[[
        "season", "month", "hour", "holiday", "weekday", "workingday",
        "weather", "temp", "feelslike", "humidity", "windspeed"
    ]]
    Y = data["riders"]
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3)
    visualizer = ResidualsPlot(LinearRegression())
    visualizer.fit(X_test, y_test)
    visualizer.poof(outpath=savepath)
예제 #7
0
def test_for_homoscedasticity(X_train, y_train, X_test, y_test):
    """ Plot the data and check for homoscedasticity.
    Arguments:
    X_train (dataframe): examples in the training set
    X_test (dataframe): examples in the test set
    y_train (dataframe): target in the training set
    y_train (dataframe): target in the test set
    """
    lr = LinearRegression()
    visualizer = ResidualsPlot(lr)
    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    #there should be no clear pattern
    visualizer.poof()
예제 #8
0
def visualize_pred_residuals(X_train, X_test, y_train, y_test):
    model = linear_model.Ridge(alpha=0.05)
    fitted = model.fit(X_train, y_train)
    visualizer = ResidualsPlot(fitted, size=(1080, 720))
    pred = fitted.predict(X_test)
    r = stats.linregress(pred, y_test)
    print(r[2])
    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    visualizer.poof()
    cvr = model_selection.cross_validate(model,
                                         X_test,
                                         y_test,
                                         cv=10,
                                         return_train_score=True)
    print('Training scores:', cvr['train_score'], '\n')
    print('Testing scores:', cvr['test_score'])
예제 #9
0
def vis_residuals(model, features, target):
    '''

    '''
    vis_residuals = ResidualsPlot(model, size=(1080, 720))
    vis_residuals.fit(features, target)
    vis = vis_residuals.poof()
    vis
    return vis
예제 #10
0
    def slr(self, iv, dv, plot_relationship=False, plot_residuals=True):

        # Create simple linear regression model
        self.slr_model = LinearRegression(fit_intercept=True)
        y = self.data[dv]
        x = self.data[iv]

        self.slr_model.fit(x[:, np.newaxis], y)

        xfit = np.linspace(-4, 4, 1000)
        yfit = self.slr_model.predict(xfit[:, np.newaxis])

        if plot_relationship:
            sns.lmplot(x=iv, y=dv, data=self.data, height=7, aspect=1.25)
            plt.plot(xfit, yfit)
            plt.ylabel(dv)
            plt.xlabel(iv)
            plt.title("{} = {} • {} + {}".format(dv, round(self.slr_model.coef_[0], 5), iv,
                                               round(self.slr_model.intercept_, 5)))
            plt.subplots_adjust(left=.095, right=.95, top=.9, bottom=.15)
            plt.xlim(-100, max(self.data["Counts"])*1.1)

        if plot_residuals:
            from yellowbrick.regressor import ResidualsPlot

            # Instantiate the linear model and visualizer
            visualizer = ResidualsPlot(model=self.slr_model)

            visualizer.fit(x[:, np.newaxis], y)  # Fit the training data to the model
            visualizer.poof()

        print("Simple Linear Regression\n{} = {} • {} + {}".format(dv, round(self.slr_model.coef_[0], 5), iv,
                                                                   round(self.slr_model.intercept_, 5)))

        # Predicts RMSE
        y_predict = self.slr_model.predict(x.values.reshape(-1, 1))
        rmse = sqrt(((y - y_predict) ** 2).values.mean())

        self.df_rmse.loc["Linear"] = round(rmse, 5)
        print("\n", self.df_rmse)
예제 #11
0
class PrincipalComponentRegressor(Regressor):
    def __init__(self, n_components):
        super().__init__()
        self.n_components = n_components
        self.regressor = LinearRegression()
        self.pca = None

    def fit(self, x_train, y_train, standardize=False):
        self.pca = PCA(self.n_components)
        self.x_train = self.pca.fit_transform(x_train)
        self.y_train = y_train
        self.regressor.fit(self.x_train, self.y_train)
        self._inference()
        return self.regressor.intercept_, self.regressor.coef_, self.p, self.regressor.score(self.x_train, y_train)

    def predict(self, x_test):
        try:
            x_test_transform = self.pca.transform(x_test)
        except ValueError:
            x_test_transform = x_test
        prediction = self.regressor.predict(x_test_transform)
        return prediction

    def residual_plot(self, x_test=None, y_test=None):
        if self.standardize:
            x_test = self.standardizescaler.transform(x_test)
        try:
            self.residual_visualizer = ResidualsPlot(self.regressor)
        except yellowbrick.exceptions.YellowbrickTypeError:
            self.residual_visualizer = ResidualsPlot(self.regressor.regressor)

        self.residual_visualizer.fit(self.x_train, self.y_train)
        if x_test is not None and y_test is not None:
            try:
                self.residual_visualizer.score(x_test, y_test)
            except ValueError:
                x_test = self.pca.transform(x_test)
                self.residual_visualizer.score(x_test, y_test)
        self.residual_visualizer.poof()
def main():
    data = pd.read_csv('plano-saude.csv')
    # .values transform to a numpy array
    x = data.iloc[:, 0].values
    y = data.iloc[:, 1].values
    corr_coef = np.corrcoef(x, y)
    # algoritmos no scikit learn necessitam estar no formato de matriz
    x = x.reshape(-1, 1)
    
    regression = LinearRegression()
    # realizando o treinamento
    regression.fit(x, y)
    # b0
    regression.intercept_
    # b1
    regression.coef_
    
    plt.scatter(x, y)
    plt.plot(x, regression.predict(x), color='red')
    plt.title('Regressão linear simples')
    plt.xlabel('Idade')
    plt.ylabel('Custo')
    
    value = [40]
    value = np.asarray(value)
    value = value.reshape(-1, 1)
    prevision1 = regression.predict(value)
    # y = b0 + b1 * x1
    prevision2 = regression.intercept_ + regression.coef_ * value
    # verificando a pontuacao do algoritmo de regressão
    score = regression.score(x, y)
    # plotando um grafico para melhor visualizacao dos dados.
    visualizer = ResidualsPlot(regression)
    visualizer.fit(x, y)
    # Train R² é a mesma coisa que regression.score
    visualizer.poof()
예제 #13
0
class RandForestRegressor(Regressor):
    def __init__(self):
        super().__init__()
        self.regressor = RandomForestRegressor()

    def fit(self, x_train, y_train, standardize=False):
        self.standardize = standardize
        if self.standardize:
            self.standardizescaler.fit(x_train)
            x_train = self.standardizescaler.transform(x_train)

        self.x_train = x_train
        self.y_train = y_train
        self.regressor.fit(self.x_train, self.y_train.ravel())
        self._inference()
        return self.rsquared

    def residual_plot(self, x_test=None, y_test=None):
        if self.standardize:
            x_test = self.standardizescaler.transform(x_test)
        try:
            self.residual_visualizer = ResidualsPlot(self.regressor)
        except yellowbrick.exceptions.YellowbrickTypeError:
            self.residual_visualizer = ResidualsPlot(self.regressor.regressor)

        y_train = self.y_train.ravel()
        self.residual_visualizer.fit(self.x_train, y_train)
        if x_test is not None and y_test is not None:
            y_test = y_test.ravel()
            self.residual_visualizer.score(x_test, y_test)
        self.residual_visualizer.poof()

    def predict(self, x_test):
        if self.standardize:
            x_test = self.standardizescaler.transform(x_test)
        return self.regressor.predict(x_test).reshape(-1, 1)
def generate_ordinal_diagnostics(x, y, current_best_model, label_type,
                                 diagnostic_image_path):
    x = np.array(x)
    y = np.array(y)
    kf = KFold(n_splits=10, shuffle=True)
    guesses = []
    for train_index, test_index in kf.split(x):
        X_train, X_test = x[train_index], x[test_index]
        y_train, y_test = np.array(y)[train_index], np.array(y)[test_index]
        model = current_best_model[0].fit(X_train, y_train)
        for guess in zip(y_test.tolist(), model.predict(X_test).tolist()):
            guesses.append(guess)
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
    if "VotingClassifier" not in str(current_best_model[0].__class__):
        visualizer = ResidualsPlot(current_best_model[0])
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        visualizer.poof(outpath=diagnostic_image_path + "/residuals_plot.png")
        plt.clf()
        visualizer = PredictionError(current_best_model[0])
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        visualizer.poof(outpath=diagnostic_image_path +
                        "/prediction_error.png")
        plt.clf()
    visualizer = PCADecomposition(scale=True, center=False, col=y, proj_dim=2)
    visualizer.fit_transform(x, y)
    print(diagnostic_image_path + "/pca_2.png")
    visualizer.poof(outpath=diagnostic_image_path + "/pca_2.png")
    plt.clf()
    visualizer = PCADecomposition(scale=True, center=False, col=y, proj_dim=3)
    visualizer.fit_transform(x, y)
    visualizer.poof(outpath=diagnostic_image_path + "/pca_3.png")
    plt.clf()
    return {
        "mse": mean_squared_error(*np.array(guesses).transpose()),
        "r2": r2_score(*np.array(guesses).transpose()),
        "mae": median_absolute_error(*np.array(guesses).transpose()),
        "evs": explained_variance_score(*np.array(guesses).transpose()),
        "rmse": np.sqrt(mean_squared_error(*np.array(guesses).transpose()))
    }
예제 #15
0
def showResiduals():
    # Load the data
    df = load_data('concrete')
    feature_names = [
        'cement', 'slag', 'ash', 'water', 'splast', 'coarse', 'fine', 'age'
    ]
    target_name = 'strength'

    # Get the X and y data from the DataFrame
    X = df[feature_names].as_matrix()
    y = df[target_name].as_matrix()

    # Create the train and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    # Instantiate the linear model and visualizer
    ridge = Ridge()
    visualizer = ResidualsPlot(ridge)

    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    g = visualizer.poof()  # Draw/show/poof the data
예제 #16
0
import pandas as pd

from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split

from yellowbrick.regressor import ResidualsPlot


if __name__ == '__main__':
    # Load the regression data set
    df = pd.read_csv("../../../examples/data/concrete/concrete.csv")

    feature_names = ['cement', 'slag', 'ash', 'water', 'splast', 'coarse', 'fine', 'age']
    target_name = 'strength'

    # Get the X and y data from the DataFrame
    X = df[feature_names].as_matrix()
    y = df[target_name].as_matrix()

    # Create the train and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    # Instantiate the linear model and visualizer
    ridge = Ridge()
    visualizer = ResidualsPlot(ridge)

    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    g = visualizer.poof(outpath="images/residuals.png")             # Draw/show/poof the data
print(f"Test R2 is {lr_log.score(X=X_test_log, y=y_test_log)}")

# There is a slight improvement (~2%) in the train R2 and test R2 utilizing log transform

# + [markdown] pycharm={"name": "#%% md\n"}
# ## Model Evaluation - Linear Regression
# ### The following section evaluates the random error, constant variance and normal distribution with mean 0 assumption of linear model in the context of the four initial models utilizing a residual plot from Yellowbrick.
#
# -

# Residual Plot for Huber LR with no log-transform
from yellowbrick.regressor import ResidualsPlot
rpv_hr = ResidualsPlot(hr)
rpv_hr.fit(X=X_train, y=y_train)
rpv_hr.score(X=X_test, y=y_test)
rpv_hr.poof()

rpv_lr = ResidualsPlot(lr)
rpv_lr.fit(X=X_train, y=y_train)
rpv_lr.score(X=X_test, y=y_test)
rpv_lr.poof()

# Residual Plot for LR with log transform
rpv_lr_log = ResidualsPlot(lr_log)
rpv_lr_log.fit(X=X_train_log, y=y_train_log)
rpv_lr_log.score(X=X_test_log, y=y_test_log)
rpv_lr_log.poof()

# + [markdown] pycharm={"name": "#%% md\n"}
# ## Model Evaluation of Ordinary Least Squares -Log Transform
# - Evaluation of log-transformed OLS model as the residuals plot appeared to satisfy most of the principal assumptions of linear regression.
예제 #18
0
class Regressor:
    def __init__(self):
        self.parameters = dict()
        self.regressor = None
        self.sse = None
        self.sst = None
        self.adjrsquared = None
        self.rsquared = None
        self._x_train = None
        self._y_train = None
        self.n = None
        # x_k refers to the number of the predictors of x_train
        self.x_k = None
        # y_k refers to the number of the responses of y_train
        self.y_k = None
        self.p = None
        self.standardize = None
        self.standardizescaler = StandardScaler()
        self.residual_visualizer = None

    @property
    def x_train(self):
        return self._x_train

    @x_train.setter
    def x_train(self, x_train):
        self._x_train = x_train
        try:
            self.x_k = x_train.shape[1]
            self.n = x_train.shape[0]
        except IndexError:
            self.x_k = 1
            self.n = x_train.shape[0]
            self._x_train = self._x_train.reshape(-1, 1)

    @property
    def y_train(self):
        return self._y_train

    @y_train.setter
    def y_train(self, y_train):
        self._y_train = y_train
        try:
            self.y_k = y_train.shape[1]
        except IndexError:
            self.y_k = y_train.shape[0]
            self._y_train = self._y_train.reshape(-1, 1)

    def _inference(self):

        try:
            self.rsquared = self.regressor.score(self.x_train, self.y_train)
        except AttributeError:
            self.rsquared = self.regressor.regressor.score(
                self.x_train, self.y_train)

        self.adjrsquared = ME.ModelEvaluation.AdjRsquared(self)

        # Store some info of the model.
        self.sst = np.sum((self.y_train - np.mean(self.y_train, axis=0))**2,
                          axis=0)
        self.sse = np.sum((self.predict(self.x_train) - self.y_train)**2,
                          axis=0)
        self.sse_scaled = self.sse / float(self.x_train.shape[0] -
                                           self.x_train.shape[1])

        if type(self.sse_scaled) == np.float64:
            self.sse_scaled = [self.sse_scaled]
        try:
            if not self.standardize:
                x_train = self.x_train - np.mean(self.x_train, axis=0)
            else:
                x_train = self.x_train
            var_beta = self.sse_scaled * (np.linalg.inv(
                np.dot(x_train.T, x_train)).diagonal())
            self.se = np.sqrt(var_beta)
        except np.linalg.linalg.LinAlgError:
            return
        except TypeError:
            return

        try:
            self.t = self.regressor.coef_ / np.array(self.se)
        except AttributeError:
            try:
                self.t = self.parameters['beta'] / self.se
            except KeyError:
                return
        self.p = [
            2 * (1 - stats.t.cdf(np.abs(i), (len(x_train) - 1)))
            for i in self.t
        ]

    def fit(self, x_train, y_train, standardize=False):
        x_train = x_train
        y_train = y_train

        self.standardize = standardize
        if self.standardize:
            self.standardizescaler.fit(x_train)
            x_train = self.standardizescaler.transform(x_train)
        self.x_train = x_train
        self.y_train = y_train
        self.regressor.fit(self.x_train, self.y_train)
        self._inference()
        return self.regressor.intercept_, self.regressor.coef_, self.p, self.regressor.score(
            x_train, y_train)

    def predict(self, x_test):
        if self.standardize:
            x_test = self.standardizescaler.transform(x_test)
        try:
            return self.regressor.predict(x_test)
        except AttributeError:
            return self.regressor.predict(x_test=x_test)

    def regression_plot(self, x_test, y_test):
        scatter = plt.scatter(x_test, y_test, color='b')
        try:
            line = plt.plot(x_test, self.regressor.predict(x_test), color='r')
        except AttributeError:
            line = plt.plot(x_test, self.regressor.predict(x_test), color='r')
        plt.ylabel('response')
        plt.xlabel('explanatory')
        plt.legend(handles=[
            scatter,
            line[0],
        ],
                   labels=[
                       'Scatter Plot',
                       'Intercept:{}, Slope:{},\n R-square:{}'.format(
                           self.regressor.intercept_, self.regressor.coef_,
                           self.regressor.score(x_test, y_test))
                   ],
                   loc='best')
        plt.title('Scatter Plot and Regression')

    def residual_plot(self, x_test=None, y_test=None):
        if self.standardize:
            x_test = self.standardizescaler.transform(x_test)
        try:
            self.residual_visualizer = ResidualsPlot(self.regressor)
        except yellowbrick.exceptions.YellowbrickTypeError:
            self.residual_visualizer = ResidualsPlot(self.regressor.regressor)

        self.residual_visualizer.fit(self.x_train, self.y_train)
        if x_test is not None and y_test is not None:
            self.residual_visualizer.score(x_test, y_test)
        self.residual_visualizer.poof()

    def get_score(self, x_test, y_test):
        if self.standardize:
            x_test = self.standardizescaler.transform(x_test)
        try:
            return self.regressor.score(x_test, y_test)
        except AttributeError:
            return self.regressor.Get_Score(x_test, y_test)
예제 #19
0
lr.score(X_test, y_test)


### Yellowbrick

from yellowbrick.regressor import PredictionError, ResidualsPlot

## RVF plot

# Run the following together

lr_yb = ResidualsPlot(lr, hist=True)
lr_yb.fit(X_train, y_train)
lr_yb.score(X_test, y_test)
lr_yb.poof()

## Prediction Error plot

lr_yb = PredictionError(lr, hist=True)
lr_yb.fit(X_train, y_train)
lr_yb.score(X_test, y_test)
lr_yb.poof()



################ Polynomial/Interactions ################


from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import PolynomialFeatures # adds polynomials and interactions
예제 #20
0
# Separando dados
X = df.values[:,0]
y = df.values[:,1]

X = X.reshape(-1,1)

# Criando Modelo
model = LinearRegression()
model.fit(X,y)
y_pred = model.predict(X)


# Visualização
plt.plot(X,y_pred,color='red')        # plot da regressão
plt.scatter(x=X,y=y)                  # plot dos pontos
plt.title("Regressão Linear Simples") # titulo
plt.xlabel("Idade")                   # eixo X
plt.ylabel("Custo");                  # eixo Y

visual = ResidualsPlot(model)
visual.fit(X,y)
visual.poof()

# Valor de corelação ou score
model.score(X,y)




예제 #21
0
reg = LinearRegression()
reg.fit(xrm, y)
print(reg.score(xrm, y))

xx = np.linspace(min(xrm), max(xrm)).reshape(-1, 1)
plt.scatter(xrm, y, color="blue")
plt.plot(xx, reg.predict(xx), color="red", linewidth=3)
plt.ylabel("y: Value of house / 1000 USD")
plt.xlabel("x: Number of rooms")
plt.show()

from yellowbrick.regressor import ResidualsPlot
visualizer = ResidualsPlot(reg, hist=False)
visualizer.fit(xrm, y)
visualizer.score(xrm, y)
visualizer.poof()

# use data multi var
# split data: 70%-training 30%-testing
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=42)
reg = LinearRegression()
reg.fit(x_train, y_train)
y_pred = reg.predict(x_test)
print("R^2 = ", reg.score(x_train, y_train))

from yellowbrick.regressor import ResidualsPlot
viz = ResidualsPlot(reg, hist=False)
예제 #22
0
from yellowbrick.regressor import ResidualsPlot

dataset_cars = pd.read_csv('cars.csv')
dataset_cars = dataset_cars.drop(['Unnamed: 0'], axis=1)

X = dataset_cars.iloc[:, 1].values
y = dataset_cars.iloc[:, 0].values
correlation = np.corrcoef(X, y)

X = X.reshape(-1, 1)

model_linear_regression = LinearRegression()
model_linear_regression.fit(X, y)

print("interception of the trained model: ",
      model_linear_regression.intercept_)
print("Inclination of trained model: ", model_linear_regression.coef_)

plt.scatter(X, y)
plt.plot(X, model_linear_regression.predict(X), color='red')

distance_stop = np.array([[22]])

model_linear_regression.predict(distance_stop)

print("Residues of the trained model: ", model_linear_regression._residues)

visualization = ResidualsPlot(model_linear_regression)
visualization.fit(X, y)
visualization.poof()
예제 #23
0
rank.poof(outpath="lasso_rank2d.png")

# Feature Importances (naive, 18 variable case)
fig = plt.figure()
ax = fig.add_subplot()
featimp = FeatureImportances(lasso, ax=ax)
featimp.fit(Xt, ytrain)
featimp.poof(outpath="lasso_featureimportances18.png")

# Residuals Plot
fig = plt.figure()
ax = fig.add_subplot()
resplot = ResidualsPlot(lasso, ax=ax)
resplot.fit(Xtrain, ytrain)
resplot.score(Xtest, ytest)
resplot.poof(outpath="lasso_resplot.png")

# Actual vs Predicted
lasso.fit(Xtrain, ytrain)
yhat = lasso.predict(Xtest)
error = ytest - yhat
data = pd.DataFrame({
    't': test['date'],
    'ytest': ytest,
    'yhat': yhat,
    'error': error,
    'neg_error': np.negative(error),
    'dless': dless
})
fig, ax = plt.subplots()
plt.plot('t', 'ytest', data=data, color='blue', linewidth=1, label='actual')
# y = B0 + B1*X
# Coeficientes
# B0
regressor.intercept_

# B1
regressor.coef_

# Plotar em um gráfico
plt.scatter(X, y)
plt.plot(X, regressor.predict(X), color="green")
plt.title("Regressão Linear Simples")
plt.xlabel("idade")
plt.ylabel("Custo")
plt.show()

# Utilizando o modelo
predict1 = regressor.intercept_ + regressor.coef_*40   # 40 = variavel de escolha[idade]
print(predict1)

# Mostra o R²
# Coeficiente R²: diz o quanto o meu modelo explica seus resultados. É um valor entre 0 e 1. Quanto mais próximo de 1, melhor. (Nem sempre este racíocinio e válido, necessita de análise)
score = regressor.score(X, y)
print(score)

# Distância dos dados reais até a reta preditiva, com este método é possível ver o R-Square(R²)
vizualizador = ResidualsPlot(regressor)
vizualizador.fit(X, y)
vizualizador.poof()
예제 #25
0
x_train = df_new.drop(columns=[
    'Total', 'Precipitation', 'High Temp (°F)', 'Low Temp (°F)', 'Date', 'Day'
])
y_train = df_new['Total']

#%%
from sklearn import preprocessing
from sklearn.linear_model import Ridge
reg = Ridge(alpha=100)
reg.fit(x_train, y_train)

#%%
reg.coef_

#%%
from sklearn.metrics import r2_score, mean_squared_error
y_pred = reg.predict(x_train)

print(r2_score(y_train, y_pred))
print(mean_squared_error(y_train, y_pred))

#%%
import yellowbrick
res = y_train - y_pred

#%%
from yellowbrick.regressor import ResidualsPlot
visualizer = ResidualsPlot(reg)
visualizer.score(x_train, y_train)  # Evaluate the model on the test data
visualizer.poof()  # Draw/show/poof the data
plt.plot(X, modelo_cerveja.predict(X), color='red')

#Calculo manual e utilizando o modelo para prever o valor de y, respectivamente
modelo_cerveja.intercept_ + modelo_cerveja.coef_ * 400
modelo_cerveja.predict([[400]])
'''OBS: Como no estudo não informou uma porção em litros como referência, podemos fazer suposições a partir desse modelo
por exemplo, se uma pessoa bebe 400 copos de cerveja por ano, e adotando um copo de cerveja com 300 ml (0,3L), uma
pessoa que bebe 400 copos por ano (dependendo de cada país, obviamente), bebe 120 litros de cerveja, e só de alcool puro,
uma pessoa bebe, aproximadamente, 13.88 litros de álcool (cerca de 11.56% aproximadamente) '''

#Visualização dos resíduos e o seu gráfico(resultado entre a distância dos pontos com a linha de regressão)
modelo_cerveja._residues

visualizador_cerveja = ResidualsPlot(modelo_cerveja)
visualizador_cerveja.fit(X, y)
visualizador_cerveja.poof()
'''2)Regressão linear de destilados VS total álcool ingerido'''

A = bebida_mundo.iloc[:, 2].values
b = bebida_mundo.iloc[:, 4].values
correlacao_destilados = np.corrcoef(A, b)

A = A.reshape(-1, 1)
modelo_destilados = LinearRegression()
modelo_destilados.fit(A, b)

score_destilados = modelo_destilados.score(A, b)

modelo_destilados.intercept_
modelo_destilados.coef_
#Cálculo automático da máquina
modelo1.predict([[400]])

''' Como no estudo não informou uma porção em litros como referência, podemos fazer suposições a partir desse modelo
por exemplo, se uma pessoa bebe 400 copos de cerveja por ano, e adotando um copo de cerveja com 300 ml (0,3L), uma
pessoa que bebe 400 copos por ano (dependendo de cada país, obviamente), bebe 120 litros de cerveja, e só de alcool puro,
uma pessoa bebe, aproximadamente, 13.88 litros de álcool (cerca de 11.56% aproximadamente) '''

#Visualização dos resíduos(resultado entre a distância dos pontos com a linha de referência)
modelo1._residues

#Visualização dos resíduos no gráfico
visualizador1 = ResidualsPlot(modelo1)
visualizador1.fit(X, y)
visualizador1.poof()

#Os resíduos quando mais próximo de zero, melhor o modelo

'''2) Relação linear entre total de álcool ingerido (em Litros) com o total de destilados ingerido (em porções)
   OBS: Bebidas destiladas são todas que tiveram seu processo de destilação (vodca, uísque, tequila, rum, dentre outros)  '''

A = bebida_mundo.iloc[:, 2].values #spirit_servings 
b = bebida_mundo.iloc[:, 4].values #total_litres_of_alcohol
correlacao2 = np.corrcoef(A, b)

A = A.reshape(-1, 1)
modelo2 = LinearRegression()
modelo2.fit(A, b)

modelo2.intercept_
예제 #28
0
# Intercecção entre x e y (inicio da linha de regressão)
print(modelo.intercept_)

# Coeficiente
print(modelo.coef_)

#%%
# Gera o grafico
# scatter - gera o grafico com os pontos
plt.scatter(X, Y)
# plot - com base nos pontos, gera a linha de melhor ajuste
plt.plot(X, modelo.predict(X), color='red')

# Obs - Rode os dois comandos acima simuntaneamente para montar o grafico
# de disperção com a linha de melhor ajuste

# Distancia de parada 22 pés(previsão de qual velocidade estava)
distancia = 22
modelo.intercept_ + modelo.coef_ * distancia
# ou
modelo.predict(np.array(distancia).reshape(-1, 1))

# Residuais - Distancia entre os pontos com base na linha de regressão
print(modelo._residues)
#%%
# Gera um novo grafico com base no modelo para melhor visualização dos residuais
visualizador = ResidualsPlot(modelo)
visualizador.fit(X, Y)
visualizador.poof()
예제 #29
0
                                                    y,
                                                    test_size=0.25,
                                                    random_state=32)

m1 = LinearRegression().fit(X_train, y_train)
print('M1 (price): ', m1.score(X_test, y_test))
m1_y = m1.predict(X_test)
plt.scatter(X_test, y_test, edgecolors='blue')
plt.plot(X_test, m1_y, linewidth=3)
plt.title('M1')
plt.xlabel('Price')
plt.ylabel('Sales')
plt.show()
visualiser = ResidualsPlot(m1)
visualiser.score(X_test, y_test)
visualiser.poof()

#second model (M2) using price, store
X = finalDF.drop(['billboard', 'printout', 'sat', 'comp', 'sales'], axis=1)
y = finalDF['sales']

#splitting data into train, test
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.25,
                                                    random_state=32)

m2 = LinearRegression().fit(X_train, y_train)
print('M2 (price, store): ', m2.score(X_test, y_test))
m2_y = m2.predict(X_test)
visualiser = ResidualsPlot(m2)
예제 #30
0
mse = np.mean((pred - y_test)**2)

mse  
## calculating score
ridgeReg.score(X_test,y_test) 


from yellowbrick.regressor import ResidualsPlot

# Instantiate the linear model and visualizer
ridge = Ridge()
visualizer = ResidualsPlot(ridge)

visualizer.fit(X_train, y_train)  # Fit the training data to the model
visualizer.score(X_test, y_test)  # Evaluate the model on the test data
visualizer.poof()        
       

##Apply different algos as on X_train,X_test,y_train,y_test

# Fitting K-NN to the Training set
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
classifier.fit(X_train, y_train)

# Predicting the Test set results
pred_y = classifier.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
# Sort by magnitude
results['sorted'] = results[0].abs()
results.sort_values(by='sorted', inplace=True, ascending=False)

print("Lasso chooses {} variables".format(len(results)))
print(results)

# How does our model perform on the test data?
score_model(lasso)

# What do our residuals look like?
from yellowbrick.regressor import ResidualsPlot
resplot = ResidualsPlot(lasso)
resplot.fit(Xtrain, ytrain)
resplot.score(Xtest, ytest)
g = resplot.poof()

# What does our prediction error look like?
from yellowbrick.regressor import PredictionError
prederr = PredictionError(lasso)
prederr.fit(Xtrain, ytrain)
prederr.score(Xtrain, ytrain)
g = prederr.poof()

# Next, we pull out our fitted values (yhat) and actuals (ytest) to see how they compare.
# We also calculate our residuals by subtracting our fitted values from the actuals.
import matplotlib.pyplot as plt

lasso.fit(Xtrain, ytrain)

yhat = lasso.predict(Xtest)