Exemplo n.º 1
0
def working_random_forest():

    #Prepare data
    MLobj = EasyReg()
    MLobj.read("Position_Salaries.csv")

    MLobj.explore()
    MLobj.split_X_y()
    MLobj.X = MLobj.X[:, 1:2]

    #Regression with random forrest
    from sklearn.ensemble import RandomForestRegressor
    regressor = RandomForestRegressor(n_estimators=300,
                                      criterion="mse",
                                      random_state=0)
    regressor.fit(MLobj.X, MLobj.y)

    #Predict
    y_pred = regressor.predict(6.5)
    print(y_pred)

    #Visualize

    import matplotlib.pyplot as plt
    X_grid = np.arange(min(MLobj.X), max(MLobj.X), 0.01)
    X_grid = X_grid.reshape((len(X_grid), 1))
    plt.scatter(MLobj.X, MLobj.y, color="red")
    plt.plot(X_grid, regressor.predict(X_grid), color="blue")
    plt.title("Truth or Bluff (SVR)")
    plt.xlabel("Position Salary")
    plt.ylabel("Salary")
    plt.show()
Exemplo n.º 2
0
def working_dec_reg_tree():

    #Prepare data
    MLobj = EasyReg()
    MLobj.read("Position_Salaries.csv")

    MLobj.explore()
    MLobj.split_X_y()
    MLobj.X = MLobj.X[:, 1:2]

    #Regression with decission regression tree
    from sklearn.tree import DecisionTreeRegressor
    regressor = DecisionTreeRegressor(random_state=0)
    regressor.fit(MLobj.X, MLobj.y)

    #Predict
    y_pred = regressor.predict(6.5)
    print(y_pred)

    #Visualize

    import matplotlib.pyplot as plt
    plt.scatter(MLobj.X, MLobj.y, color="red")
    plt.plot(MLobj.X, regressor.predict(MLobj.X), color="blue")
    plt.title("Truth or Bluff (SVR)")
    plt.xlabel("Position Salary")
    plt.ylabel("Salary")
    plt.show()

    X_grid = np.arange(min(MLobj.X), max(MLobj.X), 0.01)
    X_grid = X_grid.reshape((len(X_grid), 1))
    plt.scatter(MLobj.X, MLobj.y, color="red")
    plt.plot(X_grid, regressor.predict(X_grid), color="blue")
    plt.title("Truth or Bluff (SVR)")
    plt.xlabel("Position Salary")
    plt.ylabel("Salary")
    plt.show()
Exemplo n.º 3
0
def preco2():

    #Preping data

    #We create the EasyReg object to read file and explore data.
    MLobj = EasyReg()
    MLobj.read(
        "C:/Users/4256GU/Desktop/Data Analytics/PRECO2/donnees/inputs_clean_csv 2.csv",
        encod='cp1252',
        delim=";")
    MLobj.explore()

    #Split X and Y. We'll predict the gaz consomption (Y)
    MLobj.split_X_y(1)

    #We keep with X the current meteo, previous gaz consumption, vacations, labor day and previous temperatures.
    #MLobj.X=MLobj.X[:,[1]+range(7,31)+[31,32]+range(33,57)+range(64,88)]
    MLobj.X = MLobj.X[:, [1] + list(range(7, 31)) + [31, 32] +
                      list(range(33, 57)) + list(range(64, 88))]

    #See what we get at X
    print(MLobj.myDS.columns[[1] + list(range(7, 31)) + [31, 32] +
                             list(range(33, 57)) + list(range(64, 88))])
    MayML.exploreArray(MLobj.X, "X")
Exemplo n.º 4
0
def working_LR_Salary():

    MLobj = EasyReg()
    MLobj.read("Salary_Data.csv")
    MLobj.explore()
    MLobj.split_X_y()
    MLobj.split_ds(ts=1 / 3)
    MLobj.fitLR()

    y_pred = MLobj.predict()

    MLobj.visualize_testingDS_vs_pred()
    MLobj.visualize_trainingDS_vs_pred()
Exemplo n.º 5
0
def working_MR_Startups():

    MLobj = EasyReg()
    MLobj.read("50_Startups.csv")
    MLobj.explore()
    MLobj.split_X_y()
    MLobj.encode_categorial_dummy_X([3])
    MLobj.split_ds(test_set=0.2)

    MLobj.fitLR()

    y_pred = MLobj.predict()
Exemplo n.º 6
0
def working_model_bwrd_elimination():

    #Prepare data
    MLobj = EasyReg()
    MLobj.read("50_Startups.csv")
    MLobj.explore()
    MLobj.split_X_y()
    MLobj.encode_categorial_dummy_X([3])
    MLobj.split_ds(test_set=0.2)

    #We delete a dummy variable, but we don't actually need it
    MLobj.X = MLobj.X[:, 1:]

    #Append interceptor
    MLobj.X = np.append(arr=np.ones((50, 1)).astype(int),
                        values=MLobj.X,
                        axis=1)

    #Backwards elimination

    #1st round
    X_opt = MLobj.X[:, [0, 1, 2, 3, 4, 5]]
    regressor_OLS = sm.OLS(endog=MLobj.y, exog=X_opt).fit()

    #Look for highest P-value feature
    regressor_OLS.summary()

    #2nd round
    X_opt = MLobj.X[:, [0, 1, 3, 4, 5]]
    regressor_OLS = sm.OLS(endog=MLobj.y, exog=X_opt).fit()

    #Look for highest P-value feature
    regressor_OLS.summary()

    #3rd round
    X_opt = MLobj.X[:, [0, 3, 4, 5]]
    regressor_OLS = sm.OLS(endog=MLobj.y, exog=X_opt).fit()

    #Look for highest P-value feature
    regressor_OLS.summary()

    #4rth round
    X_opt = MLobj.X[:, [0, 3, 5]]
    regressor_OLS = sm.OLS(endog=MLobj.y, exog=X_opt).fit()

    #Look for highest P-value feature
    regressor_OLS.summary()

    #5th round
    X_opt = MLobj.X[:, [0, 3]]
    regressor_OLS = sm.OLS(endog=MLobj.y, exog=X_opt).fit()

    #Look for highest P-value feature
    regressor_OLS.summary()
Exemplo n.º 7
0
def working_build_bwrd_elimination():

    #Prepare data
    MLobj = EasyReg()
    MLobj.read("50_Startups.csv")
    MLobj.explore()
    MLobj.split_X_y()
    MLobj.encode_categorial_dummy_X([3])

    #We delete a dummy variable, but we don't actually need it
    MLobj.X = MLobj.X[:, 1:]

    #Add interception
    MLobj.append_interceptor()

    SL = 0.05
    X_opt = MLobj.X[:, [0, 1, 2, 3, 4, 5]]
    #X_Modeled = MLobj.backwardElimination_Pvalue(X_opt,MLobj.y, SL)
    MLobj.backwardElimination(X_opt, MLobj.y, SL, inPlace=True)

    MLobj.split_ds(test_set=0.2)

    MLobj.fitLR()

    y_pred = MLobj.predict()

    print(y_pred)
Exemplo n.º 8
0
def debug_PR():

    #testing
    MLobj = EasyReg()
    MLobj.read("Position_Salaries.csv")

    MLobj.explore()
    MLobj.split_X_y()
    MLobj.X = MLobj.X[:, 1:2]

    #MLobj.encode_categorial_dummy_X([0])
    MLobj.split_ds(test_set=0)

    from sklearn.preprocessing import PolynomialFeatures
    poly_reg = PolynomialFeatures(degree=4)
    X_poly = poly_reg.fit_transform(MLobj.X)
    poly_reg.fit(X_poly, MLobj.y)

    from sklearn.linear_model import LinearRegression
    lin_reg_2 = LinearRegression()
    lin_reg_2.fit(X_poly, MLobj.y)
    import matplotlib.pyplot as plt
    X_grid = np.arange(min(MLobj.X), max(MLobj.X), 0.1)
    X_grid = X_grid.reshape((len(X_grid), 1))
    plt.scatter(MLobj.X, MLobj.y, color="red")
    plt.plot(X_grid,
             lin_reg_2.predict(poly_reg.fit_transform(X_grid)),
             color="blue")
    plt.title("test")
    plt.xlabel("pos label")
    plt.ylabel("salary")
    plt.show()
Exemplo n.º 9
0
def working_polynomial_regression():

    #Prepare data
    MLobj = EasyReg()
    MLobj.read("Position_Salaries.csv")

    MLobj.explore()
    MLobj.split_X_y()
    MLobj.X = MLobj.X[:, 1:2]

    #MLobj.encode_categorial_dummy_X([0])
    MLobj.split_ds(test_set=0)

    #Fit LR, predict and visualize
    MLobj.fitPR()
    MLobj.predict()
    MLobj.visualize_trainingDS_vs_pred()
    print(MLobj.predictVar(6.5))

    #Fit PR, predict and visualize
    MLobj.fitPR(2)
    MLobj.predict(PR=True)
    MLobj.visualize_trainingDS_vs_pred(PR=True)

    #Fit PR, predict and visualize
    MLobj.fitPR(3)
    MLobj.predict(PR=True)
    MLobj.visualize_trainingDS_vs_pred(PR=True)

    #Fit PR, predict and visualize
    MLobj.fitPR(4)
    MLobj.predict(PR=True)
    MLobj.visualize_trainingDS_vs_pred(PR=True)

    #Fit PR, predict and visualize, more granularity
    MLobj.fitPR(4)
    MLobj.predict(PR=True)
    x_grid = MLobj.sample_change_resolution(sampleX=MLobj.X, gran=0.1)
    MLobj.visualize_trainingDS_vs_pred(PR=True, xsample=x_grid)

    print(MLobj.predictVar(6.5, PR=True))
Exemplo n.º 10
0
def debug_predict():

    #Prepare data
    MLobj = EasyReg()
    MLobj.read("Position_Salaries.csv")

    MLobj.explore()
    MLobj.split_X_y()
    MLobj.X = MLobj.X[:, 1:2]

    #MLobj.encode_categorial_dummy_X([0])
    MLobj.split_ds(test_set=0)

    from sklearn.linear_model import LinearRegression
    lin_reg = LinearRegression()
    lin_reg.fit(MLobj.X, MLobj.y)
    lin_reg.predict(6.5)
Exemplo n.º 11
0
def working_SVR():
    #Prepare data
    MLobj = EasyReg()
    MLobj.read("Position_Salaries.csv")

    MLobj.explore()
    MLobj.split_X_y()
    MLobj.X = MLobj.X[:, 1:2]

    #Feature scaling
    from sklearn.preprocessing import StandardScaler
    sc_X = StandardScaler()
    sc_y = StandardScaler()
    MLobj.X = sc_X.fit_transform(MLobj.X)
    MLobj.y = sc_y.fit_transform(MLobj.y.reshape(-1, 1))

    from sklearn.svm import SVR
    regressor = SVR(kernel="rbf")
    regressor.fit(MLobj.X, MLobj.y)

    y_pred = regressor.predict(sc_X.transform(np.array([[6.5]])))
    pred = sc_y.inverse_transform(y_pred)

    import matplotlib.pyplot as plt
    plt.scatter(MLobj.X, MLobj.y, color="red")
    plt.plot(MLobj.X, regressor.predict(MLobj.X), color="blue")
    plt.title("Truth or Bluff (SVR)")
    plt.xlabel("Position Salary")
    plt.ylabel("Salary")
    plt.show()

    X_grid = np.arange(min(MLobj.X), max(MLobj.X), 0.1)
    X_grid = X_grid.reshape((len(X_grid), 1))
    plt.scatter(MLobj.X, MLobj.y, color="red")
    plt.plot(X_grid, regressor.predict(X_grid), color="blue")
    plt.title("Truth or Bluff (SVR)")
    plt.xlabel("Position Salary")
    plt.ylabel("Salary")
    plt.show()
Exemplo n.º 12
0
def working_SVR_easy():

    #Prepare data
    MLobj = EasyReg()
    MLobj.read("Position_Salaries.csv")

    #Preparing data
    MLobj.explore()
    MLobj.split_X_y()
    MLobj.X = MLobj.X[:, 1:2]

    #Feature scaling
    MLobj.split_ds(test_set=0)
    MLobj.scale_features(scaleY=True)

    #Fit model
    MLobj.fitSVR()

    #Predict
    y_pred = MLobj.predictVar(6.5)
    print("prediction : ", y_pred, "\n")

    #Visualize
    MLobj.visualize_trainingDS_vs_pred()
    X_grid = np.arange(min(MLobj.X_train), max(MLobj.X_train), 0.1)
    X_grid = X_grid.reshape((len(X_grid), 1))
    MLobj.visualize_trainingDS_vs_pred(X_grid)
Exemplo n.º 13
0
def working_drt_easy():

    #Read data
    MLobj = EasyReg()
    MLobj.read("Position_Salaries.csv")

    #Check data
    MLobj.explore()

    #Prepare data
    MLobj.split_X_y()
    MLobj.X = MLobj.X[:, 1:2]
    MLobj.split_ds(test_set=0)

    #Regression with decission regression tree
    MLobj.fitDRT()

    #Predict
    y_pred = MLobj.predictVar(6.5)
    print("prediction : ", y_pred, "\n")

    #Visualize
    MLobj.visualize_trainingDS_vs_pred()
    X_grid = np.arange(min(MLobj.X_train), max(MLobj.X_train), 0.1)
    X_grid = X_grid.reshape((len(X_grid), 1))
    MLobj.visualize_trainingDS_vs_pred(X_grid)