示例#1
0
def working_ANN():

    #Read data
    MLobj = EasyClassi()
    MLobj.read("Churn_Modelling.csv")

    #Prepare data
    MLobj.explore()
    MLobj.split_X_y()
    MLobj.X = MLobj.X[:, 3:]

    #Encode
    MLobj.encode_and_dummy([1, 2], [1], encode_y=False, removeFirstColumn=True)

    #Split test and training set
    MLobj.split_ds(test_set=0.2)

    #Scale features
    MLobj.scale_features()

    #Initialising the ANN
    classifier = Sequential()

    #Defining ANN model.
    #Number of hiden layers is avg(#inputNode,#outputNodes)=6
    classifier.add(
        Dense(output_dim=6, init="uniform", activation="relu", input_dim=11))
    classifier.add(Dense(output_dim=6, init="uniform", activation="relu"))
    classifier.add(Dense(output_dim=1, init="uniform", activation="sigmoid"))

    #Compile ANN
    classifier.compile(optimizer="adam",
                       loss="binary_crossentropy",
                       metrics=["accuracy"])

    #Fit
    classifier.fit(MLobj.X_train, MLobj.y_train, batch_size=10, nb_epoch=100)

    #Predict
    y_pred = classifier.predict(MLobj.X_test)

    #Convert y_pred to True or False with 0.5
    y_pred = (y_pred > 0.5)

    from sklearn.metrics import confusion_matrix
    cm = confusion_matrix(MLobj.y_test, y_pred)
示例#2
0
def working_class_dec_tree_easy():

    #Read data
    MLobj = EasyClassi()
    MLobj.read("Social_Network_Ads.csv")

    #Prepare data
    MLobj.explore()
    MLobj.split_X_y()
    MLobj.X = MLobj.X[:, 2:4]
    MLobj.split_ds(test_set=1 / 4)
    MLobj.scale_features(scaleY=False)

    #Classification
    MLobj.fitDecTree()

    #Predict
    y_pred = MLobj.predict()

    #Evaluation confusion matrix
    cm = MLobj.create_confusion_matrix()
    print(cm)

    #Visualize data
    MLobj.visualize_lineal_2D_class(MLobj.X_train, MLobj.y_train)
    MLobj.visualize_lineal_2D_class()
示例#3
0
def working_PCA_easy():

    #Read data
    MLobj = EasyClassi()
    MLobj.read("wine.csv")

    #Prepare data
    MLobj.explore()
    MLobj.split_X_y()
    MLobj.split_ds()
    MLobj.scale_features(scaleY=False)

    #Applyinh PCA
    MLobj.applyPCA()
    PCAratio = MLobj.getPCAVarianceRatio()

    #Classification
    MLobj.fitLog()

    #Predict
    y_pred = MLobj.predict()

    #Evaluation confusion matrix
    cm = MLobj.create_confusion_matrix()

    MLobj.printModelPerformance()

    #Visualize data
    #MLobj.visualize_lineal_2D_class(MLobj.X_train,MLobj.y_train,x1="PC1",x2="PC2",classNum=3)
    MLobj.visualize_lineal_2D_class(x1="PC1", x2="PC2", classNum=3)
示例#4
0
def working_PCA():

    #Read data
    MLobj = EasyClassi()
    MLobj.read("wine.csv")

    #Prepare data
    MLobj.explore()
    MLobj.split_X_y()
    MLobj.split_ds()
    MLobj.scale_features(scaleY=False)

    #Applyinh PCA
    from sklearn.decomposition import PCA
    pca = PCA(n_components=2)
    MLobj.X_train = pca.fit_transform(MLobj.X_train)
    MLobj.X_test = pca.transform(MLobj.X_test)
    explained_variance = pca.explained_variance_ratio_

    #Classification
    MLobj.fitLog()

    #Predict
    y_pred = MLobj.predict()

    #Evaluation confusion matrix
    cm = MLobj.create_confusion_matrix()

    MLobj.printModelPerformance()

    #Visualize data
    #MLobj.visualize_lineal_2D_class(MLobj.X_train,MLobj.y_train,x1="PC1",x2="PC2",classNum=3)
    MLobj.visualize_lineal_2D_class(x1="PC1", x2="PC2", classNum=3)
示例#5
0
def working_kernel_PCA_easy():

    #Read data
    MLobj = EasyClassi()
    MLobj.read("Social_Network_Ads.csv")

    #Prepare data
    MLobj.explore()
    MLobj.split_X_y()
    MLobj.X = MLobj.X[:, 2:4]
    MLobj.split_ds()
    MLobj.scale_features(scaleY=False)

    #Applyinh Kernel PCA
    MLobj.applyKernelPCA()

    #Classification
    MLobj.fitLog()

    #Predict
    y_pred = MLobj.predict()

    #Evaluation confusion matrix
    cm = MLobj.create_confusion_matrix()

    MLobj.printModelPerformance()

    #Visualize data
    #MLobj.visualize_lineal_2D_class(MLobj.X_train,MLobj.y_train,x1="KPC1",x2="KPC2")
    MLobj.visualize_lineal_2D_class(x1="KPC1", x2="KPC2")
示例#6
0
def working_k_fold_cross_easy():

    #Read data
    MLobj = EasyClassi()
    MLobj.read("Social_Network_Ads.csv")

    #Prepare data
    MLobj.explore()
    MLobj.split_X_y()
    MLobj.X = MLobj.X[:, 2:4]
    MLobj.split_ds(test_set=1 / 4)
    MLobj.scale_features(scaleY=False)

    #Classification
    MLobj.fitKernelSVM()

    #Predict
    y_pred = MLobj.predict()

    #Evaluation confusion matrix
    cm = MLobj.create_confusion_matrix()

    #Applying K-Fold Cross validation
    MLobj.apply_class_k_fold()
    MLobj.print_k_fold_perf()
示例#7
0
def working_grid_search():

    #Read data
    MLobj = EasyClassi()
    MLobj.read("Social_Network_Ads.csv")

    #Prepare data
    MLobj.explore()
    MLobj.split_X_y()
    MLobj.X = MLobj.X[:, 2:4]
    MLobj.split_ds(test_set=1 / 4)
    MLobj.scale_features(scaleY=False)

    #Classification
    MLobj.fitKernelSVM()

    #Predict
    y_pred = MLobj.predict()

    #Evaluation confusion matrix
    cm = MLobj.create_confusion_matrix()

    #Applying K-Fold Cross validation
    MLobj.apply_class_k_fold()
    MLobj.print_k_fold_perf()

    #Apply grid search to find the best model and best parameters
    from sklearn.model_selection import GridSearchCV
    parameters = [{
        'C': [1, 10, 100, 1000],
        "kernel": ["linear"]
    }, {
        'C': [1, 10, 100, 1000],
        "kernel": ["rbf"],
        "gamma": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
    }]
    grid_search = GridSearchCV(estimator=MLobj.classifier,
                               param_grid=parameters,
                               scoring="accuracy",
                               cv=10,
                               n_jobs=-1)

    grid_search = grid_search.fit(MLobj.X_train, MLobj.y_train)
    best_accuracy = grid_search.best_score_
    best_parameters = grid_search.best_params_
示例#8
0
def working_grid_search_easy():

    #Read data
    MLobj = EasyClassi()
    MLobj.read("Social_Network_Ads.csv")

    #Prepare data
    MLobj.explore()
    MLobj.split_X_y()
    MLobj.X = MLobj.X[:, 2:4]
    MLobj.split_ds(test_set=1 / 4)
    MLobj.scale_features(scaleY=False)

    #Classification
    MLobj.fitKernelSVM()

    #Predict
    y_pred = MLobj.predict()

    #Evaluation confusion matrix
    cm = MLobj.create_confusion_matrix()

    #Applying K-Fold Cross validation
    MLobj.apply_class_k_fold()
    MLobj.print_k_fold_perf()

    #Apply grid search to find the best model and best parameters
    parameters = [{
        'C': [1, 10, 100, 1000],
        "kernel": ["linear"]
    }, {
        'C': [1, 10, 100, 1000],
        "kernel": ["rbf"],
        "gamma": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
    }]

    MLobj.apply_grid_search(paramsGS=parameters)
    MLobj.print_grid_search_perf()
示例#9
0
def working_class_logistic():

    #Read data
    MLobj = EasyClassi()
    MLobj.read("Social_Network_Ads.csv")

    #Prepare data
    MLobj.explore()
    MLobj.split_X_y()
    MLobj.X = MLobj.X[:, 2:4]
    MLobj.split_ds(test_set=1 / 4)
    MLobj.scale_features(scaleY=False)

    #Regression with random forrest
    from sklearn.linear_model import LogisticRegression
    classifier = LogisticRegression(random_state=0)
    classifier.fit(MLobj.X_train, MLobj.y_train)

    #Predict
    y_pred = classifier.predict(MLobj.X_test)
    print(y_pred)

    #Making confusing matrix
    from sklearn.metrics import confusion_matrix
    cm = confusion_matrix(MLobj.y_test, y_pred)

    # Visualising the Training set results
    from matplotlib.colors import ListedColormap
    X_set, y_set = MLobj.X_train, MLobj.y_train
    X1, X2 = np.meshgrid(
        np.arange(start=X_set[:, 0].min() - 1,
                  stop=X_set[:, 0].max() + 1,
                  step=0.01),
        np.arange(start=X_set[:, 1].min() - 1,
                  stop=X_set[:, 1].max() + 1,
                  step=0.01))
    plt.contourf(X1,
                 X2,
                 classifier.predict(np.array([X1.ravel(),
                                              X2.ravel()
                                              ]).T).reshape(X1.shape),
                 alpha=0.75,
                 cmap=ListedColormap(('red', 'green')))
    plt.xlim(X1.min(), X1.max())
    plt.ylim(X2.min(), X2.max())
    for i, j in enumerate(np.unique(y_set)):
        plt.scatter(X_set[y_set == j, 0],
                    X_set[y_set == j, 1],
                    c=ListedColormap(('red', 'green'))(i),
                    label=j)
    plt.title('Logistic Regression (Training set)')
    plt.xlabel('Age')
    plt.ylabel('Estimated Salary')
    plt.legend()
    plt.show()

    # Visualising the Test set results
    from matplotlib.colors import ListedColormap
    X_set, y_set = MLobj.X_test, MLobj.y_test
    X1, X2 = np.meshgrid(
        np.arange(start=X_set[:, 0].min() - 1,
                  stop=X_set[:, 0].max() + 1,
                  step=0.01),
        np.arange(start=X_set[:, 1].min() - 1,
                  stop=X_set[:, 1].max() + 1,
                  step=0.01))
    plt.contourf(X1,
                 X2,
                 classifier.predict(np.array([X1.ravel(),
                                              X2.ravel()
                                              ]).T).reshape(X1.shape),
                 alpha=0.75,
                 cmap=ListedColormap(('red', 'green')))
    plt.xlim(X1.min(), X1.max())
    plt.ylim(X2.min(), X2.max())
    for i, j in enumerate(np.unique(y_set)):
        plt.scatter(X_set[y_set == j, 0],
                    X_set[y_set == j, 1],
                    c=ListedColormap(('red', 'green'))(i),
                    label=j)
    plt.title('Logistic Regression (Test set)')
    plt.xlabel('Age')
    plt.ylabel('Estimated Salary')
    plt.legend()
    plt.show()