def cost_vs_iterations_plotting(learning_rates_list):
    start = time.time()

    if not learning_rates_list:
        learning_rates_list = [.1, .5, 1]

    # Dataset list
    datasets = [
        Datasets.IONOSPHERE, Datasets.ADULT, Datasets.WINE_QUALITY,
        Datasets.BREAST_CANCER_DIAGNOSIS
    ]

    # Initialize model
    classifier = LogisticRegression()

    for dataset_name in datasets:
        print("dataset: ", dataset_name)
        # Load the datasets
        X, y = get_dataset(dataset_name)

        # Feature Scalling
        X = feature_scaling(X)

        # Split the datasets
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            0.8,
                                                            shuffle=True)

        for lr in learning_rates_list:
            classifier.lr = lr
            # Fit the model to the dataset
            classifier.fit(X_train, y_train)
            # Plot the evolution of the cost during training
            plt.plot(range(len(classifier.cost_history)),
                     classifier.cost_history)

        legends = []
        for l in learning_rates_list:
            l_str = 'lr = ' + str(l)
            legends.append(l_str)

        plt.legend(legends, loc='upper right')
        plt.title(dataset_name)
        plt.xlim((0, 100))
        plt.ylabel('Cost')
        plt.xlabel('Iterations')
        plt.show()

    print('\n\nDONE!')
    print('It took', time.time() - start, 'seconds.')
                    assert dW.shape == self.W.shape

                    self.W -= self.lr * dW
                    self.b -= self.lr * db

    def predict(self, X):
        # 将矩阵压缩成向量,与原始输入Y保持一致
        return np.squeeze(np.dot(X, self.W) + self.b)


def RMSE(y_true, y_pred):
    return sum((y_true - y_pred) ** 2) ** 0.5 / len(y_true)


if __name__ == "__main__":
    from datasets.dataset import load_boston
    from model_selection.train_test_split import train_test_split

    data = load_boston()
    X = data.data
    Y = data.target

    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

    line_reg = LinearRegression(max_iter=2000)
    line_reg.fit(X_train, Y_train)

    Y_pred = line_reg.predict(X_test)
    rmse = RMSE(Y_test, Y_pred)
    print(rmse)
        #### E-step,计算概率 ####
        return np.argmax(P_mat, axis=1)


if __name__ == '__main__':
    from sklearn.datasets.samples_generator import make_blobs
    from model_selection.train_test_split import train_test_split

    X, _ = make_blobs(cluster_std=1.5,
                      random_state=42,
                      n_samples=1000,
                      centers=3)
    X = np.dot(X, np.random.RandomState(0).randn(2, 2))  # 生成斜形类簇

    import matplotlib.pyplot as plt

    plt.clf()
    plt.scatter(X[:, 0], X[:, 1], alpha=0.3)
    plt.show()

    X_train, X_test = train_test_split(X, test_size=0.2)
    n_samples, n_feature = X_train.shape

    gmm = GaussianMixture(n_components=6)
    gmm.fit(X_train)
    Y_pred = gmm.predict(X_test)

    plt.clf()
    plt.scatter(X_test[:, 0], X_test[:, 1], c=Y_pred, alpha=0.3)
    plt.show()
示例#4
0
        clus_pred = np.argmin(dist_test, axis=1)

        return clus_pred


if __name__ == '__main__':
    import numpy as np

    data_1 = np.random.randn(200, 2) + [1, 1]
    data_2 = np.random.randn(200, 2) + [4, 4]
    data_3 = np.random.randn(200, 2) + [7, 1]
    data = np.concatenate((data_1, data_2, data_3), axis=0)

    from model_selection.train_test_split import train_test_split

    X_train, X_test = train_test_split(data, test_size=0.2)

    kmeans = KMeans(n_clusters=3)
    kmeans.fit(X_train)

    import matplotlib.pyplot as plt

    plt.clf()
    plt.scatter(X_train[:, 0], X_train[:, 1], alpha=0.5, c=kmeans.labels_)
    plt.scatter(kmeans.cluster_centers_[:, 0],
                kmeans.cluster_centers_[:, 1],
                marker='*',
                c='k')
    plt.show()

    clus_pred = kmeans.predict(X_test)
示例#5
0
        # Feature scaling
        X = feature_scaling(X)

        # Create the classifiers
        lr_classifier = LogisticRegression()
        nb_classifier = GaussianNaiveBayes()

        train_sizes = np.arange(0.05, 1, 0.05)
        lr_accuracy = []
        nb_accuracy = []

        for t in train_sizes:
            # Split into train and test
            X_train, X_test, y_train, y_test = train_test_split(X,
                                                                y,
                                                                t,
                                                                shuffle=True)

            # Train and evaluate the models
            lr_classifier.fit(X_train, y_train)
            y_pred = lr_classifier.predict(X_test)
            lr_accuracy.append(evaluate_acc(y_test, y_pred))

            nb_classifier.fit(X_train, y_train)
            y_pred = nb_classifier.predict(X_test)
            nb_accuracy.append(evaluate_acc(y_test, y_pred))

        printAccuracyComparison(ds, lr_accuracy, nb_accuracy, train_sizes)

    print('\n\nDONE!')