def test_gaussian_mixture_fit_predict_n_init():
    # Check that fit_predict is equivalent to fit.predict, when n_init > 1
    X = np.random.RandomState(0).randn(1000, 5)
    gm = GaussianMixture(n_components=5, n_init=5, random_state=0)
    y_pred1 = gm.fit_predict(X)
    y_pred2 = gm.predict(X)
    assert_array_equal(y_pred1, y_pred2)
def test_gaussian_mixture_fit_predict_n_init():
    # Check that fit_predict is equivalent to fit.predict, when n_init > 1
    X = np.random.RandomState(0).randn(1000, 5)
    gm = GaussianMixture(n_components=5, n_init=5, random_state=0)
    y_pred1 = gm.fit_predict(X)
    y_pred2 = gm.predict(X)
    assert_array_equal(y_pred1, y_pred2)
Пример #3
0
def cluster(clusterType, vectors, y):
    if (clusterType == "KMeans"):
        kclusterer = KMeansClusterer(
            NUM_CLUSTERS,
            distance=nltk.cluster.util.cosine_distance,
            repeats=25)
        assigned_clusters = kclusterer.cluster(vectors, assign_clusters=True)

    elif (clusterType == "GMM"):
        GMM = GaussianMixture(n_components=NUM_CLUSTERS)
        assigned_clusters = GMM.fit_predict(vectors)

    elif (clusterType == "SVM"):
        classifier = SVC(kernel='rbf', gamma='auto', random_state=0)
        #cross-validation
        assigned_clusters = cross_validation(classifier, vectors, y)

    elif (clusterType == "T2VH"):
        ret = hierarchical.ward_tree(vectors, n_clusters=NUM_CLUSTERS)
        children = ret[0]
        n_leaves = ret[2]
        assigned_clusters = hierarchical._hc_cut(NUM_CLUSTERS, children,
                                                 n_leaves)

    elif (clusterType == "RandomForest"):
        classifier = RandomForestClassifier()
        #cross-validation
        assigned_clusters = cross_validation(classifier, vectors, y)
        # classifier.fit(vectors, y)
        # assigned_clusters=classifier.predict(vectors)

    elif (clusterType == "DecisionTree"):
        classifier = DecisionTreeClassifier()
        #cross-validation
        assigned_clusters = cross_validation(classifier, vectors, y)
        # classifier.fit(vectors, y)
        # assigned_clusters=classifier.predict(vectors)

    elif (clusterType == "LogisticRegression"):
        classifier = sklearn.linear_model.LogisticRegression()
        #cross-validation
        assigned_clusters = cross_validation(classifier, vectors, y)
        # classifier.fit(vectors, y)
        # assigned_clusters=classifier.predict(vectors)

    else:
        print(clusterType, " is not a predefined cluster type.")
        return
    return assigned_clusters
def test_gaussian_mixture_fit_predict():
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)
    for covar_type in COVARIANCE_TYPE:
        X = rand_data.X[covar_type]
        Y = rand_data.Y
        g = GaussianMixture(n_components=rand_data.n_components,
                            random_state=rng, weights_init=rand_data.weights,
                            means_init=rand_data.means,
                            precisions_init=rand_data.precisions[covar_type],
                            covariance_type=covar_type)

        # check if fit_predict(X) is equivalent to fit(X).predict(X)
        f = copy.deepcopy(g)
        Y_pred1 = f.fit(X).predict(X)
        Y_pred2 = g.fit_predict(X)
        assert_array_equal(Y_pred1, Y_pred2)
        assert_greater(adjusted_rand_score(Y, Y_pred2), .95)
def test_gaussian_mixture_fit_predict():
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)
    for covar_type in COVARIANCE_TYPE:
        X = rand_data.X[covar_type]
        Y = rand_data.Y
        g = GaussianMixture(n_components=rand_data.n_components,
                            random_state=rng, weights_init=rand_data.weights,
                            means_init=rand_data.means,
                            precisions_init=rand_data.precisions[covar_type],
                            covariance_type=covar_type)

        # check if fit_predict(X) is equivalent to fit(X).predict(X)
        f = copy.deepcopy(g)
        Y_pred1 = f.fit(X).predict(X)
        Y_pred2 = g.fit_predict(X)
        assert_array_equal(Y_pred1, Y_pred2)
        assert_greater(adjusted_rand_score(Y, Y_pred2), .95)