Python MSTClustering.fit示例

编程语言: Python

命名空间/包名称: mst_clustering

类/类型: MSTClustering

方法/功能: fit

hotexamples.com的示例: 5

Python MSTClustering.fit - 已找到5个示例。这些是从开源项目中提取的最受好评的mst_clustering.MSTClustering.fit现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

MSTClustering(20)

fit_predict(7)

fit(4)

get_graph_segments(3)

示例#1

显示文件

文件： test_mst_clustering.py 项目： SerialDev/mst_clustering

def test_bad_arguments():
    X, y = make_blobs(100, random_state=42)

    mst = MSTClustering()
    assert_raises_regex(ValueError,
                        "Must specify either cutoff or cutoff_frac",
                        mst.fit, X, y)

    mst = MSTClustering(cutoff=-1)
    assert_raises_regex(ValueError, "cutoff must be positive", mst.fit, X)

    mst = MSTClustering()
    msg = "Must call fit\(\) before get_graph_segments()"
    assert_raises_regex(ValueError, msg, mst.get_graph_segments)

    mst = MSTClustering(cutoff=0, metric='precomputed')
    mst.fit(pairwise_distances(X))
    msg = "Cannot use ``get_graph_segments`` with precomputed metric."
    assert_raises_regex(ValueError, msg, mst.get_graph_segments)

示例#2

显示文件

def test_bad_arguments():
    X, y = make_blobs(100, random_state=42)

    mst = MSTClustering()
    assert_raises_regex(ValueError,
                        "Must specify either cutoff or cutoff_frac", mst.fit,
                        X, y)

    mst = MSTClustering(cutoff=-1)
    assert_raises_regex(ValueError, "cutoff must be positive", mst.fit, X)

    mst = MSTClustering()
    msg = "Must call fit\(\) before get_graph_segments()"
    assert_raises_regex(ValueError, msg, mst.get_graph_segments)

    mst = MSTClustering(cutoff=0, metric='precomputed')
    mst.fit(pairwise_distances(X))
    msg = "Cannot use ``get_graph_segments`` with precomputed metric."
    assert_raises_regex(ValueError, msg, mst.get_graph_segments)

示例#3

显示文件

def MST_clustering(filename):
    with open(filename, 'r') as f:
        words = f.readlines()
    words = [word.rstrip() for word in words if len(word) > 4]
    words = np.asarray(words)
    jac_similarity = np.array([[jaccard(w1, w2) for w1 in words[:500]]
                               for w2 in words[:500]])

    #pdb.set_trace()
    mst = MSTClustering(min_cluster_size=10,
                        cutoff_scale=1)  # cut-off scale ??
    mst.fit(jac_similarity)
    mst_matrix = mst.full_tree_

    X_tsne = TSNE(learning_rate=100).fit_transform(mst_matrix.todense())
    labels = mst.labels_
    pdb.set_trace()
    plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=labels)
    #plot_mst(mst)
    plt.show()

示例#4

显示文件

文件： mst.py 项目： noycohen100/MARCO-GE

def get_mst(dataframe):
    model = MSTClustering(cutoff_scale=2)

    model.fit(dataframe)
    return model.labels_

示例#5

显示文件

文件： mstModel.py 项目： xaca/Miscellaneous

model = MSTClustering(cutoff_scale=2, approximate=False)
labels = model.fit_predict(X)
plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='rainbow')
plt.show()

plot_minimum_spanning_tree(model)
plt.show()

rng = np.random.RandomState(int(100 * y[-1]))
noise = -14 + 28 * rng.rand(200, 2)

X_noisy = np.vstack([X, noise])
y_noisy = np.concatenate([y, np.full(200, -1, dtype=int)])

plt.scatter(X_noisy[:, 0], X_noisy[:, 1], c='lightblue', cmap='spectral_r')
plt.xlim(-15, 15)
plt.ylim(-15, 15)

plt.show()

noisy_model = MSTClustering(cutoff_scale=1)
noisy_model.fit(X_noisy)
plot_minimum_spanning_tree(noisy_model)
plt.show()

noisy_model = MSTClustering(cutoff_scale=1, min_cluster_size=10)
noisy_model.fit(X_noisy)
plot_minimum_spanning_tree(noisy_model)
plt.show()