def main():
    X, y = load_dataset.load_dataset("stream1")

    # Experiment parameters
    nclusters = 4
    nsamples = 2000 * nclusters
    train_size = 800 * nclusters
    window_size = 100

    evol_model = EvolvingClustering.EvolvingClustering(macro_cluster_update=1,
                                                       variance_limit=0.001,
                                                       debug=True)
    Benchmarks.prequential_evaluation(evol_model, X, y, adjusted_rand_score,
                                      train_size, window_size)
Exemplo n.º 2
0
def main():
    X, y = load_dataset.load_dataset("gaussian")
    #    X, y = load_dataset.load_dataset("s2")

    X = X[:1000, :8]
    y = y[:1000]

    standardized_X = preprocessing.scale(X)
    minmaxscaler = preprocessing.MinMaxScaler()
    minmaxscaler.fit(standardized_X)
    X = minmaxscaler.transform(standardized_X)

    evol_model = EvolvingClustering.EvolvingClustering(variance_limit=0.01,
                                                       debug=True)
    #    evol_model = EvolvingClustering2.EvolvingClustering2(rad=0.04, debug=True)
    evol_model.fit(X[:100])
    evol_model.fit(X[100:200])
    y_pred = evol_model.predict(X[:3000])
Exemplo n.º 3
0
cmap = plt.cm.get_cmap('rainbow')

nsamples = 1000

from sklearn import datasets
from sklearn import preprocessing
X,y = datasets.fetch_covtype(return_X_y=True)
X = X[:nsamples]
y = y[:nsamples]
X = preprocessing.scale(X)
minmaxscaler = preprocessing.MinMaxScaler()
minmaxscaler.fit(X)
X = minmaxscaler.transform(X)

## Running training and prediction..
evol_model = EvolvingClustering.EvolvingClustering(variance_limit=0.00001, debug=True)

tic = time()
evol_model.fit(X)
tac = time()
print('Operation took {} ms'.format((tac - tic) * 1e3))

y_pred = evol_model.predict(X)

#pickle.dump(evol_model, open("evol_model.pkl", "wb"))
## END Running training and prediction..

## Load pickle
# evol_model = pickle.load(open("evol_model.pkl", "rb"))
# y_pred = evol_model.labels_
## END Load pickle
        n_clusters=params['n_clusters'], linkage='ward',
        connectivity=connectivity)
    spectral = cluster.SpectralClustering(
        n_clusters=params['n_clusters'], eigen_solver='arpack',
        affinity="nearest_neighbors")
    dbscan = cluster.DBSCAN(eps=params['eps'])
    affinity_propagation = cluster.AffinityPropagation(
        damping=params['damping'], preference=params['preference'])
    average_linkage = cluster.AgglomerativeClustering(
        linkage="average", affinity="cityblock",
        n_clusters=params['n_clusters'], connectivity=connectivity)
    birch = cluster.Birch(n_clusters=params['n_clusters'])
    # gmm = mixture.GaussianMixture(
    #     n_components=params['n_clusters'], covariance_type='full')

    evol = EvolvingClustering.EvolvingClustering(macro_cluster_update=1, variance_limit=0.01, debug=False)

    # clustering_algorithms = (
    #     ('MiniBatchKMeans', two_means),
    #     ('AffinityPropagation', affinity_propagation),
    #     ('MeanShift', ms),
    #     ('SpectralClustering', spectral),
    #     ('Ward', ward),
    #     ('AgglomerativeClustering', average_linkage),
    #     ('DBSCAN', dbscan),
    #     ('Birch', birch),
    #     ('GaussianMixture', gmm)
    # )

    clustering_algorithms = (
        ('MiniBatchKMeans', two_means),