def silhouette(self):
     model = KMeans(random_state=1234556)
     visualizer = KElbowVisualizer(model,
                                   metric='silhouette',
                                   timings=False)
     visualizer.fit(self.data_continuous_std)
     visualizer.show()
def makespaces(s2, k, alpha, beta, legend, title):
    kk = pd.DataFrame({
        'Skew²': s2,
        'Kurtosis': k,
        'Alpha': alpha,
        'Beta': beta
    })
    K = 8
    model = KMeans()
    visualizer = KElbowVisualizer(model, k=(1, K))
    kIdx = visualizer.fit(
        kk.drop(columns="Beta"))  # Fit the data to the visualizer
    visualizer.show()  # Finalize and render the figure
    kIdx = kIdx.elbow_value_
    model = KMeans(n_clusters=kIdx).fit(kk.drop(columns="Beta"))
    fig = plt.figure()
    ax = Axes3D(fig)
    cmap = plt.get_cmap('gnuplot')
    clr = [cmap(i) for i in np.linspace(0, 1, kIdx)]
    for i in range(0, kIdx):
        ind = (model.labels_ == i)
        ax.scatter(kk["Skew²"][ind],
                   kk["Kurtosis"][ind],
                   kk["Alpha"][ind],
                   s=30,
                   c=clr[i],
                   label='Cluster %d' % i)
    ax.set_xlabel("Skew²")
    ax.set_ylabel("Kurtosis")
    ax.set_zlabel(r"$\alpha$")
    ax.legend()
    plt.title(title + ": EDF-K-means")
    plt.savefig(title + "EDF.png")
    plt.show()
    model = KMeans()
    visualizer = KElbowVisualizer(model, k=(1, K))
    kIdx = visualizer.fit(
        kk.drop(columns="Alpha"))  # Fit the data to the visualizer
    visualizer.show()  # Finalize and render the figure
    kIdx = kIdx.elbow_value_
    model = KMeans(n_clusters=kIdx).fit(kk.drop(columns="Alpha"))
    fig = plt.figure()
    ax = Axes3D(fig)
    cmap = plt.get_cmap('gnuplot')
    clr = [cmap(i) for i in np.linspace(0, 1, kIdx)]
    for i in range(0, kIdx):
        ind = (model.labels_ == i)
        ax.scatter(kk["Skew²"][ind],
                   kk["Kurtosis"][ind],
                   kk["Beta"][ind],
                   s=30,
                   c=clr[i],
                   label='Cluster %d' % i)
    ax.set_xlabel("Skew²")
    ax.set_ylabel("Kurtosis")
    ax.set_zlabel(r"$\beta$")
    ax.legend()
    plt.title(title + ": EPSB-K-means")
    plt.savefig(title + "EPSB.png")
    plt.show()
Пример #3
0
def run_kmeans_2(trainX):
    #find k
    cluster_counts = {
        'wine': 3,
        'wage': 2,
    }
    model = KMeans()
    visualizer = KElbowVisualizer(model,
                                  k=(2, 100),
                                  metric='calinski_harabasz',
                                  timings=True)
    visualizer.fit(X_train)
    visualizer.show()
    plt.tight_layout()
    plt.savefig('plots/km_sl_' + dataset + '.png')

    #validation
    model.set_params(n_clusters=cluster_counts[dataset])
    model.fit(X_train)
    score_fns = [
        v_measure_score,
        homogeneity_score,
        completeness_score,
    ]
    cluster_validation_df = pd.DataFrame()
    for score in score_fns:
        cluster_validation_df.loc[score.__name__,
                                  'score'] = score(y_test[y_test.columns[0]],
                                                   model.predict(X_test))
    print(cluster_validation_df)
Пример #4
0
def plot_elbow(estimator, k_values, dataset, version, metric='distortion'):
    visualizer = KElbowVisualizer(estimator, k=k_values, metric=metric)
    visualizer.fit(data.DATA[dataset][version]['x_train'])
    visualizer.show(
        f'{PLOTS_FOLDER}/{dataset}/{version}/{dataset}_{version}_{estimator.__class__.__name__}_elbow_{metric}.png'
    )
    plt.clf()
Пример #5
0
def elbowCheck():
    mat = data.values
    mat = mat.astype(float)
    model = KMeans()
    visualizer = KElbowVisualizer(model, k=(1, 12))
    visualizer.fit(mat)
    visualizer.show(outpath="static/images/kmeans.png")
Пример #6
0
def find_optimal_clusters(X):
    # Instantiate the clustering model and visualizer
    model = KMeans()
    visualizer = KElbowVisualizer(model, k=(2, 21))

    visualizer.fit(X)  # Fit the data to the visualizer
    visualizer.show()
Пример #7
0
def makeK(d,ilist, title):
    d=np.array(d)
    kk=pd.DataFrame({'Variance': d[:,0], 'Skewness': d[:,1], 'Kurtosis': d[:,2]})
    K=20
    model=KMeans()
    visualizer = KElbowVisualizer(model, k=(1,K))
    kIdx=visualizer.fit(kk)        # Fit the data to the visualizer
    visualizer.show()        # Finalize and render the figure
    kIdx=kIdx.elbow_value_
    model=KMeans(n_clusters=kIdx).fit(kk)
    # scatter plot
    fig = plt.figure()
    ax = Axes3D(fig) #.add_subplot(111))
    cmap = plt.get_cmap('gnuplot')
    clr = [cmap(i) for i in np.linspace(0, 1, kIdx)]
    for i in range(0,kIdx):
        ind = (model.labels_==i)
        ax.scatter(d[ind,2],d[ind,1], d[ind,0], s=30, c=clr[i], label='Cluster %d'%i)
    
    ax.set_xlabel("Kurtosis")
    ax.set_ylabel("Skew")
    ax.set_zlabel("Variance")
    plt.title(title+': KMeans clustering with K=%d' % kIdx)
    plt.legend()
    plt.savefig(title+"clustersnoises.png")
    plt.show()
    d=pd.DataFrame({'Variance': d[:,0], 'Skewness': d[:,1], 'Kurtosis': d[:,2], 'Alpha': d[:,3], 'Beta': d[:,4], "Cluster": model.labels_}, index=ilist)
    return d
def find_clusters(ClusterTeams):
    """
    Finds the optimal number of clusters using KMeans.
    :param ClusterTeams: DataFrame. The data to find the number of clusters of.
    """

    from yellowbrick.cluster import KElbowVisualizer
    from sklearn.cluster import KMeans

    kmeans = KMeans(random_state=52594)

    visualizer = KElbowVisualizer(kmeans,
                                  k=(2, 10),
                                  metric='calinski_harabasz',
                                  timings=False)

    visualizer.fit(ClusterTeams)
    visualizer.show()

    kmeans = KMeans(random_state=52594)

    visualizer = KElbowVisualizer(kmeans,
                                  k=(2, 10),
                                  metric='silhouette',
                                  timings=False)

    visualizer.fit(ClusterTeams)
    visualizer.show()
Пример #9
0
def kelbow_optimization(df):
    # Shows optimal number of clusters for model
    model = KMeans()
    visualizer = KElbowVisualizer(model, k=(1, 10))
    visualizer.fit(df)
    visualizer.poof()
    visualizer.show(outpath="Elbow Kmeans Cluster.pdf")
    return df
Пример #10
0
def plot_elbow(run_ids):
    for run_id in run_ids:
        latent_vectors = latent_vectors[run_id]
        model = KMeans()
        visualizer = KElbowVisualizer(model, k=(1, 20))

        visualizer.fit(latent_vectors)
        visualizer.show(exp_config.ANALYSIS_PATH + "elbow_curve.jpg")
Пример #11
0
    def find_optimal_kelbow(self, fitted_vectorizer):
        # Instantiate the clustering model and visualizer
        model = MiniBatchKMeans(random_state=42)
        visualizer = KElbowVisualizer(model, k=self.params["k_range"])

        visualizer.fit(fitted_vectorizer)  # Fit the data to the visualizer
        visualizer.show(outpath="kelbow_minibatchkmeans.pdf"
                        )  # Finalize and render the figure
Пример #12
0
def kmeans_groups(min, max, df, metric='calinski_harabasz'):
    model = KMeans()
    visualizer = KElbowVisualizer(model,
                                  k=(min, max),
                                  metric=metric,
                                  timings=False)
    visualizer.fit(df)
    visualizer.show()
Пример #13
0
def elbow_point(df):
    visualizer = KElbowVisualizer(KMeans(),
                                  k=(2, 10),
                                  metric='calinski_harabasz',
                                  timings=False)
    visualizer.fit(df)
    visualizer.show()
    plt.close()
    return visualizer.elbow_value_
def elbow(X):
    mms = MinMaxScaler()
    mms.fit(X)
    data_transformed = mms.transform(X)
    model = KMeans()
    visualizer = KElbowVisualizer(model, k=(1, 10), timings=False)

    visualizer.fit(data_transformed)  # Fit the data to the visualizer
    visualizer.show()  # Finalize and render the figure
    def elbow_chart(self, df, model_type):
        model = KMeans()  # create a model for elbow method
        if (model_type == 'AGNES'):
            model = AgglomerativeClustering()

        visualizer = KElbowVisualizer(model, k=(1, 12))

        visualizer.fit(df)
        visualizer.show()
        return visualizer.elbow_value_
Пример #16
0
def elbow_algorithm(the_data):
    print("[ ELBOW ALGORITHM ... ]")

    model = KMeans()
    visualizer = KElbowVisualizer(model, k=(4, 14))

    visualizer.fit(the_data)  # Fit the data to the visualizer
    visualizer.show()
    print("     Estimated number of clusters: " + str(visualizer.elbow_value_))
    return visualizer.elbow_value_
Пример #17
0
    def elbow(self, audiofeatures):

        X = np.vstack(np.array(audiofeatures[key]) for key in audiofeatures)

        # Instantiate the clustering model and visualizer
        model = skc.KMeans()
        visualizer = KElbowVisualizer(model, k=(4, 25), timings=False)
        visualizer.elbow_value_
        visualizer.fit(X)  # Fit the data to the visualizer
        visualizer.show()  # Finalize and render the figure
Пример #18
0
def elbow(f, g, krange):
    df = pd.read_table(f, index_col=0, header=0)
    if g == 'c':
        df = df.T
    model = KMeans(random_state=1)
    min_k = krange[0]
    max_k = krange[1]
    visualizer = KElbowVisualizer(model, k=(min_k, max_k + 1))
    visualizer.fit(df)
    visualizer.show(outpath=os.path.basename(f).replace('.txt', '_elbow.pdf'),
                    clear_figure=True)
Пример #19
0
def print_yellowbrickkelbow(model,
                            som_weights_to_nodes,
                            destination,
                            up_to=26):
    kelbow_visualizer = KElbowVisualizer(KMeans(), k=(2, up_to))
    kelbow_visualizer.fit(som_weights_to_nodes)
    kelbow_visualizer.show(
        f'{destination}/{model.RUN_time}_{utils.time_now()}-{model.month_names_joined}_yellowbrickelbowfor-k2.png'
    )
    if kelbow_visualizer.elbow_value_ != None:
        return kelbow_visualizer.elbow_value_
Пример #20
0
def elbowplot(x, k, metric, title, fignam, elbow=True):
    plt.clf()
    km = KMeans()
    visualizer = KElbowVisualizer(km,
                                  k=k,
                                  metric=metric,
                                  timings=False,
                                  title=title,
                                  locate_elbow=elbow)
    visualizer.fit(x)
    visualizer.show(outpath=fignam)
    return
Пример #21
0
def run_k_means_elbow(params, x_data):
    model = KMeans()
    visualizer = KElbowVisualizer(model, k=(2, 40), metric='distortion')
    plt.figure()
    visualizer.fit(x_data)
    visualizer.set_title(params['elbow_graph'])

    try:
        path = params['path'] + params['elbow_graph'] + '.png'
    except:
        path = params['elbow_graph'] + '.png'

    visualizer.show(outpath=path)
    def defineClusters(self, df):
        ''''
        : ações: plota uma gráfico com a métrica do "cotovelo" para ver qual quantidade de clusters adequada
        '''
        from sklearn.cluster import KMeans
        from yellowbrick.cluster import KElbowVisualizer

        # Instantiate the clustering model and visualizer
        model = KMeans()
        visualizer = KElbowVisualizer(model, k=(4, 16))

        visualizer.fit(df)  # Fit the data to the visualizer
        visualizer.show()  # Finalize and render the figure
Пример #23
0
def makespaces(s2, k, alpha, beta, legend, title, ilist):
    kk=pd.DataFrame({'Skew²': s2, 'Kurtosis': k, 'Alpha': alpha, 'Beta': beta, "Entity": ilist})
    K=8
    model=KMeans()
    visualizer = KElbowVisualizer(model, k=(1,K))
    kIdx=visualizer.fit(kk.drop(columns=["Beta", "Entity"]))        # Fit the data to the visualizer
    visualizer.show()        # Finalize and render the figure
    kIdx=kIdx.elbow_value_
    model=KMeans(n_clusters=kIdx).fit(kk.drop(columns=["Beta", "Entity"]))
    print(len(model.labels_))
    fig = plt.figure(figsize=(20,15))
    ax = Axes3D(fig)
    cmap = plt.get_cmap('gnuplot')
    ilist2=list(set(ilist))
    clr = [cmap(i) for i in np.linspace(0, 1, len(ilist2))]
    for i in range(0,len(ilist2)):
        ind = (kk["Entity"]==ilist2[i])
        ax.scatter(kk["Skew²"][ind],kk["Kurtosis"][ind], kk["Alpha"][ind], s=30, c=clr[i], label=ilist2[i])
    ax.set_xlabel("Skew²")
    ax.set_ylabel("Kurtosis")
    ax.set_zlabel(r"$\alpha$")
    ax.legend()
    plt.title(title+": EDF-K-means")
    plt.savefig("masoq.png")
    plt.show()
    kk=pd.DataFrame({'Skew²': s2, 'Kurtosis': k, 'Alpha': alpha, 'Beta': beta, "Entity": ilist}, index=model.labels_)
    kk.sort_index(inplace=True)
    kk.to_csv("clusteringalpha.csv")
    model=KMeans()
    visualizer = KElbowVisualizer(model, k=(1,K))
    kIdx=visualizer.fit(kk.drop(columns=["Alpha", "Entity"]))        # Fit the data to the visualizer
    visualizer.show()        # Finalize and render the figure
    kIdx=kIdx.elbow_value_
    model=KMeans(n_clusters=kIdx).fit(kk.drop(columns=["Alpha", "Entity"]))
    fig = plt.figure(figsize=(20,15))
    ax = Axes3D(fig)
    cmap = plt.get_cmap('gnuplot')
    clr = [cmap(i) for i in np.linspace(0, 1, len(ilist2))]
    for i in range(0,len(ilist2)):
        ind = (kk["Entity"]==ilist2[i])
        ax.scatter(kk["Skew²"][ind],kk["Kurtosis"][ind], kk["Beta"][ind], s=30, c=clr[i], label=ilist[i])
    ax.set_xlabel("Skew²")
    ax.set_ylabel("Kurtosis")
    ax.set_zlabel(r"$\beta$")
    ax.legend()
    plt.title(title+": EPSB-K-means")
    plt.savefig("masoq2.png")
    plt.show()
    kk=pd.DataFrame({'Skew²': s2, 'Kurtosis': k, 'Alpha': alpha, 'Beta': beta, "Entity": ilist}, index=model.labels_)
    kk.sort_index(inplace=True)
    kk.to_csv("clusteringbeta.csv")
def elbow_test():
    for filename in os.listdir(path):
        print("elbow test for: "+filename)
        data_file = path + filename.replace('\\', '/')
        X, y = pp.pre_process(data_file)

        # Instantiate the clustering model and visualizer
        model = KMeans()
        visualizer = KElbowVisualizer(model, k=(4, 20))
        X_t, X_test, y_t, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

        visualizer.fit(X_t)  # Fit the data to the visualizer
        print(visualizer.elbow_value_)
        visualizer.show()
def kmeans_elbow(df, metric='distortion', save_fig=False):
    ''' Input: scaled data
        Run the elbow method and returns the optimal number of clusters 
        Other metrics available: 'silhouette' and 'calinski_harabasz'
        Metric:
        - **distortion**: mean sum of squared distances to centers
        - **silhouette**: mean ratio of intra-cluster and nearest-cluster distance
        - **calinski_harabasz**: ratio of within to between cluster dispersion
    '''
    model = KMeans()
    visualizer = KElbowVisualizer(model, k=(2, 12), metric=metric)
    visualizer.fit(df)  # Fit the data to the visualizer
    visualizer.show()
    k = visualizer.elbow_value_  # optimal number of clusters
    return (k)
Пример #26
0
def clustering(d: pd.DataFrame,
               n_clusters=None,
               scale='linear',
               show_img=False):
    scaler = MinMaxScaler()
    kdata = scaler.fit_transform(d.values)

    km = KMeans(max_iter=400)
    visualizer = KElbowVisualizer(km,
                                  k=(1, 20),
                                  metric='distortion',
                                  timings=False)
    visualizer.fit(kdata)

    if show_img:
        visualizer.show()

    n = n_clusters if n_clusters is not None else visualizer.elbow_value_
    kmeans = KMeans(n_clusters=n, max_iter=400)
    kmeans.fit(kdata)

    l, c = np.unique(kmeans.labels_, return_counts=True)
    print("Total: ", len(d), dict(zip(l, c)))

    centers = scaler.inverse_transform(kmeans.cluster_centers_)

    if show_img:

        cmap = plt.cm.get_cmap('hsv', n + 1)

        df_copy = d.copy()
        df_copy["cluster"] = kmeans.labels_
        for i in l:
            plt.scatter(df_copy.loc[df_copy['cluster'] == i, 'comp1'],
                        df_copy.loc[df_copy['cluster'] == i, 'comp2'],
                        label="Cluster %s" % i,
                        s=20,
                        color=cmap(i))

        plt.title(f"K-Means ({n_clusters} clusters)")
        plt.scatter(centers[:, 0], centers[:, 1], s=200, marker='*', c='k')
        plt.tick_params(axis='both', which='major', labelsize=10)
        plt.xscale(scale)
        plt.legend()
        plt.show()

    return centers, kmeans.labels_
Пример #27
0
    def getEstimation2(self, k=(2, 11), metric='calinski_harabasz'):
        start = time.perf_counter()
        print('Getting estimation and metrics with Calinski-Harabasz method')
        model = KMeans(random_state=0)

        visualizer = KElbowVisualizer(model, k=k, metric=metric)
        visualizer.fit(self.AllShuffledStackReshaped)

        basefilename = os.path.basename(self.datFile).split('.')[0]
        calinskiFigPath = os.path.join(
            self.path, basefilename + '_calinskiEstimation.jpg')

        visualizer.show(outpath=calinskiFigPath)
        #This should save the fig with the visualizer in the instantiated folder for its retrieval outside the front-end app
        stop = time.perf_counter()
        print(f'Finished estimation in {stop - start:0.4f} seconds')
        return visualizer
Пример #28
0
def perform_elbow_method(x_scaled):
    """
    Perform the elbow method to help us decide the number of clusters
    :param x_scaled: Values of our data after normalization
    :return: A plot of the elbow method
    """
    # Instantiate the clustering model and visualizer
    mpl.rcParams['xtick.labelsize'] = 12
    mpl.rcParams['ytick.labelsize'] = 12
    mpl.rcParams['axes.titlesize'] = 18
    mpl.rcParams['axes.labelsize'] = 14
    model = KMeans()
    visualizer = KElbowVisualizer(model, k=(1, 12))
    # Fit the data to the visualizer
    visualizer.fit(x_scaled)
    visualizer.set_title("The Elbow Method")
    visualizer.show()
def Elbow(sample, feature):
    # Generate synthetic dataset with 8 random clusters
    X, y = make_blobs(n_samples=sample,
                      n_features=feature,
                      centers=8,
                      random_state=42)

    # Instantiate the clustering model and visualizer
    model = KMeans()
    visualizer = KElbowVisualizer(model,
                                  k=(4, 40),
                                  metric='calinski_harabasz',
                                  timings=False,
                                  locate_elbow=True)

    visualizer.fit(X)  # Fit the data to the visualizer
    visualizer.show()
def graph_kmeans(df, message):
    kmean = KMeans()
    visualizer = KElbowVisualizer(
        kmean,
        k=(min(kmeans_range), max(kmeans_range)),
        title=message +
        " distortion score elbow method for k means clustering")
    visualizer.fit(df)
    visualizer.show()

    kmean = KMeans(n_clusters=visualizer.elbow_value_)
    kmean.fit(df)
    y = kmean.labels_
    model = LogisticRegression().fit(x, y)
    print(message + "| clusters: ", visualizer.elbow_value_, "| accuracy: ",
          model.score(x, y), "| clusters at: ", kmean.cluster_centers_)
    return kmean