示例#1
0
def goKmeans():
    clusteringNum = request.form['clusteringNum']
    dataset = json.loads(request.form.get('dataset'))
    if (clusteringNum == '' or int(float(clusteringNum)) < 2):
        clusteringNum = 2
    dataset = np.array(dataset)
    #dataset = np.delete(dataset, 0, 1)
    new_list = list(
        list(float(a) for a in b if BN.is_number(a)) for b in dataset)
    kmeans = KMeans(n_clusters=int(float(clusteringNum)),
                    random_state=0).fit(new_list)
    new_list_as_array = np.array(new_list)
    SilhouetteVisualize = SilhouetteVisualizer(kmeans)
    SilhouetteVisualize.fit(new_list_as_array)
    if (len(new_list) > 10):
        k_upper_bound = 10
    else:
        k_upper_bound = len(new_list)
    KElbowVisualize = KElbowVisualizer(KMeans(), k=k_upper_bound)
    KElbowVisualize.fit(new_list_as_array)  # Fit the data to the visualizer
    silhouette = SilhouetteVisualize.silhouette_score_
    elbow = KElbowVisualize.elbow_value_
    return jsonify({
        'inputArray': list(new_list),
        'kmeansLabels': (kmeans.labels_.tolist()),
        'elbowValue': str(elbow),
        'silhouetteValue': ('%.3f' % silhouette)
    })
def makespaces(s2, k, alpha, beta, legend, title):
    kk = pd.DataFrame({
        'Skew²': s2,
        'Kurtosis': k,
        'Alpha': alpha,
        'Beta': beta
    })
    K = 8
    model = KMeans()
    visualizer = KElbowVisualizer(model, k=(1, K))
    kIdx = visualizer.fit(
        kk.drop(columns="Beta"))  # Fit the data to the visualizer
    visualizer.show()  # Finalize and render the figure
    kIdx = kIdx.elbow_value_
    model = KMeans(n_clusters=kIdx).fit(kk.drop(columns="Beta"))
    fig = plt.figure()
    ax = Axes3D(fig)
    cmap = plt.get_cmap('gnuplot')
    clr = [cmap(i) for i in np.linspace(0, 1, kIdx)]
    for i in range(0, kIdx):
        ind = (model.labels_ == i)
        ax.scatter(kk["Skew²"][ind],
                   kk["Kurtosis"][ind],
                   kk["Alpha"][ind],
                   s=30,
                   c=clr[i],
                   label='Cluster %d' % i)
    ax.set_xlabel("Skew²")
    ax.set_ylabel("Kurtosis")
    ax.set_zlabel(r"$\alpha$")
    ax.legend()
    plt.title(title + ": EDF-K-means")
    plt.savefig(title + "EDF.png")
    plt.show()
    model = KMeans()
    visualizer = KElbowVisualizer(model, k=(1, K))
    kIdx = visualizer.fit(
        kk.drop(columns="Alpha"))  # Fit the data to the visualizer
    visualizer.show()  # Finalize and render the figure
    kIdx = kIdx.elbow_value_
    model = KMeans(n_clusters=kIdx).fit(kk.drop(columns="Alpha"))
    fig = plt.figure()
    ax = Axes3D(fig)
    cmap = plt.get_cmap('gnuplot')
    clr = [cmap(i) for i in np.linspace(0, 1, kIdx)]
    for i in range(0, kIdx):
        ind = (model.labels_ == i)
        ax.scatter(kk["Skew²"][ind],
                   kk["Kurtosis"][ind],
                   kk["Beta"][ind],
                   s=30,
                   c=clr[i],
                   label='Cluster %d' % i)
    ax.set_xlabel("Skew²")
    ax.set_ylabel("Kurtosis")
    ax.set_zlabel(r"$\beta$")
    ax.legend()
    plt.title(title + ": EPSB-K-means")
    plt.savefig(title + "EPSB.png")
    plt.show()
示例#3
0
def elbow_method(bag_of_words):
	model = KMeans()
	visualizer = KElbowVisualizer(model,k = (2,12))
	visualizer.fit(bag_of_words)
	plt.cla()
	plt.close()
	return visualizer.elbow_value_
示例#4
0
    def bins(self, data, labels):
        bins = []
        for feature in self.to_discretize:
            x = np.reshape(data[:, feature], (-1, 1))
            model = KMeans()
            visualizer = KElbowVisualizer(model, k=(3,12))
            visualizer.fit(x)
            n_bins = visualizer.elbow_value_
            kmeans = KBinsDiscretizer(n_bins=n_bins, encode='ordinal', strategy='kmeans')
            if self.filename != "":
                plt.show(block=False)
                plt.savefig(self.filename + "/" + str(self.feature_names[feature]) + "_elbow_visualisation.png")
                plt.close('all')
            kmeans.fit(x)
            qts = []
            for biner in kmeans.bin_edges_:
                qts.append(biner)
            qts = np.array(qts)

            if qts.shape[0] == 0:
                qts = np.array([np.median(data[:, feature])])
            else:
                qts = np.sort(qts)

            bins.append(qts)
        return bins
示例#5
0
def elbow_method(X):
    model = KMeans()
    visualizer = KElbowVisualizer(model, k=(2, 12))
    visualizer.fit(X)
    plt.clf()
    #visualizer.show()
    return visualizer.elbow_value_
示例#6
0
def makeK(d,ilist, title):
    d=np.array(d)
    kk=pd.DataFrame({'Variance': d[:,0], 'Skewness': d[:,1], 'Kurtosis': d[:,2]})
    K=20
    model=KMeans()
    visualizer = KElbowVisualizer(model, k=(1,K))
    kIdx=visualizer.fit(kk)        # Fit the data to the visualizer
    visualizer.show()        # Finalize and render the figure
    kIdx=kIdx.elbow_value_
    model=KMeans(n_clusters=kIdx).fit(kk)
    # scatter plot
    fig = plt.figure()
    ax = Axes3D(fig) #.add_subplot(111))
    cmap = plt.get_cmap('gnuplot')
    clr = [cmap(i) for i in np.linspace(0, 1, kIdx)]
    for i in range(0,kIdx):
        ind = (model.labels_==i)
        ax.scatter(d[ind,2],d[ind,1], d[ind,0], s=30, c=clr[i], label='Cluster %d'%i)
    
    ax.set_xlabel("Kurtosis")
    ax.set_ylabel("Skew")
    ax.set_zlabel("Variance")
    plt.title(title+': KMeans clustering with K=%d' % kIdx)
    plt.legend()
    plt.savefig(title+"clustersnoises.png")
    plt.show()
    d=pd.DataFrame({'Variance': d[:,0], 'Skewness': d[:,1], 'Kurtosis': d[:,2], 'Alpha': d[:,3], 'Beta': d[:,4], "Cluster": model.labels_}, index=ilist)
    return d
示例#7
0
文件: kmean.py 项目: Werest/woork
def km(img, number, g, dr, opa, parametr_p, rz_x):
    # plt.cla()
    # plt.clf()

    x = g[0]
    y = g[1]
    # Если имеется массив центроидов
    if len(x) > 0 and len(y) > 0:
        mkm_width, caff = rz(1214.6, img, rz_x)

        # zip (..., ..., img[x, y])
        z = [list(hhh) for hhh in zip(x, y)]

        # elbow method
        model = KMeans()
        vis = KElbowVisualizer(model, k=(1, 15))
        vis.fit(np.array(z))

        contours = measure.find_contours(img, 0.5)

        k = KMeans(n_clusters=vis.elbow_value_).fit(z)
        x_t = list(k.cluster_centers_[:, 0])
        y_t = list(k.cluster_centers_[:, 1])

        array_x_t.append(x_t)
        array_y_t.append(y_t)
        log.info('Параметр порога - {}'.format(parametr_p))

        return img, contours, y_t, x_t, parametr_p, mkm_width, caff, k.cluster_centers_
    else:
        log.info("Не можем определить центроиды")
示例#8
0
def elbow_method(column):
    # Instantiate the clustering model
    model = KMeans()
    visualizer = KElbowVisualizer(model, k=(1, 11), timings=False)
    # Plot visualizer
    plt.figure(figsize=(10, 5))
    # Fit the data to the visualizer
    visualizer.fit(column)
    return visualizer
示例#9
0
def mbkmm(z):
    model = MiniBatchKMeans()
    vis = KElbowVisualizer(model, k=(1, 15))
    vis.fit(np.array(z))

    k = MiniBatchKMeans(n_clusters=vis.elbow_value_).fit(z)
    x_t = list(k.cluster_centers_[:, 0])
    y_t = list(k.cluster_centers_[:, 1])
    return x_t, y_t
示例#10
0
    def check_test_unimodal_data(self,
                                 counterfactual_in_sphere,
                                 instances_in_sphere,
                                 radius,
                                 counterfactual_libfolding=None):
        """ Test over instances in the hypersphere to discover if data are uni or multimodal """
        try:
            results = pf.FTU(
                counterfactual_libfolding, routine="python"
            ) if counterfactual_libfolding is not None else pf.FTU(
                counterfactual_in_sphere, routine="python")
            #if results.p_value < 0.05:
            self.multimodal_results = results.folding_statistics < 1
            if self.multimodal_results:
                visualizer = KElbowVisualizer(KMeans(), k=(1, 8))
                x_elbow = np.array(counterfactual_in_sphere)
                visualizer.fit(x_elbow)
                n_clusters = visualizer.elbow_value_
                if n_clusters is not None:
                    if self.verbose: print("n CLUSTERS ", n_clusters)
                    kmeans = KMeans(n_clusters=n_clusters)
                    kmeans.fit(counterfactual_in_sphere)
                    self.clusters_centers = kmeans.cluster_centers_
                    if self.verbose:
                        print("Mean center of clusters from KMEANS ",
                              self.clusters_centers)
            else:
                tree_closest_neighborhood = scipy.spatial.cKDTree(
                    instances_in_sphere)
                mean = 0
                #print("counterfactual in sphere", counterfactual_in_sphere)
                target_class = self.black_box_predict(
                    counterfactual_in_sphere[0].reshape(1, -1))
                for item in counterfactual_in_sphere:
                    the_result = tree_closest_neighborhood.query(item, k=2)
                    try:
                        if self.black_box_predict(
                                instances_in_sphere[the_result[1][1]].reshape(
                                    1, -1)) == target_class:
                            mean += 1
                    except:
                        print(
                            "problem in the search of the closest neighborhood",
                            the_result)
                mean /= len(counterfactual_in_sphere)
                print("mean", mean)
                self.multimodal_results = mean < self.threshold_precision

            print("The libfolding test indicates that data are ",
                  "multimodal." if self.multimodal_results else "unimodal.")
            return True
        except ValueError:
            print(
                "There is an error in the libfolding code for unimodal testing."
            )
            return False
def get_nCluster_elbow(X, start,to):
    model = KMeans()
    visualizer = KElbowVisualizer(model, k=(start, to), locate_elbow=False, timings=False)

    visualizer.fit(X)  # Fit the data to the visualizer
    # visualizer.show()  # Finalize and render the figure
    # print("visualizer.elbow_value is ", visualizer.elbow_value_)
    k = visualizer.elbow_value_

    return k
示例#12
0
def elbow_plt(X_data):
    # Import ElbowVisualizer

    model = KMeans(init='k-means++', max_iter=1000, n_init=10, random_state=0)
    # k is range of number of clusters.
    visualizer = KElbowVisualizer(model, k=(3, 12), timings=False)
    visualizer.fit(X_data)  # Fit the data to the visualizer
    # visualizer.show()  # Finalize and render the figure
    print("Elbow Value by Yellowbricks ", visualizer.elbow_value_)
    plt.savefig(elbow_plt_path)
示例#13
0
def elbow_RMSD(X):
    model = AgglomerativeClustering(compute_full_tree=True, affinity='precomputed', linkage='average')
    visualizer = KElbowVisualizer(model, k=(4,50), metric='silhouette', timings=False)
    visualizer.fit(X)
    plt.clf()
    plt.close()
    dendo_clustering = AgglomerativeClustering(n_clusters=None, compute_full_tree=True, distance_threshold=0, affinity='precomputed', linkage='average').fit(X)
    plot_dendrogram(dendo_clustering, visualizer.elbow_value_)
    plt.grid(b=None)
    #plt.show()
    plt.close()
    return AgglomerativeClustering(n_clusters=visualizer.elbow_value_, affinity='precomputed', linkage='average').fit(X)
示例#14
0
def clusterfind():
    datadf = noramizedf()['body_text']
    cv = CountVectorizer(ngram_range=(1, 2), min_df=10, max_df=0.8)
    cv_matrix = cv.fit_transform(datadf)
    print(cv_matrix.shape)
    tfidf_vectorizer = TfidfVectorizer(ngram_range=(3, 4), max_df=0.9, min_df=0.005, sublinear_tf=True)
    tfidf_matrix = tfidf_vectorizer.fit_transform(datadf)
    km = KMeans(max_iter=10000, n_init=50, random_state=42, n_jobs= -1)
    visualizer = KElbowVisualizer(km, k=(2, 15))
    visualizer.fit(tfidf_matrix)        
    result = [tfidf_matrix,visualizer.elbow_value_]
    return result
示例#15
0
def makespaces(s2, k, alpha, beta, legend, title, ilist):
    kk=pd.DataFrame({'Skew²': s2, 'Kurtosis': k, 'Alpha': alpha, 'Beta': beta, "Entity": ilist})
    K=8
    model=KMeans()
    visualizer = KElbowVisualizer(model, k=(1,K))
    kIdx=visualizer.fit(kk.drop(columns=["Beta", "Entity"]))        # Fit the data to the visualizer
    visualizer.show()        # Finalize and render the figure
    kIdx=kIdx.elbow_value_
    model=KMeans(n_clusters=kIdx).fit(kk.drop(columns=["Beta", "Entity"]))
    print(len(model.labels_))
    fig = plt.figure(figsize=(20,15))
    ax = Axes3D(fig)
    cmap = plt.get_cmap('gnuplot')
    ilist2=list(set(ilist))
    clr = [cmap(i) for i in np.linspace(0, 1, len(ilist2))]
    for i in range(0,len(ilist2)):
        ind = (kk["Entity"]==ilist2[i])
        ax.scatter(kk["Skew²"][ind],kk["Kurtosis"][ind], kk["Alpha"][ind], s=30, c=clr[i], label=ilist2[i])
    ax.set_xlabel("Skew²")
    ax.set_ylabel("Kurtosis")
    ax.set_zlabel(r"$\alpha$")
    ax.legend()
    plt.title(title+": EDF-K-means")
    plt.savefig("masoq.png")
    plt.show()
    kk=pd.DataFrame({'Skew²': s2, 'Kurtosis': k, 'Alpha': alpha, 'Beta': beta, "Entity": ilist}, index=model.labels_)
    kk.sort_index(inplace=True)
    kk.to_csv("clusteringalpha.csv")
    model=KMeans()
    visualizer = KElbowVisualizer(model, k=(1,K))
    kIdx=visualizer.fit(kk.drop(columns=["Alpha", "Entity"]))        # Fit the data to the visualizer
    visualizer.show()        # Finalize and render the figure
    kIdx=kIdx.elbow_value_
    model=KMeans(n_clusters=kIdx).fit(kk.drop(columns=["Alpha", "Entity"]))
    fig = plt.figure(figsize=(20,15))
    ax = Axes3D(fig)
    cmap = plt.get_cmap('gnuplot')
    clr = [cmap(i) for i in np.linspace(0, 1, len(ilist2))]
    for i in range(0,len(ilist2)):
        ind = (kk["Entity"]==ilist2[i])
        ax.scatter(kk["Skew²"][ind],kk["Kurtosis"][ind], kk["Beta"][ind], s=30, c=clr[i], label=ilist[i])
    ax.set_xlabel("Skew²")
    ax.set_ylabel("Kurtosis")
    ax.set_zlabel(r"$\beta$")
    ax.legend()
    plt.title(title+": EPSB-K-means")
    plt.savefig("masoq2.png")
    plt.show()
    kk=pd.DataFrame({'Skew²': s2, 'Kurtosis': k, 'Alpha': alpha, 'Beta': beta, "Entity": ilist}, index=model.labels_)
    kk.sort_index(inplace=True)
    kk.to_csv("clusteringbeta.csv")
示例#16
0
    def clusterting_metric(self, number_of_clusters=50):
        """Automatic execution of the elbow method"""
        #Min max scale the data
        scaler, XC = StaticML.min_max_df(self.X_train)

        model = KMeans()
        #Perfom elbow method
        visualizer = KElbowVisualizer(model, k=(4, number_of_clusters))
        visualizer.fit(XC)

        #get values of elbow method to plot later
        wcss = pd.DataFrame(visualizer.k_scores_, columns=["wcss"])
        self.wcss = wcss
        #set optimal value of k in clustering.
        self.elbow = visualizer.elbow_value_
示例#17
0
def kelbow_optimization(df):
    # Shows optimal number of clusters for model
    model = KMeans()
    visualizer = KElbowVisualizer(model, k=(1, 10))
    visualizer.fit(df)
    visualizer.poof()
    visualizer.show(outpath="Elbow Kmeans Cluster.pdf")
    return df
示例#18
0
def elbow(X, path):
    model = AgglomerativeClustering(affinity='cosine', linkage='average') # affinity='euclidean', linkage='ward'
    visualizer = KElbowVisualizer(model, k=(4,50), metric='silhouette', timings=False)
    visualizer.fit(X)
    dendo_clustering = AgglomerativeClustering(n_clusters=None, compute_full_tree=True, distance_threshold=0,
                                                 linkage='average', affinity='cosine').fit(X)
    plt.figure(figsize=(20,10), dpi=200)
    plot_dendrogram(dendo_clustering, visualizer.elbow_value_)
    plt.xlabel("Number of the snapshot")
    plt.ylabel("cosine distance")
    plt.grid(b=None)
    plt.savefig(path + "/dendogram.png")
    plt.clf()
    plt.close()
    return AgglomerativeClustering(n_clusters=visualizer.elbow_value_, affinity='cosine', linkage='average').fit(X), visualizer.elbow_value_
示例#19
0
    def check_test_unimodal_data(self, counterfactual_in_sphere, instances_in_sphere, radius, counterfactual_libfolding=None):
        """ 
        Test over instances in the hypersphere to discover if data are uni or multimodal
        Args: counterfactual_in_sphere: Counterfactual instances find in the area of the hyper field
              instances_in_sphere: All the instances generated or present in the field
              radius: Size of the hyper field
              counterfactual_libfolding: counterfactual instances with continuous values for Libfolding
        Return: Indicate whether the counterfactual find in the hyper field are unimodal or multimodal 
                and compute the clusters centers in case of multimodal data 
        """
        try:
            results = pf.FTU(counterfactual_libfolding, routine="python") if counterfactual_libfolding is not None else pf.FTU(counterfactual_in_sphere, routine="python")
            self.multimodal_results = results.folding_statistics<1
            if self.multimodal_results:
                # If counterfactual instances are multimodal we compute the clusters center 
                visualizer = KElbowVisualizer(KMeans(), k=(1,8))
                x_elbow = np.array(counterfactual_in_sphere)
                visualizer.fit(x_elbow)
                n_clusters = visualizer.elbow_value_
                if n_clusters is not None:
                    if self.verbose: print("n CLUSTERS ", n_clusters)
                    kmeans = KMeans(n_clusters=n_clusters)
                    kmeans.fit(counterfactual_in_sphere)
                    self.clusters_centers = kmeans.cluster_centers_
                    if self.verbose: print("Mean center of clusters from KMEANS ", self.clusters_centers)
            else:
                # If counterfactual instances are unimodal we test a linear separability problem
                tree_closest_neighborhood = scipy.spatial.cKDTree(instances_in_sphere)
                mean = 0
                target_class = self.black_box_predict(counterfactual_in_sphere[0].reshape(1, -1)) 
                for item in counterfactual_in_sphere:
                    the_result = tree_closest_neighborhood.query(item, k=2)
                    try:
                        if self.black_box_predict(instances_in_sphere[the_result[1][1]].reshape(1, -1)) == target_class:
                            mean+=1   
                    except:
                        print("problem in the search of the closest neighborhood", the_result)     
                mean /= len(counterfactual_in_sphere)
                if self.verbose: print("Value of the linear separability test:", mean)
                # We indicate that data are multimodal if the test of linear separability is inferior to the threshold precision
                # of the interpretability methods 
                self.multimodal_results = mean < self.threshold_precision 

            if self.verbose: print("The libfolding test indicates that data are ", "multimodal." if self.multimodal_results else "unimodal.")
            return True
        except ValueError:
            print("There is an error in the libfolding code for unimodal testing.")
            return False
示例#20
0
    def k_means(self): # k개의 centroid를 반환
        model = KMeans()
        visualizer = KElbowVisualizer(model, metric='calinski_harabasz', k=(3, 100))

        visualizer.fit( self.reduced_new_lst )
        #visualizer.show()
        K = visualizer.elbow_value_

        if K == None:
            K = 50
        print('K= ',K)

        model = KMeans(init="k-means++", n_clusters=K, random_state=0)
        xys = model.fit_transform(self.reduced_new_lst)
        y_kmeans = model.predict(self.reduced_new_lst)
        #print(xys)

        word_vector = self.embedding_model.wv
        keys = word_vector.vocab.keys()

        xs = xys[:, 0]
        ys = xys[:, 1]
        #self.plot_2d_graph(keys, xs, ys)

        # 아래는 dataframe으로 뿌리기 위한 용도이구나
        pd_reduced_new_lst = pd.DataFrame(self.reduced_new_lst)
        keys = [k for k in keys]
        pd_keys = pd.DataFrame(keys)
        pd_keys = pd_keys.rename(columns={0: "keyword"})
        df = pd.concat([pd_reduced_new_lst, pd_keys], 1)
        #print(df)

        plt.figure()
        plt.scatter(xs, ys, c=y_kmeans, s=50, cmap='viridis')
        words = df['keyword']
        for i, word in enumerate(words):
            plt.annotate(word, xy=(xs[i], ys[i]))

        centers = model.cluster_centers_
        #plt.scatter(centers[:, 0], centers[:, 1], c='black', s=200, alpha=0.5)
        pd_centers = pd.DataFrame(centers)
        #print(pd_centers)

        new = pd.concat([pd_centers, pd_keys], axis=1, join='inner')
        print(new)
        #plt.show()

        return new
示例#21
0
    def fit(self, x, output_filename_suffix='output.pdf'):
        x = np.array(x)
        num_samples, num_features = x.shape[0], x.shape[1]
        self.__pca = PCA(n_components=min(num_samples, num_features),
                         random_state=0)
        x_transformed = self.__pca.fit_transform(x)

        visualizer = KElbowVisualizer(KMedoids(random_state=0),
                                      k=(1, num_samples),
                                      timings=False,
                                      locate_elbow=True)
        visualizer.fit(x_transformed)
        best_n_clusters = visualizer.elbow_value_ if visualizer.elbow_value_ is not None else 1

        self.__clusterer = KMedoids(n_clusters=best_n_clusters, random_state=0)
        self.__clusterer.fit(x_transformed)
示例#22
0
def find_optimal_k(model, k_fold, X):
  '''
  Returns optimal no. of k clusters

  Parameters:
      model (object): Algorithm object (e.g. KMeans)
      k_fold (int): Check up to how many k

  Returns:
     The optimal no. of k clusters
  '''
  visualizer = KElbowVisualizer(model, k=(2,k_fold))
  visualizer.fit(X)

  print(f"Using the elbow method, the optimal number of k: {visualizer.elbow_value_}")

  return visualizer.elbow_value_
示例#23
0
 def fit(self):
     a = time.time()
     inertia_values = []
     Model = KMeans()
     Visualizer = KElbowVisualizer(Model, k=(3, 15), metric="silhouette")
     Visualizer.fit(self.train_data)
     plt.close()
     elbow_value = Visualizer.elbow_value_
     if elbow_value == None:
         self.clusters = 5
     else:
         self.clusters = elbow_value
     self.KM = KMeans(self.clusters)
     self.KM.fit(self.train_data)
     self.centers = self.KM.cluster_centers_
     b = time.time()
     print("Fit time: {}s.".format(b - a))
     print("{} clusters are selected.".format(self.clusters))
示例#24
0
def elbow(ax, metric="distortion"):
    from sklearn.cluster import KMeans
    from yellowbrick.cluster import KElbowVisualizer
    from sklearn.datasets import make_blobs

    kws = {
        'centers': 8,
        'n_samples': 1000,
        'n_features': 12,
        'shuffle': True,
    }

    X = make_blobs()
    X, y = make_blobs(centers=8)
    visualizer = KElbowVisualizer(KMeans(), ax=ax, k=(2, 16), metric=metric)
    # visualizer.title = "Silhouette Ranked Elbow Curve for K-Means on 8 Blob Dataset"
    visualizer.fit(X)
    return visualizer
示例#25
0
def analysis_feature(slide_id):
    X = np.array(read_csv('lab2.xlsx', slide_id))

    # normalize the data attributes
    normalized_X = MinMaxScaler().fit_transform(X)

    pca = PCA(n_components=2)
    pca.fit(normalized_X)
    pca_X = pca.transform(normalized_X)

    model = KMeans()
    visualizer = KElbowVisualizer(model, k=(2, 12))
    visualizer.fit(pca_X)

    kmeans = KMeans(n_clusters=visualizer.elbow_value_)
    kmeans.fit(pca_X)
    y_kmeans = kmeans.predict(pca_X)
    plt.show()
    _extraction_feature(normalized_X, y_kmeans, 10)
示例#26
0
def clustering(fname="clustering.png"):
    # Create side-by-side axes grid
    _, axes = plt.subplots(ncols=2, figsize=(18,6))
    X, y = make_blobs(centers=7)

    # Add K-Elbow to the left
    oz = KElbowVisualizer(MiniBatchKMeans(), k=(3,12), ax=axes[0])
    oz.fit(X, y)
    oz.finalize()

    # Add SilhouetteVisualizer to the right
    oz = SilhouetteVisualizer(Birch(n_clusters=5), ax=axes[1])
    oz.fit(X, y)
    oz.finalize()

    # Save figure
    path = os.path.join(FIGURES, fname)
    plt.tight_layout()
    plt.savefig(path)
示例#27
0
def get_elbow_plot(X):

    output_text = ""
    try:
        model = KMeans(random_state=40, )
        elbow_score = KElbowVisualizer(model, k=(1, 30))
        elbow_score.fit(X)
        elbow_value = elbow_score.elbow_value_
        model = KMeans(elbow_value, random_state=42)
        silhoutte_score = SilhouetteVisualizer(model, colors='yellowbrick')
        silhoutte_score.fit(X)

        output_text = """The optimal number of clusters is """ + \
                      str(silhoutte_score.n_clusters_) + """ and the silhouette score is """ + \
                      str(np.round(silhoutte_score.silhouette_score_, 2))
    except ValueError as e:
        print(e)

    return output_text
示例#28
0
def run_k_means_elbow(params, x_data):
    model = KMeans()
    visualizer = KElbowVisualizer(model, k=(2, 40), metric='distortion')
    plt.figure()
    visualizer.fit(x_data)
    visualizer.set_title(params['elbow_graph'])

    try:
        path = params['path'] + params['elbow_graph'] + '.png'
    except:
        path = params['elbow_graph'] + '.png'

    visualizer.show(outpath=path)
示例#29
0
    def OptimumCluster(self,df):

        from yellowbrick.cluster import KElbowVisualizer
        kmeans = KMeans()
        visualizer = KElbowVisualizer(kmeans, k=(1,15))
        visualizer.fit(df) 
        visualizer.poof() 
示例#30
0
def plot_elbow(estimator, k_values, dataset, version, metric='distortion'):
    visualizer = KElbowVisualizer(estimator, k=k_values, metric=metric)
    visualizer.fit(data.DATA[dataset][version]['x_train'])
    visualizer.show(
        f'{PLOTS_FOLDER}/{dataset}/{version}/{dataset}_{version}_{estimator.__class__.__name__}_elbow_{metric}.png'
    )
    plt.clf()
示例#31
0
def find_optimal_clusters(X):
    # Instantiate the clustering model and visualizer
    model = KMeans()
    visualizer = KElbowVisualizer(model, k=(2, 21))

    visualizer.fit(X)  # Fit the data to the visualizer
    visualizer.show()