示例#1
0
def plot_clusters(data, predicted_clusters, initialized_kmeans, number_of_clusters):
    for i in range(0, number_of_clusters):
        color = cm.nipy_spectral(float(i) / number_of_clusters)
        plt.scatter(data[predicted_clusters == i, 0],
                    data[predicted_clusters == i, 1],
                    s=50, c=color,
                    marker='o', edgecolor=color,
                    label='cluster %d' % (i+1))
    color = cm.nipy_spectral(float(number_of_clusters) / number_of_clusters)
    plt.scatter(initialized_kmeans.cluster_centers_[:, 0],
                initialized_kmeans.cluster_centers_[:,  1],
                s=250, marker='*',
                c=color, edgecolor='black',
                label='centroids')
    plt.legend(scatterpoints=1)
    plt.grid()
    plt.tight_layout()
    plt.show()


# ============================================TESTING=======================================================
# path = "TelcoCustomerChurn.csv"
# df_telco = pd.read_csv(path)
# df_preprocessed = data_preprocessing(df_telco)
# columns_to_standardize = ['tenure', 'MonthlyCharges', 'TotalCharges']
# df_preprocessed = standardize_columns(df_preprocessed, True, columns_to_standardize)
示例#2
0
def plot_silhouette_tsne(o_silhouette, X_transformed, o_stat_H, rank, prefix):
	n_clusters = rank
	silhouette_avg = o_silhouette['silhouette']
	sample_silhouette_values = o_silhouette['silhouette_values']
	cluster_labels = o_stat_H['class0'].astype(float).astype(int)

	"""# Create a subplot with 1 row and 2 columns"""
	fig, (ax1, ax2) = plt.subplots(1, 2)
	fig.set_size_inches(18, 7)
	fig.set_dpi(300)
	"""
	# The 1st subplot is the silhouette plot
	# The silhouette coefficient can range from -1, 1 but in this example all
	# lie within [-0.1, 1]
	"""
	ax1.set_xlim([-0.1, 1])
	"""
	# The (n_clusters+1)*10 is for inserting blank space between silhouette
	# plots of individual clusters, to demarcate them clearly.
	"""
	ax1.set_ylim([0, len(X_transformed) + (n_clusters + 1) * 10])

	y_lower = 10
	for i in range(n_clusters):
		"""# Aggregate the silhouette scores for samples belonging to cluster i, and sort them """
		ith_cluster_silhouette_values = sample_silhouette_values[cluster_labels == i]
		ith_cluster_silhouette_values.sort()
		size_cluster_i = ith_cluster_silhouette_values.shape[0]
		y_upper = y_lower + size_cluster_i
		color = cm.nipy_spectral(float(i) / n_clusters)
		ax1.fill_betweenx(np.arange(y_lower, y_upper),
			0, ith_cluster_silhouette_values,
			facecolor=color, edgecolor=color, alpha=0.7)
		"""# Label the silhouette plots with their cluster numbers at the middle"""
		ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i+1))
		"""# Compute the new y_lower for next plot"""
		y_lower = y_upper + 10  # 10 for the 0 samples

	ax1.set_title("The silhouette plot for the various clusters.")
	ax1.set_xlabel(" ".join(["The silhouette coefficient values (mean:",str(silhouette_avg),")"]))
	ax1.set_ylabel("Cluster label")

	"""# The vertical line for average silhouette score of all the values """
	ax1.axvline(x=silhouette_avg, color="red", linestyle="--")
	ax1.set_yticks([])  # Clear the yaxis labels / ticks
	ax1.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])

	"""# 2nd Plot showing the actual clusters formed"""
	colors = cm.nipy_spectral(cluster_labels.astype(float) / n_clusters)
	ax2.scatter(X_transformed[:, 0], X_transformed[:, 1], marker='.', s=30, lw=0, alpha=0.7, c=colors, edgecolor='k')

	""" lable non-classified cells"""

	ax2.set_title("The visualization of the clustered data.")
	ax2.set_xlabel("Feature space for the 1st tsne")
	ax2.set_ylabel("Feature space for the 2nd tsne")
	plt.suptitle(("Silhouette analysis for tSNE clustering on coefficient matrix H "
		"with n_clusters = %d" % n_clusters),fontsize=14, fontweight='bold')
	#fig.savefig('.'.join([prefix, "silhouette_tsne", "png"]))
	fig.savefig('.'.join([prefix, "silhouette_umap", "png"]))
def compare(ests, types, name, pres):
    colors = np.linspace(0, 1, int(len(ests) * len(types)))
    if pres == True:
        #shifts=np.linspace(1,10+(int(len(ests)*len(types))-1),int(len(ests)*len(types)))
        shifts = [9] * (int(len(ests) * len(types)))
        #shifts[0]+=10
    else:
        shifts = np.asarray([1] * int(len(ests) * len(types)))
    fig = plt.figure(1)
    ax = plt.subplot(111)
    i = 0
    for type_ in types:
        for est in ests:
            if type_ == 'surhud':
                dat = np.genfromtxt(
                    "/home/dominik.zuercher/Documents/RSP_Pro/Mest/redmap.dat")
            elif "SDSS" in type_:
                dat = np.genfromtxt(
                    "/work/dominik.zuercher/DataStore/corr-pairs/Planck_SDSS/Planck_SDSS_plot("
                    + str(est) + ").dat")
            else:
                try:
                    dat = np.genfromtxt(
                        "/work/dominik.zuercher/DataStore/corr-pairs/" +
                        str(type_) + "/" + str(type_) + "_plot(" + str(est) +
                        ").dat")
                except:
                    continue
            print("-----------------------------------------")
            print(type_, est)
            print(dat)
            print("-----------------------------------------")
            if type_ == 'surhud':
                ax.errorbar(dat[:, 0],
                            np.multiply(dat[:, 1], 0.1),
                            np.multiply(dat[:, 2], 0.1),
                            fmt=".",
                            c=cm.nipy_spectral(colors[i]),
                            capsize=2,
                            alpha=1,
                            label="RedMaPPer")
            else:
                ax.errorbar(dat[:, 0],
                            np.multiply(dat[:, 1], shifts[i]),
                            np.multiply(dat[:, 2], shifts[i]),
                            fmt=".",
                            capsize=2,
                            c=cm.nipy_spectral(colors[i]),
                            alpha=0.8,
                            label=str(type_) + " (" + str(est) + ")")
            i += 1
    ax.set_title("Comparison")
    ax.set_xscale("log")
    ax.set_yscale("log")
    ax.set_xlabel(r"$R$ ($h^{-1}$Mpc)")
    ax.set_ylabel(r"$\xi^{\rm 2d}$ ($h^{-1}$Mpc)")
    plt.legend()
    plt.savefig("comparisons/" + str(name) + ".pdf")
    plt.close()
def silhouette_analysis(X, n_clusters, clusterer, set_lim,subtitle):
	
    # Create a subplot with 1 row and 2 columns
    fig, (ax1, ax2) = plt.subplots(1, 2)
    fig.set_size_inches(18, 7)
    
    # The 1st subplot is the silhouette plot
    # The silhouette coefficient can range from -1, 1 but in this example lie within [-0.1, 1]   
    ax1.set_xlim([-0.1, 1])
    # The (n_clusters+1)*10 is for inserting blank space between silhouette    
    ax1.set_ylim([0, len(X) + (n_clusters + 1) * 10])          
    # plots of individual clusters, to demarcate them clearly.
    ax2.set_ylim(set_lim)    
    cluster_labels = clusterer.predict(X)
    # The silhouette_score gives the average value for all the samples.
    # This gives a perspective into the density and separation of the formed clusters
    silhouette_avg = silhouette_score(X, cluster_labels) 
    # Compute the silhouette scores for each sample
    sample_silhouette_values = silhouette_samples(X, cluster_labels)
    y_lower = 10
    for i in range(n_clusters):
        # Aggregate the silhouette scores for samples belonging to cluster i, and sort them
        ith_cluster_silhouette_values = sample_silhouette_values[cluster_labels == i]
        ith_cluster_silhouette_values.sort()
        size_cluster_i = ith_cluster_silhouette_values.shape[0]
        y_upper = y_lower + size_cluster_i
        colors = cm.nipy_spectral(float(i)*1.3 / n_clusters)
        ax1.fill_betweenx(np.arange(y_lower, y_upper),0, ith_cluster_silhouette_values,
                              facecolor=colors, edgecolor=colors, alpha=0.7)
        # Label the silhouette plots with their cluster numbers at the middle
        ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))
        # Compute the new y_lower for next plot
        y_lower = y_upper + 10  # 10 for the 0 samples
    ax1.set_title("The silhouette plot for the various clusters.")
    ax1.set_xlabel("The silhouette coefficient values")
    ax1.set_ylabel("Cluster label")
    # The vertical line for average silhouette score of all the values
    ax1.axvline(x=silhouette_avg, color="red", linestyle="--")
    ax1.set_yticks([])  # Clear the yaxis labels / ticks
    ax1.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])
    
    # 2nd Plot showing the actual clusters formed
    colors = cm.nipy_spectral(cluster_labels.astype(float)*1.3 / n_clusters)
    ax2.scatter(X[:, 0], X[:, 1], marker='.', s=130, lw=0, alpha=0.7,c=colors)
    # Labeling the clusters
    centers = clusterer.cluster_centers_
    # Draw white circles at cluster centers
    ax2.scatter(centers[:, 0], centers[:, 1],marker='o', c="white", alpha=1, s=200)
    for i, c in enumerate(centers):
        ax2.scatter(c[0], c[1], marker='$%d$' % i, alpha=1, s=100)
    ax2.set_title("The visualization of the clustered data.")
    ax2.set_xlabel("Feature space for the 1st feature")
    ax2.set_ylabel("Feature space for the 2nd feature")
    
    plt.suptitle("Silhouette analysis for %s clustering on sample data with n_clusters = %d and " 
                 "silhouette_score = %s" %(subtitle, n_clusters,silhouette_avg))
    plt.show()
    return cluster_labels, centers
示例#5
0
def write_result(og_img, img, prediction, index, epoch, resolution):
    try:
        mask = np.zeros(img.shape[1:])
        # breakpoint()
        try:
            masks = prediction[0]["masks"]
            labels = prediction[0]["labels"].cpu()
        except Exception:
            masks = prediction["masks"]
            labels = prediction["labels"].cpu()
        for x in range(masks.shape[0] - 1, 0, -1):
            if len(masks.shape) == 4:
                tmp_mask = masks[x, 0].mul(255).byte().cpu().numpy()
            else:
                tmp_mask = masks[x].mul(255).byte().cpu().numpy()
            mask = np.where(tmp_mask > 0, labels[x].item(), mask)

        og_mask = DataSet.resize_mask(mask, og_img.size)
    except Exception:
        pass

    mask = Image.fromarray(np.uint8(cm.nipy_spectral(mask / mask.max()) * 255)).convert(
        "RGB"
    )
    og_mask = Image.fromarray(
        np.uint8(cm.nipy_spectral(og_mask / og_mask.max()) * 255)
    ).convert("RGB")
    filepath = Path(f"./results/{epoch}/{resolution}/")
    filepath.mkdir(parents=True, exist_ok=True)
    img = Image.fromarray(img.mul(255).permute(1, 2, 0).byte().numpy())
    img.save(filepath / f"img_{index}.png")
    og_img.save(filepath / f"og_img_{index}.png")
    mask.save(filepath / f"mask_{index}.png")
    og_mask.save(filepath / f"og_mask_{index}.png")

    blended = Image.blend(og_img, og_mask, 0.5)
    blended.save(filepath / f"blended_{index}.png")

    og_mask_np = np.asarray(og_mask).astype(np.uint8).transpose(2, 0, 1)
    og_img_np = np.asarray(og_img).astype(np.uint8)
    # breakpoint()
    for x in range(1, og_mask_np.max() + 1):
        submask = (og_mask_np[0] == x).astype(np.uint8)
        if submask.sum() == 0:
            continue

        contours, hierarchy = cv2.findContours(
            submask, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE
        )

        r = x % 3
        g = (x // 3) % 3
        b = (x // 3 // 3) % 3

        for contour in contours:
            cv2.drawContours(og_img_np, contour, -1, (r * 100, g * 100, b * 100), 5)

    cv2.imwrite(str(filepath / f"og_img_{index}_cnts.png"), og_img_np[:, :, ::-1])
示例#6
0
def plot_silhouette(X, k, cluster_labels, centroids):

    # silhouette_score
    silhouette_avg = silhouette_score(X, cluster_labels)

    # Compute the silhouette scores for each sample
    sample_silhouette_values = silhouette_samples(X, cluster_labels)

    #--- Do the ploting
    fig, (ax1, ax2) = plt.subplots(1, 2)
    fig.set_size_inches(18, 7)
    ax1.set_xlim([-0.1, 1])
    ax1.set_ylim([0, len(X) + (k + 1) * 10])

    y_lower = 10
    for i in range(k):
        # Aggregate the silhouette scores for samples belonging to
        # cluster i, and sort them
        ith_cluster_silhouette_values = sample_silhouette_values[cluster_labels
                                                                 == i]
        ith_cluster_silhouette_values.sort()

        size_cluster_i = ith_cluster_silhouette_values.shape[0]
        y_upper = y_lower + size_cluster_i

        color = cm.nipy_spectral(float(i) / k)
        ax1.fill_betweenx(np.arange(y_lower, y_upper),
                          0,
                          ith_cluster_silhouette_values,
                          facecolor=color,
                          edgecolor=color,
                          alpha=0.7)

        # Label the silhouette plots with their cluster numbers at the middle
        ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))

        # Compute the new y_lower for next plot
        y_lower = y_upper + 10  # 10 for the 0 samples

    ax1.set_title("The silhouette plot for the various clusters.")
    ax1.set_xlabel("The silhouette coefficient values")
    ax1.set_ylabel("Cluster label")

    # The vertical line for average silhouette score of all the values
    ax1.axvline(x=silhouette_avg, color="red", linestyle="--")

    ax1.set_yticks([])  # Clear the yaxis labels / ticks
    ax1.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])

    colors = cm.nipy_spectral(cluster_labels.astype(float) / k)
    ax2.scatter(X[:, 0], X[:, 1], marker='o', s=100, alpha=0.5, c=colors)
    ax2.scatter(centroids[:, 0], centroids[:, 1], marker='x', c='k', s=200)
    ax2.set_xlabel('$x_1$', fontsize=16)
    ax2.set_ylabel('$x_2$', fontsize=16)

    plt.show()
def sigma_compare(ests, types, name):
    colors = np.linspace(0, 1, int(len(ests) * len(types)))
    fig = plt.figure(2)
    ax = plt.subplot(111)
    i = 0
    for type_ in types:
        for est in ests:
            if type_ == 'surhud':
                dat = np.genfromtxt(
                    "/home/dominik.zuercher/Documents/RSP_Pro/Mest/redmap.dat")
            elif "SDSS" in type_:
                dat = np.genfromtxt(
                    "/work/dominik.zuercher/DataStore/corr-pairs/Planck_SDSS/Planck_SDSS_plot("
                    + str(est) + ").dat")
            else:
                try:
                    dat = np.genfromtxt(
                        "/work/dominik.zuercher/DataStore/corr-pairs/" +
                        str(type_) + "/" + str(type_) + "_sigplot(" +
                        str(est) + ").dat")
                except:
                    continue
            print("-----------------------------------------")
            print(name)
            print(dat)
            print("-----------------------------------------")
            if type_ == 'surhud':
                ax.errorbar(dat[:, 0],
                            dat[:, 1],
                            dat[:, 2],
                            fmt=".",
                            c=cm.nipy_spectral(colors[i]),
                            capsize=2,
                            alpha=1,
                            label="RedMaPPer")
            else:
                ax.errorbar(dat[:, 0],
                            dat[:, 1],
                            dat[:, 2],
                            fmt=".",
                            c=cm.nipy_spectral(colors[i]),
                            capsize=2,
                            alpha=0.8,
                            label=str(type_) + " (" + str(est) + ")")
            i += 1
    ax.set_title("Sigma Comparison")
    ax.set_xscale("log")
    ax.set_yscale("log")
    ax.set_xlabel(r"$R$ ($h^{-1}$Mpc)")
    ax.set_ylabel(r"$\Sigma_g$ ($h^{2}$Mpc^{-2})")
    plt.legend()
    plt.savefig("sigmacomparisons/" + str(name) + ".pdf")
    plt.close()
示例#8
0
def plot_score(data, labels, y_true, num_clusters=10):

    df_embedded = TSNE(n_components=2).fit_transform(data)
    fig, (ax1, ax2) = plt.subplots(1, 2)
    fig.set_size_inches(18, 7)
    ax1.set_xlim([-0.1, 1])
    ax1.set_ylim([0, data.shape[0] + (num_clusters + 1) * 10])

    if len(np.unique(np.array(labels))) == 1:
        print("This time, no good.")
    else:
        silhouette_avg = silhouette_score(data, labels)
        sample_silhouette_values = silhouette_samples(data, labels)
        y_lower = 10
        for i in range(num_clusters):
            ith_cluster_silhouette_values = sample_silhouette_values[labels ==
                                                                     i]
            ith_cluster_silhouette_values.sort()
            size_cluster_i = ith_cluster_silhouette_values.shape[0]
            y_upper = y_lower + size_cluster_i
            color = cm.nipy_spectral(float(i) / num_clusters)
            ax1.fill_betweenx(np.arange(y_lower, y_upper),
                              0,
                              ith_cluster_silhouette_values,
                              facecolor=color,
                              edgecolor=color,
                              alpha=0.7)
            ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))
            y_lower = y_upper + 10
        ax1.set_title("The silhouette plot for the various clusters.")
        ax1.set_xlabel("The silhouette plot for the various clussters.")
        ax1.set_ylabel("Cluster label.")
        ax1.axvline(x=silhouette_avg, color='red', linestyle='--')
        ax1.set_yticks([])
        ax1.set_xticks([-0.2, 0, 0.2, 0.4, 0.6, 0.8, 1])
        colors = cm.nipy_spectral(labels.astype(float) / num_clusters)
        ax2.scatter(df_embedded[:, 0],
                    df_embedded[:, 1],
                    marker='.',
                    s=60,
                    lw=0,
                    alpha=0.7,
                    c=colors,
                    edgecolor='k')
        ax2.set_title("The TSNE visualisation of the clustered data.")
        ax2.set_xlabel("Feature space for the 1st feature.")
        ax2.set_ylabel("Feature space for the 2nd feature.")
        plt.suptitle(("Silhouette analysis for clustering on sampling data"
                      "with n_clusters = %d" % num_clusters),
                     fontsize=14,
                     fontweight='bold')
        plt.show()
示例#9
0
    def __init__(self,world,name,parent = "Originator",color = 0,type = "Human", movementspd = 10,t0 = 0, growthrate = 0.4,deathrate = 1,renewabledepletion = 0.01,nonrenewabledepletion = 0.001,startRain = 0.7):

        self.Name = name
        self.Type = type
        self.Parent = parent

        PossibleStarts = np.where((world.Elevation > world.oLevel)*(world.RainFall > startRain))
        if len(PossibleStarts[0]):
            pk = np.random.randint(len(PossibleStarts[0]))
        else:
            PossibleStarts = np.where((world.Elevation > world.oLevel))
            pk = np.random.randint(len(PossibleStarts[0]))
        thept = PossibleStarts[0][pk]*world.GlobeGrid[0].shape[0] + PossibleStarts[1][pk]

        InitialPop = np.zeros(len(world.gridindices))
        InitialPop[thept] = 1
        self.InitialDistribution = InitialPop.reshape(world.GlobeGrid[0].shape)
        self.Population = self.InitialDistribution.copy()

        grs = growthrate*np.ones(len(InitialPop))
        grs[world.OceanIndicator]  = 0
        self.growthrates = grs.reshape(world.GlobeGrid[0].shape)

        drs = deathrate*np.ones(len(InitialPop))
        drs[world.OceanIndicator]  = 0
        self.deathrates = drs.reshape(world.GlobeGrid[0].shape)

        rds = renewabledepletion*np.ones(len(world.InitialRenew.flatten()))
        rds[world.OceanIndicator]  = 0
        self.RenewDeplete = rds

        nrds = nonrenewabledepletion*np.ones(len(world.InitialRenew.flatten()))
        nrds[world.OceanIndicator]  = 0
        self.NonRenewDeplete = nrds

        self.Movement = movementspd
        #if color == 'r':
        if hasattr(color, "__len__"):
            if len(color) == 3:
                self.BaseChromosome = color
            else:
                self.BaseChromosome = np.array(cm.nipy_spectral(np.random.rand())[:3])#np.random.rand(3)
        else:
            self.BaseChromosome = np.array(cm.nipy_spectral(np.random.rand())[:3])#np.random.rand(3)


        self.Chromosomes = self.BaseChromosome*self.InitialDistribution.astype(bool)[:,:,None]

        self.Cities = np.zeros_like(self.Population)

        self.History = {"Population":[self.InitialDistribution.copy()], "Genetics":[self.Chromosomes.copy()],"Cities":[self.Cities], "Time":[t0]}
示例#10
0
def output_plot(filename,
                models,
                numberized,
                x_min=-0.1,
                x_max=1.0,
                y_distance=10,
                x_step=0.2):
    if (x_min < -1) or (x_max < 1) or (x_min > 1) or (x_max > 1) or (x_min >
                                                                     x_max):
        raise ValueError('Incorrect bounds for plotting silhouette score')
    if (y_distance <= 0):
        raise ValueError('Incorrect y distance value')
    if (x_step < 0) or ((x_max - x_min) < x_step):
        raise ValueError('Incorrect x step value')

    fig, axs = plt.subplots(1, len(models))
    fig.set_size_inches(7 * len(models), 18)
    axs_cycle = cycle(axs)

    for model in models:
        ax1 = next(axs_cycle)
        number_of_clusters = model.number_of_clusters

        predicted_labels = model.instance.fit_predict(numberized)
        silhouette_avg = silhouette_score(numberized, predicted_labels)
        silhouette_sample_values = silhouette_samples(numberized,
                                                      predicted_labels)

        ax1.set_xlim([x_min, x_max])
        ax1.set_ylim(
            [0, numberized.shape[0] + (number_of_clusters + 1) * y_distance])

        y_lower = y_distance
        for j in range(number_of_clusters):
            ith_cluster_values = silhouette_sample_values[predicted_labels ==
                                                          j]
            ith_cluster_values.sort()

            ith_cluster_size = ith_cluster_values.shape[0]
            y_upper = y_lower + ith_cluster_size

            color = cm.nipy_spectral(float(j) / number_of_clusters)
            ax1.fill_betweenx(np.arange(y_lower, y_upper),
                              0,
                              ith_cluster_values,
                              facecolor=color,
                              edgecolor=color,
                              alpha=0.7)
            ax1.text(-0.05, y_lower + 0.5 * ith_cluster_size, str(j))
            y_lower = y_upper + 10

        ax1.set_title(f'The silhouette plot for {model.name} clustering')
        ax1.set_xlabel('The silhouette coefficient values')
        ax1.set_ylabel('The index of cluster')

        ax1.axvline(x=silhouette_avg, color='red', linestyle='--')
        ax1.set_yticks([])
        ax1.set_xticks(np.arange(x_min, x_max, x_step))

    plt.savefig(f'{filename}.jpeg', bbox_inches='tight')
示例#11
0
def silhouette_plot(k, cluster_labels, sample_sil_coefficients, sil_score):
    y_lower = 10
    plt.figure()
    for i in range(k):
        ith_cluster_silhouette_values = \
          sample_sil_coefficients[cluster_labels == i]
        ith_cluster_silhouette_values.sort()

        size_cluster_i = ith_cluster_silhouette_values.shape[0]
        y_upper = y_lower + size_cluster_i

        color = cm.nipy_spectral(float(i) / k)
        plt.fill_betweenx(np.arange(y_lower, y_upper),
                          0,
                          ith_cluster_silhouette_values,
                          facecolor=color,
                          edgecolor=color,
                          alpha=0.7)

        plt.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))

        y_lower = y_upper + 10

    plt.xlabel("The silhouette coefficient values")
    plt.ylabel("Cluster label")

    plt.axvline(x=sil_score, color="red", linestyle="--")
    plt.savefig('exported/plots/Silhouette_graph_Origin_' + str(k) + '.png')
    plt.close()
示例#12
0
def display():
    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT)
    glMatrixMode(GL_MODELVIEW)
    glLoadIdentity()
    v = gvd(u'め')
    #    v = gvd(u'を')
    #    v = gvd(u'あ')
    S = v.get_instance(5)
    for s in S:
        x, y = s
        glColor3f(1., 1., 1.)
        glBegin(GL_POINTS)
        glVertex3f(x, y, 0)
        glEnd()
        glColor4f(*(cm.nipy_spectral(np.sqrt(x * x + y * y) / 1500.)))
        mesh = v.get_mesh(s)
        glBegin(GL_TRIANGLES)
        for t in mesh['triangles']:
            for i in t:
                glVertex3f(*(mesh['vertices'][i]))
        glEnd()
    lines = v.get_vis_net(S)
    glColor4f(0.6, 0.6, 0.6, 0.8)
    glBegin(GL_LINES)
    for u, v in lines:
        glVertex2f(*u)
        glVertex2f(*v)
    glEnd()
    glutSwapBuffers()
示例#13
0
def plot_clusters(X, y, centers=None, ax=None):
    colors = cm.nipy_spectral(y.astype(float) / np.unique(y).shape[0])
    if ax is not None:
        ax.scatter(X[:, 0],
                   X[:, 1],
                   marker='.',
                   lw=0,
                   s=30,
                   alpha=0.7,
                   c=colors,
                   edgecolor='k')
        # Draw white circles at cluster centers
        if centers is not None:
            ax.scatter(centers[:, 0],
                       centers[:, 1],
                       marker='o',
                       c="white",
                       alpha=1,
                       s=200,
                       edgecolor='k')
            for i, c in enumerate(centers):
                ax.scatter(c[0],
                           c[1],
                           marker='$%d$' % i,
                           alpha=1,
                           s=50,
                           edgecolor='k')
    else:
        plt.scatter(X[:, 0], X[:, 1], c=colors, s=10)
        if centers is not None:
            plt.scatter(centers[:, 0], centers[:, 1], c='red', marker='*')
示例#14
0
def plot_silhouette_values(n_cluster, cluster_labels, sample_silhouette_values, silhouette_avg,ax):
    y_lower = 10
    for i in range(n_cluster):
        ith_cluster_silhouette_values = sample_silhouette_values[cluster_labels == i]
        ith_cluster_silhouette_values.sort()
        size_cluster_i = ith_cluster_silhouette_values.shape[0]
        y_upper = y_lower + size_cluster_i

        color = cm.nipy_spectral(float(i) / n_cluster)

        ax.fill_betweenx(np.arange(y_lower, y_upper),
                         0, ith_cluster_silhouette_values,
                         facecolor=color, edgecolor=color, alpha=0.7)

        # Label the silhouette plots with their cluster numbers at the middle
        ax.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))

        # Compute the new y_lower for next plot
        y_lower = y_upper + 10  # 10 for the 0 samples

    ax.set_title("The silhouette plot for the various clusters.")
    ax.set_xlabel("The silhouette coefficient values")
    ax.set_ylabel("Cluster label")

    # The vertical line for average silhouette score of all the values
    ax.axvline(x=silhouette_avg, color="red", linestyle="--")

    ax.set_yticks([])  # Clear the yaxis labels / ticks
    ax.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])
示例#15
0
 def set_colors(self, n_clusters):
     if self.cluster_nb is not None:
         self.color = cm.nipy_spectral(
             float(self.cluster_nb + 1) / (n_clusters + 1))
     if not self.no_child:
         self.first_child.set_colors(n_clusters)
         self.second_child.set_colors(n_clusters)
示例#16
0
def create_silgraph(df, labels):
    sample_silhouette_values = silhouette_samples(df, labels)

    n_clusters = len(np.unique(labels))
    y_lower = 100
    fig = plt.figure()
    ax1 = fig.add_subplot(111)

    for i in range(n_clusters):
        ith_cluster_silhouette_values = sample_silhouette_values[labels == i]
        ith_cluster_silhouette_values.sort()
        size_cluster_i = ith_cluster_silhouette_values.shape[0]
        y_upper = y_lower + size_cluster_i
        y_upper = y_lower + size_cluster_i
        color = cm.nipy_spectral(float(i) / n_clusters)
        ax1.fill_betweenx(np.arange(y_lower, y_upper),
                          0,
                          ith_cluster_silhouette_values,
                          facecolor=color,
                          edgecolor=color,
                          alpha=0.7)

        # Label the silhouette plots with their cluster numbers at the middle
        ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))

        # Compute the new y_lower for next plot
        y_lower = y_upper + 10  # 10 for the 0 samples
示例#17
0
def PCA(X, label, variablesName, outputDir):
    """
    根据两个最大的主成分进行绘图 降维为2,方便画图,输出图像并保存
    :param X: 聚类的源数据
    :param label: 聚完类之后的标签
    :param variablesName:变量名
    :return:直观的聚类结果图像
    """
    pca = decomposition.PCA(n_components=2)
    pca.fit(X)  # 主城分析时每一行是一个输入数据
    result = pca.transform(X)  # 计算结果
    plt.figure(figsize=[10, 6])  # 新建一张图进行绘制
    n_clusters = len(set(label.tolist()))
    for i in range(result[:, 0].size):
        color = cm.nipy_spectral(float(label[i]) / n_clusters)
        plt.plot(result[i, 0],
                 result[i, 1],
                 c=color,
                 marker='o',
                 markersize=10)
        plt.text(result[i, 0], result[i, 1], variablesName[i])
    x_label = 'PC1(%s%%)' % round(
        (pca.explained_variance_ratio_[0] * 100.0), 2)  # x轴标签字符串
    y_label = 'PC1(%s%%)' % round(
        (pca.explained_variance_ratio_[1] * 100.0), 2)  # y轴标签字符串
    plt.xlabel(x_label)  # 绘制x轴标签
    plt.ylabel(y_label)  # 绘制y轴标签
    plt.title('使用主成分分析法对高维数据进行降维,产生直观图像')
    # 显示并保存散点图
    tick = time.time()
    print("当前的时间戳为:", tick)
    pylab.savefig(outputDir + '/result.png')
示例#18
0
def plotSilhouette(df, n_clusters, kmeans_labels, silhouette_avg):
    fig, ax1 = plt.subplots(1)
    fig.set_size_inches(8, 6)
    ax1.set_xlim([-0.2, 1])
    ax1.set_ylim([0, len(df) + (n_clusters + 1) * 10])

    # The vertical line for average silhouette score of all the values
    ax1.axvline(x=silhouette_avg, color="red", linestyle="--")
    ax1.set_yticks([])  # Clear the yaxis labels / ticks
    ax1.set_xticks([-0.2, 0, 0.2, 0.4, 0.6, 0.8, 1])
    plt.title(("Análise de Silhouette para K = %d" %
              n_clusters), fontsize=10, fontweight='bold')

    y_lower = 10
    # Compute the silhouette scores for each sample
    sample_silhouette_values = silhouette_samples(df, kmeans_labels)
    for i in range(n_clusters):
        ith_cluster_silhouette_values = sample_silhouette_values[kmeans_labels == i]
        ith_cluster_silhouette_values.sort()

        size_cluster_i = ith_cluster_silhouette_values.shape[0]
        y_upper = y_lower + size_cluster_i

        color = cm.nipy_spectral(float(i) / n_clusters)
        ax1.fill_betweenx(np.arange(y_lower, y_upper), 0, ith_cluster_silhouette_values,
                          facecolor=color, edgecolor=color, alpha=0.7)

        # Label the silhouette plots with their cluster numbers at the middle
        ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))
        y_lower = y_upper + 10  # Compute the new y_lower for next plot. 10 for the 0 samples
    plt.show()
示例#19
0
def calculate_silhouette(X, cluster_labels, n_clusters):
    silhouette_avg = silhouette_score(X, cluster_labels)
    print(f"For n_clusters = {n_clusters}, the average silhouette_score is : {silhouette_avg}")
    fig = plt.figure(figsize=(12, 8))
    ax = fig.gca()
    ax.set_xlim([-1, 1])
    # The (n_clusters+1)*10 is for inserting blank space between silhouette
    # plots of individual clusters, to demarcate them clearly.
    ax.set_ylim([0, len(X) + (n_clusters + 1) * 10])
    # Compute the silhouette scores for each sample
    sample_silhouette_values = silhouette_samples(X, cluster_labels)
    y_lower = 10
    for i in range(n_clusters):
        ith_cluster_silhouette_values = sample_silhouette_values[cluster_labels == i]
        ith_cluster_silhouette_values.sort()
        size_cluster_i = ith_cluster_silhouette_values.shape[0]
        y_upper = y_lower + size_cluster_i
        color = cm.nipy_spectral(float(i) / n_clusters)
        ax.fill_betweenx(np.arange(y_lower, y_upper),
                         0, ith_cluster_silhouette_values,
                         facecolor=color, edgecolor=color, alpha=0.7)
        ax.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))
        y_lower = y_upper + 10  # 10 for the 0 samples
    ax.set_title("The silhouette plot for the various clusters.")
    ax.set_xlabel("The silhouette coefficient values")
    ax.set_ylabel("Cluster label")
    ax.axvline(x=silhouette_avg, color="red", linestyle="--")
    ax.set_yticks([])  # Clear the yaxis labels / ticks
    ax.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])
    plt.suptitle(("Silhouette analysis for KMeans clustering on sample data "
                  "with n_clusters = %d" % n_clusters),
                 fontsize=14, fontweight='bold')
    fname = os.path.join(args.output, args.prefix + "-kmeans-silhouette-" + str(n_clusters) + ".png")
    plt.savefig(fname)
示例#20
0
def plot_silhouette(X, cluster_labels):
    n_clusters = len(set(cluster_labels))
    # Compute silhouette score
    silhouette_avg = silhouette_score(X, cluster_labels)
    # Compute silhouette value for each data point
    sample_silhouette_values = silhouette_samples(X, cluster_labels)

    #    fig, ax = plt.subplot()
    #    fig = plt.figure()
    ax = plt.gca()
    y_lower = 10
    for i in range(n_clusters):
        ith_cluster_silhouette_values = \
            sample_silhouette_values[cluster_labels == i]
        ith_cluster_silhouette_values.sort()

        size_cluster_i = ith_cluster_silhouette_values.shape[0]
        y_upper = y_lower + size_cluster_i
        color = cm.nipy_spectral(float(i) / n_clusters)
        ax.fill_betweenx(np.arange(y_lower, y_upper),
                         0,
                         ith_cluster_silhouette_values,
                         facecolor=color,
                         edgecolor=color,
                         alpha=0.7)
        ax.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))
        y_lower = y_upper + 10
    ax.set_title("The silhouette plot for the various clusters.")
    ax.set_xlabel("The silhouette coefficient values")
    ax.set_ylabel("Cluster label")

    ax.axvline(x=silhouette_avg, color="red", linestyle="--")
    ax.set_yticks([])  # Clear the yaxis labels / ticks
    ax.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])
    plt.show()
示例#21
0
def evolplot(tracks,
             lim,
             save="",
             xlabel=r'$\hat{X}\ [1]$',
             ylabel=r'$\hat{P}\ [1]$'):
    """TODO documentation
    """
    npart = tracks.shape[0]

    fig, ax = plt.subplots()
    ax.set_aspect(1)
    ax.set_xlim([-lim, lim])
    ax.set_ylim([-lim, lim])

    colors = iter(cm.nipy_spectral(np.linspace(0, 1, npart)))
    for part in range(npart):
        ax.scatter(tracks[part, :, 0], tracks[part, :, 1], color=next(colors))

    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)

    fig.tight_layout()

    if os.path.exists(os.path.dirname(save)):
        plt.savefig(save, bbox_inches='tight')
        plt.clf()
    else:
        plt.show()
示例#22
0
def draw_pointed_cluster_map(df, attribute='checkins'):
    """Draws a cluster map highlighting important points. The important point are calculated according to the attribute
    value.

    Parameters
    ----------

    df : A pandas dataframe containing the latitude, longitude and cluster id data that will be clusterized.
    The columns of the dataframe must have the names 'latitude', 'longitude' and 'cluster_id'.

    attribute: str
    The attribute column name in the pandas dataframe that will be used to highlight points on the map. Rows with higher
    values of attribute are more highlighted.

    Returns
    -------
    fig: a figure object of the matplotlib module.
    """
    fig = plt.figure()
    s = list(df[attribute]/(np.mean(df[attribute])))
    # 2nd Plot showing the actual clusters formed
    X = df[['latitude', 'longitude']].values
    cluster_labels = np.array(df['cluster_id']).astype(float)
    n_clusters = len(Counter(cluster_labels).keys())
    colors = cm.nipy_spectral(cluster_labels / n_clusters)
    # plt.scatter(df.latitude, df.longitude, marker='.', s=s, c=df.cluster_id)
    plt.scatter(X[:, 0], X[:, 1], marker='.', s=30, lw=0, alpha=0.7,
                c=colors, edgecolor='k')
    plt.xlabel('Latitude')
    plt.ylabel('Longitude')
    return fig
示例#23
0
文件: utils.py 项目: nmerlene/satmap
def plot_ground_tracks(sat_groups, obs_time=dt.datetime.utcnow()):
    """
    Generate plot of ground tracks

    Args:
        sat_groups (dict):      Dictionary containing keys for each satellite group
        obs_time (dt.datetime): Observer time (in UTC)

    Returns:
        fig: matplotlib.figure.Figure: Figure handle
    """
    # Initialize figure and map
    fig = plt.figure(figsize=(12, 10))
    # Create colormap based off of number of satellite groups
    colors = cm.nipy_spectral(np.linspace(0, 1, len(sat_groups)))
    m = Basemap(projection='mill')  # Use Miller project
    # Plot coastlines, draw label meridians and parallels.
    m.drawcoastlines()
    m.bluemarble(scale=0.2, alpha=0.95, zorder=-1)
    m.nightshade(obs_time, alpha=0.5, zorder=0)  # Add nightshade
    # Plot satellites by group
    for ind_group, (sat_group_key, sat_group) in enumerate(sat_groups.items()):
        lats = [np.rad2deg(sat.sublat) for sat in sat_group]
        lons = [np.rad2deg(sat.sublong) for sat in sat_group]
        x, y = m(lons, lats)  # transform coordinates
        m.scatter(x, y, s=40, marker='+', color=colors[ind_group],
                  label=sat_group_key)
    fig.suptitle('Visible Satellites at {} (UTC)'.format(obs_time.strftime("%d %b %Y %H:%M:%S")))
    fig.legend()
    return fig
示例#24
0
def graficarSilhouette(k, labels, sample_silhouette_values, silhouette_avg):
    fig, ax1 = plt.subplots(1, 1)
    y_lower = 10
    for i in range(k):
        ith_cluster_silhouette_values = \
            sample_silhouette_values[labels == i]

        ith_cluster_silhouette_values.sort()

        size_cluster_i = ith_cluster_silhouette_values.shape[0]
        y_upper = y_lower + size_cluster_i

        color = cm.nipy_spectral(float(i) / k)
        ax1.fill_betweenx(np.arange(y_lower, y_upper),
                          0,
                          ith_cluster_silhouette_values,
                          facecolor=color,
                          edgecolor=color,
                          alpha=0.7)
        ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))
        y_lower = y_upper + 10
    ax1.set_title("Plot del silhouette de cada cluster")
    ax1.set_xlabel("Coeficiente de silhouette")
    ax1.set_ylabel("Etiqueta del cluster")
    ax1.axvline(x=silhouette_avg, color="red", linestyle="--")
    ax1.set_yticks([])
示例#25
0
def PCA(X, label, cities, method, height):
    #根据两个最大的主成分进行绘图
    #选择方差95%的占比
    pca = decomposition.PCA(n_components=0.95)
    pca.fit(X)  #主城分析时每一行是一个输入数据
    result = pca.transform(X)  #计算结果
    plt.figure(figsize=[10, 6])  #新建一张图进行绘制
    plt.rcParams['font.size'] = 14
    n_clusters = len(set(label.tolist()))
    print("When Height = %d, n_clusters = %d." % (height, n_clusters))
    for i in range(result[:, 0].size):
        color = cm.nipy_spectral(float(label[i]) / n_clusters)
        plt.plot(result[i, 0],
                 result[i, 1],
                 c=color,
                 marker='o',
                 markersize=10)
        plt.text(result[i, 0], result[i, 1], cities[i])
    x_label = 'PC1(%s%%)' % round(
        (pca.explained_variance_ratio_[0] * 100.0), 2)  #x轴标签字符串
    y_label = 'PC1(%s%%)' % round(
        (pca.explained_variance_ratio_[1] * 100.0), 2)  #y轴标签字符串
    plt.xlabel(x_label)  #绘制x轴标签
    plt.ylabel(y_label)  #绘制y轴标签
    plt.title('Height = %d (%s)' % (height, method))
    plt.show()
示例#26
0
def plot_silhoutte_3d(X):
    range_n_clusters = [2, 3, 4, 5, 6, 10]
    for n_clusters in range_n_clusters:
        fig = plt.figure()
        ax = fig.add_subplot(111, projection='3d')

        clusterer = KMeans_SK(n_clusters=n_clusters, random_state=10)
        cluster_labels = clusterer.fit_predict(X)

        silhouette_avg = silhouette_score(X, cluster_labels)
        print("For n_clusters =", n_clusters,
              "The average silhouette_score is :", silhouette_avg)

        colors = cm.nipy_spectral(cluster_labels.astype(float) / n_clusters)
        ax.scatter(X[:, 0], X[:, 1], X[:, 2], marker='.', s=30, lw=0, alpha=0.7,
                    c=colors, edgecolor='k')

        centers = clusterer.cluster_centers_
        # Draw white circles at cluster centers
        ax.scatter(centers[:, 0], centers[:, 1], centers[:, 2]+0.5, marker='o',
                    c="black", alpha=.2, s=200, edgecolor='k')

        for i, c in enumerate(centers):
            ax.scatter(c[0], c[1], c[2]+0.5, marker='$%d$' % i, alpha=.5,
                        s=50, edgecolor='k')

        ax.set_title("The visualization of the clustered data.")
        ax.set_xlabel("Feature space for the 1st feature")
        ax.set_ylabel("Feature space for the 2nd feature")

        plt.suptitle(("KMeans clustering on sample data "
                      "with n_clusters = %d" % n_clusters),
                     fontsize=14, fontweight='bold')
        plt.show()
示例#27
0
def silhouettePlot(d, cluster_labels):
    fig, ax = plt.subplots(1, 1)
    y_lower_bound = 10
    sil_avg = silhouette_score(d,
                               labels=cluster_labels,
                               metric='euclidean',
                               random_state=seed)
    silhouette_values = silhouette_samples(d, cluster_labels)
    nbr_clusters = len(set(cluster_labels))
    for i in range(nbr_clusters):
        ith_cluster_silhouette_values = silhouette_values[cluster_labels == i]
        ith_cluster_silhouette_values.sort()
        ith_cluster_size = ith_cluster_silhouette_values.shape[0]
        y_upper_bound = y_lower_bound + ith_cluster_size
        color = cm.nipy_spectral(float(i) / nbr_clusters)
        ax.fill_betweenx(np.arange(y_lower_bound, y_upper_bound),
                         0,
                         ith_cluster_silhouette_values,
                         facecolor=color,
                         edgecolor=color)
        ax.text(-0.05, y_lower_bound + 0.5 * ith_cluster_size, str(i))
        y_lower_bound = y_upper_bound + 10
    ax.set_title("Silhouette Plot for {} Clusters".format(nbr_clusters))
    ax.set_xlabel("Silhouette Coefficients")
    ax.set_ylabel("Cluster Label by Sample")
    ax.axvline(x=sil_avg, color="red", linestyle="--")
    ax.set_yticks([])
    plt.show()
    return
示例#28
0
def silhouettes(name, X_train, max_clusters = 15, min_clusters = 5, save = False):
	'''

	'''

	X_train = X_train.copy()
	num_numerical = ds.get_number_numerical(name)
	X_train_s_numerical = split.standardize(name, X_train).iloc[:,0:num_numerical]
	cluster_range = range(min_clusters,max_clusters+1)
	for clusters in cluster_range:
		fig, ax = plt.subplots()

		fig.set_size_inches(18, 7)

		# The 1st subplot is the silhouette plot
		# The silhouette coefficient can range from -1, 1 but in this example all
		# lie within [-0.1, 1]
		ax.set_xlim([-0.7, 1])
		# The (n_clusters+1)*10 is for inserting blank space between silhouette
		# plots of individual clusters, to demarcate them clearly.
		ax.set_ylim([0, len(X_train_s_numerical) + (clusters + 1) * 10])

		cluster_labels = kmeans(name, clusters, X_train_s_numerical).predict(X_train_s_numerical)
		silhouette_avg = silhouette_score(X_train_s_numerical, cluster_labels)
		print("For n_clusters =", clusters, "The average silhouette_score is :", silhouette_avg)

		cluster_silhouette = silhouette_samples(X_train_s_numerical, cluster_labels)

		y_lower = 10
		for i in range(clusters):
			ith_cluster_silhouette_values = cluster_silhouette[cluster_labels == i]
			ith_cluster_silhouette_values.sort()

			size_cluster_i = ith_cluster_silhouette_values.shape[0]
			y_upper = y_lower + size_cluster_i

			color = cm.nipy_spectral(float(i) / clusters)
			ax.fill_betweenx(np.arange(y_lower, y_upper), 0, ith_cluster_silhouette_values, 
								facecolor=color, edgecolor=color, alpha=0.7)

			# Label the silhouette plots with their cluster numbers at the middle
			ax.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))

			# Compute the new y_lower for next plot
			y_lower = y_upper + 10  # 10 for the 0 samples

		ax.set_title("The silhouette plot for the various clusters.")
		ax.set_xlabel("The silhouette coefficient values")
		ax.set_ylabel("Cluster label")

		# The vertical line for average silhouette score of all the values
		ax.axvline(x=silhouette_avg, color="red", linestyle="--")

		ax.set_yticks([])  # Clear the yaxis labels / ticks
		ax.set_xticks(np.arange(-0.6,1.1,0.2))
		plt.show()

	if save:
		to_save = Path().resolve().joinpath('data', 'visualizations', '{}_elbow.png'.format(name))
		fig.savefig(to_save) 
示例#29
0
def plot_silhouette(data, cluster_labels, title=""):
    silhouette_avg = silhouette_score(data, cluster_labels)
    silhouette_values = silhouette_samples(data, cluster_labels)
    fig, ax = plt.subplots(figsize=(10, 8))
    n_clusters = np.unique(np.array(cluster_labels))
    y_lower = 10
    for i in n_clusters:
        cluster_silhouette_values = silhouette_values[cluster_labels == i]
        cluster_silhouette_values.sort()
        size_cluster = cluster_silhouette_values.shape[0]
        y_upper = y_lower + size_cluster
        color = cm.nipy_spectral(float(i) / 1)
        ax.fill_betweenx(np.arange(y_lower, y_upper),
                         0,
                         cluster_silhouette_values,
                         facecolor=color,
                         edgecolor=color,
                         alpha=0.7)
        ax.text(-0.05, y_lower + 0.5 * size_cluster, str(i))
        y_lower = y_upper + 10

    ax.axvline(x=silhouette_avg, color="red", linestyle="--")
    ax.set_yticks([])
    ax.set_xlim((-1, 1))
    ax.set_ylim([0, len(data) + (len(n_clusters) + 1) * 10])
    ax.text(-0.12, (len(data) + (len(n_clusters) + 1) * 10) / 2.,
            "Cluster label",
            rotation=90)
    ax.set_title(f"The silhouette plot {title}")
    ax.set_xlabel("The silhouette coefficient values")
示例#30
0
def silhouette():
    if not os.path.exists("Stardust_results"):
        print(
            "The directory structure Stardust_results doest not exist. Please run run_stardust first"
        )
        sys.exit()
    if not os.path.exists("Stardust_results/analysis"):
        os.mkdir("Stardust_results/analysis")
    output_path = "Stardust_results/analysis/"
    from sklearn.metrics import silhouette_samples, silhouette_score
    data_df = pd.read_csv(
        'Stardust_results/visualization_output/3_pass/data.csv',
        delimiter=",",
        index_col=False)
    data_df.set_index('data', inplace=True)
    silhouette_avg = silhouette_score(data_df[['x', 'y']], data_df['cluster'])
    sample_silhouette_values = silhouette_samples(data_df[['x', 'y']],
                                                  data_df['cluster'])
    print("silhouette score ", silhouette_avg)

    y_lower = 10
    import matplotlib.cm as cm
    fig = plt.figure(figsize=(4, 7))
    n_clusters = len(list(data_df['cluster'].unique()))
    for i in range(n_clusters):
        # Aggregate the silhouette scores for samples belonging to
        # cluster i, and sort them
        ith_cluster_silhouette_values = \
            sample_silhouette_values[data_df['cluster'] == i]

        ith_cluster_silhouette_values.sort()

        size_cluster_i = ith_cluster_silhouette_values.shape[0]
        y_upper = y_lower + size_cluster_i

        color = cm.nipy_spectral(float(i) / n_clusters)
        plt.fill_betweenx(np.arange(y_lower, y_upper),
                          0,
                          ith_cluster_silhouette_values,
                          facecolor=color,
                          edgecolor=color,
                          alpha=0.7)

        # Label the silhouette plots with their cluster numbers at the middle
        plt.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))

        # Compute the new y_lower for next plot
        y_lower = y_upper + 10  # 10 for the 0 samples

    plt.title("The silhouette plot for the various clusters.")
    plt.xlabel("silhouette coefficient", fontsize=20)
    plt.ylabel("Cluster label", fontsize=20)
    plt.axvline(x=silhouette_avg, color="red", linestyle="--")

    plt.yticks([])  # Clear the yaxis labels / ticks
    plt.xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])
    sns.despine(bottom=False, left=False)
    fig.savefig(output_path + "/silhouette.pdf", bbox_inches='tight', dpi=600)
    fig.savefig(output_path + "/silhouette.png", bbox_inches='tight', dpi=600)
示例#31
0
def plotAstar(length, width, height, paths, gates):
    fig = plt.figure()

    ax = fig.add_subplot(111, projection='3d')

    colors = cm.nipy_spectral(np.linspace(0, 1, len(paths)))

    usedColors = []

    for path in paths:
        x = path[0]
        y = path[1]
        z = path[2]

        # to create a nice mix of colors: pick a random not used color
        randIndex = randint(0, len(colors))
        while randIndex in usedColors:
            randIndex = randint(0, len(colors))
        c = colors[randIndex]
        usedColors.append(randIndex)

        ax.plot(x, y, z, color=c, zorder=-1)
    x = []
    y = []
    z = []
    for gate in gates:
        x.append(gate.getX())
        y.append(gate.getY())
        z.append(gate.getZ())

    ax.scatter(x, y, z, c='black', marker="s", s=60, zorder=10)

    ax.set_xlim3d([0, length-1])
    ax.set_ylim3d([0, width-1])
    ax.set_zlim3d([0, height-1])

    ax.set_xlabel('X')
    ax.set_ylabel('Y')
    ax.set_zlabel('Z')
    
    title = str(len(paths)) + " Random Connections"
    plt.title(title)

    # mngr = plt.get_current_fig_manager()
    # mngr.window.setGeometry(50,100,640,545)

    plt.show(block=False)
    
    
示例#32
0
	def visualizeGrid(self, length, width, height, paths, gates):
		f = plt.figure()
	
		# a tk.DrawingArea
		self.canvas2 = FigureCanvasTkAgg(f, self.master)
		self.canvas2.show()
		self.canvas2.get_tk_widget().pack(side=RIGHT, fill=BOTH, expand=1)

		ax = f.add_subplot(111, projection='3d')

		# create a color palette 
		colors = cm.nipy_spectral(np.linspace(0, 1, len(paths)))
		usedColors = []

		for path in paths:
			x = path[0]
			y = path[1]
			z = path[2]

			# to create a nice mix of colors: pick a random not used color
			randIndex = randint(0, len(colors)-1)
			while randIndex in usedColors:
				randIndex = randint(0, len(colors)-1)
			c = colors[randIndex]
			usedColors.append(randIndex)

			ax.plot(x, y, z, color=c, zorder=-1)
		x = []
		y = []
		z = []
		for gate in gates:
			x.append(gate.getX())
			y.append(gate.getY())
			z.append(gate.getZ())

		ax.scatter(x, y, z, c='black', marker="s", s=60, zorder=10)

		ax.set_xlim3d([length-1, 0])
		ax.set_ylim3d([0, width-1])
		ax.set_zlim3d([0, height-1])

		ax.set_xlabel('X')
		ax.set_ylabel('Y')
		ax.set_zlabel('Layer')
		
		title = str(len(paths)) + " Random Connections"
		plt.title(title)		
 def create_scatter(self, size=100, filename=None):
     '''
     create scatter plot of the clusters found
     '''
     num_k = len(set(self.k_fit))  # number of kernels
     plt.figure(figsize=(15, 15))
     x = numpy.arange(num_k)
     # TODO: yys is unused!!
     yys = [i + x + (i * x)**2 for i in range(num_k)]
     colors = cm.nipy_spectral(numpy.linspace(0, 1, num_k))
     for idx in range(0, num_k):
         plt.scatter(self.pos[numpy.where(self.k_fit == idx), 0], self.pos[numpy.where(self.k_fit == idx), 1],
                     s=100, label=str(idx), c=colors[idx])
     plt.legend()
     if filename == None:
         plt.show()
     else:
         plt.savefig(filename, dpi=300)
     plt.close()
            km = factory(n_clusters=n_clusters, init=init, random_state=run_id,
                         n_init=n_init, **params).fit(X)
            inertia[i, run_id] = km.inertia_
    p = plt.errorbar(n_init_range, inertia.mean(axis=1), inertia.std(axis=1))
    plots.append(p[0])
    legends.append("%s with %s init" % (factory.__name__, init))

plt.xlabel('n_init')
plt.ylabel('inertia')
plt.legend(plots, legends)
plt.title("Mean inertia for various k-means init across %d runs" % n_runs)

# Part 2: Qualitative visual inspection of the convergence

X, y = make_data(random_state, n_samples_per_center, grid_size, scale)
km = MiniBatchKMeans(n_clusters=n_clusters, init='random', n_init=1,
                     random_state=random_state).fit(X)

plt.figure()
for k in range(n_clusters):
    my_members = km.labels_ == k
    color = cm.nipy_spectral(float(k) / n_clusters, 1)
    plt.plot(X[my_members, 0], X[my_members, 1], 'o', marker='.', c=color)
    cluster_center = km.cluster_centers_[k]
    plt.plot(cluster_center[0], cluster_center[1], 'o',
             markerfacecolor=color, markeredgecolor='k', markersize=6)
    plt.title("Example cluster allocation with a single random init\n"
              "with MiniBatchKMeans")

plt.show()
    # Compute the silhouette scores for each sample
    sample_silhouette_values = silhouette_samples(X, cluster_labels)

    y_lower = 10
    for i in range(n_clusters):
        # Aggregate the silhouette scores for samples belonging to
        # cluster i, and sort them
        ith_cluster_silhouette_values = \
            sample_silhouette_values[cluster_labels == i]

        ith_cluster_silhouette_values.sort()

        size_cluster_i = ith_cluster_silhouette_values.shape[0]
        y_upper = y_lower + size_cluster_i

        color = cm.nipy_spectral(float(i) / n_clusters)
        ax1.fill_betweenx(np.arange(y_lower, y_upper),
                          0, ith_cluster_silhouette_values,
                          facecolor=color, edgecolor=color, alpha=0.7)

        # Label the silhouette plots with their cluster numbers at the middle
        ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))

        # Compute the new y_lower for next plot
        y_lower = y_upper + 10  # 10 for the 0 samples

    ax1.set_title("The silhouette plot for the various clusters.")
    ax1.set_xlabel("The silhouette coefficient values")
    ax1.set_ylabel("Cluster label")

    # The vertical line for average silhouette score of all the values
示例#36
0
print 'measDip_srt', measDip_srt

dots = ['QD1','QD2','QD3','QD4','QD5','QD6','QD7','QD8','QD9']
my_xticks = dots

x = na.arange(1, len(dots)+1, 1, dtype=na.int8)
print 'x', x

x_box_len = 5
y_box_len = 6

#y = measDip_srt[0]
#yerr = measDip_srt[1]
y = a[srt]*1e3
yerr = a_err[srt]*1e3
colors = mplcm.nipy_spectral(np.linspace(0, 1, len(dots)))
for it in range(len(y)):
    ax.errorbar(x[it], y[it], yerr=yerr[it], fmt='o', color=colors[it], markeredgecolor=colors[it])
ax.axhline(np.mean(y), color='black', ls=':', lw=2)
print 'mean', np.mean(y)
ax.text(0.9, -0.47, r'$-$%.2f' %(np.abs(np.mean(y))), fontsize=12)
#ax.set_ylim(-0.55,0)
#ax1.set_ylim(-0.55,0)
ax.set_ylim(-0.6,0)
#ax1.set_ylim(-0.6,0)
ax.set_xlim(0.8,9.2)
#ax.set_ylabel(r'$\frac{1}{e}p^{\mathrm{static}}_{z}$')
ax.set_ylabel(r'$A^{\mathrm{QD}}$ $\mathrm{(nm/GPa)}$', fontsize=15)
ax.set_xlabel('$\mathrm{Measured}$ $\mathrm{QD}$ $\mathrm{no.}$', fontsize=15)
ax.tick_params( labeltop=False, labelbottom=True, labelleft=True)
    profileDict = {} # create dictionary to store diagraph instances
    for dia,time in zip(listdia,listtime):
        if len(dia)>1:
            if dia[0]+dia[1] in profileDict: # index dictionary at diagraph, check if it exists
                profileDict[dia[0]+dia[1]].append(time) # add time value to list
            else: # if the the index is not found
                profileDict[dia[0]+dia[1]] = [] # create a list there
                profileDict[dia[0]+dia[1]].append(time) # add time value to list
    profiles.append(profileDict) # add list to profile list
commonDiagraphs = open('/home/andrew/Documents/Research/keystroke-authentication/keystroke goats/DIAGRAPHS_ETC.txt')
diagraphsToTest = commonDiagraphs.read()
allTestDia = diagraphsToTest.rstrip('\n').split(' ')
#diagraphToTest = raw_input("Enter the diagraph to visualize: ")
#numberOfUsers = raw_input("Enter the number of users you would like to test: ") # UNCOMMENT FOR LARGER SET
testUserList = []
colormap = cm.nipy_spectral(np.linspace(0,.9,len(profiles)))
for number,diagraphToTest in enumerate(allTestDia):
    plt.figure(number)
    if len(diagraphToTest) == 1:
        diagraphToTest = " " + diagraphToTest
    for numUsers,uname in enumerate(namelist):
        testUserList.append(numUsers)#raw_input("Enter user " + str(numUsers) + ": ")) #UNCOMMENT FOR LARGER SET
        timeToTest = [] # list to contain all times for a given diagraph for user
        timeToCompare = [] # list for times to compare to
        if diagraphToTest in profiles[numUsers]:
            for instance in profiles[numUsers][diagraphToTest]:
                timeToTest.append(instance)
        if len(timeToTest)>1:
            #kde = stats.kde.gaussian_kde(timeToTest)
            kde = sm.nonparametric.KDEUnivariate(timeToTest) # calculate density function for all times for given diagraph
            kde.fit(bw=nrd0(timeToTest))