def main(): no_of_samples = 400 data = [] data.append( datasets.make_moons(n_samples=no_of_samples, noise=0.05)[0] ) data.append( datasets.make_circles(n_samples=no_of_samples, factor=0.5, noise=0.05)[0] ) # number of clusters we expect K = 2 for X in data: # from dataset, create adjacency, degree, and laplacian matrix adjacency = gaussianDistance( X, sigma=0.1 ) degree = degreeMatrix( adjacency ) L = diag(degree) - adjacency # perform whitening on the Laplacian matrix deg_05 = diag( degree ** -0.5 ) L = deg_05.dot( L ).dot( deg_05 ) # use eig to obtain eigenvalues and eigenvectors eigenvalues, eigenvectors = linalg.eig( L ) # Sort the eigenvalues ascending, the first K zero eigenvalues represent the connected components idx = eigenvalues.argsort() eigenvalues.sort() evecs = eigenvectors[:, idx] eigenvectors = evecs[:, 0:K] print eigenvalues[0:K] color_array = ['b', 'r', 'g', 'y'] fig = pyplot.figure( figsize=(15, 5) ) fig.canvas.set_window_title( 'Difference between K-means and Spectral Clusterings' ) # First perform the normal K-means on the original dataset and plot it out centroids, labels = scipy.cluster.vq.kmeans2( X, K ) data = c_[X, labels] ax = fig.add_subplot( 131 ) ax.set_title('K means clustering') for k in range( 0, K ): ax.scatter( data[data[:, 2]==k, 0], data[data[:, 2]==k, 1], c=color_array[k], marker='o') # Then we perform spectral clustering, i.e. K-means on eigenvectors centroids, labels = scipy.cluster.vq.kmeans2( eigenvectors, K ) data = c_[X, labels] ax = fig.add_subplot( 132 ) ax.set_title('Spectral clustering') for k in range( 0, K ): ax.scatter( data[data[:, 2]==k, 0], data[data[:, 2]==k, 1], c=color_array[k], marker='o') # Plot out the eigenvectors too data = c_[eigenvectors, labels] ax = fig.add_subplot(133) ax.set_title('K-eigenvectors') for k in range( 0, K ): ax.scatter( data[data[:, 2]==k, 0], data[data[:, 2]==k, 1], c=color_array[k], marker='o') pyplot.show()
def test_sparse(self): np.random.seed(10) thresh = 1.1 # Do dense filtration with threshold data = ( datasets.make_circles(n_samples=100)[0] + 5 * datasets.make_circles(n_samples=100)[0] ) rips0 = Rips(thresh=thresh, maxdim=1) dgms0 = rips0.fit_transform(data) # Convert to sparse matrix first based on threshold, # then do full filtration rips1 = Rips(maxdim=1) D = makeSparseDM(data, thresh) dgms1 = rips1.fit_transform(D, distance_matrix=True) # The same number of edges should have been added assert rips0.num_edges_ == rips1.num_edges_ I10 = dgms0[1] I11 = dgms1[1] idx = np.argsort(I10[:, 0]) I10 = I10[idx, :] idx = np.argsort(I11[:, 0]) I11 = I11[idx, :] assert np.allclose(I10, I11)
def test_sparse(self): np.random.seed(10) thresh = 1.1 # Do dense filtration with threshold data = ( datasets.make_circles(n_samples=100)[0] + 5 * datasets.make_circles(n_samples=100)[0] ) res0 = ripser(data, thresh=thresh) # Convert to sparse matrix first based on threshold, # then do full filtration D = makeSparseDM(data, thresh) res1 = ripser(D, distance_matrix=True) # The same number of edges should have been added assert res0["num_edges"] == res1["num_edges"] dgms0 = res0["dgms"] dgms1 = res1["dgms"] I10 = dgms0[1] I11 = dgms1[1] idx = np.argsort(I10[:, 0]) I10 = I10[idx, :] idx = np.argsort(I11[:, 0]) I11 = I11[idx, :] assert np.allclose(I10, I11)
def generate_data(): def kernel(x1, x2): return np.array([x1, x2, 2 * x1 ** 2 + 2 * x2 ** 2]) X, Y = make_circles(500, noise=0.12, factor=0.01) A = X[np.where(Y == 0)] B = X[np.where(Y == 1)] X0_orig = A[:, 0] Y0_orig = A[:, 1] X1_orig = B[:, 0] Y1_orig = B[:, 1] A = np.array([kernel(x, y) for x, y in zip(np.ravel(X0_orig), np.ravel(Y0_orig))]) X0 = A[:, 0] Y0 = A[:, 1] Z0 = A[:, 2] A = np.array([kernel(x, y) for x, y in zip(np.ravel(X1_orig), np.ravel(Y1_orig))]) X1 = A[:, 0] Y1 = A[:, 1] Z1 = A[:, 2] return X0, X1, Y0, Y1, Z0, Z1
def withCircleData(): np.random.seed(0) X, Y = make_circles(n_samples=400, noise=.05, factor=.3) #plotData(X, Y, 'original-circle.png') #testPCA(X, Y, ncomp=2, dataset='circles') #myKPCA(X, Y, kernel_type='gauss', c=1, deg=2, ncomp=2, dataset='circles') myKPCA(X, Y, kernel_type='poly', c=1, deg=10, ncomp=2, dataset='circles')
def main(): args = sys.argv[1:] dataset_path = None if args and '-save' in args: try: dataset_path = args[args.index('-save') + 1] except: dataset_path = 'dataset.p' # Generate the dataset print "...Generating Dataset..." X1, Y1 = make_circles(n_samples=800, noise=0.07, factor=0.4) frac0 = len(np.where(Y1 == 0)[0]) / float(len(Y1)) frac1 = len(np.where(Y1 == 1)[0]) / float(len(Y1)) print "Percentage of '0' labels:", frac0 print "Percentage of '1' labels:", frac1 # (Optionally) save the dataset to DATASET_PATH if dataset_path: print "...Saving dataset to {0}...".format(dataset_path) pickle.dump((X1, Y1, frac0, frac1), open(dataset_path, 'wb')) # Plot the dataset print "...Showing dataset in new window..." pl.figure(figsize=(10, 8)) pl.subplots_adjust(bottom=.05, top=.9, left=.05, right=.95) pl.subplot(111) pl.title("Our Dataset: N=200, '0': {0} '1': {1} ".format(frac0, frac1), fontsize="large") pl.scatter(X1[:, 0], X1[:, 1], marker='o', c=Y1) pl.show() print "...Done."
def main(): n=500 x,y=datasets.make_circles(n_samples=n,factor=.5,noise=.05) label=CCPMCV().fit(x).label print "ARI:",adjusted_rand_score(y,label) figure(1) scatter(x[:,0],x[:,1],c=label,s=50) show()
def generate_circles(n, y_val): """ Generates a dataset where points are shaped into two circles, and labels them with y_val. """ X,y = make_circles(n, noise=0.1) return (X, [y_val] * len(X))
def generate_circles(): X1, Y1 = make_circles(n_samples=500, noise=0.07, factor=0.4) plt.figure(figsize=(5, 5)) plt.scatter(X1[:, 0], X1[:, 1], c=Y1) plt.grid(b=True, which="major", linestyle="-", alpha=0.1, color="black") plt.title("Can this be solved linearly?", size=16) plt.show()
def generate_biclass_data(data_type, random_state): """ Generate biclass data to classify arg : data_type (str) possible type of data choose any in ["lin_sep", "non_lin_sep", "overlap"] 'lin_sep' : Bi-class, linearly separable data 'non_lin_sep' : Bi-class, non linearly separable data 'overlap' : Bi-class, non linearly separable data with class overlap random_state (int) seed for numpy.random """ # Set seed for reproducible results np.random.seed(random_state) # Case 1 : linearly separable data if data_type == "lin_sep": mean1 = np.array([0, 2]) mean2 = np.array([2, 0]) cov = np.array([[0.8, 0.6], [0.6, 0.8]]) X1 = np.random.multivariate_normal(mean1, cov, 100) y1 = np.ones(len(X1)) X2 = np.random.multivariate_normal(mean2, cov, 100) y2 = np.ones(len(X2)) * -1 X = np.vstack((X1, X2)) y = np.hstack((y1, y2)) # Case 2 : non -linearly separable data elif data_type == "moons": X, y = make_moons(n_samples=200, noise=0.2) elif data_type == "circles": X, y = make_circles(n_samples=200, noise=0.2, factor=0.5) # Case 3 : data with overlap between classes elif data_type == "overlap": mean1 = np.array([0, 2]) mean2 = np.array([2, 0]) cov = np.array([[1.5, 1.0], [1.0, 1.5]]) X1 = np.random.multivariate_normal(mean1, cov, 100) y1 = np.ones(len(X1)) X2 = np.random.multivariate_normal(mean2, cov, 100) y2 = np.ones(len(X2)) * -1 X = np.vstack((X1, X2)) y = np.hstack((y1, y2)) assert(X.shape[0] == y.shape[0]) # Format target to: -1 / +1 targets = set(y.tolist()) t1 = min(targets) t2 = max(targets) l1 = np.where(y < t2) l2 = np.where(y > t1) y[l1] = -1 y[l2] = 1 return X, y
def loadDatasets(linearly_separable): datasets = [\ make_moons(noise=0.3, random_state=0), \ make_circles(noise=0.2, factor=0.5, random_state=1), \ linearly_separable \ ] return datasets
def make_datasets(): """ :return: """ return [make_moons(n_samples=200, noise=0.3, random_state=0), make_circles(n_samples=200, noise=0.2, factor=0.5, random_state=1), make_linearly_separable()]
def test_gridsearch_pipeline(): # Test if we can do a grid-search to find parameters to separate # circles with a perceptron model. X, y = make_circles(n_samples=400, factor=0.3, noise=0.05, random_state=0) kpca = KernelPCA(kernel="rbf", n_components=2) pipeline = Pipeline([("kernel_pca", kpca), ("Perceptron", Perceptron())]) param_grid = dict(kernel_pca__gamma=2.0 ** np.arange(-2, 2)) grid_search = GridSearchCV(pipeline, cv=3, param_grid=param_grid) grid_search.fit(X, y) assert_equal(grid_search.best_score_, 1)
def test_random_trees_dense_type(): # Test that the `sparse_output` parameter of RandomTreesEmbedding # works by returning a dense array. # Create the RTE with sparse=False hasher = RandomTreesEmbedding(n_estimators=10, sparse_output=False) X, y = datasets.make_circles(factor=0.5) X_transformed = hasher.fit_transform(X) # Assert that type is ndarray, not scipy.sparse.csr.csr_matrix assert_equal(type(X_transformed), np.ndarray)
def build_datasets(n_samples=100): X, y = make_classification(n_samples=n_samples, n_features=2, n_redundant=0, n_informative=2, n_clusters_per_class=1) X += 2 * np.random.uniform(size=X.shape) linearly_separable = (X, y) names = ['moons', 'circles', 'linear', 'xor'] datasets = [make_moons(n_samples=n_samples, noise=0.3), make_circles(n_samples=n_samples, noise=0.2, factor=0.5), linearly_separable, xor_scale_invariant(n_samples=n_samples)] return (names, datasets)
def test_gridsearch_pipeline_precomputed(): # Test if we can do a grid-search to find parameters to separate # circles with a perceptron model using a precomputed kernel. X, y = make_circles(n_samples=400, factor=0.3, noise=0.05, random_state=0) kpca = KernelPCA(kernel="precomputed", n_components=2) pipeline = Pipeline([("kernel_pca", kpca), ("Perceptron", Perceptron())]) param_grid = dict(Perceptron__n_iter=np.arange(1, 5)) grid_search = GridSearchCV(pipeline, cv=3, param_grid=param_grid) X_kernel = rbf_kernel(X, gamma=2.0) grid_search.fit(X_kernel, y) assert_equal(grid_search.best_score_, 1)
def get_dataset(dataset, n_samples): # Generate the new data: if dataset=='Noisy Circles': X, y = datasets.make_circles(n_samples=n_samples, factor=.5, noise=.05) elif dataset=='Noisy Moons': X, y = datasets.make_moons(n_samples=n_samples, noise=.05) elif dataset=='Blobs': X, y = datasets.make_blobs(n_samples=n_samples, random_state=8) else: X, y = np.random.rand(n_samples, 2), None return X, y
def get_dataset(dataset, n_samples): if dataset == "Noisy Circles": return datasets.make_circles(n_samples=n_samples, factor=0.5, noise=0.05) elif dataset == "Noisy Moons": return datasets.make_moons(n_samples=n_samples, noise=0.05) elif dataset == "Blobs": return datasets.make_blobs(n_samples=n_samples, random_state=8) elif dataset == "No Structure": return np.random.rand(n_samples, 2), None
def gen_test_data() : ################################################################ # using sklearn # ################################################################ N = 500 #features,labels = ds.make_classification(n_samples = N,n_features = 2,n_informative = 2,n_redundant = 0,n_clusters_per_class = 1,class_sep = 2,shift = 2.2) features,labels = ds.make_circles(n_samples = N) #features,labels = ds.make_moons(n_samples = N) labels[labels == 0] = -1 features = auto_np.array(features) * 4.0 labels = auto_np.array(labels).reshape(features.shape[0],1) return features,labels
def makeSimpleDatasets(n_samples=1500): # from sklearn example np.random.seed(0) # Generate datasets. We choose the size big enough to see the scalability # of the algorithms, but not too big to avoid too long running times n_samples = 1500 noisy_circles = datasets.make_circles(n_samples=n_samples, factor=.5, noise=.05) noisy_moons = datasets.make_moons(n_samples=n_samples, noise=.05) blobs = datasets.make_blobs(n_samples=n_samples, random_state=8) no_structure = np.random.rand(n_samples, 2), None return [noisy_circles, noisy_moons, blobs, no_structure]
def labelPropagationTest01(): import numpy as np import matplotlib.pyplot as plt from sklearn.semi_supervised import label_propagation from sklearn.datasets import make_circles #Generate Data n_samples = 200 X, y = make_circles(n_samples = n_samples, shuffle = False) outer, inner = 0, 1 labels = -np.ones(n_samples) #这里是设置label,相当于是给了一个初始值, 其他的label都不知道 labels[0] = outer labels[-1] = inner #print X.shape #X是200 * 2的矩阵 #print labels #label头尾分别是0和1, 其他全部是-1 #print y #前100个是0, 后100个是1 #Learn with LabelSpreading #这里输入的是X和labels -- 没有输入y #但是这里的labels是要预测的量 label_spread = label_propagation.LabelSpreading(kernel = "knn", alpha = 1.0) label_spread.fit(X, labels) #Plot output labels output_labels = label_spread.transduction_ plt.figure(figsize=(8.5, 4)) plt.subplot(1,2,1) plot_outer_labeled, = plt.plot(X[labels == outer, 0], X[labels == outer, 1], 'rs') plot_unlabeled, = plt.plot(X[labels == -1, 0], X[labels == -1, 1], 'g.') plot_inner_labeled, = plt.plot(X[labels == inner, 0], X[labels == inner, 1], 'bs') plt.legend((plot_outer_labeled, plot_inner_labeled, plot_unlabeled), ("Outer Labeled", "Inner Labeled", "Unlabeled"), "upper left", numpoints = 1, shadow = False) plt.title("Raw data (2 classes = red and blue)") #要学习这里的画图技巧 plt.subplot(1,2,2) output_label_array = np.asarray(output_labels) outer_numbers = np.where(output_label_array == outer)[0] inner_numbers = np.where(output_label_array == inner)[0] plot_outer, = plt.plot(X[outer_numbers, 0], X[outer_numbers, 1], 'rs') plot_inner, = plt.plot(X[inner_numbers, 0], X[inner_numbers, 1], 'bs') plt.legend((plot_outer, plot_inner), ('Outer Learned', 'Inner Learned'), 'upper left', numpoints = 1, shadow = False) plt.title("Labels Learned with Label Spreading (KNN)") plt.subplots_adjust(left = 0.07, bottom = 0.07, right = 0.93, top = 0.92) plt.show()
def test_random_trees_dense_equal(): # Test that the `sparse_output` parameter of RandomTreesEmbedding # works by returning the same array for both argument values. # Create the RTEs hasher_dense = RandomTreesEmbedding(n_estimators=10, sparse_output=False, random_state=0) hasher_sparse = RandomTreesEmbedding(n_estimators=10, sparse_output=True, random_state=0) X, y = datasets.make_circles(factor=0.5) X_transformed_dense = hasher_dense.fit_transform(X) X_transformed_sparse = hasher_sparse.fit_transform(X) # Assert that dense and sparse hashers have same array. assert_array_equal(X_transformed_sparse.toarray(), X_transformed_dense)
def load_dataset( dname, num_samples ): if 'circles' in dname.lower(): noisy_circles = datasets.make_circles(n_samples=num_samples, factor=.5, noise=.05) return noisy_circles elif 'moons' in dname.lower(): noisy_moons = datasets.make_moons(n_samples=num_samples, noise=.05) return noisy_moons elif 'blobs' in dname.lower(): blobs = datasets.make_blobs(n_samples=num_samples, random_state=8) return blobs else: no_structure = np.random.rand(num_samples, 2), None return no_structure return [[]]
def gen_test_data() : ################################################################ # using sklearn # ################################################################ N = 500 #features,labels = ds.make_classification(n_samples = N,n_features = 2,n_informative = 2,n_redundant = 0,n_clusters_per_class = 1,class_sep = 2,shift = 2.2) features,labels = ds.make_circles(n_samples = N) #features,labels = ds.make_moons(n_samples = N) labels_tmp = np.zeros((features.shape[0],2)) features = features * 4.0 labels_tmp[labels == 1,0] = 1 labels_tmp[labels == 0,1] = 1 labels = labels_tmp return features,labels
def test_single_linkage_clustering(): # Check that we get the correct result in two emblematic cases moons, moon_labels = make_moons(noise=0.05, random_state=42) clustering = AgglomerativeClustering(n_clusters=2, linkage='single') clustering.fit(moons) assert_almost_equal(normalized_mutual_info_score(clustering.labels_, moon_labels), 1) circles, circle_labels = make_circles(factor=0.5, noise=0.025, random_state=42) clustering = AgglomerativeClustering(n_clusters=2, linkage='single') clustering.fit(circles) assert_almost_equal(normalized_mutual_info_score(clustering.labels_, circle_labels), 1)
def generate_circle_sample(): #X[0] is all the data points(2D), including one big circle, one small circle #X[1] is all the labels of the data X,y = datasets.make_circles(n_samples=5000, factor=.5, noise=.05) circle_dict = {} count = 0 for x in X: x1=float(x[0]) x2=float(x[1]) circle_dict[str(count)] = (x1,x2) count = count + 1 return circle_dict, X, y
def main(): X, y = make_circles(n_samples=1000, random_state=123, noise=0.1, factor=0.2) while True: print(options) opt = int(raw_input('------>')) if opt == 1: show_moons(X, y) return elif opt == 2: scikit_std_pca(X, y) return elif opt == 3: kernel_pca_unfold(X, y) return else: print("Wrong choice\n"); continue
def get_circles(): """Load synthetic concentric circles data from sklearn, transform into dataframe & return.""" circles_data, circles_labels = make_circles(n_samples=1000, noise=0.1) circles_data = pd.DataFrame(circles_data) circles_labels = pd.DataFrame(circles_labels) circles_combined = pd.concat([circles_data, circles_labels], axis=1) circles_combined.columns = ['x', 'y', 'label'] # draw scatter plot of concentric circles g = (ggplot(circles_combined, aes('x', 'y', shape='label')) + geom_point(size=40)) # print g return circles_combined
def DBSCAN_cirles(n_samples=10000,factor=.4,noise=.1,eps=.1,minPts=30): ''' Plots classic example for DBSCAN clustering on datasets consisting of two circular clusters Args: n_samples: number of total observations factor: scaling between inner and outer circle, see sklearn.datasets.make_circles doc noise: standard deviation of noise, see sklearn.datasets.make_circles doc eps: DBSCAN epsilon parameter minPts: DBSCAN minPts parameter ''' circle = make_circles(n_samples=n_samples, factor=factor, noise=noise) circle = circle[0] circlescan = DBSCAN.DBSCAN(circle,eps,minPts) #reassign noise for plotting labels = circlescan.cluster_labels for p, i in enumerate(labels): if i == 'noise': labels[p] = circlescan._n_clusters + 1 plt.scatter(circle[:, 0], circle[:, 1], c=circlescan.cluster_labels)
def test_nested_circles(): # Test the linear separability of the first 2D KPCA transform X, y = make_circles(n_samples=400, factor=0.3, noise=0.05, random_state=0) # 2D nested circles are not linearly separable train_score = Perceptron().fit(X, y).score(X, y) assert_less(train_score, 0.8) # Project the circles data into the first 2 components of a RBF Kernel # PCA model. # Note that the gamma value is data dependent. If this test breaks # and the gamma value has to be updated, the Kernel PCA example will # have to be updated too. kpca = KernelPCA(kernel="rbf", n_components=2, fit_inverse_transform=True, gamma=2.0) X_kpca = kpca.fit_transform(X) # The data is perfectly linearly separable in that space train_score = Perceptron().fit(X_kpca, y).score(X_kpca, y) assert_equal(train_score, 1.0)
def getCircles(size): X, y = datasets.make_circles(size, factor=.5, noise=.05) return X, [[i] for i in y]
import numpy as np import matplotlib.pyplot as plt from sklearn.metrics import pairwise from sklearn.datasets import make_blobs, make_circles from mpl_toolkits.mplot3d import Axes3D # draw blobs data # X, y = make_blobs(n_samples=50, centers=2, random_state=0, cluster_std=0.60) # draw circles data X, y = make_circles(100, factor=.1, noise=.1) plt.scatter(X[:, 0], X[:, 1], c=y, cmap='winter') # calculate the rbf (gaussian) kernel between X and (0, 0) K = pairwise.rbf_kernel(X, np.array([[0, 0]])) # K = pairwise.polynomial_kernel(X, np.array([[0.5, 0.5]])) fig = plt.figure() ax = Axes3D(fig) ax.scatter(X[:, 0], X[:, 1], K[:], c=y, cmap='winter') plt.show()
# -*- coding: utf-8 -*- # ライブラリをロード from sklearn.decomposition import PCA, KernelPCA from sklearn.datasets import make_circles # 線形分離不可能なデータを生成 features, _ = make_circles(n_samples=1000, random_state=1, noise=0.1, factor=0.1) # RBF(radius basis function)カーネルPCAを適用 kpca = KernelPCA(kernel="rbf", gamma=15, n_components=1) features_kpca = kpca.fit_transform(features) print("もとの特徴量数:", features.shape[1]) print("削減後の特徴量数:", features_kpca.shape[1])
from sklearn.datasets import make_moons import matplotlib.pyplot as plt from sklearn.decomposition import PCA import numpy as np from P146 import rbf_kernel_pca from matplotlib.ticker import FormatStrFormatter from sklearn.datasets import make_circles X, y = make_circles(n_samples=1000, random_state=123, noise=0.1, factor=0.2) scikit_pca = PCA(n_components=2) X_spca = scikit_pca.fit_transform(X) fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(7, 3)) ax[0].scatter(X_spca[y == 0, 0], X_spca[y == 0, 1], color='red', marker='^', alpha=0.5) ax[0].scatter(X_spca[y == 1, 0], X_spca[y == 1, 1], color='blue', marker='o', alpha=0.5) ax[1].scatter(X_spca[y == 0, 0], np.zeros((500, 1)) + 0.02, color='red', marker='^', alpha=0.5) ax[1].scatter(X_spca[y == 1, 0], np.zeros((500, 1)) - 0.02,
sc.output_options(parser) from scrape import write_dict args = sc.parse_args(parser) #results = np.loadtxt(open("test_1_centroids.csv","rb"),delimiter=",",skiprows=1) #print (results) # bcml mode feature_class version # 0 1 2 3 # mode { baseline, update } np.random.seed(0) X, y = make_circles(n_samples=args.n_samples, factor=args.factor, noise=args.noise, shuffle=args.shuffle) datasets.dump_svmlight_file(X, y, args.output_file, zero_based=args.zero_based, query_id=args.query_id, multilabel=args.multilabel, comment=args.comment) write_dict({'feature_file': args.output_file})
propagate correctly around the circle. """ print(__doc__) # Authors: Clay Woolam <*****@*****.**> # Andreas Mueller <*****@*****.**> # Licence: BSD import numpy as np import matplotlib.pyplot as plt from sklearn.semi_supervised import label_propagation from sklearn.datasets import make_circles # generate ring with inner box n_samples = 200 X, y = make_circles(n_samples=n_samples, shuffle=False) outer, inner = 0, 1 labels = -np.ones(n_samples) labels[0] = outer labels[-1] = inner ############################################################################### # Learn with LabelSpreading label_spread = label_propagation.LabelSpreading(kernel='knn', alpha=1.0) label_spread.fit(X, labels) ############################################################################### # Plot output labels output_labels = label_spread.transduction_ plt.figure(figsize=(8.5, 4)) plt.subplot(1, 2, 1)
LinearDiscriminantAnalysis(), QuadraticDiscriminantAnalysis() ] X, y = make_classification(n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1) rng = np.random.RandomState(2) X += 2 * rng.uniform(size=X.shape) linearly_separable = (X, y) datasets = [ make_moons(noise=0.3, random_state=0), make_circles(noise=0.2, factor=0.5, random_state=1), linearly_separable ] figure = plt.figure(figsize=(27, 9)) i = 1 # iterate over datasets for ds in datasets: # preprocess dataset, split into training and test part X, y = ds X = StandardScaler().fit_transform(X) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4) x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
!pip install scipy !pip install matplotlib !pip install sklearn import numpy as np import scipy as sc import matplotlib.pyplot as plt from sklearn.datasets import make_circles # CREAR EL DATASET n = 500 p = 2 X, Y = make_circles(n_samples=n, factor=0.5, noise=0.05) Y = Y[:, np.newaxis] plt.scatter(X[Y[:, 0] == 0, 0], X[Y[:, 0] == 0, 1], c="skyblue") plt.scatter(X[Y[:, 0] == 1, 0], X[Y[:, 0] == 1, 1], c="salmon") plt.axis("equal") plt.show() # CLASE DE LA CAPA DE LA RED class neural_layer(): def __init__(self, n_conn, n_neur, act_f): self.act_f = act_f
""" 1. Deep Neural Network using PyTorch 2. Using non linear boundaries to seperate the data """ import torch import matplotlib.pyplot as plt import torch.nn as nn import numpy as np from sklearn import datasets number_of_points = 500 centers = [[-0.5, 0.5], [0.5, -0.5]] x, y = datasets.make_circles(n_samples=number_of_points, random_state=123, noise=0.1, factor=0.2) x_data = torch.Tensor(x) y_data = torch.Tensor(y.reshape(500, 1)) def scatter_plot(): plt.scatter(x[y == 0, 0], x[y == 0, 1]) plt.scatter(x[y == 1, 0], x[y == 1, 1]) class Model(nn.Module): #constructing a model using Linear class def __init__(self, input_size, H1, output_size): super().__init__() self.linear = nn.Linear(input_size, H1) self.linear2 = nn.Linear(H1, output_size)
excitatory_ratio = 0.5 n_iters = 100 mu = 0.1 gamma = 0.5 config_str = "layers_{} excite_{} iters_{} mu_{} gamma_{}".format( str(layer_sizes), excitatory_ratio, n_iters, mu, gamma) np.random.seed(0) # ============ # Generate datasets. We choose the size big enough to see the scalability # of the algorithms, but not too big to avoid too long running times # ============ n_samples = 1500 noisy_circles = datasets.make_circles(n_samples=n_samples, factor=.5, noise=.05) noisy_moons = datasets.make_moons(n_samples=n_samples, noise=.05) blobs = datasets.make_blobs(n_samples=n_samples, random_state=8) no_structure = np.random.rand(n_samples, 2), None # Anisotropicly distributed data random_state = 170 X, y = datasets.make_blobs(n_samples=n_samples, random_state=random_state) transformation = [[0.6, -0.6], [-0.4, 0.8]] X_aniso = np.dot(X, transformation) aniso = (X_aniso, y) # blobs with varied variances varied = datasets.make_blobs(n_samples=n_samples, cluster_std=[1.0, 2.5, 0.5],
import torch import numpy as np import matplotlib.pyplot as plt import torch.nn as nn from sklearn import datasets n_pts = 500 centers = [[-0.5, 0.5], [0.5, -0.5]] X, Y = datasets.make_circles(n_samples=n_pts, random_state=123, noise=0.1, factor=0.2) x_data = torch.Tensor(X) y_data = torch.Tensor(Y.reshape(500, 1)) def scattering(): plt.scatter(X[Y == 0, 0], X[Y == 0, 1]) plt.scatter(X[Y == 1, 0], X[Y == 1, 1]) plt.show() class Model(nn.Module): def __init__(self, input, output, hidden1): super().__init__() self.linear = nn.Linear( input, hidden1) #now we have hidden layer, so we need to include it self.linear2 = nn.Linear(hidden1, output) def forward(self, x):
y_tr, n_out, epochs, num_models=10, train_kwargs={}, **kwargs): M = ClassifierEnsemble(num_models, X_tr.shape[1], n_out, F.cross_entropy, **kwargs) M.train(X_tr, y_tr, epochs, **train_kwargs) p_tr = M.predict(X_tr, posterior=True) return p_tr X, y = datasets.make_circles(1000, factor=0.5, noise=0.05) alpha = 0.2 ypu, ind = makePU(y, alpha, balanced=True) ind_tr, ind_te = train_test_split(ind, test_size=0.2) # save and read to make label unchanged # y = y.reshape((-1,1)) # ypu = ypu.reshape((-1,1)) # ind = ind.reshape((-1,1)) # data = pd.DataFrame(np.concatenate([X, y, ypu], axis=1), columns = ['X.x', 'X.y', 'y', 'ypu']) # data.to_csv('data.txt', float_format='%.3f') # np.savetxt('ind_tr.txt', ind_tr, delimiter=',', fmt='% 4d') # np.savetxt('ind_te.txt', ind_te, delimiter=',', fmt='% 4d') # data = pd.read_csv('data.txt', index_col=0, sep=',') # X = np.array(data.iloc[:,:2])
# https://mp.weixin.qq.com/s/UltBigoduH76vs_pmLUOVQ import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.datasets import make_circles from sklearn.model_selection import train_test_split np.random.seed(123) # generate data set X, y = make_circles(n_samples=1000, factor=0.5, noise=.1) fig = plt.figure(figsize=(8,6)) plt.scatter(X[:,0], X[:,1], c=y) plt.xlim([-1.5, 1.5]) plt.ylim([-1.5, 1.5]) plt.title("Dataset") plt.xlabel("First feature") plt.ylabel("Second feature") plt.show() # split train and test data # reshape targets to get column vector with shape (n_samples, 1) y_true = y[:, np.newaxis] # Split the data into a training and test set X_train, X_test, y_train, y_test = train_test_split(X, y_true) print(f'Shape X_train: {X_train.shape}') print(f'Shape y_train: {y_train.shape}') print(f'Shape X_test: {X_test.shape}') print(f'Shape y_test: {y_test.shape}')
import numpy as np import matplotlib.pyplot as plt from matplotlib.colors import ListedColormap from sklearn import svm#from sklearn.svm import SVC 两者都可以 from sklearn.datasets import make_circles, make_moons, make_blobs,make_classification # 制作4个数据集 n_samples = 100 datasets = [ make_moons(n_samples=n_samples, noise=0.2, random_state=0), make_circles(n_samples=n_samples, noise=0.2, factor=0.5, random_state=1), make_blobs(n_samples=n_samples, centers=2, random_state=5),#分簇的数据集 make_classification(n_samples=n_samples,n_features = 2,n_informative=2,n_redundant=0, random_state=5) #n_features:特征数,n_informative:带信息的特征数,n_redundant:不带信息的特征数 ] Kernel = ["linear","poly","rbf","sigmoid"] # 规定子图规格 nrows=len(datasets) ncols=len(Kernel) + 1 fig, axes = plt.subplots(nrows, ncols,figsize=(20,16)) # 开始画图 #第一层循环:在不同的数据集中循环 for ds_cnt, (X,Y) in enumerate(datasets): #在图像中的第一列,放置原数据的分布 ax = axes[ds_cnt, 0]
return None, kmc.labels_ if __name__ == '__main__': from sklearn import datasets from sklearn.metrics import adjusted_mutual_info_score from kemlglearn.datasets import make_blobs import matplotlib.pyplot as plt # data = datasets.load_iris()['data'] # labels = datasets.load_iris()['target'] # data, labels = make_blobs(n_samples=[100, 200], n_features=2, centers=[[1,1], [0,0]], random_state=2, cluster_std=[0.2, 0.4]) data, labels = datasets.make_circles(n_samples=400, noise=0.1, random_state=4, factor=0.3) km = KMeans(n_clusters=2) cons = SimpleConsensusClustering(n_clusters=2, n_clusters_base=20, n_components=50, ncb_rand=False) lkm = km.fit_predict(data) cons.fit(data) lcons = cons.labels_ print(adjusted_mutual_info_score(lkm, labels)) print(adjusted_mutual_info_score(lcons, labels)) fig = plt.figure() # ax = fig.gca(projection='3d')
X, y = make_classification( n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1) rng = np.random.RandomState(2) X += 2*rng.uniform(size=X.shape) linearly_separable = (X, y) datasets = [ make_moons(noise=0.3, random_state=0), make_circles(noise=0.2, factor=0.5, random_state=1), linearly_separable ] # Iterate through the dataset figure = plt.figure(figsize=(27, 9)) i = 1 for ds_cnt, ds in enumerate(datasets): X, y = ds X = StandardScaler().fit_transform(X) X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=.4, random_state=42) x_min, x_max = X[:, 0].min() - .5, X[:, 0].max()+.5 y_min, y_max = X[:, 1].min() - .5, X[:, 1].max()+.5 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
# -*- coding: utf-8 -*- #[email protected] """ 字典学习 """ print(__doc__) import matplotlib.pyplot as plt import matplotlib as mpl import numpy as np from sklearn.decomposition import DictionaryLearning mpl.style.use('fivethirtyeight') from sklearn.datasets import make_circles np.random.seed(0) X, y = make_circles(n_samples=400, factor=.3, noise=.05) pca = DictionaryLearning(n_components=2) X_pca = pca.fit_transform(X) fig = plt.figure() ax = fig.add_subplot(211) ax.scatter(X[:, 0], X[:, 1], c=y) ax.axis("equal") ax = fig.add_subplot(212) ax.scatter(X_pca[:, 0], X_pca[:, 1], c=y) ax.axis("equal") plt.show()
plt.figure(figsize=(12, 8)) # plot the contour plt.contourf(aa, bb, Z, cmap='bwr', alpha=0.2) # plot the moons of data plot_data(plt, X, y) return plt # Generate some data blobs. Data will be either 0 or 1 when 2 is number of centers. # X is a [number of samples, 2] sized array. X[sample] contains its x,y position of the sample in the space # ex: X[1] = [1.342, -2.3], X[2] = [-4.342, 2.12] # y is a [number of samples] sized array. y[sample] contains the class index (ie. 0 or 1 when there are 2 centers) # ex: y[1] = 0 , y[1] = 1 X, y = make_circles(n_samples=1000, factor=.6, noise=0.1, random_state=42) #pl = plot_data(plt, X, y) #pl.show() # Split the data into Training and Test sets from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) # Create the keras model from keras.models import Sequential from keras.layers import Dense from keras.optimizers import Adam
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn import cluster x1, y1 = datasets.make_circles(n_samples=2000, factor=0.5, noise=0.05) x2, y2 = datasets.make_blobs(n_samples=1000, centers=[[1.2, 1.2]], cluster_std=[[0.1]]) xData = np.concatenate((x1, x2)) # plt.scatter(xData[:, 0], xData[:, 1]) # plt.show() kMeansModel = cluster.KMeans(n_clusters=3) kMeansModel.fit(xData) kmPredict = kMeansModel.labels_ plt.scatter(xData[:, 0], xData[:, 1], c=kmPredict) plt.show() dbModel = cluster.DBSCAN(eps=0.2, min_samples=50) dbModel.fit(xData) dbPredict = dbModel.labels_ plt.scatter(xData[:, 0], xData[:, 1], c=dbPredict) plt.show()
# mlp for the circles problem with cross-entropy loss from utils import disable_tensorflow_gpu from sklearn.datasets import make_circles from keras.models import Sequential from keras.layers import Dense from keras.optimizers import SGD from matplotlib import pyplot # generate 2d classification dataset X, y = make_circles(n_samples=1000, noise=0.1, random_state=1) # split into train and test n_train = 500 trainX, testX = X[:n_train, :], X[n_train:, :] trainy, testy = y[:n_train], y[n_train:] # define model model = Sequential() model.add( Dense(50, input_dim=2, activation='relu', kernel_initializer='he_uniform')) model.add(Dense(1, activation='sigmoid')) opt = SGD(lr=0.01, momentum=0.9) model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy']) # fit model history = model.fit(trainX, trainy, validation_data=(testX, testy), epochs=200, verbose=0) # evaluate the model _, train_acc = model.evaluate(trainX, trainy, verbose=0) _, test_acc = model.evaluate(testX, testy, verbose=0) print('Train: %.3f, Test: %.3f' % (train_acc, test_acc)) # plot loss during training
@ Filename: DBSCAN_TEST.py @ Author: Ryuk @ Create Date: 2019-05-20 @ Update Date: 2019-05-20 @ Description: Implement DBSCAN_TEST """ from Cluster import KMeans as kmeans from Cluster import DBSCAN as dbscan from sklearn.cluster import DBSCAN import time import matplotlib.pyplot as plt from sklearn import datasets X1, y1=datasets.make_circles(n_samples=5000, factor=.6, noise=.05) trainData = X1[0:1000] time_start1 = time.time() clf1 = kmeans(k=4, cluster_type="KMeans") pred1 = clf1.train(trainData) time_end1 = time.time() print("Runtime of KMeans:", time_end1-time_start1) time_start2 = time.time() clf2 = dbscan() pred = clf2.train(trainData) time_end2 = time.time() print("Runtime of DBSCAN:", time_end2-time_start2) time_start3 = time.time() clf3 = DBSCAN(eps=0.1, min_samples=10)
# -*- coding: utf-8 -*- """ Created on Tue Mar 3 16:13:19 2020 @author: chaos """ import sys sys.path.append('../..') import numpy as np from sklearn.datasets import make_circles import matrixslow as ms X, y = make_circles(600, noise=0.1, factor=0.2) y = y * 2 - 1 # 特征维数 dimension = 20 # 构造噪声特征 X = np.concatenate([X, np.random.normal(0.0, 0.5, (600, dimension - 2))], axis=1) # 隐藏向量维度 k = 2 # 一次项 x1 = ms.core.Variable(dim=(dimension, 1), init=False, trainable=False) # 标签
row_idx = np.where(y_pred == cluster) plt.scatter(X[row_idx, 0], X[row_idx, 1]) plt.title('Clusters') plt.xlabel('X1') plt.ylabel('X2') plt.legend() plt.show() """## Get the dataset""" n = 1000 from sklearn.datasets import make_moons, make_blobs, make_circles, make_s_curve X_moons, y_moons = make_moons(n_samples=n, noise=0.1) X_blobs, y_blobs = make_blobs(n_samples=n, n_features=2) X_circles, y_circles = make_circles(n_samples=n, noise=0.1, factor=0.4) X_scurve, y_scurve = make_s_curve(n_samples=n, noise=0.1) X_random = np.random.random([n, 2]) y_random = np.random.randint(0, 3, size=[n]) transformation = [[0.80834549, -0.83667341], [-0.20887718, 0.85253229]] X_aniso = np.dot(X_blobs, transformation) y_aniso = y_blobs plot_dataset(X_moons) visual(2, X_moons, y_moons) plot_dataset(X_blobs) visual(3, X_blobs, y_blobs)
def demo(self): h = .02 names = [ "Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process", "Decision Tree", "Random Forest", "Neural Net", "AdaBoost", "Naive Bayes", "QDA" ] classifiers = [ KNeighborsClassifier(3), SVC(kernel="linear", C=0.025), SVC(gamma=2, C=1), GaussianProcessClassifier(1.0 * RBF(1.0)), DecisionTreeClassifier(max_depth=5), RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1), MLPClassifier(alpha=1), AdaBoostClassifier(), GaussianNB(), QuadraticDiscriminantAnalysis() ] X, y = make_classification(n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1) rng = np.random.RandomState(2) X += 2 * rng.uniform(size=X.shape) linearly_separable = (X, y) datasets = [ make_moons(noise=0.3, random_state=0), make_circles(noise=0.2, factor=0.5, random_state=1), linearly_separable ] figure = plt.figure(figsize=(27, 9)) i = 1 # iterable over datasets for ds_cnt, ds in enumerate(datasets): # preprocess dataset, split into training and test part X, y = ds X = StandardScaler().fit_transform(X) X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=.4, random_state=42) x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) # just plot the dataset first cm = plt.cm.RdBu cm_bright = ListedColormap(['#FF0000', '#0000FF']) ax = plt.subplot(len(datasets), len(classifiers) + 1, i) if ds_cnt == 0: ax.set_title("Input data") # Plot the training points ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6, edgecolors='k') ax.set_xlim(xx.min(), xx.max()) ax.set_ylim(yy.min(), yy.max()) ax.set_xticks(()) ax.set_yticks(()) i += 1 # iterate over classifiers for name, clf in zip(names, classifiers): ax = plt.subplot(len(datasets), len(classifiers) + 1, i) clf.fit(X_train, y_train) score = clf.score(X_test, y_test) # Plot the decision boundary. For that, we will assign a color to each # point in the mesh [x_min, x_max]x[y_min, y_max]. if hasattr(clf, "decision_function"): Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) else: Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1] # Put the result into a color plot Z = Z.reshape(xx.shape) ax.contourf(xx, yy, Z, cmap=cm, alpha=.8) # Plot the training points ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright, edgecolors='k') # Plot the testing points ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, edgecolors='k', alpha=0.6) ax.set_xlim(xx.min(), xx.max()) ax.set_ylim(yy.min(), yy.max()) ax.set_xticks(()) ax.set_yticks(()) if ds_cnt == 0: ax.set_title(name) ax.text(xx.max() - .3, yy.min() + .3, ('%.2f' % score).lstrip('0'), size=15, horizontalalignment='right') i += 1 plt.show() return self
normal_size = int((1 - anom_perc) * n) anom_size = normal_size - tes_normal_size test_data = np.vstack( [ np.random.multivariate_normal(mean=mu[0, :], cov=cov[0], size=test_normal_size), np.random.multivariate_normal(mean=mu[1, :], cov=cov[1], size=test_anom_size), ] ) test_labels = np.hstack([np.repeat(0, test_normal_size), np.repeat(1, test_anom_size)]) return train_normal_data, test_data, train_normal_labels, test_labels if __name__ == "__main__": circle_X, circle_y = make_circles(10000, noise=0.01) circle_X = circle_X[circle_y == 1, :] circle_y = circle_y[circle_y == 1] pickle.dump((circle_X, circle_y), open("../data/circle_train_normal.pickle", "wb")) test_grid = np.array( np.meshgrid( np.linspace(circle_X[:, 0].min(), circle_X[:, 0].max(), 100), np.linspace(circle_X[:, 1].min(), circle_X[:, 1].max(), 100), ) ).T.reshape(-1, 2) pickle.dump((test_grid, test_grid), open("../data/circle_test_normal.pickle", "wb")) scaler = MinMaxScaler() circle_X = scaler.fit_transform(circle_X) test_grid = np.array(np.meshgrid(np.linspace(0, 1, 100), np.linspace(0, 1, 100))).T.reshape(
def plot_classifiers(): """ Plot classifiers in synthetic datasets, taken from http://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html A comparison of a several classifiers in scikit-learn on synthetic datasets. The point of this example is to illustrate the nature of decision boundaries of different classifiers. This should be taken with a grain of salt, as the intuition conveyed by these examples does not necessarily carry over to real datasets. Particularly in high-dimensional spaces, data can more easily be separated linearly and the simplicity of classifiers such as naive Bayes and linear SVMs might lead to better generalization than is achieved by other classifiers. The plots show training points in solid colors and testing points semi-transparent. The lower right shows the classification accuracy on the test set. """ h = .02 # step size in the mesh names = [ "DummyClassifier", "Nearest Neighbors", "Decision Tree", "Naive Bayes", "Linear SVM", "RBF SVM", "Random Forest" ] classifiers = [ DummyClassifier(strategy="prior"), KNeighborsClassifier(3), DecisionTreeClassifier(max_depth=5), GaussianNB(), SVC(kernel="linear", C=0.025), SVC(gamma=2, C=1), RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1) ] X, y = make_classification(n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1) rng = np.random.RandomState(2) X += 2 * rng.uniform(size=X.shape) linearly_separable = (X, y) datasets = [ make_moons(noise=0.3, random_state=0), make_circles(noise=0.2, factor=0.5, random_state=1), linearly_separable ] ds_names = [ "Dataset moons", "Dataset circles", "Dataset linearly_separable" ] figure = plt.figure(figsize=(27, 9)) i = 1 # iterate over datasets for ds_name, ds in zip(ds_names, datasets): # preprocess dataset, split into training and test part X, y = ds X = StandardScaler().fit_transform(X) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4) x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) # just plot the dataset first cm = plt.cm.RdBu cm_bright = ListedColormap(['#FF0000', '#0000FF']) ax = plt.subplot(len(datasets), len(classifiers) + 1, i) ax.set_title(ds_name) # Plot the training points ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright) # and testing points ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6) ax.set_xlim(xx.min(), xx.max()) ax.set_ylim(yy.min(), yy.max()) ax.set_xticks(()) ax.set_yticks(()) i += 1 # iterate over classifiers for name, clf in zip(names, classifiers): ax = plt.subplot(len(datasets), len(classifiers) + 1, i) clf.fit(X_train, y_train) score = clf.score(X_test, y_test) # Plot the decision boundary. For that, we will assign a color to each # point in the mesh [x_min, m_max]x[y_min, y_max]. if hasattr(clf, "decision_function"): Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) else: Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1] # Put the result into a color plot Z = Z.reshape(xx.shape) ax.contourf(xx, yy, Z, cmap=cm, alpha=.8) # Plot also the training points ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright) # and testing points ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6) ax.set_xlim(xx.min(), xx.max()) ax.set_ylim(yy.min(), yy.max()) ax.set_xticks(()) ax.set_yticks(()) ax.set_title(name) ax.text(xx.max() - .3, yy.min() + .3, ('%.2f' % score).lstrip('0'), size=15, horizontalalignment='right') i += 1 figure.subplots_adjust(left=.02, right=.98) plt.suptitle("Comparison of Classifiers in synthetic datasets", fontsize=18) plt.show()