def show_tsne(self, number_to_show): plt_data = self.training_data[:number_to_show] plt_labels = self.training_labels[:number_to_show] for digit in range(10): instances = [i for i in plt_labels if i == digit] print("Digit {} appears {} times".format(digit, len(instances))) transformer = TSNE(n_components=2, perplexity=40, verbose=2) fig, plot = plt.subplots() fig.set_size_inches(50, 50) plt.prism() X_transformed = transformer.fit_transform(plt_data) plt.scatter(X_transformed[:, 0], X_transformed[:, 1], c=plt_labels) plt.tight_layout() count = 0 for label, x, y in zip(plt_labels, X_transformed[:, 0], X_transformed[:, 1]): if count % 100 == 0: plt.annotate(str(int(label)), xy=(x, y), color='black', weight='normal', size=10, bbox=dict(boxstyle='round4, pad=.5')) count += 1 plt.savefig('mnist_digits.pdf')
def princicalComponentAnalysis(): # https://gist.github.com/mrgloom/6622175 # Explanation: https://lazyprogrammer.me/tutorial-principal-components-analysis-pca/ from sklearn.decomposition import PCA X, y = mnist.data / 255., mnist.target X_train, X_test = X[:60000], X[60000:] y_train, y_test = y[:60000], y[60000:] #X_train, y_train = shuffle(X_train, y_train) #X_train, y_train = X_train[:1000], y_train[:1000] # lets subsample a bit for a first impression pca = PCA(n_components=2, svd_solver='randomized') #pca = PCA(n_components = 2) fig, plot = plt.subplots() fig.set_size_inches(50, 50) plt.prism() X_transformed = pca.fit_transform(X_train) print(pca.explained_variance_ratio_) plot.scatter(X_transformed[:, 0], X_transformed[:, 1], c=y_train) plot.set_xticks(()) plot.set_yticks(()) plt.tight_layout() plt.show()
def get_pairwise_plot(train_set_x,train_set_y): fig, plots = plt.subplots(10, 10) fig.set_size_inches(50, 50) plt.prism() pca = PCA() for i in xrange(10): for j in xrange(10): class_i = i; class_j = j; class1_indexes=[index for index,value in enumerate(train_set_y) if value==class_i] class2_indexes=[index for index,value in enumerate(train_set_y) if value==class_j] class1_data = train_set_x[class1_indexes,:] class2_data = train_set_x[class2_indexes,:] pca.train(np.vstack((class1_data,class2_data))); class1_proj=pca.project(class1_data) plots[i, j].plot(class1_proj[:,0],class1_proj[:,1],'o',markersize=3,color='red',alpha=0.5,label=class_i) if class_i!=class_j: class2_proj=pca.project(class2_data) plots[i, j].plot(class2_proj[:,0],class2_proj[:,1],'o',markersize=3,color='green',alpha=0.5,label=class_j) plots[i, j].set_xticks(()) plots[i, j].set_yticks(()) return plt
def plot_pca(data, points, step, max_step, pca=True): print(step, " ", max_step) fig, plot = plt.subplots() fig.set_size_inches(4, 4) plt.prism() #x_len = x_max - x_min #y_len = y_max - y_min #plt.plot([x_min + x_len*0.1, y_min + y_len*0.1], [x_min + x_len*0.8*(step / max_step), y_min + y_len*0.1]) plt.plot(data[:, 0], data[:, 1], 'o', markerfacecolor='grey', markersize=1, fillstyle='full', markeredgewidth=0.0) #colors = ['red', 'blue', 'green', 'purple', 'orange', 'teal', 'black', 'grey'] for i in range(len(points)): plt.plot(points[i][0], points[i][1], 'o', markerfacecolor=colors[i], markersize=6, fillstyle='full', markeredgewidth=0.0) plot.set_xticks(()) plot.set_yticks(()) plt.title(str(int(step))) plt.tight_layout(pad=-0.5, w_pad=-0.5, h_pad=-0.5) #fig.savefig("plots/{}.pdf".format("pca" if pca else "t-sne"), bbox_inches='tight', pad_inches=0) fig.savefig("plots/pca/{}_step_{}.png".format("pca" if pca else "t-sne", step), bbox_inches='tight', pad_inches=0) return fig
def scatter_plot(train_x, train_y, filename): pca = PCA(train_x.shape[1], n_latent=2) fig, plots = plot.subplots(10, 10) fig.set_size_inches(50, 50) plot.prism() for i, j in product(xrange(10), repeat=2): if i > j: continue X_ = train_x[(train_y == i) + (train_y == j)] y_ = train_y[(train_y == i) + (train_y == j)] # train on each pair of vars separately pca.train(X_) X_transformed = pca.apply(X_) plots[i, j].scatter(X_transformed[:, 0], X_transformed[:, 1], c=y_) plots[i, j].set_xticks(()) plots[i, j].set_yticks(()) plots[j, i].scatter(X_transformed[:, 0], X_transformed[:, 1], c=y_) plots[j, i].set_xticks(()) plots[j, i].set_yticks(()) if i == 0: plots[i, j].set_title(j) plots[j, i].set_ylabel(j) # plt.scatter(X_transformed[:, 0], X_transformed[:, 1], c=y_) plot.tight_layout() plot.savefig(filename)
def visualize_data (datafile): #pca = RandomizedPCA(n_components=2) au.log.info ('Linear Discriminant analysis') lda = LDA(n_components=2) fig, plots = plt.subplots(4, 4) fig.set_size_inches(50, 50) plt.prism() for i, j in product(xrange(4), repeat=2): if i > j: continue if i == j: continue X_ = X[(y == i) + (y == j)] y_ = y[(y == i) + (y == j)] #marks #marks = y_.astype(str) #marks[y_ == 0] = 'x' #marks[y_ == 1] = 'o' #marks[y_ == 2] = 'D' #marks[y_ == 3] = '1' #colors colors = y_.copy() colors[y_ == 0] = 0 colors[y_ == 1] = 1 colors[y_ == 2] = 2 colors[y_ == 3] = 3 #transform #X_trans = pca.fit_transform(X_) X_trans = lda.fit(X_, y_).transform(X_) #plots plots[i, j].scatter(X_trans[:, 0], X_trans[:, 1], c=colors, marker='o') plots[i, j].set_xticks(()) plots[i, j].set_yticks(()) plots[j, i].scatter(X_trans[:, 0], X_trans[:, 1], c=colors, marker='o') plots[j, i].set_xticks(()) plots[j, i].set_yticks(()) if i == 0: plots[i, j].set_title (j) plots[j, i].set_ylabel(j) #plt.scatter(X_trans[:, 0], X_trans[:, 1], c=y_) plt.tight_layout() plt.savefig(outfile)
def visualize_data(datafile): #pca = RandomizedPCA(n_components=2) au.log.info('Linear Discriminant analysis') lda = LDA(n_components=2) fig, plots = plt.subplots(4, 4) fig.set_size_inches(50, 50) plt.prism() for i, j in product(xrange(4), repeat=2): if i > j: continue if i == j: continue X_ = X[(y == i) + (y == j)] y_ = y[(y == i) + (y == j)] #marks #marks = y_.astype(str) #marks[y_ == 0] = 'x' #marks[y_ == 1] = 'o' #marks[y_ == 2] = 'D' #marks[y_ == 3] = '1' #colors colors = y_.copy() colors[y_ == 0] = 0 colors[y_ == 1] = 1 colors[y_ == 2] = 2 colors[y_ == 3] = 3 #transform #X_trans = pca.fit_transform(X_) X_trans = lda.fit(X_, y_).transform(X_) #plots plots[i, j].scatter(X_trans[:, 0], X_trans[:, 1], c=colors, marker='o') plots[i, j].set_xticks(()) plots[i, j].set_yticks(()) plots[j, i].scatter(X_trans[:, 0], X_trans[:, 1], c=colors, marker='o') plots[j, i].set_xticks(()) plots[j, i].set_yticks(()) if i == 0: plots[i, j].set_title(j) plots[j, i].set_ylabel(j) #plt.scatter(X_trans[:, 0], X_trans[:, 1], c=y_) plt.tight_layout() plt.savefig(outfile)
def theanoScatterPCA(path, dataset): if dataset == 'mnist': print('Loading Mnist Data') (imageData, imageLabels) = LoadData.loadMNISTUnSplit(path, shared=False) print(imageData.shape) elif dataset == 'cifar': print('Loading Cifar Data') (imageData, imageLabels) = LoadData.loadCIFAR10UnSplit(path, shared=False) imageData = imageData / 255. print('Loaded') print("Computing Scatter Plot") labelIds = dict() for idx in range(len(imageLabels)): if str(imageLabels[idx]) not in labelIds: labelIds[str(imageLabels[idx])] = [] labelIds[str(imageLabels[idx])].append(idx) fig, plots = plt.subplots(10, 10) fig.set_size_inches(50, 50) plt.prism() for i, j in product(xrange(10), repeat=2): if i > j: continue idx = labelIds[str(i)] + labelIds[str(j)] print('\tCalculating PCA For Classes %d And %d' % (i, j)) X_transformed = runPCA(data=imageData, elems=idx, components=2) Y_ = imageLabels[labelIds[str(i)] + labelIds[str(j)]] plots[i, j].scatter(X_transformed[:, 0], X_transformed[:, 1], c=Y_) plots[i, j].set_xticks(()) plots[i, j].set_yticks(()) plots[j, i].scatter(X_transformed[:, 0], X_transformed[:, 1], c=Y_) plots[j, i].set_xticks(()) plots[j, i].set_yticks(()) if i == 0: plots[i, j].set_title(j) plots[j, i].set_ylabel(j) plt.tight_layout() plt.savefig('scatter/' + dataset + ".png") print("Computing Scatter Plot Finished")
def theanoScatterPCA(path, dataset): if dataset == 'mnist': print('Loading Mnist Data') (imageData, imageLabels) = LoadData.loadMNISTUnSplit(path, shared=False) print(imageData.shape) elif dataset == 'cifar': print('Loading Cifar Data') (imageData, imageLabels) = LoadData.loadCIFAR10UnSplit(path, shared=False) imageData = imageData / 255. print('Loaded') print("Computing Scatter Plot") labelIds = dict() for idx in range(len(imageLabels)): if str(imageLabels[idx]) not in labelIds: labelIds[str(imageLabels[idx])] = [] labelIds[str(imageLabels[idx])].append(idx) fig, plots = plt.subplots(10, 10) fig.set_size_inches(50, 50) plt.prism() for i, j in product(xrange(10), repeat=2): if i > j: continue idx = labelIds[str(i)] + labelIds[str(j)] print('\tCalculating PCA For Classes %d And %d' %(i,j)) X_transformed = runPCA(data=imageData, elems=idx, components=2) Y_ = imageLabels[labelIds[str(i)] + labelIds[str(j)]] plots[i, j].scatter(X_transformed[:, 0], X_transformed[:, 1], c=Y_) plots[i, j].set_xticks(()) plots[i, j].set_yticks(()) plots[j, i].scatter(X_transformed[:, 0], X_transformed[:, 1], c=Y_) plots[j, i].set_xticks(()) plots[j, i].set_yticks(()) if i == 0: plots[i, j].set_title(j) plots[j, i].set_ylabel(j) plt.tight_layout() plt.savefig('scatter/' + dataset + ".png") print("Computing Scatter Plot Finished")
def plot_pca_mmr(data, build_orders, mmrs): fig, plot = plt.subplots() fig.set_size_inches(4, 4) plt.prism() x = data[:, 0] y = data[:, 1] min_mmr = np.min(mmrs) max_mmr = np.max(mmrs) c = cm.rainbow([norm(mmr, min_mmr, max_mmr) for mmr in mmrs]) for i in range(len(data)): plt.scatter(x[i], y[i], color=c[i], s=2) plot.set_xticks(()) plot.set_yticks(()) plt.tight_layout(pad=-0.5, w_pad=-0.5, h_pad=-0.5) fig.savefig("plots/mds/mmr.pdf".format(), bbox_inches='tight', pad_inches=0) return fig
def plot_2d(data, fitnesses, max_fit=1): fig, plot = plt.subplots() fig.set_size_inches(4, 4) plt.prism() for i in range(len(data)): for p in range(len(data[i])): size = 2 + (fitnesses[i][p] / max_fit) * 8 plt.plot(data[i][p][0], data[i][p][1], 'o', markerfacecolor=colors[i], markersize=size, fillstyle='full', markeredgewidth=0.0) plot.set_xticks(()) plot.set_yticks(()) plt.title("Archive") fig.savefig(f"plots/archives/archive_{exp_id}.pdf", bbox_inches='tight', pad_inches=0) return fig
def scatterplot(pca, x_train, y_train, n_classes, outputfile, imgsize=50): """Construct scatterplot of (x_train, y_train) data :param pca: PCA object used for computing PCA on input data :param x_train: input data :param y_train: input labels :param n_classes: number of classes of input data :param imgsize: size of the img :param outputfile: output file to save the plot """ fig, plots = plt.subplots(10, 10) fig.set_size_inches(imgsize, imgsize) plt.prism() for i in range(n_classes): for j in range(n_classes): if i > j: continue print("Computing PCA for pair {}".format((i,j))) x = numpy.asarray([x for (x, y) in zip(x_train, y_train) if y==i or y==j]) y = ([y for y in y_train if y==i or y==j]) x_pca = pca.compute_pca(x) plots[i, j].scatter(x_pca[:, 0], x_pca[:, 1], c=y) plots[i, j].set_xticks(()) plots[i, j].set_yticks(()) plots[j, i].scatter(x_pca[:, 0], x_pca[:, 1], c=y) plots[j, i].set_xticks(()) plots[j, i].set_yticks(()) if i == 0: plots[i, j].set_title(j) plots[j, i].set_ylabel(j) plt.tight_layout() print("Saving figure...") plt.savefig(outputfile)
def train(trainx, trainy, name): x = tensor.matrix('x') pca = PCA(x, components=2) train_model = theano.function( inputs=[x], outputs=pca.output, ) X_train, y_train = trainx / 255., trainy print "pca running..." fig, plots = plt.subplots(10, 10) fig.set_size_inches(50, 50) plt.prism() for i, j in product(xrange(10), repeat=2): if i > j: continue X_ = X_train[(y_train == i) + (y_train == j)] y_ = y_train[(y_train == i) + (y_train == j)] X_transformed = train_model(X_) print "pca " , i, " and " , j plots[i, j].scatter(X_transformed[:, 0], X_transformed[:, 1], c=y_) plots[i, j].set_xticks(()) plots[i, j].set_yticks(()) plots[j, i].scatter(X_transformed[:, 0], X_transformed[:, 1], c=y_) plots[j, i].set_xticks(()) plots[j, i].set_yticks(()) if i == 0: plots[i, j].set_title(j) plots[j, i].set_ylabel(j) plt.tight_layout() plt.savefig(name)
return (basin, (bigEnough or not willBeConsideredAgain)) # State variable for _capture() for caching. _capture._markedSoFar = {} if __name__ == '__main__' : #from RandomImage import RandomImage #np.random.seed(32) #i = RandomImage(200, 50, 95, (1000, 1000)) from scipy.ndimage import imread, gaussian_filter i = imread("/home/bvr/SatData/2011.07.01.12.00.png") i = gaussian_filter(i, 1)[300:800, 300:i.shape[1]/2] print "Shape:", i.shape, " DType:", i.dtype globs, basins = Watershed_Transform(i, 5, 250) print "Glob Cnt:", len(globs) #import cProfile #cProfile.run("Watershed_Transform(i, 40, 1000)", "watershed_lg5_profile") basins = np.ma.masked_array(basins, mask=(basins <= 0)) import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(1, 2, 1) ax.imshow(i, cmap=plt.gray(), interpolation='none') ax = fig.add_subplot(1, 2, 2) ax.imshow(basins, vmin=0, cmap=plt.prism(), interpolation='none') plt.show()
X_train.append(XX_train[i]) y_train.append(yy_train[i]) num_samples_to_plot = 5000 X_train, y_train = shuffle(X_train, y_train) X_train, y_train = X_train[: num_samples_to_plot], y_train[: num_samples_to_plot] # lets subsample a bit for a first impression for digit in mytargets: instances = [i for i in y_train if i == digit] print "Digit", digit, "appears ", len(instances), "times" transformer = Isomap(n_neighbors=10, n_components=2) fig, plot = plt.subplots() fig.set_size_inches(50, 50) plt.prism() X_transformed = transformer.fit_transform(X_train) plot.scatter(X_transformed[:, 0], X_transformed[:, 1], c=y_train) plot.set_xticks(()) plot.set_yticks(()) count = 0 plt.tight_layout() plt.suptitle("Isomap for MNIST digits ") for label, x, y in zip(y_train, X_transformed[:, 0], X_transformed[:, 1]): #Lets annotate every 1 out of 200 samples, otherwise graph will be cluttered with anotations if count % 200 == 0: plt.annotate(str(int(label)), xy=(x, y), color='black',
import numpy as np import matplotlib.pyplot as pl from sklearn.utils import shuffle from sklearn.neighbors import KNeighborsClassifier from sklearn.ensemble import RandomForestClassifier #Import dataset red = np.loadtxt("./red.txt") blue = np.loadtxt("./blue.txt") #Plot data pl.prism() plt.xlim(-1.2, 1.2) plt.ylim(-1.2, 1.2) pl.scatter(red[:, 0], red[:, 1], c='red') pl.scatter(blue[:, 0], blue[:, 1], c='blue') #Prepare data for analysis reds = np.hstack ((red, [[1]] * len (red) )) blues = np.hstack ((blue, [[0]] * len (blue) )) dots = np.concatenate((reds, blues), axis=0) x = dots[:, :-1] y = dots[:, 2] #Train and test sets x, y = shuffle(x, y, random_state=1) size=dots.shape[0] * 0.8 x_train = x[:size] y_train = y[:size] x_test = x[size:] y_test = y[size:]
def do_oasis_visualize_pca(args): subjsf = args.infile.strip() outfile = args.outfile.strip() datadir = args.datadir.strip() maskf = args.mask.strip() fsmethod = args.fsmethod.strip() # scale = args.scale # scale_min = args.scale_min # scale_max = args.scale_max verbose = args.verbosity # logging config au.setup_logger(verbose) # loading mask msk = nib.load(maskf).get_data() nvox = np.sum(msk > 0) indices = np.where(msk > 0) # reading subjects list [scores, subjs] = parse_subjects_list(subjsf, datadir) scores = np.array(scores) imgsiz = nib.load(subjs[0]).shape nsubjs = len(subjs) # checking mask and first subject dimensions match if imgsiz != msk.shape: au.log.error("Subject image and mask dimensions should coincide.") exit(1) # relabeling scores to integers, if needed if not np.all(scores.astype(np.int) == scores): unis = np.unique(scores) scs = np.zeros(scores.shape, dtype=int) for k in np.arange(len(unis)): scs[scores == unis[k]] = k y = scs.copy() else: y = scores.copy() # loading data au.log.info("Loading data...") X = np.zeros((nsubjs, nvox), dtype="float32") for f in np.arange(nsubjs): imf = subjs[f] au.log.info("Reading " + imf) img = nib.load(imf).get_data() X[f, :] = img[msk > 0] # demo """ from sklearn.datasets import fetch_mldata mnist = fetch_mldata("MNIST original") X, y = mnist.data[:60000] / 255., mnist.target[:60000] X, y = shuffle(X, y) X, y = X[:5000], y[:5000] # lets subsample a bit for a first impression """ # lets start plotting au.log.info("Preparing plots...") X, y = shuffle(X, y) X = X / X.max() # reducing training and test data if fsmethod != "none": au.log.info("Feature selecion : " + fsmethod) selector = select_features(X, y, fsmethod) X = selector.transform(X) # au.log.info ('Randomized PCA') # pca = RandomizedPCA(n_components=2) au.log.info("Linear Discriminant analysis") lda = LDA(n_components=2) fig, plots = plt.subplots(4, 4) fig.set_size_inches(50, 50) plt.prism() for i, j in product(xrange(4), repeat=2): if i > j: continue if i == j: continue X_ = X[(y == i) + (y == j)] y_ = y[(y == i) + (y == j)] # marks # marks = y_.astype(str) # marks[y_ == 0] = 'x' # marks[y_ == 1] = 'o' # marks[y_ == 2] = 'D' # marks[y_ == 3] = '1' # colors colors = y_.copy() colors[y_ == 0] = 0 colors[y_ == 1] = 1 colors[y_ == 2] = 2 colors[y_ == 3] = 3 # transform # X_trans = pca.fit_transform(X_) X_trans = lda.fit(X_, y_).transform(X_) # plots plots[i, j].scatter(X_trans[:, 0], X_trans[:, 1], c=colors, marker="o") plots[i, j].set_xticks(()) plots[i, j].set_yticks(()) plots[j, i].scatter(X_trans[:, 0], X_trans[:, 1], c=colors, marker="o") plots[j, i].set_xticks(()) plots[j, i].set_yticks(()) if i == 0: plots[i, j].set_title(j) plots[j, i].set_ylabel(j) # plt.scatter(X_trans[:, 0], X_trans[:, 1], c=y_) plt.tight_layout() plt.savefig(outfile)
def plot_pca(data, build_orders, pca=False, eps=0.3, min_samples=10, min_clusters=1): fig, plot = plt.subplots() fig.set_size_inches(4, 4) plt.prism() #plot.scatter(data[...,0][:-5], data[...,1][:-5], c=all_colors[:-5]) #s = [100 for n in range(5)] #plot.scatter(data[...,0][-5:], data[...,1][-5:], c=all_colors[-5:], marker='s', s=s) # Clustering db = DBSCAN(eps=eps, min_samples=min_samples).fit(data) core_samples_mask = np.zeros_like(db.labels_, dtype=bool) core_samples_mask[db.core_sample_indices_] = True labels = db.labels_ # Number of clusters in labels, ignoring noise if present. n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) print('Estimated number of clusters: %d' % n_clusters_) if n_clusters_ < min_clusters: return # Black removed and is used for noise instead. unique_labels = set(labels) colors = [ plt.cm.Spectral(each) for each in np.linspace(0, 1, len(unique_labels)) ] for k, col in zip(unique_labels, colors): if k == -1: # Black used for noise. col = [0, 0, 0, 1] class_member_mask = (labels == k) xy = data[class_member_mask & core_samples_mask] plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col), markersize=3, fillstyle='full', markeredgewidth=0.0) # Find centroids if len(xy) > 0: center = (sum(xy[:, 0]) / len(xy), sum(xy[:, 1]) / len(xy)) centroid = closest_point(center, xy) centroid_idx = -1 assert centroid in data for i in range(len(data)): if np.array_equal(data[i], centroid): centroid_idx = i break assert centroid_idx >= 0 print("Centroid of cluster {} with color {} and position {},{}". format(k, col, centroid[0], centroid[1])) print(build_orders[centroid_idx]) plt.plot(centroid[0], centroid[1], 'o', markerfacecolor=tuple(col), markeredgecolor='k', markersize=8) bbox_props = dict(boxstyle="circle,pad=0.1", fc="white", ec="black", lw=1) t = plot.text(centroid[0], centroid[1], k, ha="center", va="center", rotation=0, size=6, bbox=bbox_props) xy = data[class_member_mask & ~core_samples_mask] plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col), markersize=3, fillstyle='full', markeredgewidth=0.0) plot.set_xticks(()) plot.set_yticks(()) ''' custom_lines = [Line2D([0], [0], marker='o', color='w', label='Scatter', markerfacecolor='red', markersize=4), Line2D([0], [0], marker='o', color='w', label='Scatter', markerfacecolor='white', markeredgecolor='black', markersize=4)] ''' #for i in range(len(human_levels)): # plot.annotate("Level {}".format(i), (data[len(gen_levels)+i][0], data[len(gen_levels)+i][1])) ''' for i in range(len(human_levels)): bbox_props = dict(boxstyle="circle,pad=0.1", fc="white", ec="black", lw=2) t = plot.text(data[len(gen_levels)+i][0], data[len(gen_levels)+i][1], i, ha="center", va="center", rotation=0, size=15, bbox=bbox_props) ''' plt.tight_layout(pad=-0.5, w_pad=-0.5, h_pad=-0.5) #plot.margins(0, 0) #lines = ax.plot(data) title = "PCA" if pca else "t-SNE" #plt.title(title) #plot.legend(custom_lines, ['PCG', 'Human'], loc=1) #plot.legend(['Won', 'Lost', 'Human'], loc=2) fig.savefig("plots/mds/{}_eps-{}_sam-{}.pdf".format( "pca" if pca else "t-sne", eps, min_samples), bbox_inches='tight', pad_inches=0) return fig
def visualize_dataset(X, y, ih, method='both'): if method == 'both': # Draw both in same plot plt.figure(figsize=(4, 9)) plt.subplots_adjust(bottom=.05, top=.9, left=.05, right=.95) plt.subplot(2, 1, 1) plt.title("MDS of %s" % dataset_name[:-4]) mds = MDS(n_components=2, max_iter=100, n_init=1) X_transformed = mds.fit_transform(X) y[y == 0] = -1 # color (blue/red) = sign: will reflect class, prominence = magnitude: will reflect IH. Added 0.2 so that y*IH # doesn't become zero (thus making colors/classes indistinguishable) effectively shifting the IH range from (0,1) to (0.2, 1.2) plt.scatter(X_transformed[:, 0], X_transformed[:, 1], s=5, c=y * (ih + 0.1), vmin=-1, vmax=1, cmap=plt.cm.coolwarm) plt.subplot(2, 1, 2) plt.title("t-SNE of %s" % dataset_name[:-4]) tsne = TSNE(n_components=2) X_transformed = tsne.fit_transform(X) yy = y yy[yy == 0] = -1 # color (blue/red) = sign: will reflect class, prominence = magnitude: will reflect IH. Added 0.2 so that y*IH # doesn't become zero (thus making colors/classes indistinguishable) effectively shifting the IH range from (0,1) to (0.2, 1.2) plt.scatter(X_transformed[:, 0], X_transformed[:, 1], s=5, c=yy * (ih + 0.1), vmin=-1, vmax=1, cmap=plt.cm.coolwarm) figname = 'visualizations//' + dataset_name[:-4] + '.png' plt.savefig(figname, bbox_inches='tight') return else: # Plot either MDS or t-SNE if (method == 'MDS'): transformer = MDS(n_components=2, max_iter=100, n_init=1) elif method == 't-SNE': # t-SNE transformer = TSNE(n_components=2) fig, plot = plt.subplots() fig.set_size_inches(8, 8) plt.prism() # Blue for low (good) mRS-90, Red for High colors = ['blue', 'red'] X_transformed = transformer.fit_transform(X) y[y == 0] = -1 # color (blue/red) = sign: will reflect class, prominence = magnitude: will reflect IH. Added 0.2 so that y*IH # doesn't become zero (thus making colors/classes indistinguishable) effectively shifting the IH range from (0,1) to (0.2, 1.2) plot.scatter(X_transformed[:, 0], X_transformed[:, 1], c=y * (ih + 0.2), vmin=-1, vmax=1, cmap=plt.cm.coolwarm) plot.set_xticks(()) plot.set_yticks(()) count = 0 plt.tight_layout() if (method == 'MDS'): title = "MDS of " + dataset_name + " dataset" # title = "MDS" plt.suptitle(title, fontsize=20) else: title = "t-SNE of " + dataset_name + " dataset" # title = "t-SNE" plt.suptitle(title, fontsize=20) plt.show() return
for i, label in enumerate(yy_train): if label in mytargets: X_train.append(XX_train[i]) y_train.append(yy_train[i]) num_samples_to_plot = 5000 X_train, y_train = shuffle(X_train, y_train) X_train, y_train = X_train[:num_samples_to_plot], y_train[:num_samples_to_plot] # lets subsample a bit for a first impression for digit in mytargets: instances=[i for i in y_train if i==digit] print "Digit",digit,"appears ",len(instances), "times" transformer = Isomap(n_neighbors = 10, n_components = 2) fig, plot = plt.subplots() fig.set_size_inches(50, 50) plt.prism() X_transformed = transformer.fit_transform(X_train) plot.scatter(X_transformed[:, 0], X_transformed[:, 1], c=y_train) plot.set_xticks(()) plot.set_yticks(()) count=0; plt.tight_layout() plt.suptitle("Isomap for MNIST digits ") for label , x, y in zip(y_train, X_transformed[:, 0], X_transformed[:, 1]): #Lets annotate every 1 out of 200 samples, otherwise graph will be cluttered with anotations if count % 200 == 0: plt.annotate(str(int(label)),xy=(x,y), color='black', weight='normal',size=10,bbox=dict(boxstyle="round4,pad=.5", fc="0.8")) count = count + 1 #plt.savefig("mnist_pca.png")
return (basin, (bigEnough or not willBeConsideredAgain)) # State variable for _capture() for caching. _capture._markedSoFar = {} if __name__ == '__main__': #from RandomImage import RandomImage #np.random.seed(32) #i = RandomImage(200, 50, 95, (1000, 1000)) from scipy.ndimage import imread, gaussian_filter i = imread("/home/bvr/SatData/2011.07.01.12.00.png") i = gaussian_filter(i, 1)[300:800, 300:i.shape[1] / 2] print "Shape:", i.shape, " DType:", i.dtype globs, basins = Watershed_Transform(i, 5, 250) print "Glob Cnt:", len(globs) #import cProfile #cProfile.run("Watershed_Transform(i, 40, 1000)", "watershed_lg5_profile") basins = np.ma.masked_array(basins, mask=(basins <= 0)) import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(1, 2, 1) ax.imshow(i, cmap=plt.gray(), interpolation='none') ax = fig.add_subplot(1, 2, 2) ax.imshow(basins, vmin=0, cmap=plt.prism(), interpolation='none') plt.show()