示例#1
0
    def show_tsne(self, number_to_show):
        plt_data = self.training_data[:number_to_show]
        plt_labels = self.training_labels[:number_to_show]

        for digit in range(10):
            instances = [i for i in plt_labels if i == digit]
            print("Digit {} appears {} times".format(digit, len(instances)))

        transformer = TSNE(n_components=2, perplexity=40, verbose=2)
        fig, plot = plt.subplots()
        fig.set_size_inches(50, 50)
        plt.prism()

        X_transformed = transformer.fit_transform(plt_data)
        plt.scatter(X_transformed[:, 0], X_transformed[:, 1], c=plt_labels)
        plt.tight_layout()
        count = 0
        for label, x, y in zip(plt_labels, X_transformed[:, 0],
                               X_transformed[:, 1]):
            if count % 100 == 0:
                plt.annotate(str(int(label)),
                             xy=(x, y),
                             color='black',
                             weight='normal',
                             size=10,
                             bbox=dict(boxstyle='round4, pad=.5'))
            count += 1
        plt.savefig('mnist_digits.pdf')
示例#2
0
def princicalComponentAnalysis():
    # https://gist.github.com/mrgloom/6622175
    # Explanation: https://lazyprogrammer.me/tutorial-principal-components-analysis-pca/
    from sklearn.decomposition import PCA

    X, y = mnist.data / 255., mnist.target
    X_train, X_test = X[:60000], X[60000:]
    y_train, y_test = y[:60000], y[60000:]

    #X_train, y_train = shuffle(X_train, y_train)
    #X_train, y_train = X_train[:1000], y_train[:1000]  # lets subsample a bit for a first impression

    pca = PCA(n_components=2, svd_solver='randomized')
    #pca = PCA(n_components = 2)

    fig, plot = plt.subplots()
    fig.set_size_inches(50, 50)
    plt.prism()

    X_transformed = pca.fit_transform(X_train)
    print(pca.explained_variance_ratio_)
    plot.scatter(X_transformed[:, 0], X_transformed[:, 1], c=y_train)
    plot.set_xticks(())
    plot.set_yticks(())

    plt.tight_layout()
    plt.show()
def get_pairwise_plot(train_set_x,train_set_y):
    
    fig, plots = plt.subplots(10, 10)
    fig.set_size_inches(50, 50)
    plt.prism()
    pca = PCA()
    for i in xrange(10):
        for j in xrange(10):
            
    
            class_i = i;
            class_j = j;
            
            class1_indexes=[index for index,value in enumerate(train_set_y) if value==class_i]
            class2_indexes=[index for index,value in enumerate(train_set_y) if value==class_j]
            
            class1_data = train_set_x[class1_indexes,:]
            class2_data = train_set_x[class2_indexes,:]
            
            pca.train(np.vstack((class1_data,class2_data)));
            
            
            
            class1_proj=pca.project(class1_data)
            plots[i, j].plot(class1_proj[:,0],class1_proj[:,1],'o',markersize=3,color='red',alpha=0.5,label=class_i)
           
            if class_i!=class_j:        
                class2_proj=pca.project(class2_data)
                plots[i, j].plot(class2_proj[:,0],class2_proj[:,1],'o',markersize=3,color='green',alpha=0.5,label=class_j)
    

            plots[i, j].set_xticks(())
            plots[i, j].set_yticks(())
            
    return plt
示例#4
0
def plot_pca(data, points, step, max_step, pca=True):
    print(step, " ", max_step)
    fig, plot = plt.subplots()
    fig.set_size_inches(4, 4)
    plt.prism()
    #x_len = x_max - x_min
    #y_len = y_max - y_min
    #plt.plot([x_min + x_len*0.1, y_min + y_len*0.1], [x_min + x_len*0.8*(step / max_step), y_min + y_len*0.1])
    plt.plot(data[:, 0],
             data[:, 1],
             'o',
             markerfacecolor='grey',
             markersize=1,
             fillstyle='full',
             markeredgewidth=0.0)
    #colors = ['red', 'blue', 'green', 'purple', 'orange', 'teal', 'black', 'grey']
    for i in range(len(points)):
        plt.plot(points[i][0],
                 points[i][1],
                 'o',
                 markerfacecolor=colors[i],
                 markersize=6,
                 fillstyle='full',
                 markeredgewidth=0.0)
    plot.set_xticks(())
    plot.set_yticks(())
    plt.title(str(int(step)))
    plt.tight_layout(pad=-0.5, w_pad=-0.5, h_pad=-0.5)
    #fig.savefig("plots/{}.pdf".format("pca" if pca else "t-sne"), bbox_inches='tight', pad_inches=0)
    fig.savefig("plots/pca/{}_step_{}.png".format("pca" if pca else "t-sne",
                                                  step),
                bbox_inches='tight',
                pad_inches=0)
    return fig
示例#5
0
def scatter_plot(train_x, train_y, filename):
    pca = PCA(train_x.shape[1], n_latent=2)

    fig, plots = plot.subplots(10, 10)
    fig.set_size_inches(50, 50)
    plot.prism()
    for i, j in product(xrange(10), repeat=2):
        if i > j:
            continue
        X_ = train_x[(train_y == i) + (train_y == j)]
        y_ = train_y[(train_y == i) + (train_y == j)]


        # train on each pair of vars separately
        pca.train(X_)
        X_transformed = pca.apply(X_)

        plots[i, j].scatter(X_transformed[:, 0], X_transformed[:, 1], c=y_)
        plots[i, j].set_xticks(())
        plots[i, j].set_yticks(())

        plots[j, i].scatter(X_transformed[:, 0], X_transformed[:, 1], c=y_)
        plots[j, i].set_xticks(())
        plots[j, i].set_yticks(())
        if i == 0:
            plots[i, j].set_title(j)
            plots[j, i].set_ylabel(j)

            # plt.scatter(X_transformed[:, 0], X_transformed[:, 1], c=y_)
    plot.tight_layout()
    plot.savefig(filename)
def visualize_data (datafile):
    #pca = RandomizedPCA(n_components=2)
    au.log.info ('Linear Discriminant analysis')
    lda = LDA(n_components=2)
    fig, plots = plt.subplots(4, 4)
    fig.set_size_inches(50, 50)
    plt.prism()
    for i, j in product(xrange(4), repeat=2):
        if i > j:
            continue
        if i == j:
            continue

        X_ = X[(y == i) + (y == j)]
        y_ = y[(y == i) + (y == j)]

        #marks
        #marks = y_.astype(str)
        #marks[y_ == 0] = 'x'
        #marks[y_ == 1] = 'o'
        #marks[y_ == 2] = 'D'
        #marks[y_ == 3] = '1'

        #colors
        colors = y_.copy()
        colors[y_ == 0] = 0
        colors[y_ == 1] = 1
        colors[y_ == 2] = 2
        colors[y_ == 3] = 3

        #transform
        #X_trans = pca.fit_transform(X_)
        X_trans = lda.fit(X_, y_).transform(X_)

        #plots
        plots[i, j].scatter(X_trans[:, 0], X_trans[:, 1], c=colors, marker='o')
        plots[i, j].set_xticks(())
        plots[i, j].set_yticks(())

        plots[j, i].scatter(X_trans[:, 0], X_trans[:, 1], c=colors, marker='o')
        plots[j, i].set_xticks(())
        plots[j, i].set_yticks(())
        if i == 0:
            plots[i, j].set_title (j)
            plots[j, i].set_ylabel(j)

        #plt.scatter(X_trans[:, 0], X_trans[:, 1], c=y_)

    plt.tight_layout()
    plt.savefig(outfile)
示例#7
0
def visualize_data(datafile):
    #pca = RandomizedPCA(n_components=2)
    au.log.info('Linear Discriminant analysis')
    lda = LDA(n_components=2)
    fig, plots = plt.subplots(4, 4)
    fig.set_size_inches(50, 50)
    plt.prism()
    for i, j in product(xrange(4), repeat=2):
        if i > j:
            continue
        if i == j:
            continue

        X_ = X[(y == i) + (y == j)]
        y_ = y[(y == i) + (y == j)]

        #marks
        #marks = y_.astype(str)
        #marks[y_ == 0] = 'x'
        #marks[y_ == 1] = 'o'
        #marks[y_ == 2] = 'D'
        #marks[y_ == 3] = '1'

        #colors
        colors = y_.copy()
        colors[y_ == 0] = 0
        colors[y_ == 1] = 1
        colors[y_ == 2] = 2
        colors[y_ == 3] = 3

        #transform
        #X_trans = pca.fit_transform(X_)
        X_trans = lda.fit(X_, y_).transform(X_)

        #plots
        plots[i, j].scatter(X_trans[:, 0], X_trans[:, 1], c=colors, marker='o')
        plots[i, j].set_xticks(())
        plots[i, j].set_yticks(())

        plots[j, i].scatter(X_trans[:, 0], X_trans[:, 1], c=colors, marker='o')
        plots[j, i].set_xticks(())
        plots[j, i].set_yticks(())
        if i == 0:
            plots[i, j].set_title(j)
            plots[j, i].set_ylabel(j)

        #plt.scatter(X_trans[:, 0], X_trans[:, 1], c=y_)

    plt.tight_layout()
    plt.savefig(outfile)
示例#8
0
def theanoScatterPCA(path, dataset):
    if dataset == 'mnist':
        print('Loading Mnist Data')
        (imageData, imageLabels) = LoadData.loadMNISTUnSplit(path,
                                                             shared=False)
        print(imageData.shape)
    elif dataset == 'cifar':
        print('Loading Cifar Data')
        (imageData, imageLabels) = LoadData.loadCIFAR10UnSplit(path,
                                                               shared=False)
        imageData = imageData / 255.
    print('Loaded')

    print("Computing Scatter Plot")
    labelIds = dict()
    for idx in range(len(imageLabels)):
        if str(imageLabels[idx]) not in labelIds:
            labelIds[str(imageLabels[idx])] = []
        labelIds[str(imageLabels[idx])].append(idx)

    fig, plots = plt.subplots(10, 10)
    fig.set_size_inches(50, 50)
    plt.prism()
    for i, j in product(xrange(10), repeat=2):
        if i > j:
            continue

        idx = labelIds[str(i)] + labelIds[str(j)]
        print('\tCalculating PCA For Classes %d And %d' % (i, j))
        X_transformed = runPCA(data=imageData, elems=idx, components=2)
        Y_ = imageLabels[labelIds[str(i)] + labelIds[str(j)]]
        plots[i, j].scatter(X_transformed[:, 0], X_transformed[:, 1], c=Y_)
        plots[i, j].set_xticks(())
        plots[i, j].set_yticks(())

        plots[j, i].scatter(X_transformed[:, 0], X_transformed[:, 1], c=Y_)
        plots[j, i].set_xticks(())
        plots[j, i].set_yticks(())
        if i == 0:
            plots[i, j].set_title(j)
            plots[j, i].set_ylabel(j)
    plt.tight_layout()
    plt.savefig('scatter/' + dataset + ".png")
    print("Computing Scatter Plot Finished")
示例#9
0
def theanoScatterPCA(path, dataset):
    if dataset == 'mnist':
        print('Loading Mnist Data')
        (imageData, imageLabels) = LoadData.loadMNISTUnSplit(path, shared=False)
        print(imageData.shape)
    elif dataset == 'cifar':
        print('Loading Cifar Data')
        (imageData, imageLabels) = LoadData.loadCIFAR10UnSplit(path, shared=False)
        imageData = imageData / 255.
    print('Loaded')
    
    print("Computing Scatter Plot")
    labelIds = dict()
    for idx in range(len(imageLabels)):
        if str(imageLabels[idx]) not in labelIds:
            labelIds[str(imageLabels[idx])] = []
        labelIds[str(imageLabels[idx])].append(idx)

    fig, plots = plt.subplots(10, 10)
    fig.set_size_inches(50, 50)
    plt.prism()
    for i, j in product(xrange(10), repeat=2):
        if i > j:
            continue

        idx = labelIds[str(i)] + labelIds[str(j)]
        print('\tCalculating PCA For Classes %d And %d' %(i,j))
        X_transformed = runPCA(data=imageData, elems=idx, components=2)
        Y_ = imageLabels[labelIds[str(i)] + labelIds[str(j)]]
        plots[i, j].scatter(X_transformed[:, 0], X_transformed[:, 1], c=Y_)
        plots[i, j].set_xticks(())
        plots[i, j].set_yticks(())
      
        plots[j, i].scatter(X_transformed[:, 0], X_transformed[:, 1], c=Y_)
        plots[j, i].set_xticks(())
        plots[j, i].set_yticks(())
        if i == 0:
            plots[i, j].set_title(j)
            plots[j, i].set_ylabel(j)
    plt.tight_layout()
    plt.savefig('scatter/' + dataset + ".png")
    print("Computing Scatter Plot Finished")
示例#10
0
def plot_pca_mmr(data, build_orders, mmrs):
    fig, plot = plt.subplots()
    fig.set_size_inches(4, 4)
    plt.prism()
    x = data[:, 0]
    y = data[:, 1]
    min_mmr = np.min(mmrs)
    max_mmr = np.max(mmrs)
    c = cm.rainbow([norm(mmr, min_mmr, max_mmr) for mmr in mmrs])
    for i in range(len(data)):
        plt.scatter(x[i], y[i], color=c[i], s=2)

    plot.set_xticks(())
    plot.set_yticks(())

    plt.tight_layout(pad=-0.5, w_pad=-0.5, h_pad=-0.5)
    fig.savefig("plots/mds/mmr.pdf".format(),
                bbox_inches='tight',
                pad_inches=0)
    return fig
def plot_2d(data, fitnesses, max_fit=1):
    fig, plot = plt.subplots()
    fig.set_size_inches(4, 4)
    plt.prism()
    for i in range(len(data)):
        for p in range(len(data[i])):
            size = 2 + (fitnesses[i][p] / max_fit) * 8
            plt.plot(data[i][p][0],
                     data[i][p][1],
                     'o',
                     markerfacecolor=colors[i],
                     markersize=size,
                     fillstyle='full',
                     markeredgewidth=0.0)
    plot.set_xticks(())
    plot.set_yticks(())
    plt.title("Archive")
    fig.savefig(f"plots/archives/archive_{exp_id}.pdf",
                bbox_inches='tight',
                pad_inches=0)
    return fig
示例#12
0
def scatterplot(pca, x_train, y_train, n_classes, outputfile, imgsize=50):
    """Construct scatterplot of (x_train, y_train) data
    
    :param pca: PCA object used for computing PCA on input data
    :param x_train: input data
    :param y_train: input labels
    :param n_classes: number of classes of input data
    :param imgsize: size of the img 
    :param outputfile: output file to save the plot
    """

    fig, plots = plt.subplots(10, 10)
    fig.set_size_inches(imgsize, imgsize)
    plt.prism()
    
    for i in range(n_classes):
        for j in range(n_classes):
            if i > j:
                continue
            print("Computing PCA for pair {}".format((i,j)))
            x = numpy.asarray([x for (x, y) in zip(x_train, y_train) if y==i or y==j])           
            y = ([y for y in y_train if y==i or y==j])
            x_pca = pca.compute_pca(x)
            
            plots[i, j].scatter(x_pca[:, 0], x_pca[:, 1], c=y)
            plots[i, j].set_xticks(())
            plots[i, j].set_yticks(())

            plots[j, i].scatter(x_pca[:, 0], x_pca[:, 1], c=y)
            plots[j, i].set_xticks(())
            plots[j, i].set_yticks(())

            if i == 0:
                plots[i, j].set_title(j)
                plots[j, i].set_ylabel(j)
                
    plt.tight_layout()
    print("Saving figure...")
    plt.savefig(outputfile)
def train(trainx, trainy, name):
    x = tensor.matrix('x')

    pca = PCA(x, components=2)


    train_model = theano.function(
        inputs=[x],
        outputs=pca.output,
    )

    X_train, y_train = trainx / 255., trainy

    print "pca running..."

    fig, plots = plt.subplots(10, 10)
    fig.set_size_inches(50, 50)
    plt.prism()
    for i, j in product(xrange(10), repeat=2):
        if i > j:
            continue
        X_ = X_train[(y_train == i) + (y_train == j)]
        y_ = y_train[(y_train == i) + (y_train == j)]
        X_transformed = train_model(X_)
        print "pca " , i,  " and " , j
        plots[i, j].scatter(X_transformed[:, 0], X_transformed[:, 1], c=y_)
        plots[i, j].set_xticks(())
        plots[i, j].set_yticks(())

        plots[j, i].scatter(X_transformed[:, 0], X_transformed[:, 1], c=y_)
        plots[j, i].set_xticks(())
        plots[j, i].set_yticks(())
        if i == 0:
            plots[i, j].set_title(j)
            plots[j, i].set_ylabel(j)

    plt.tight_layout()
    plt.savefig(name)
示例#14
0
    return (basin, (bigEnough or not willBeConsideredAgain))

# State variable for _capture() for caching.
_capture._markedSoFar = {}

if __name__ == '__main__' :
    #from RandomImage import RandomImage
    #np.random.seed(32)
    #i = RandomImage(200, 50, 95, (1000, 1000))

    from scipy.ndimage import imread, gaussian_filter
    i = imread("/home/bvr/SatData/2011.07.01.12.00.png")
    i = gaussian_filter(i, 1)[300:800, 300:i.shape[1]/2]
    print "Shape:", i.shape, "  DType:", i.dtype

    globs, basins = Watershed_Transform(i, 5, 250)
    print "Glob Cnt:", len(globs)
    #import cProfile
    #cProfile.run("Watershed_Transform(i, 40, 1000)", "watershed_lg5_profile")


    basins = np.ma.masked_array(basins, mask=(basins <= 0))
    import matplotlib.pyplot as plt
    fig = plt.figure()
    ax = fig.add_subplot(1, 2, 1)
    ax.imshow(i, cmap=plt.gray(), interpolation='none')
    ax = fig.add_subplot(1, 2, 2)
    ax.imshow(basins, vmin=0, cmap=plt.prism(), interpolation='none')
    plt.show()
示例#15
0
        X_train.append(XX_train[i])
        y_train.append(yy_train[i])
num_samples_to_plot = 5000
X_train, y_train = shuffle(X_train, y_train)
X_train, y_train = X_train[:
                           num_samples_to_plot], y_train[:
                                                         num_samples_to_plot]  # lets subsample a bit for a first impression

for digit in mytargets:
    instances = [i for i in y_train if i == digit]
    print "Digit", digit, "appears ", len(instances), "times"

transformer = Isomap(n_neighbors=10, n_components=2)
fig, plot = plt.subplots()
fig.set_size_inches(50, 50)
plt.prism()

X_transformed = transformer.fit_transform(X_train)
plot.scatter(X_transformed[:, 0], X_transformed[:, 1], c=y_train)
plot.set_xticks(())
plot.set_yticks(())

count = 0
plt.tight_layout()
plt.suptitle("Isomap for MNIST digits ")
for label, x, y in zip(y_train, X_transformed[:, 0], X_transformed[:, 1]):
    #Lets annotate every 1 out of 200 samples, otherwise graph will be cluttered with anotations
    if count % 200 == 0:
        plt.annotate(str(int(label)),
                     xy=(x, y),
                     color='black',
示例#16
0
import numpy as np
import matplotlib.pyplot as pl
from sklearn.utils import shuffle
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

#Import dataset
red = np.loadtxt("./red.txt")
blue = np.loadtxt("./blue.txt")

#Plot data
pl.prism()
plt.xlim(-1.2, 1.2)
plt.ylim(-1.2, 1.2)
pl.scatter(red[:, 0], red[:, 1], c='red')
pl.scatter(blue[:, 0], blue[:, 1], c='blue')

#Prepare data for analysis
reds = np.hstack ((red, [[1]] * len (red) ))
blues = np.hstack  ((blue, [[0]] * len (blue) ))
dots = np.concatenate((reds, blues), axis=0)
x = dots[:, :-1]
y = dots[:, 2]

#Train and test sets
x, y = shuffle(x, y, random_state=1)
size=dots.shape[0] * 0.8
x_train = x[:size]
y_train = y[:size]
x_test = x[size:]
y_test = y[size:]
def do_oasis_visualize_pca(args):

    subjsf = args.infile.strip()
    outfile = args.outfile.strip()
    datadir = args.datadir.strip()
    maskf = args.mask.strip()
    fsmethod = args.fsmethod.strip()

    #    scale      = args.scale
    #    scale_min  = args.scale_min
    #    scale_max  = args.scale_max

    verbose = args.verbosity

    # logging config
    au.setup_logger(verbose)

    # loading mask
    msk = nib.load(maskf).get_data()
    nvox = np.sum(msk > 0)
    indices = np.where(msk > 0)

    # reading subjects list
    [scores, subjs] = parse_subjects_list(subjsf, datadir)
    scores = np.array(scores)

    imgsiz = nib.load(subjs[0]).shape
    nsubjs = len(subjs)

    # checking mask and first subject dimensions match
    if imgsiz != msk.shape:
        au.log.error("Subject image and mask dimensions should coincide.")
        exit(1)

    # relabeling scores to integers, if needed
    if not np.all(scores.astype(np.int) == scores):
        unis = np.unique(scores)
        scs = np.zeros(scores.shape, dtype=int)
        for k in np.arange(len(unis)):
            scs[scores == unis[k]] = k
        y = scs.copy()
    else:
        y = scores.copy()

    # loading data
    au.log.info("Loading data...")
    X = np.zeros((nsubjs, nvox), dtype="float32")
    for f in np.arange(nsubjs):
        imf = subjs[f]
        au.log.info("Reading " + imf)

        img = nib.load(imf).get_data()
        X[f, :] = img[msk > 0]

    # demo
    """
    from sklearn.datasets import fetch_mldata
    mnist = fetch_mldata("MNIST original")
    X, y = mnist.data[:60000] / 255., mnist.target[:60000]
    X, y = shuffle(X, y)
    X, y = X[:5000], y[:5000] # lets subsample a bit for a first impression
    """

    # lets start plotting
    au.log.info("Preparing plots...")
    X, y = shuffle(X, y)
    X = X / X.max()

    # reducing training and test data
    if fsmethod != "none":
        au.log.info("Feature selecion : " + fsmethod)
        selector = select_features(X, y, fsmethod)
        X = selector.transform(X)

    # au.log.info ('Randomized PCA')
    # pca = RandomizedPCA(n_components=2)
    au.log.info("Linear Discriminant analysis")
    lda = LDA(n_components=2)
    fig, plots = plt.subplots(4, 4)
    fig.set_size_inches(50, 50)
    plt.prism()
    for i, j in product(xrange(4), repeat=2):
        if i > j:
            continue
        if i == j:
            continue

        X_ = X[(y == i) + (y == j)]
        y_ = y[(y == i) + (y == j)]

        # marks
        # marks = y_.astype(str)
        # marks[y_ == 0] = 'x'
        # marks[y_ == 1] = 'o'
        # marks[y_ == 2] = 'D'
        # marks[y_ == 3] = '1'

        # colors
        colors = y_.copy()
        colors[y_ == 0] = 0
        colors[y_ == 1] = 1
        colors[y_ == 2] = 2
        colors[y_ == 3] = 3

        # transform
        # X_trans = pca.fit_transform(X_)
        X_trans = lda.fit(X_, y_).transform(X_)

        # plots
        plots[i, j].scatter(X_trans[:, 0], X_trans[:, 1], c=colors, marker="o")
        plots[i, j].set_xticks(())
        plots[i, j].set_yticks(())

        plots[j, i].scatter(X_trans[:, 0], X_trans[:, 1], c=colors, marker="o")
        plots[j, i].set_xticks(())
        plots[j, i].set_yticks(())
        if i == 0:
            plots[i, j].set_title(j)
            plots[j, i].set_ylabel(j)

        # plt.scatter(X_trans[:, 0], X_trans[:, 1], c=y_)

    plt.tight_layout()
    plt.savefig(outfile)
示例#18
0
def plot_pca(data,
             build_orders,
             pca=False,
             eps=0.3,
             min_samples=10,
             min_clusters=1):
    fig, plot = plt.subplots()
    fig.set_size_inches(4, 4)
    plt.prism()

    #plot.scatter(data[...,0][:-5], data[...,1][:-5], c=all_colors[:-5])
    #s = [100 for n in range(5)]
    #plot.scatter(data[...,0][-5:], data[...,1][-5:], c=all_colors[-5:], marker='s', s=s)

    # Clustering
    db = DBSCAN(eps=eps, min_samples=min_samples).fit(data)

    core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
    core_samples_mask[db.core_sample_indices_] = True
    labels = db.labels_

    # Number of clusters in labels, ignoring noise if present.
    n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
    print('Estimated number of clusters: %d' % n_clusters_)
    if n_clusters_ < min_clusters:
        return

    # Black removed and is used for noise instead.
    unique_labels = set(labels)
    colors = [
        plt.cm.Spectral(each) for each in np.linspace(0, 1, len(unique_labels))
    ]
    for k, col in zip(unique_labels, colors):
        if k == -1:
            # Black used for noise.
            col = [0, 0, 0, 1]

        class_member_mask = (labels == k)

        xy = data[class_member_mask & core_samples_mask]
        plt.plot(xy[:, 0],
                 xy[:, 1],
                 'o',
                 markerfacecolor=tuple(col),
                 markersize=3,
                 fillstyle='full',
                 markeredgewidth=0.0)

        # Find centroids
        if len(xy) > 0:
            center = (sum(xy[:, 0]) / len(xy), sum(xy[:, 1]) / len(xy))
            centroid = closest_point(center, xy)
            centroid_idx = -1
            assert centroid in data
            for i in range(len(data)):
                if np.array_equal(data[i], centroid):
                    centroid_idx = i
                    break
            assert centroid_idx >= 0

            print("Centroid of cluster {} with color {} and position {},{}".
                  format(k, col, centroid[0], centroid[1]))
            print(build_orders[centroid_idx])
            plt.plot(centroid[0],
                     centroid[1],
                     'o',
                     markerfacecolor=tuple(col),
                     markeredgecolor='k',
                     markersize=8)

            bbox_props = dict(boxstyle="circle,pad=0.1",
                              fc="white",
                              ec="black",
                              lw=1)
            t = plot.text(centroid[0],
                          centroid[1],
                          k,
                          ha="center",
                          va="center",
                          rotation=0,
                          size=6,
                          bbox=bbox_props)

        xy = data[class_member_mask & ~core_samples_mask]
        plt.plot(xy[:, 0],
                 xy[:, 1],
                 'o',
                 markerfacecolor=tuple(col),
                 markersize=3,
                 fillstyle='full',
                 markeredgewidth=0.0)

    plot.set_xticks(())
    plot.set_yticks(())
    '''
    custom_lines = [Line2D([0], [0], marker='o', color='w', label='Scatter',
                          markerfacecolor='red', markersize=4),
                    Line2D([0], [0], marker='o', color='w', label='Scatter',
                          markerfacecolor='white', markeredgecolor='black', markersize=4)]
    '''
    #for i in range(len(human_levels)):
    #    plot.annotate("Level {}".format(i), (data[len(gen_levels)+i][0], data[len(gen_levels)+i][1]))
    '''
    for i in range(len(human_levels)):
        bbox_props = dict(boxstyle="circle,pad=0.1", fc="white", ec="black", lw=2)
        t = plot.text(data[len(gen_levels)+i][0], data[len(gen_levels)+i][1], i, ha="center", va="center", rotation=0,
                    size=15,
                    bbox=bbox_props)
    '''
    plt.tight_layout(pad=-0.5, w_pad=-0.5, h_pad=-0.5)
    #plot.margins(0, 0)
    #lines = ax.plot(data)
    title = "PCA" if pca else "t-SNE"
    #plt.title(title)
    #plot.legend(custom_lines, ['PCG', 'Human'], loc=1)
    #plot.legend(['Won', 'Lost', 'Human'], loc=2)
    fig.savefig("plots/mds/{}_eps-{}_sam-{}.pdf".format(
        "pca" if pca else "t-sne", eps, min_samples),
                bbox_inches='tight',
                pad_inches=0)
    return fig
示例#19
0
def visualize_dataset(X, y, ih, method='both'):
    if method == 'both':  # Draw both in same plot
        plt.figure(figsize=(4, 9))
        plt.subplots_adjust(bottom=.05, top=.9, left=.05, right=.95)

        plt.subplot(2, 1, 1)
        plt.title("MDS of %s" % dataset_name[:-4])
        mds = MDS(n_components=2, max_iter=100, n_init=1)
        X_transformed = mds.fit_transform(X)
        y[y == 0] = -1
        # color (blue/red) = sign: will reflect class, prominence = magnitude: will reflect IH. Added 0.2 so that y*IH
        # doesn't become zero (thus making colors/classes indistinguishable) effectively shifting the IH range from (0,1) to (0.2, 1.2)

        plt.scatter(X_transformed[:, 0],
                    X_transformed[:, 1],
                    s=5,
                    c=y * (ih + 0.1),
                    vmin=-1,
                    vmax=1,
                    cmap=plt.cm.coolwarm)

        plt.subplot(2, 1, 2)
        plt.title("t-SNE of %s" % dataset_name[:-4])
        tsne = TSNE(n_components=2)
        X_transformed = tsne.fit_transform(X)
        yy = y
        yy[yy == 0] = -1
        # color (blue/red) = sign: will reflect class, prominence = magnitude: will reflect IH. Added 0.2 so that y*IH
        # doesn't become zero (thus making colors/classes indistinguishable) effectively shifting the IH range from (0,1) to (0.2, 1.2)

        plt.scatter(X_transformed[:, 0],
                    X_transformed[:, 1],
                    s=5,
                    c=yy * (ih + 0.1),
                    vmin=-1,
                    vmax=1,
                    cmap=plt.cm.coolwarm)

        figname = 'visualizations//' + dataset_name[:-4] + '.png'
        plt.savefig(figname, bbox_inches='tight')
        return

    else:  # Plot either MDS or t-SNE
        if (method == 'MDS'):
            transformer = MDS(n_components=2, max_iter=100, n_init=1)
        elif method == 't-SNE':  # t-SNE
            transformer = TSNE(n_components=2)
        fig, plot = plt.subplots()
        fig.set_size_inches(8, 8)
        plt.prism()

        # Blue for low (good) mRS-90, Red for High
        colors = ['blue', 'red']

        X_transformed = transformer.fit_transform(X)
        y[y == 0] = -1
        # color (blue/red) = sign: will reflect class, prominence = magnitude: will reflect IH. Added 0.2 so that y*IH
        # doesn't become zero (thus making colors/classes indistinguishable) effectively shifting the IH range from (0,1) to (0.2, 1.2)

        plot.scatter(X_transformed[:, 0],
                     X_transformed[:, 1],
                     c=y * (ih + 0.2),
                     vmin=-1,
                     vmax=1,
                     cmap=plt.cm.coolwarm)
        plot.set_xticks(())
        plot.set_yticks(())

        count = 0
        plt.tight_layout()
        if (method == 'MDS'):
            title = "MDS of " + dataset_name + " dataset"
            #     title = "MDS"
            plt.suptitle(title, fontsize=20)
        else:
            title = "t-SNE of " + dataset_name + " dataset"
            #  title = "t-SNE"
            plt.suptitle(title, fontsize=20)

        plt.show()
        return
示例#20
0
for i, label in enumerate(yy_train):
  if label in mytargets:
    X_train.append(XX_train[i])
    y_train.append(yy_train[i])
num_samples_to_plot = 5000
X_train, y_train = shuffle(X_train, y_train)
X_train, y_train = X_train[:num_samples_to_plot], y_train[:num_samples_to_plot]  # lets subsample a bit for a first impression

for digit in mytargets:
  instances=[i for i in y_train if i==digit]
  print "Digit",digit,"appears ",len(instances), "times"

transformer = Isomap(n_neighbors = 10, n_components = 2)
fig, plot = plt.subplots()
fig.set_size_inches(50, 50)
plt.prism()

X_transformed = transformer.fit_transform(X_train)
plot.scatter(X_transformed[:, 0], X_transformed[:, 1], c=y_train)
plot.set_xticks(())
plot.set_yticks(())

count=0;
plt.tight_layout()
plt.suptitle("Isomap for MNIST digits ")
for label , x, y in zip(y_train, X_transformed[:, 0], X_transformed[:, 1]):
#Lets annotate every 1 out of 200 samples, otherwise graph will be cluttered with anotations
  if count % 200 == 0:
    plt.annotate(str(int(label)),xy=(x,y), color='black', weight='normal',size=10,bbox=dict(boxstyle="round4,pad=.5", fc="0.8"))
  count = count + 1
#plt.savefig("mnist_pca.png")
示例#21
0
    return (basin, (bigEnough or not willBeConsideredAgain))


# State variable for _capture() for caching.
_capture._markedSoFar = {}

if __name__ == '__main__':
    #from RandomImage import RandomImage
    #np.random.seed(32)
    #i = RandomImage(200, 50, 95, (1000, 1000))

    from scipy.ndimage import imread, gaussian_filter
    i = imread("/home/bvr/SatData/2011.07.01.12.00.png")
    i = gaussian_filter(i, 1)[300:800, 300:i.shape[1] / 2]
    print "Shape:", i.shape, "  DType:", i.dtype

    globs, basins = Watershed_Transform(i, 5, 250)
    print "Glob Cnt:", len(globs)
    #import cProfile
    #cProfile.run("Watershed_Transform(i, 40, 1000)", "watershed_lg5_profile")

    basins = np.ma.masked_array(basins, mask=(basins <= 0))
    import matplotlib.pyplot as plt
    fig = plt.figure()
    ax = fig.add_subplot(1, 2, 1)
    ax.imshow(i, cmap=plt.gray(), interpolation='none')
    ax = fig.add_subplot(1, 2, 2)
    ax.imshow(basins, vmin=0, cmap=plt.prism(), interpolation='none')
    plt.show()