Exemplo n.º 1
0
def plotTrajectory(dfile):
    fin = open(dfile)

    Vsteps = []
    Vtarget = fin.readline().strip().split()
    Vtarget = map(float,Vtarget)
    Vsteps.append(Vtarget)
    for l in fin:
        l = l.strip().split()
        if len(l) != 26: continue
        l = map(float,l)
        Vsteps.append(l)


    distances = [euclidean(a,Vsteps[0]) for a in Vsteps[1:]]
    print len(distances)

    _map = plt.get_cmap("winter")
    distcolors = _map(distances)


    dimred = Isomap(n_components=2)
    Vsteps = dimred.fit_transform(Vsteps)



    #objective vector
    plt.scatter(Vsteps[0,0],Vsteps[0,1],color='red',s=30,marker=(5,1))
    #Optimization steps
    plt.scatter(Vsteps[1:,0],Vsteps[1:,1],color=distcolors,alpha=0.5)

    plt.show()
Exemplo n.º 2
0
def plot_3d(dataset):
    """TODO: Docstring for plot_3d.
    :returns: TODO

    """
    from mpl_toolkits.mplot3d import Axes3D

    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')

    iso = Isomap(n_components=3)
    projected = iso.fit_transform(dataset.data.toarray())

    print 'projected: sample: %s, feature: %s'\
            % (projected.shape[0], projected.shape[1])

    all_scatter = []
    colors = cm.rainbow(np.linspace(0, 1, len(dataset.target_names)), alpha=0.5)
    for i in range(len(dataset.target_names)):
        points = projected[dataset.target==i,:]
        cur = ax.scatter(points[:,0], points[:,1], points[:,2],
                          color=colors[i], edgecolor='k', lw=0.1,
                          vmin=0, vmax=len(dataset.target_names))
        all_scatter.append(cur)
    ax.legend(all_scatter, dataset.target_names,
               loc='lower left', scatterpoints=1)
    plt.savefig('isomap3d', dpi=500)
    plt.show()

    return True
Exemplo n.º 3
0
def isomap(similarity, euclid=False):
    if not euclid:
        print('podvod')
    model = Isomap(n_neighbors=15)
    result = model.fit_transform(similarity)

    return result.T
Exemplo n.º 4
0
def iso_map(data, target, target_names):
    iso = Isomap(n_components=2)
    data_projected = iso.fit_transform(data)
    formatter = plt.FuncFormatter(lambda i, *args:target_names[int(i)])
    plt.figure(figsize=(8, 8))
    plt.scatter(data_projected[:, 0], data_projected[:, 1], c=target,edgecolor='none', alpha=0.5, cmap=plt.cm.get_cmap('rainbow', len(target_names)));
    plt.colorbar(ticks=sorted(list(set(target))), format=formatter)
    #plt.clim(-200, 0)
    return iso, data_projected
Exemplo n.º 5
0
def embedDistanceMatrix(dmatDf, method='kpca', n_components=2, **kwargs):
    """Two-dimensional embedding of sequence distances in dmatDf,
    returning Nx2 x,y-coords: tsne, isomap, pca, mds, kpca, sklearn-tsne"""
    if isinstance(dmatDf, pd.DataFrame):
        dmat = dmatDf.values
    else:
        dmat = dmatDf

    if method == 'tsne':
        xy = tsne.run_tsne(dmat, no_dims=n_components, perplexity=kwargs['perplexity'])
    elif method == 'isomap':
        isoObj = Isomap(n_neighbors=10, n_components=n_components)
        xy = isoObj.fit_transform(dmat)
    elif method == 'mds':
        mds = MDS(n_components=n_components,
                  max_iter=3000,
                  eps=1e-9,
                  random_state=15,
                  dissimilarity="precomputed",
                  n_jobs=1)
        xy = mds.fit(dmat).embedding_
        rot = PCA(n_components=n_components)
        xy = rot.fit_transform(xy)
    elif method == 'pca':
        pcaObj = PCA(n_components=None)
        xy = pcaObj.fit_transform(dmat)[:, :n_components]
    elif method == 'kpca':
        pcaObj = KernelPCA(n_components=dmat.shape[0], kernel='precomputed', eigen_solver='dense')
        try:
            gram = dist2kernel(dmat)
        except:
            print('Could not convert dmat to kernel for KernelPCA; using 1 - dmat/dmat.max() instead')
            gram = 1 - dmat / dmat.max()
        xy = pcaObj.fit_transform(gram)[:, :n_components]
    elif method == 'lle':
        lle = manifold.LocallyLinearEmbedding(n_neighbors=30, n_components=n_components, method='standard')
        xy = lle.fit_transform(dist)
    elif method == 'sklearn-tsne':
        tsneObj = TSNE(n_components=n_components, metric='precomputed', random_state=0, perplexity=kwargs['perplexity'])
        xy = tsneObj.fit_transform(dmat)
    elif method == 'umap':
        umapObj = umap.UMAP(n_components=n_components, metric='precomputed', **kwargs)
        xy = umapObj.fit_transform(dmat)
    else:
        print('Method unknown: %s' % method)
        return

    assert xy.shape[0] == dmatDf.shape[0]
    xyDf = pd.DataFrame(xy[:, :n_components], index=dmatDf.index, columns=np.arange(n_components))
    if method == 'kpca':
        """Not sure how negative eigenvalues should be handled here, but they are usually
        small so it shouldn't make a big difference"""
        setattr(xyDf, 'explained_variance_', pcaObj.lambdas_[:n_components]/pcaObj.lambdas_[pcaObj.lambdas_>0].sum())
    return xyDf
Exemplo n.º 6
0
def isomap(file_name, dimension, num_neighbors, label):
    balls = np.loadtxt(file_name)
    matrix = balls[:, 0:dimension]
    new_matrix = convert_angles_to_cos_sin(matrix)
    imap = Isomap(n_neighbors=num_neighbors, n_components=2, eigen_solver='auto', tol=0, max_iter=None,
                  path_method='auto', neighbors_algorithm='auto')
    transformed_matrix = imap.fit_transform(new_matrix)
    ball_coords = np.zeros((balls.shape[0], dimension+3))
    for i in xrange(balls.shape[0]):
        ball_coords[i, 0:dimension] = balls[i, 0:dimension].tolist()
        ball_coords[i, dimension:dimension+2] = transformed_matrix[i]
        if label == 'cluster':
            ball_coords[i, dimension+2] = balls[i, dimension].tolist()
        elif label == 'eq':
            ball_coords[i, dimension+2] = (-0.0019872041*300*np.log(abs(balls[i, dimension+1]))).tolist()
        elif label == 'committor':
            ball_coords[i, dimension+2] = (balls[i, dimension+2]/abs(balls[i, dimension+1])).tolist()
        print ' '.join([str(x) for x in ball_coords[i, :]])
Exemplo n.º 7
0
 def isomap(self, data):
     print 'Isomap neighbours :', self.parameters["n_neighbors"]
     print 'Isomap components, ie final number of coordinates :', self.k
     
     k_means_n_clusters=self.parameters['k_means_n_clusters']
     isomap_params = dict(self.parameters)
     del isomap_params["k_means_n_clusters"]
     m = Isomap(neighbors_algorithm = 'kd_tree',**isomap_params)#eigen_solver='auto', tol=0, path_method='auto', neighbors_algorithm='kd_tree')
     x = m.fit_transform(data)
     
     error=m.reconstruction_error() 
     geod_d = m.dist_matrix_.flatten()
     new_euclid_d = cdist(x, x, metric='euclidean').flatten()
     corr=1- pearsonr(geod_d, new_euclid_d)[0]**2
     
     new_data = x
     print self.parameters
     return self.batch_kmeans(new_data, parameters = dict(zip(params["mini-batchk-means"], [k_means_n_clusters, 1000, 500, 1000, 'k-means++', 5])))
def outputBin(data, ctrlSize,nbPheno, lPheno, binSize, sigma, nbDim=2, nbNeighbours=20):
    m = Isomap(n_neighbors=nbNeighbours, n_components=nbDim, eigen_solver='auto', tol=0, max_iter=None, path_method='auto', neighbors_algorithm='kd_tree')
    D = m.fit_transform(data)
    ctrl = D[:ctrlSize]
    ctrlTree = KDTree(ctrl, leafsize=10)
    length=ctrlSize
    
    mini = np.amin(D, 0); maxi=np.amax(D, 0); 
    nbPointsX = int((maxi[0]-mini[0])/float(binSize))+1
    nbPointsY = int((maxi[1]-mini[1])/float(binSize))+1
    
    result = np.zeros(shape=(nbPheno, nbPointsX, nbPointsY))
    denomCtrl = np.zeros(shape=(nbPointsX, nbPointsY))
    
    for pointX, pointY in product(range(nbPointsX), range(nbPointsY)):
        x=mini[0]+(pointX+0.5)*binSize; y=mini[1]+(pointY+0.5)*binSize
        ctrldou, ctrli = ctrlTree.query((x, y), ctrlSize, distance_upper_bound=binSize/sqrt(2))
        if min(ctrldou)<100:
            ctrlPoint = filter(lambda t: t[1]<ctrl.shape[0] and np.all(np.abs(ctrl[t[1]]-(x, y))<(binSize/2.0, binSize/2.0)), zip(ctrldou, ctrli))        
            for distance, cPoint in ctrlPoint:
                denomCtrl[pointX, pointY]+=dist((x,y), ctrl[cPoint], sigma)
                
    for ifilm in range(nbPheno):
        print 'film ', ifilm
        pheno = D[length:length+lPheno[ifilm]]
        phenoTree = KDTree(pheno, leafsize=10)
        
        for pointX, pointY in product(range(nbPointsX), range(nbPointsY)):
            x=mini[0]+(pointX+0.5)*binSize; y=mini[1]+(pointY+0.5)*binSize
            denom=denomCtrl[pointX, pointY]
            phenodou, phenoi=phenoTree.query((x, y), data.shape[0]-ctrlSize, distance_upper_bound=binSize/sqrt(2))
            if min(phenodou)<100:
                phenoPoint =filter(lambda t: t[1]<pheno.shape[0] and np.all(np.abs(pheno[t[1]]-(x, y))<(binSize/2.0, binSize/2.0)), zip(phenodou, phenoi))
                for distance, pPoint in phenoPoint:
                    local = dist((x,y), pheno[pPoint], sigma)
                    result[ifilm, pointX, pointY]+=local; denom+=local
        length+=lPheno[ifilm]        
        if denom>0:result[ifilm, pointX, pointY]/=denom
    plotMovies('/media/lalil0u/New/workspace2/Tracking/images', result, 'pattern_b{}_s{}'.format(binSize, sigma))
    return result
Exemplo n.º 9
0
def plot_2d(dataset):
    """TODO: Docstring for plot_2d.
    :returns: TODO

    """
    iso = Isomap(n_components=2)
    projected = iso.fit_transform(dataset.data.toarray())

    print 'projected: sample: %s, feature: %s'\
            % (projected.shape[0], projected.shape[1])

    all_scatter = []
    colors = cm.rainbow(np.linspace(0, 1, len(dataset.target_names)), alpha=0.5)
    for i in range(len(dataset.target_names)):
        points = projected[dataset.target==i,:]
        cur = plt.scatter(points[:,0], points[:,1], color=colors[i],
                          edgecolor='k', lw=0.6,
                          vmin=0, vmax=len(dataset.target_names))
        all_scatter.append(cur)
    plt.legend(all_scatter, dataset.target_names,
               loc='lower left', scatterpoints=1)
    plt.clim(-0.5, 9.5)
    plt.savefig('isomap2d', dpi=500)
Exemplo n.º 10
0
def embedDistanceMatrix(dist,method='tsne'):
    """MDS embedding of sequence distances in dist, returning Nx2 x,y-coords: tsne, isomap, pca, mds, kpca"""
    if method == 'tsne':
        xy = tsne.run_tsne(dist, no_dims=2)
        #xy=pytsne.run_tsne(adist,no_dims=2)
    elif method == 'isomap':
        isoObj = Isomap(n_neighbors=10, n_components=2)
        xy = isoObj.fit_transform(dist)
    elif method == 'mds':
        mds = manifold.MDS(n_components=2, max_iter=3000, eps=1e-9, random_state=15,
                           dissimilarity="precomputed", n_jobs=1)
        xy = mds.fit(dist).embedding_
        rot = PCA(n_components=2)
        xy = rot.fit_transform(xy)
    elif method == 'pca':
        pcaObj = PCA(n_components=2)
        xy = pcaObj.fit_transform(1-dist)
    elif method == 'kpca':
        pcaObj = KernelPCA(n_components=2, kernel='precomputed')
        xy = pcaObj.fit_transform(1-dist)
    elif method == 'lle':
        lle = manifold.LocallyLinearEmbedding(n_neighbors=30, n_components=2, method='standard')
        xy = lle.fit_transform(dist)
    return xy
Exemplo n.º 11
0
num_samples_to_plot = 5000
X_train, y_train = shuffle(X_train, y_train)
X_train, y_train = X_train[:
                           num_samples_to_plot], y_train[:
                                                         num_samples_to_plot]  # lets subsample a bit for a first impression

for digit in mytargets:
    instances = [i for i in y_train if i == digit]
    print "Digit", digit, "appears ", len(instances), "times"

transformer = Isomap(n_neighbors=10, n_components=2)
fig, plot = plt.subplots()
fig.set_size_inches(50, 50)
plt.prism()

X_transformed = transformer.fit_transform(X_train)
plot.scatter(X_transformed[:, 0], X_transformed[:, 1], c=y_train)
plot.set_xticks(())
plot.set_yticks(())

count = 0
plt.tight_layout()
plt.suptitle("Isomap for MNIST digits ")
for label, x, y in zip(y_train, X_transformed[:, 0], X_transformed[:, 1]):
    #Lets annotate every 1 out of 200 samples, otherwise graph will be cluttered with anotations
    if count % 200 == 0:
        plt.annotate(str(int(label)),
                     xy=(x, y),
                     color='black',
                     weight='normal',
                     size=10,
Exemplo n.º 12
0
for i,s in enumerate(spectra):
    spec= s.spectrum
    wavsi = s.wavelength()
    intpol = spi.interp1d(wavsi,spec,bounds_error=False,fill_value = 0.)
    spec = intpol(wavs)
    spec/=spec.max()
    data[i]= spec

#print data

iso = Isomap(k,n)

#iso.fit(data)

print "projecting and fitting: "
proj = iso.fit_transform(data)

print "proj.shape"
print proj.shape

fig,axes = plt.subplots(2,3)


print proj[:,0]
print proj[:,1]
print proj

for prop,nprop,ax in zip(properties,nproperites,axes.flatten()):
    ax.set_title(nprop)
    ax.scatter(proj[:,0],proj[:,1],c=prop)
Exemplo n.º 13
0
def main(session_key, config_file, segment_size, step_size):    
    # Get audiofilename
    audio_dir = "static/uploads/" + session_key + "/"
    for file_name in os.listdir(audio_dir):
        if file_name[0] != ".":
            audio_name = file_name
            break
    # Get full path
    audio_path = audio_dir + file_name

    # If mp3, convert to wav
    if audio_path[-3:] == "mp3":
        wav_audio = AudioSegment.from_mp3(audio_path)
        audio_path = audio_path[:-3:] + "wav" # set new audio_path
        wav_audio.export(audio_path, format="wav")
    
    # Get metadata
    loaded_sound = AudioSegment.from_wav(audio_path)
    audio_duration = len(loaded_sound)
    frame_rate = loaded_sound.frame_rate


    # If duration is longer than 1 hour, segment into chunks
    if audio_duration > 3600000:
        chunks = []
        chunk_start_time = 0
        while chunk_start_time * 1000 < audio_duration:
            subprocess.call(["sox", audio_path, audio_dir + str(int((chunk_start_time / 3600)+1)) + ".wav", "trim", str(chunk_start_time), "3600"])
            chunks.append(audio_dir + str(int((chunk_start_time / 3600)+1)) + ".wav")
            chunk_start_time += 3600
    else:
        chunks = [audio_path]


    # Create dir for ouput and set filenames
    output_dir = "static/data/" + session_key + "/"
    subprocess.call(["mkdir", output_dir])
    output_path = output_dir + audio_name.split(".")[0] + ".mfcc.htk"

    if config_file == "spectrogram":
        waveform = wavfile.read(audio_path)[1]
        print(frame_rate)
        print(segment_size)
        print(int(frame_rate*segment_size))
        f, t, Sxx = signal.spectrogram(waveform, fs=frame_rate, nperseg=int(frame_rate*(segment_size/10000)), noverlap=0)
        Sxx_transpose = Sxx.transpose()
        print("scipy shape: ", Sxx_transpose.shape)

        # Reduce dimensionality to 39 with svd
        svd = TruncatedSVD(n_components=39)
        result = svd.fit_transform(Sxx_transpose)
        print("scipy shape2: ", result.shape)
    else:
        # Prepend path to config file
        config_file = config_dir + config_file

        # Update config file with segment- and steplength, divided by 1000 to get second-format
        update_config(config_file, str(segment_size/10000), str(step_size/10000))

        # Run opensmile to output features in output dir
        subprocess.call([smilextract, "-C", config_file, "-I", audio_path, "-O", output_path])

        # Read file, and return formatted data
        htk_reader = HTKFile()
        htk_reader.load(output_path)
        result = np.array(htk_reader.data)
    
    # Flatten concatenate ten vectors at a time, resulting in 39*10 dimensionality per snippet
    new_result = []
    temp_list = []
    for vec in result:
        temp_list.append(vec)
        if len(temp_list) == 10:
            new_result.append(np.concatenate(tuple(temp_list), axis=0))
            temp_list = []
    result = np.array(new_result)
    
    # Run data through t-SNE
    tsne = TSNE(n_components=2, perplexity=25)#, random_state=None)
    Y1 = convert_range(tsne.fit_transform(result))
    print("t-SNE done")

    # Run data through PCA
    pca = PCA(n_components=2)
    Y2 = convert_range(pca.fit_transform(result))
    print("PCA done")

    # Run data through SOM
    som = True
    if som:
        print("SOM-grid-size: ", int(len(result)**0.5))
        mapsize = [int(len(result)**0.5), int(len(result)**0.5)]
        if mapsize[0] > 100:
            mapsize = [100, 100]
        som = sompy.SOMFactory.build(result, mapsize, mask=None, mapshape='planar', lattice='rect', normalization='var', initialization='pca', neighborhood='gaussian', training='batch', name='sompy')  # this will use the default parameters, but i can change the initialization and neighborhood methods
        som.train(n_job=1, verbose='info')  # verbose='debug' will print more, and verbose=None wont print anything
        som_output = np.array(np.array([np.array(np.unravel_index(int(bmu), (mapsize[0],mapsize[0]))) for bmu in som._bmu[0]]))
        Y3 = convert_range(som_output)
        print("SOM done")
    else:
        Y3 = convert_range(np.array([np.array([random.randint(-50, 50), random.randint(-50, 50)]) for i in range(len(Y2))]))

    # Run data through UMAP
    run_umap = True
    if run_umap:
        Y4 = convert_range(umap.UMAP().fit_transform(result))
        print("UMAP done")
    else:
        Y4 = convert_range(np.array([np.array([random.randint(-50, 50), random.randint(-50, 50)]) for i in range(len(Y2))]))

    # Run data through isomap
    IM = Isomap(n_components=2)
    Y5 = convert_range(IM.fit_transform(result))
    print("Isomap done")

    # Experiment with autoencoder, bad results so commented for now
    # Run data through autoencoder
    # ae = False
    # if ae:
    #     Y5 = convert_range(AE(result))
    # else:
    #     Y5 = convert_range(np.array([np.array([random.randint(-50, 50), random.randint(-50, 50)]) for i in range(len(Y2))]))
    # print("Autoencoder done")



    # K-means on raw features
    kmeans2 = KMeans(n_clusters=2, random_state=0).fit(result)
    print("kmeans2 done")
    kmeans3 = KMeans(n_clusters=3, random_state=0).fit(result)
    print("kmeans3 done")
    kmeans4 = KMeans(n_clusters=4, random_state=0).fit(result)
    print("kmeans4 done")
    kmeans5 = KMeans(n_clusters=5, random_state=0).fit(result)
    print("kmeans5 done")
    kmeans6 = KMeans(n_clusters=6, random_state=0).fit(result)
    print("kmeans6 done")
    kmeans7 = KMeans(n_clusters=7, random_state=0).fit(result)
    print("kmeans7 done")
    kmeans8 = KMeans(n_clusters=8, random_state=0).fit(result)
    print("kmeans8 done")
    kmeans20 = KMeans(n_clusters=20, random_state=0).fit(result)
    print("kmeans20 done")


    # Format t-SNE output to correct dictionary format
    data = []
    i = 0
    for coord1, coord2, coord3, coord4, coord5, cluster_index2, cluster_index3, cluster_index4, cluster_index5, cluster_index6, cluster_index7, cluster_index8, cluster_index20 in zip(Y1, Y2, Y3, Y4, Y5, kmeans2.labels_, kmeans3.labels_, kmeans4.labels_, kmeans5.labels_, kmeans6.labels_, kmeans7.labels_, kmeans8.labels_, kmeans20.labels_):
        data.append({
            "id":i, 
            "tsneX":float(coord1[0]), 
            "tsneY":float(coord1[1]), 
            "pcaX":float(coord2[0]), 
            "pcaY":float(coord2[1]), 
            "somX":float(coord3[0]), 
            "somY":float(coord3[1]), 
            "umapX":float(coord4[0]), 
            "umapY":float(coord4[1]), 
            "aeX":float(coord5[0]), 
            "aeY":float(coord5[1]), 
            "start":int(i*step_size), 
            "active":1, 
            "color":"black", 
            "kcolor2":color_dict[str(cluster_index2)], 
            "kcolor3":color_dict[str(cluster_index3)], 
            "kcolor4":color_dict[str(cluster_index4)], 
            "kcolor5":color_dict[str(cluster_index5)], 
            "kcolor6":color_dict[str(cluster_index6)], 
            "kcolor7":color_dict[str(cluster_index7)], 
            "kcolor8":color_dict[str(cluster_index8)],
            "kcolor20":color_dict[str(cluster_index20)]})
        #data.append({"id":i, "tsneX":random.randint(1,99), "tsneY":random.randint(1,99), "pcaX":random.randint(1,99), "pcaY":random.randint(1,99), "start":int(i*step_size), "active":1, "color":"black"})
        i+=1

    # Save data as csv to be able to load later
    keys = data[0].keys()
    with open(output_dir + "data.csv", 'w') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(data)

    # Save metadata as csv to be able to load later
    metadata = [{"audio_duration":audio_duration, "audio_path":audio_path, "segment_size":segment_size, "step_size":step_size, "chunks":",".join(chunks)}]
    keys = metadata[0].keys()
    with open(output_dir + "metadata.csv", 'w') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(metadata)
Exemplo n.º 14
0
# copy from https://blog.csdn.net/qq_42797457/article/details/100675654

import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.manifold import Isomap

iris = datasets.load_iris()
X = iris.data
y = iris.target

fig, ax = plt.subplots(1, 3, figsize=(15, 5))

for idx, neighbor in enumerate([2, 20, 100]):
    isomap = Isomap(n_components=2, n_neighbors=neighbor)
    new_X_isomap = isomap.fit_transform(X)

    ax[idx].scatter(new_X_isomap[:, 0], new_X_isomap[:, 1], c=y)
    ax[idx].set_title("Isomap (n_neighbors=%d)" % neighbor)

plt.show()

# use `isomap.transform(X)` to calc new samples
Exemplo n.º 15
0
class CardiotocographyMainFrame(Tk.Frame):
    def __init__(self, master, x_train, y_train, x_test, y_test, evaluator, console):
        Tk.Frame.__init__(self, master)
        self.evaluator = evaluator
        self.x_train = x_train
        self.y_train = y_train
        self.x_test = x_test
        self.y_test = y_test
        self.new_estimator = None
        self.console = console
        self.evaluator.load_data(x_train, y_train, x_test, y_test)
        self.evaluator.train()
        self.x_train_r = self.evaluator.reduce(x_train)  # 特征降维

        # 0. 优化按钮
        self.button_opt = Tk.Button(self, text="优化", command=self.optimize_parameter)
        self.button_opt.pack(side=Tk.TOP, anchor=Tk.E)
        self.label_tips = Tk.Label(self)
        self.label_tips.pack(side=Tk.TOP, anchor=Tk.E)

        # 1. 散点图
        frame_train = Tk.Frame(self)
        frame_train.pack(fill=Tk.BOTH, expand=1, padx=15, pady=15)
        self.figure_train = Figure(figsize=(5, 4), dpi=100)
        self.subplot_train = self.figure_train.add_subplot(111)
        self.subplot_train.set_title('Cardiotocography High-Dimension Data Visualization (21-dim)')
        self.figure_train.tight_layout()  # 一定要放在add_subplot函数之后,否则崩溃
        self.last_line = None

        self.tsne = Isomap(n_components=2, n_neighbors=10)
        np.set_printoptions(suppress=True)
        x_train_r = self.tsne.fit_transform(x_train)
        self.subplot_train.scatter(x_train_r[:, 0], x_train_r[:, 1], c=y_train, cmap=plt.cm.get_cmap("Paired"))
        self.attach_figure(self.figure_train, frame_train)

        y_pred = self.evaluator.pipeline.predict(x_train)
        accuracy = accuracy_score(y_true=y_train, y_pred=y_pred)

        self.console.output("[CTG] INIT MODEL: ", str(self.evaluator.pipeline.named_steps['clf']) + "\n")
        self.console.output("[CTG] INIT ACCURACY: ", str(accuracy) + "\n")

        # 2. 概率输出框
        frame_prob = Tk.Frame(self)
        frame_prob.pack(fill=Tk.BOTH, expand=1, padx=5, pady=5)
        Tk.Label(frame_prob, text="prob").pack(side=Tk.LEFT)
        self.strvar_prob1 = Tk.StringVar()
        Tk.Label(frame_prob, text="1.").pack(side=Tk.LEFT)
        Tk.Entry(frame_prob, textvariable=self.strvar_prob1, bd=5).pack(side=Tk.LEFT, padx=5, pady=5)

        self.strvar_prob2 = Tk.StringVar()
        Tk.Label(frame_prob, text="2.").pack(side=Tk.LEFT)
        Tk.Entry(frame_prob, textvariable=self.strvar_prob2, bd=5).pack(side=Tk.LEFT, padx=5, pady=5)

        self.strvar_prob3 = Tk.StringVar()
        Tk.Label(frame_prob, text="3.").pack(side=Tk.LEFT)
        Tk.Entry(frame_prob, textvariable=self.strvar_prob3, bd=5).pack(side=Tk.LEFT, padx=5, pady=5)

        # 3. 滑动条
        frame_slides = Tk.Frame(self)
        frame_slides.pack(fill=Tk.BOTH, expand=1, padx=5, pady=5)
        canv = Tk.Canvas(frame_slides, relief=Tk.SUNKEN)
        vbar = Tk.Scrollbar(frame_slides, command=canv.yview)
        canv.config(scrollregion=(0, 0, 300, 1500))
        canv.config(yscrollcommand=vbar.set)
        vbar.pack(side=Tk.RIGHT, fill=Tk.Y)
        canv.pack(side=Tk.LEFT, expand=Tk.YES, fill=Tk.BOTH)
        feature_num = x_train.shape[1]
        self.slides = [None] * feature_num  # 滑动条个数为特征个数
        for i in range(feature_num):
            canv.create_window(60, (i + 1) * 40, window=Tk.Label(canv, text=str(i + 1) + ". "))
            min_x = np.min(x_train[:, i])
            max_x = np.max(x_train[:, i])
            self.slides[i] = Tk.Scale(canv, from_=min_x, to=max_x, resolution=(max_x - min_x) / 100.0,
                                      orient=Tk.HORIZONTAL, command=self.predict)
            canv.create_window(200, (i + 1) * 40, window=self.slides[i])

    # 根据即特征值,计算归属类别的概率
    def predict(self, trivial):
        feature_num = self.x_train.shape[1]
        x = np.arange(feature_num, dtype='f').reshape((1, feature_num))
        for i in range(feature_num):
            x[0, i] = float(self.slides[i].get())
        result = self.evaluator.predict(x)
        self.strvar_prob1.set("%.2f%%" % (result[0, 0] * 100))  # 无病的概率
        self.strvar_prob2.set("%.2f%%" % (result[0, 1] * 100))  # 存疑的概率
        self.strvar_prob3.set("%.2f%%" % (result[0, 2] * 100))  # 确诊的概率
        self.plot_point(self.subplot_train, self.tsne.transform(x))
        self.figure_train.canvas.draw()

    # 重绘点
    def plot_point(self, subplot, x):
        if self.last_line is not None:
            self.last_line.remove()
            del self.last_line
        lines = subplot.plot(x[:, 0], x[:, 1], "ro", label="case")
        self.last_line = lines.pop(0)
        subplot.legend(loc='lower right')

    # 将figure放到frame上
    @staticmethod
    def attach_figure(figure, frame):
        canvas = FigureCanvasTkAgg(figure, master=frame)  # 内嵌散点图到UI
        canvas.show()
        canvas.get_tk_widget().pack(side=Tk.TOP, fill=Tk.BOTH, expand=1)
        toolbar = NavigationToolbar2TkAgg(canvas, frame)  # 内嵌散点图工具栏到UI
        toolbar.update()
        canvas.tkcanvas.pack(side=Tk.TOP, fill=Tk.BOTH, expand=1)

    # 搜索最优参数
    def optimize_parameter(self):

        self.console.output("[CTG] OPTIMIZATION START...", "\n")

        # 计算旧模型(即初始模型)的交叉验证精度
        old_scores = cross_validation.cross_val_score(estimator=self.evaluator.pipeline, X=self.x_train, y=self.y_train,
                                                      scoring='accuracy',
                                                      cv=10, n_jobs=-1)
        old_score = np.mean(old_scores)

        # 计算新模型们中最好的交叉验证精度
        new_score = -1.0
        self.new_estimator = None
        for clf, param_grid in RandomParameterSettings.possible_models:
            self.console.output("[CTG] SEARCH MODEL:", str(clf) + "\n")
            estimator = Pipeline([('scl', StandardScaler()), ('pca', PCA()), ('clf', clf)])
            gs = RandomizedSearchCV(estimator=estimator, param_distributions=param_grid, scoring='accuracy', cv=10,
                                    n_jobs=-1)
            gs = gs.fit(self.x_train, self.y_train)
            if new_score < gs.best_score_:
                new_score = gs.best_score_
                self.new_estimator = gs.best_estimator_

        if new_score > old_score:
            self.label_tips.config(
                text='Found a new model with improvement: %.2f%%' % (100.0 * (new_score - old_score) / old_score))
            self.button_opt.config(text='应用', command=self.apply_new_estimator)
        else:
            self.label_tips.config(text="No better model founded.")

        self.console.output("[CTG] OPTIMIZATION COMPLETE !", "\n")
        self.console.output("[CTG] RESULT: ", "old_model_accuracy=%f, new_model_accuracy=%f, improvement=%.2f%%\n" % (
        old_score, new_score, (100.0 * (new_score - old_score) / old_score)) + "\n")

    def apply_new_estimator(self):
        self.console.output("[CTG] APPLY NEW MODEL:",
                            "old_model=%s \n new_model=%s\n" % (self.evaluator.pipeline, self.new_estimator))
        self.evaluator.pipeline = self.new_estimator
        self.label_tips.config(text="New model has been applied.")
Exemplo n.º 16
0
cells = opts.high / opts.step
isomap_gmm_results = np.zeros((cells,opts.iters))

D = scale(X)

n_samples, n_features = D.shape
# chosen by hyperparam search in a separate test.
n_neighbors = 10

# For the specified number of principal components, do the clustering
dimension_list = range(opts.low, opts.high + 1, opts.step)
data_files = []
for i in dimension_list:
    index = (i / opts.step) - 1 
    isomap = Isomap(n_neighbors, n_components=i)
    X_iso = isomap.fit_transform(D)
     
    for j in range(0,opts.iters,1): 
        gaussmix = GMM(n_components=true_k, covariance_type='tied', n_init=10, n_iter=1000)
        gaussmix.fit(X_iso)
        gaussmix_labels = gaussmix.predict(X_iso)
        homog = metrics.homogeneity_score(labels[:,0], gaussmix_labels)
        print "Homogeneity: %0.3f" % homog
        test_result = {"Model": 'Isomap', "Dimension": i, "Homogeneity": homog, "Trial": j}
        index = pd.Index([0], name='rows')
        data_files.append(pd.DataFrame(data=test_result,index=index))
        
print "...Done"
print "...rbinding DataFrames"
master_df = data_files[0]
for i in xrange(1,len(data_files)):
Exemplo n.º 17
0
plt.show()


fig = plt.figure()
ax = fig.add_subplot(111, projection="3d")
plt.scatter(fdata[:, 0], fdata[:, 1], zs=fdata[:, 2], c=digits["target"], s=100)

plt.show()


# ISOMAP

from sklearn.manifold import Isomap

iso = Isomap(n_components=3, n_neighbors=15)
fdata = iso.fit_transform(digits["data"])
fig = plt.figure()
ax = fig.add_subplot(111, projection="3d")

plt.scatter(fdata[:, 0], fdata[:, 1], zs=fdata[:, 2], c=digits["target"], s=100)

plt.show()


# LLE

from sklearn.manifold import LocallyLinearEmbedding

lle = LocallyLinearEmbedding(n_neighbors=15, n_components=3, method="modified")
fig = plt.figure()
fdata = lle.fit_transform(digits["data"])
Exemplo n.º 18
0
from __future__ import division
import sys
from sklearn.manifold import Isomap
from sklearn.decomposition import PCA
from sklearn import preprocessing
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from mpl_toolkits.mplot3d import Axes3D
import random
from colorsys import hsv_to_rgb


data = np.genfromtxt('data012.txt', delimiter=',')
isomap = Isomap()
data_xformed = isomap.fit_transform(data)
# pca = PCA(n_components=2)
# data_xformed = pca.fit_transform(data)
print data.shape
print data_xformed.shape
c = [(1,0,0)]*1000+[(0,1,0)]*1000+[(1,1,0)]*1000
plt.figure()
plt.scatter(data_xformed[:,0], data_xformed[:,1], c=c)
plt.show()
quit()

train_data = np.genfromtxt('training.txt', delimiter=',')
isomap = Isomap(n_components=4)
train_xformed = isomap.fit_transform(train_data)
test_data = np.genfromtxt('testing.txt', delimiter=',')
test_xformed = isomap.transform(test_data)
Exemplo n.º 19
0
#03-03.py
X, y = preprocess(data, shuffle=False, n_samples=1000, normalization=None)

from sklearn.manifold import Isomap
iso = Isomap(n_neighbors=15, n_components=3)
X_proj = iso.fit_transform(X)

three_component_plot(X_proj[:, 0], X_proj[:, 1], X_proj[:, 2], y, labels, trim_outliers=True)
Exemplo n.º 20
0
X_pca = pca.fit_transform(T)
'''
# No, the accuracy levels off at the same value as before from 7 components onwards.

# If you are not, then forget about PCA entirely, unless you want to visualize your data. However if you are able to get a higher score,
# then be *sure* keep that figure in mind, and comment out all the PCA code.
# In the same spot, run Isomap on the data, before sending it to the train / test split. Manually experiment with every inclusive
# combination of n_neighbors between 2 and 5, and n_components between 4 and 6. Are you able to get a better accuracy?
from sklearn.manifold import Isomap

# You're going to have to write nested for loops that wrap around everything from here on down!
best_score = 0
for k in range(2, 6):
    for l in range(4, 7):
        iso = Isomap(n_neighbors = k, n_components = l)
        X_iso = iso.fit_transform(T)

        # Perform a train/test split. 30% test group size, with a random_state equal to 7.
        from sklearn.cross_validation import train_test_split
        X_train, X_test, y_train, y_test = train_test_split(X_iso, y, test_size = 0.3, random_state = 7)

        # Create a SVC classifier. Don't specify any parameters, just leave everything as default.
        # Fit it against your training data and then score your testing data.
        from sklearn.svm import SVC
        # Lines below are for the first lab question:
        '''
        model = SVC()
        model.fit(X_train, y_train)
        score = model.score(X_test, y_test)
        print score
        '''
Exemplo n.º 21
0
from sklearn.manifold import Isomap
from matplotlib import pyplot as pl

X=np.load("NormalizedHRVdata.npy")
print X

k=3
print "-"*10,"k=%d"%(k),"-"*10
km =KMeans(k)
km.fit(X)
print "Labels:"
reduced_data = PCA(n_components=2).fit_transform(X)
kmRed = KMeans(k)
kmRed.fit(reduced_data)
imap=Isomap()
isomap_data=imap.fit_transform(X)
kmIso = KMeans(k)
kmIso.fit(isomap_data)
print km.labels_
print kmRed.labels_
print kmIso.labels_
print "Silhouette Score"
print metrics.silhouette_score(X,km.labels_,metric="euclidean")
print metrics.silhouette_score(X,kmRed.labels_,metric="euclidean")
print metrics.silhouette_score(X,kmIso.labels_,metric="euclidean")

pl.subplot(1,2,1)

# Step size of the mesh. Decrease to increase the quality of the VQ.
h = .02     # point in the mesh [x_min, m_max]x[y_min, y_max].
import pylab
import numpy

# [0: 'CLASSICAL', 1: 'METAL', 2: 'HIPHOP', 3: 'DANCE', 4: 'JAZZ']
# [5:'FOLK', 6: 'SOUL', 7: 'ROCK', 8: 'POP', 9: 'BLUES']

col_input=['genre', 'year', 'col1', 'col2', 'col3', 'col4', 'col5', 'col6', 'col7', 'col8', 'col9', 'col10', 'col11', 'col12', 'col13', 'col14', 'col15', 'col16', 'col17', 'col18', 'col19', 'col20', 'col21', 'col22', 'col23', 'col24', 'col25', 'col26', 'col27', 'col28', 'col29', 'col30', 'col31', 'col32', 'col33', 'col34', 'col35', 'col36', 'col37', 'col38', 'col39', 'col40', 'col41', 'col42', 'col43', 'col44', 'col45', 'col46', 'col47', 'col48', 'col49', 'col50', 'col51', 'col52', 'col53', 'col54', 'col55', 'col56', 'col57', 'col58', 'col59', 'col60', 'col61', 'col62', 'col63', 'col64', 'col65', 'col66', 'col67', 'col68', 'col69', 'col70', 'col71', 'col72']
df_input = pandas.read_csv('pandas_output_missing_data_fixed.csv', header=None, delimiter = ",", names=col_input)

# range(2,74) means its goes from col 2 to col 73
df_input_data = df_input[list(range(2, 74))]
df_input_target = df_input[list(range(0, 1))]

colors = numpy.random.rand(len(df_input_target))

# Manifold PCA
from sklearn.manifold import Isomap
iso = Isomap(n_neighbors=10, n_components=2) # n_neighbors = num of classes = 10 genres , components optimal n=6
proj1 = iso.fit_transform(df_input_data)

# Relative weights on features
print iso.dist_matrix_
print iso.kernel_pca_

# Plotting
mpyplot.figure(1)
p1 = mpyplot.scatter(proj1[:, 0], proj1[:, 1], c=colors)
mpyplot.colorbar(p1)
mpyplot.show(p1)

Exemplo n.º 23
0
def apply_ISOMap(proj_data, proj_weights=None):
    model = Isomap(n_neighbors=4, n_components=2);
    norm_data = normalize_columns(proj_data);
    result = model.fit_transform(norm_data.T);
    return result;
Exemplo n.º 24
0
from sklearn.datasets import make_s_curve
X,y=make_s_curve(n_samples=1000)
from mpl_toolkits.mplot3d import Axes3D
ax=plt.axes(projection='3d')
ax.scatter3D(X[:,0],X[:,1],X[:,2],c=y)
ax.view_init(10,-60)
# this is a 2D dataset embedded in 3D, but it is embedded in such a way that 
#PCA can't discover the underlying data orientation.
from sklearn import decomposition
X_pca=decomposition.PCA(n_components=2).fit_transform(X)
plt.scatter(X_pca[:,0],X_pca[:,1],c=y)
#Manifold learning algorithms, however, available in the sklearn.manifold
#submodule, are able to recover the underlying 2-dimensional manifold:
from sklearn.manifold import Isomap
iso = Isomap(n_neighbors=15, n_components=2)
X_iso = iso.fit_transform(X)
plt.scatter(X_iso[:, 0], X_iso[:, 1], c=y)
"""
Exercise: Compare the results of Isomap and PCA on a 5-class subset of the 
digits dataset (load_digits(5))
Bonus: Also compare to TSNE, another popular manifold learning technique.
"""
from sklearn.datasets import load_digits
digits=load_digits(5)
X=digits.data
isomap=Isomap(n_neighbors=15,n_components=2)
X_trans=isomap.fit_transform(X)
print(X_trans.shape)
plt.scatter(X_trans[:,0],X_trans[:,1],c=digits.target)
# Another method
from sklearn.manifold import TSNE
#Traintestsplit---------------------------
from sklearn.cross_validation import train_test_split
from sklearn.metrics import confusion_matrix

X_train, X_test, Y_train, Y_test = train_test_split(X, y)
clf.fit(X_train, Y_train)
Y_pred = clf.predict(X_test)
#print(confusion_matrix(Y_test,Y_pred))

#Training on the digits-------------------------------------------------

from sklearn.manifold import Isomap
from sklearn.datasets import load_digits
digits = load_digits()
iso = Isomap(n_components=2)
data_projected = iso.fit_transform(digits.data)
#print(data_projected.shape)

#plot the data transformed from 64 dim to 2 dim.
'''plt.scatter(data_projected[:,0],data_projected[:,1],c=digits.target,
            edgecolors="k", lw=.1, alpha=.5,s=10, cmap=plt.cm.get_cmap("nipy_spectral",10))
plt.colorbar(label="digit label", ticks=range(10))
plt.clim(-.5,9.5)'''
#plt.show()

#Classification of the digits------------------------------

Xtrain, Xtest, Ytrain, Ytest = train_test_split(digits.data,
                                                digits.target,
                                                random_state=2)
#print(Xtrain.shape, Xtest.shape)
Exemplo n.º 26
0
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import Isomap

resourceFolder = '../res/'

dataframe = pd.read_csv(resourceFolder + 'EnergyMix.csv')

df = dataframe.loc[:, ['Oil', 'Gas', 'Coal', 'Nuclear', 'Hydro']]

print df

imap = Isomap()
df_reduced = imap.fit_transform(df)
print df_reduced

plt.plot(df_reduced[:, 0], df_reduced[:, 1], '.')
for index, country in enumerate(dataframe["Country"]):
    plt.text(df_reduced[index, 0], df_reduced[index, 1], country)

plt.savefig('../doc/EnergyMix_Reduced.png')
plt.show()
Exemplo n.º 27
0

if __name__ == '__main__':
    pth = './data.txt'
    data, label = load_data(pth)
    # print(data,label)
    # PCA
    pca = PCA(n_components=2)
    pca_ = pca.fit_transform(data)
    visual(pca_, label, "PCA")
    # LDA
    lda = LinearDiscriminantAnalysis()
    lda_ = lda.fit_transform(data, label)
    visual(lda_, label, "LDA")
    # KPCA
    kpca = KernelPCA(n_components=2, kernel='rbf')
    kpca_ = kpca.fit_transform(data)
    visual(kpca_, label, "KPCA")
    # Isomap
    iso = Isomap(n_components=2)
    iso_ = iso.fit_transform(data)
    visual(iso_, label, "Isomap")
    # LLE
    lle = LocallyLinearEmbedding(n_components=2)
    lle_ = lle.fit_transform(data)
    visual(lle_, label, "LLE")
    # Laplacian Eigenmaps
    le = SpectralEmbedding(n_components=2)
    le_ = le.fit_transform(data)
    visual(le_, label, "Laplacian Eigenmaps")
Exemplo n.º 28
0
transformed_pca = pd.DataFrame(transformed_pca)
transformed_pca.columns = ('PCA 1', 'PCA 2', 'PCA 3', 'PCA 4', 'PCA 5', 'PCA 6')

## TSNE feature extraction

TSNE = TSNE( method='exact', n_components = 6)
transformed_TSNE = TSNE.fit_transform(data)

transformed_TSNE = pd.DataFrame(transformed_TSNE)
transformed_TSNE.columns = ('TSNE 1', 'TSNE 2', 'TSNE 3', 'TSNE 4', 'TSNE 5', 'TSNE 6')

## ISOMAP feature extraction

isomap = Isomap(n_neighbors = 6, n_components= 6)
transformed_isomap = isomap.fit_transform(data)

transformed_isomap = pd.DataFrame(transformed_isomap)
transformed_isomap.columns = ('ISOMAP 1', 'ISOMAP 2', 'ISOMAP 3',
                              'ISOMAP 4', 'ISOMAP 5', 'ISOMAP 6')

## Combining all features into one data frame

features = pd.concat([ pd.DataFrame(transformed_pca),pd.DataFrame(transformed_TSNE), 
                      pd.DataFrame(transformed_isomap)], axis=1) 

### STANDARDIZING THE FEATURES

st = StandardScaler()   
st.fit(features)
features = st.transform(features)
Exemplo n.º 29
0
    colors.append("r")

#
# TODO: Convert the list to a dataframe
#
# .. your code here ..

df = pd.DataFrame(samples)

#
# TODO: Implement Isomap here. Reduce the dataframe df down
# to three components, using K=6 for your neighborhood size
#
# .. your code here ..
transformedIsomap = Isomap(n_neighbors=6, n_components=3)
transformedIsomap = transformedIsomap.fit_transform(df)

#
# TODO: Create a 2D Scatter plot to graph your manifold. You
# can use either 'o' or '.' as your marker. Graph the first two
# isomap components
#
# .. your code here ..

figure = plt.figure()
figure.add_subplot("211").scatter(transformedIsomap[:, 0],
                                  transformedIsomap[:, 1],
                                  c=colors)

#
# TODO: Create a 3D Scatter plot to graph your manifold. You
Exemplo n.º 30
0
def get_iso_net(net, neighbours, comps):
    embedding = Isomap(n_neighbors=neighbours, n_components=comps)
    net_transformed = embedding.fit_transform(net)
    return net_transformed
def plot2d(X, y, scale=True, normalize=False, embedding='pca', title=''): 
	"""
	Plot data transformed into two dimensions by PCA. 
	PCA transforms into a new embedding dimension such that 
	the first dimension contains the maximal variance and following 
	dimensions maximal remaining variance. 
	This shoudl spread the observed n-dimensional data maximal. This 
	is unsupervised and will not consider target values. 
	"""
	if (scale): 
		scaler = StandardScaler()
		X = scaler.fit_transform(X)

	if (normalize): 
		normalizer = Normalizer(norm='l2')
		X = normalizer.fit_transform(X)
		
	if (embedding is 'pca'): 
		pca = PCA(n_components=2)
		X_transformed = pca.fit_transform(X)
	elif (embedding is 'isomap'):
		isomap = Isomap(n_components=2, n_neighbors=20)
		X_transformed = isomap.fit_transform(X)
	elif (embedding is 'lle' ): 
		lle = LocallyLinearEmbedding(n_components=2, n_neighbors=5)
		X_transformed = lle.fit_transform(X)
	elif (embedding is 'tsne'): 
		t_sne = TSNE(n_components=2)
		X_transformed = t_sne.fit_transform(X)
	elif (embedding is 'spectral'): 
		se = SpectralEmbedding(n_components=2)
		X_transformed = se.fit_transform(X)
	elif (embedding is 'mds'):
		mds = MDS(n_components=2)
		X_transformed = mds.fit_transform(X)
	elif (embedding is 'gallery'): 
		plt.figure(1)
		
		plt.subplot(231)
		plt.title('pca')
		X_t = PCA(n_components=2).fit_transform(X)
		plt.scatter(X_t[:,0 ], X_t[:, 1], c=y)

		plt.subplot(232)
		plt.title('isomap')
		X_t = Isomap(n_neighbors=20).fit_transform(X)
		plt.scatter(X_t[:,0 ], X_t[:, 1], c=y)

		plt.subplot(233)
		plt.title('lle')
		X_t = LocallyLinearEmbedding(n_neighbors=20).fit_transform(X)
		plt.scatter(X_t[:,0 ], X_t[:, 1], c=y)

		plt.subplot(234)
		plt.title('tsne')
		X_t = TSNE().fit_transform(X)
		plt.scatter(X_t[:,0 ], X_t[:, 1], c=y)

		plt.subplot(235)
		plt.title('spectral')
		X_t = SpectralEmbedding().fit_transform(X)
		plt.scatter(X_t[:,0 ], X_t[:, 1], c=y)

		plt.subplot(236)
		plt.title('mds')
		X_t = MDS().fit_transform(X)
		plt.scatter(X_t[:,0 ], X_t[:, 1], c=y)

		plt.suptitle('Gallery transforms ' + title)

		return plt
	else:
		raise ValueError("Choose between pca, isomap and tsne")

	plt.title(title + ' ' + embedding + ' plot')
	sc = plt.scatter(X_transformed[:, 0], X_transformed[:, 1], c=y)
	plt.colorbar(sc)
	return plt
Exemplo n.º 32
0
    def plot2D_classification(self, query=None, colors=None, markers=['*', 'v', 'o', '+', '-', '.', ',']):

        X, y = self.__check_data_available()
        n_row, n_col = X.shape

        import matplotlib.pyplot as plt
        import matplotlib as mpl

        c_map = plt.cm.get_cmap("hsv", self._nb_clazz + 1)
        colors = dict((self._clazz[idx], c_map(idx)) for idx in range(0, self._nb_clazz)) \
            if colors is None else colors
        markers = dict((self._clazz[idx], markers[idx]) for idx in range(0, self._nb_clazz))

        def plot_constraints(lower, upper, _linestyle="solid"):
            plt.plot([lower[0], lower[0], upper[0], upper[0], lower[0]],
                     [lower[1], upper[1], upper[1], lower[1], lower[1]],
                     linestyle=_linestyle)
            plt.grid()

        def plot2D_scatter(X, y):
            for row in range(0, len(y)):
                plt.scatter(X[row, 0], X[row, 1], marker=markers[y[row]], c=colors[y[row]])

        def plot_ellipse(splot, mean, cov, color):
            from scipy import linalg

            v, w = linalg.eigh(cov)
            u = w[0] / linalg.norm(w[0])
            angle = np.arctan(u[1] / u[0])
            angle = 180 * angle / np.pi
            ell = mpl.patches.Ellipse(mean, 2 * v[0] ** 0.5, 2 * v[1] ** 0.5,
                                      180 + angle, facecolor="none",
                                      edgecolor=color,
                                      linewidth=2, zorder=2)
            ell.set_clip_box(splot.bbox)
            ell.set_alpha(0.9)
            splot.add_artist(ell)

        if n_col == 2:
            for clazz in self._clazz:
                post_mean_lower = self._mean_lower[clazz]
                post_mean_upper = self._mean_upper[clazz]
                plot_constraints(post_mean_lower, post_mean_upper)
                mean = self.get_mean_by_clazz(clazz)
                prior_mean_lower = mean - self.ell
                prior_mean_upper = mean + self.ell
                plot_constraints(prior_mean_lower, prior_mean_upper, _linestyle="dashed")

            if query is not None:
                ml_mean, ml_cov, ml_prob = self.fit_max_likelihood(query)
                plt.plot([query[0]], [query[1]], marker='h', markersize=5, color="black")
                _, _bounds = self.evaluate(query)
                for clazz in self._clazz:
                    plt.plot([ml_mean[clazz][0]], [ml_mean[clazz][1]], marker='o', markersize=5, color=colors[clazz])
                    _, est_mean_lower = _bounds[clazz]['inf']
                    _, est_mean_upper = _bounds[clazz]['sup']
                    plt.plot([est_mean_lower[0]], [est_mean_lower[1]], marker='x', markersize=4, color="black")
                    plt.plot([est_mean_upper[0]], [est_mean_upper[1]], marker='x', markersize=4, color="black")

            cov, inv, det = self.__cov_group_sample()
            s_plot = plt.subplot()
            for clazz in self._clazz:
                mean = self.get_mean_by_clazz(clazz)
                plot_ellipse(s_plot, mean, cov, colors[clazz])

        elif n_col > 2:
            if query is not None:
                inference, _ = self.evaluate(query)
                X = np.vstack([X, query])
                y = np.append(y, inference[0])

            from sklearn.manifold import Isomap
            iso = Isomap(n_components=2)
            projection = iso.fit_transform(X)
            X = np.c_[projection[:, 0], projection[:, 1]]

            if query is not None:
                color_instance = colors[inference[0]] if len(inference) == 1 else 'black'
                plt.plot([X[n_row, 0]], [X[n_row, 1]], color='red', marker='o', mfc=color_instance)
        else:
            raise Exception("Not implemented for one feature yet.")

        plot2D_scatter(X, y)
        plt.show()
Exemplo n.º 33
0
labels.remove('y')
X_raw = df[list(labels)]
X_train, _, _ = one_hot_dataframe(X_raw, [
    'job', 'marital', 'education', 'default', 'housing', 'loan', 'contact',
    'month', 'poutcome'
],
                                  replace=True)
y_train = [1 if i == 'yes' else 0 for i in df.y]

reductions = []
pca = PCA(n_components=2)
reductions.append(pca.fit_transform(X_train, y_train))
lda = LDA(n_components=2)
reductions.append(lda.fit_transform(X_train, y_train))
isomap = Isomap(n_components=2)
reductions.append(isomap.fit_transform(X_train, y_train))
lle = LocallyLinearEmbedding(n_components=2, method='standard')
reductions.append(lle.fit_transform(X_train, y_train))

for reduced_X in reductions:
    plt.figure()
    red_x = []
    red_y = []
    blue_x = []
    blue_y = []
    green_x = []
    green_y = []

    for i in range(len(reduced_X)):
        if y_train[i] == 0:
            red_x.append(reduced_X[i][0])
Exemplo n.º 34
0
from sklearn.manifold import Isomap
iso = Isomap(n_components=2)
digits_isomap = iso.fit_transform(digits.data)

plt.figure(figsize=(10, 10))
plt.xlim(digits_isomap[:, 0].min(), digits_isomap[:, 0].max() + 1)
plt.ylim(digits_isomap[:, 1].min(), digits_isomap[:, 1].max() + 1)
for i in range(len(digits.data)):
    # actually plot the digits as text instead of using scatter
    plt.text(digits_isomap[i, 0], digits_isomap[i, 1], str(digits.target[i]),
             color = colors[digits.target[i]],
             fontdict={'weight': 'bold', 'size': 9})
Exemplo n.º 35
0
    def fit_transform(self, X):
        """
        计算降维结果
        :param X: 高维数据矩阵,每一行是一个高维数据点
        :return:
        """
        (n, m) = X.shape
        print(self.parameters)

        # 用经典的降维方法
        if self.affinity == 'PCA':  # 直接返回 PCA 的降维结果
            print('Classical method: PCA...')
            pca = PCA(n_components=self.n_components)
            return pca.fit_transform(X)
        elif self.affinity == 'MDS':  # 直接返回 MDS 的降维结果
            print('Classical method: MDS...')
            mds = MDS(n_components=self.n_components)
            return mds.fit_transform(X)
        elif self.affinity == 'Isomap':  # 直接返回 Isomap 的降维结果
            print('Classical method: Isomap...')
            iso = Isomap(n_components=self.n_components,
                         n_neighbors=self.parameters['n_neighbors'])
            return iso.fit_transform(X)
        elif self.affinity == 't-SNE':  # 直接返回 t-SNE 的降维结果
            print('Classical method: t-SNE...')
            tsne = TSNE(n_components=self.n_components,
                        perplexity=self.parameters['perplexity'])
            return tsne.fit_transform(X)
        elif self.affinity == 'cTSNE':  # 用不加速版本的t-SNE降维
            print('Classical method: classical t-SNE...')
            from ArtDR import tsne
            return tsne.tsne(X,
                             perplexity=self.parameters['perplexity'],
                             path=self.path,
                             config_str='t-SNE ')
        elif self.affinity == 'LLE':  # 直接返回 LLE 的降维结果
            print('Classical method: LLE...')
            lle = LocallyLinearEmbedding(
                n_components=self.n_components,
                n_neighbors=self.parameters['n_neighbors'])
            return lle.fit_transform(X)
        elif self.affinity == 'geo-t-SNE':  # 用基于测地线距离的 t-SNE 方法
            print('Geodesic t-SNE...')
            gtsne = geoTsne(n_neighbors=self.parameters['n_neighbors'],
                            perplexity=self.parameters['perplexity'])
            return gtsne.fit_transform(X, n_components=self.n_components)

        if self.parameters['use_skeleton']:  # 用骨架点的方法
            return self.skeleton_fit_transform(X)

        # 用我们自己设计的降维方法
        if self.parameters['neighborhood_type'] == 'iter':  # 用迭代的方式
            W = self.iter_affinity_matrix(X)
        else:
            W = self.affinity_matrix(X)  # 用我们的普通方法
        if self.frame == 'MDS':
            print('Using MDS frame...')
            mds = MDS(n_components=self.n_components,
                      dissimilarity='precomputed')
            Y = mds.fit_transform(W)
            return Y
        elif self.frame == 't-SNE':
            print('Using t-SNE frame...')
            Y = tsneFrame.tsne_plus(W,
                                    self.parameters['perplexity'],
                                    path=self.path,
                                    config_str=self.config_str)
            return Y
        elif self.frame == 't-SNE+':
            print('Using t-SNE framework in sklearn...')
            tsne = tsneFramePlus.tsnePlus(
                n_components=self.n_components,
                perplexity=self.parameters['perplexity'])
            Y = tsne.fit_transform(W)
            return Y
        else:
            print("Wrong frame name!")
            return
Exemplo n.º 36
0
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import Isomap


resourceFolder = '../res/'

dataframe = pd.read_csv(resourceFolder + 'EnergyMix.csv')

df = dataframe.loc[:,['Oil','Gas','Coal','Nuclear','Hydro']]

print df

imap = Isomap()
df_reduced = imap.fit_transform(df)
print df_reduced

plt.plot(df_reduced[:,0],df_reduced[:,1],'.')
for index, country in enumerate(dataframe["Country"]):
        plt.text(df_reduced[index,0], df_reduced[index,1], country)

plt.savefig('../doc/EnergyMix_Reduced.png')
plt.show()
threes_data = (threes_data - threes_data.min()) / \
    (threes_data.max() - threes_data.min())
n_neighbors = 5
n_components = 4

# 1. Apply LLE

lle = LocallyLinearEmbedding(n_neighbors=n_neighbors,
                             n_components=n_components)
lle_data = lle.fit_transform(threes_data)
lle_df = pd.DataFrame(lle_data)
plot_three("LLE", lle_df, 0, 1, threes_df, 0.45)

# 2. Apply ISOMAP
iso = Isomap(n_neighbors=n_neighbors, n_components=n_components)
iso_data = iso.fit_transform(threes_data)
iso_df = pd.DataFrame(iso_data)
plot_three("Isomap", iso_df, 0, 1, threes_df, 0.45)


# 3. Use the Naive Bayes classier to classify the dataset based on the projected 4-dimension representations of the LLE and ISOMAP.
df_data = df.values[:, 1: len(df.columns) - 1]
test_size = 0.3


def calc_mean_accuracy(data, threshold=0.00015, miniter=500):
    print("Diff threshold {}".format(thresh))
    i = 0
    scores = []
    mean_accuracy = 0
    gnb = GaussianNB()
Exemplo n.º 38
0
df = pd.DataFrame(X,columns=feat_cols)

df['y'] = y
df['label'] = df['y'].apply(lambda i: str(i))
X, y = None, None

df_subset = df
X = df_subset[feat_cols].values
y = df_subset['y']

#n_components = 50
from sklearn.manifold import Isomap
pca_50 = Isomap(n_neighbors=5,n_components=15)

#pca_50 = PCA(n_components=n_components)
pca_result_50 = pca_50.fit_transform(X)
#print('Cumulative explained variation for %d principal components: %3.f' \
#  % (n_components, np.sum(pca_50.explained_variance_ratio_)))

X = pca_result_50
y = df['y']

def run_silhouhette_analysis():
  range_n_clusters = [2, 3, 4]

  for n_clusters in range_n_clusters:

    if PLT_ALL: 
      fig, (ax1, ax2) = plt.subplots(1, 2)
      fig.set_size_inches(18, 7)
      ax1.set_xlim([-0.1, 1])
                     
#-------------------------------------------
# RUN CLASSIFIER WITH ISOMAP IMPLEMENTATION
#-------------------------------------------
'''ISOMAP is so slow that the value of n_components is manually adjusted;
the process in fact did not successfully run on the full dataset and various subsets of data
were created to generate results demonstrating that ISOMAP is disadvanatageous'''
                     
classifier_condition = "Random Forest, ISOMAP"

x_train, x_test, y_train, y_test = train_test_split(x, y, stratify=y, test_size = 0.20, random_state=5)

start = time.time()

embedding = Isomap(n_components=6)
x_train = embedding.fit_transform(x_train)
x_test = embedding.fit_transform(x_test)

rfclassifier = RandomForestClassifier(n_estimators=500, random_state=5, criterion = 'gini')
classifier = OneVsRestClassifier(rfclassifier, n_jobs=-1)
classifier.fit(x_train, y_train)

prediction = classifier.predict(x_test)

end = time.time()

save_data[f"{classifier_condition}_n = 6"] = (model_evaluation("RF", "6", x_test, y_test, prediction, classifier, end-start, n_classes))

save_data.to_csv("Random_Forest_ISO_6.csv")
Exemplo n.º 40
0
def testMahalanobisMushroom():
    np.random.seed(0)
    N = 5000
    X = np.random.rand(N, 2)
    Y = getMushroom(X)
    fn_ellipsoid = lambda idx, delta, n_points: getMushroomEllipsoid(X, idx, delta, n_points)
    res = getMahalanobisDists(Y, fn_ellipsoid, 0.001, 400, 2)
    gamma = res["gamma"]
    dMaxSqrCoeff=0.5

    """
    Using https://github.com/jmbr/diffusion-maps
    """
    c = plt.get_cmap('magma_r')
    C1 = c(np.array(np.round(255.0*X[:, 1]/np.max(X[:, 1])), dtype=np.int32))
    C1 = C1[:, 0:3]
    C2 = c(np.array(np.round(255.0*X[:, 0]/np.max(X[:, 1])), dtype=np.int32))
    C2 = C2[:, 0:3]
    
    t = dMaxSqrCoeff*np.max(gamma)*0.001
    tic = time.time()
    YM = getDiffusionMap(gamma, t, distance_matrix=True, neigs=6, thresh=1e-10)
    print("Elapsed Time: %.3g"%(time.time()-tic))

    embedding = Isomap(n_components=2)
    YIso = embedding.fit_transform(Y)

    plt.figure(figsize=(16, 8))
    plt.subplot(241)
    plt.scatter(X[:, 1], X[:, 0], c=C1)
    plt.axis('equal')
    plt.title("Domain, Colored by x")

    plt.subplot(245)
    plt.scatter(X[:, 1], X[:, 0], c=C2)
    plt.axis('equal')
    plt.title("Domain, Colored by y")

    plt.subplot(242)
    plt.scatter(Y[:, 0], Y[:, 1], c=C1)
    plt.axis('equal')
    plt.title("Mushroom, Colored by x")

    plt.subplot(246)
    plt.scatter(Y[:, 0], Y[:, 1], c=C2)
    plt.axis('equal')
    plt.title("Mushroom, Colored by y")

    plt.subplot(243)
    plt.scatter(YIso[:, 0], YIso[:, 1], c=C1)
    plt.axis('equal')
    plt.title("ISOMAP, Colored by x")

    plt.subplot(247)
    plt.scatter(YIso[:, 0], YIso[:, 1], c=C2)
    plt.axis('equal')
    plt.title("ISOMAP, Colored by y")

    plt.subplot(244)
    plt.scatter(YM[:, 0], YM[:, 1], c=C1)
    plt.axis('equal')
    plt.title("Mahalanobis, Colored by x")

    plt.subplot(248)
    plt.scatter(YM[:, 0], YM[:, 1], c=C2)
    plt.axis('equal')
    plt.title("Mahalanobis, Colored by y")
    plt.savefig("Mushroom.png", bbox_inches='tight')
Exemplo n.º 41
0
try:
    data_res = np.load('feature_res.npz')
    pca_data = data_res['pca_data']
    tsne_data = data_res['tsne_data']
    iso_data = data_res['iso_data']
except IOError:
    data = images_as_matrix()
    pca = PCA(n_components=6)
    pca_data = pca.fit_transform(data)

    tsne = TSNE(n_components=6, method='exact')
    tsne_data = tsne.fit_transform(data)

    iso = Isomap(n_components=6)
    iso_data = iso.fit_transform(data)

    np.savez('feature_res.npz',
             pca_data=pca_data,
             tsne_data=tsne_data,
             iso_data=iso_data)

data_labels = np.loadtxt('labels.txt', delimiter=',')
stacked_features = np.concatenate((pca_data, tsne_data, iso_data), axis=1)
stacked_f, stacked_prob = f_classif(stacked_features[data_labels[:, 1] > 0, :],
                                    data_labels[data_labels[:, 1] > 0, 1])

plt.figure()
plt.bar(range(18),
        stacked_f,
        width=.2,
X_pca = pca.fit_transform(T)
'''
# No, the accuracy levels off at the same value as before from 7 components onwards.

# If you are not, then forget about PCA entirely, unless you want to visualize your data. However if you are able to get a higher score,
# then be *sure* keep that figure in mind, and comment out all the PCA code.
# In the same spot, run Isomap on the data, before sending it to the train / test split. Manually experiment with every inclusive
# combination of n_neighbors between 2 and 5, and n_components between 4 and 6. Are you able to get a better accuracy?
from sklearn.manifold import Isomap

# You're going to have to write nested for loops that wrap around everything from here on down!
best_score = 0
for k in range(2, 6):
    for l in range(4, 7):
        iso = Isomap(n_neighbors=k, n_components=l)
        X_iso = iso.fit_transform(T)

        # Perform a train/test split. 30% test group size, with a random_state equal to 7.
        from sklearn.cross_validation import train_test_split
        X_train, X_test, y_train, y_test = train_test_split(X_iso,
                                                            y,
                                                            test_size=0.3,
                                                            random_state=7)

        # Create a SVC classifier. Don't specify any parameters, just leave everything as default.
        # Fit it against your training data and then score your testing data.
        from sklearn.svm import SVC
        # Lines below are for the first lab question:
        '''
        model = SVC()
        model.fit(X_train, y_train)
Exemplo n.º 43
0
newlabels=np.array(newlabels)

features=features.transpose(1,2,3,0)
features=np.reshape(features,(len(features),4,32,64))
print(features.shape)

'''
feats=fs.mutual_info_classif(features,newlabels,n_neighbors=5,random_state=0)

max_indices=sorted(range(len(feats)), key=lambda i: feats[i])[-64:]      #picking max 64 features
print(len(max_indices))


features=np.reshape(features,(len(features),-1))
newfeatures=[]
for f in features:
    newfeatures.append(f[max_indices])

features=np.array(newfeatures)
'''
features=np.reshape(features,(len(features),-1))
print(features.shape)

lle=Isomap(n_components=10,max_iter=60000,n_jobs=-1)

X_embedded=lle.fit_transform(features)

print(X_embedded.shape)

with open('../Manifold_features/isomap', 'wb') as fp:
    pickle.dump(X_embedded, fp)
Exemplo n.º 44
0
    X_train.append(XX_train[i])
    y_train.append(yy_train[i])
num_samples_to_plot = 5000
X_train, y_train = shuffle(X_train, y_train)
X_train, y_train = X_train[:num_samples_to_plot], y_train[:num_samples_to_plot]  # lets subsample a bit for a first impression

for digit in mytargets:
  instances=[i for i in y_train if i==digit]
  print "Digit",digit,"appears ",len(instances), "times"

transformer = Isomap(n_neighbors = 10, n_components = 2)
fig, plot = plt.subplots()
fig.set_size_inches(50, 50)
plt.prism()

X_transformed = transformer.fit_transform(X_train)
plot.scatter(X_transformed[:, 0], X_transformed[:, 1], c=y_train)
plot.set_xticks(())
plot.set_yticks(())

count=0;
plt.tight_layout()
plt.suptitle("Isomap for MNIST digits ")
for label , x, y in zip(y_train, X_transformed[:, 0], X_transformed[:, 1]):
#Lets annotate every 1 out of 200 samples, otherwise graph will be cluttered with anotations
  if count % 200 == 0:
    plt.annotate(str(int(label)),xy=(x,y), color='black', weight='normal',size=10,bbox=dict(boxstyle="round4,pad=.5", fc="0.8"))
  count = count + 1
#plt.savefig("mnist_pca.png")
plt.show()
Exemplo n.º 45
0
def testMahalanobisCircle():
    dMaxSqrCoeff = 1.0
    np.random.seed(0)
    N = 1000
    t =np.linspace(0, 1, N+1)[0:N]
    t *= 2*np.pi
    Y = getPinchedCircleParam(t)
    fn_ellipsoid = lambda idx, delta, n_points: getPinchedCircleEllipsoid(t, idx, delta, n_points)
    np.random.seed(2)
    res = getMahalanobisDists(Y, fn_ellipsoid, delta=0.1, n_points=100, \
                                rank=1, maxeigs=2, jacfac=10)
    gamma = res["gamma"]
    mask = res["mask"]


    ## Step 1: Show the effect of the mask
    plt.figure(figsize=(8, 8))
    plt.subplot(221)
    plt.imshow(res["DSqr"])
    plt.title("Original")
    plt.subplot(222)
    plt.imshow(res["gamma"])
    plt.title("Full Mahalanobis")
    plt.subplot(223)
    plt.imshow(mask)
    plt.title("Mask")
    plt.subplot(224)
    D = np.array(gamma)
    D[mask == 0] = np.inf
    plt.imshow(D)
    plt.title("Masked Mahalanobis")
    plt.savefig("PinchedCircle_Mask.png", bbox_inches='tight')


    c = plt.get_cmap('magma_r')
    C1 = c(np.array(np.round(255.0*t/np.max(t)), dtype=np.int32))
    C1 = C1[:, 0:3]
    t = dMaxSqrCoeff*np.max(gamma)*0.001
    tic = time.time()
    YMask = getDiffusionMap(gamma, t, mask=mask, distance_matrix=True, neigs=6, thresh=1e-10)
    YNoMask = getDiffusionMap(gamma, t, distance_matrix=True, neigs=6, thresh=1e-10)
    print("Elapsed Time Diffusion Maps: %.3g"%(time.time()-tic))
    
    embedding = Isomap(n_components=2)
    YIso = embedding.fit_transform(Y)

    plt.figure(figsize=(8, 8))

    plt.subplot(221)
    plt.scatter(Y[:, 0], Y[:, 1], c=C1)
    plt.axis('equal')
    plt.title("Warped, Colored by t")

    plt.subplot(222)
    plt.scatter(YIso[:, 0], YIso[:, 1], c=C1)
    plt.axis('equal')
    plt.title("ISOMAP, Colored by t")

    plt.subplot(223)
    plt.scatter(YNoMask[:, 0], YNoMask[:, 1], c=C1)
    plt.axis('equal')
    plt.title("Mahalanobis, Colored by t")

    plt.subplot(224)
    plt.scatter(YMask[:, 0], YMask[:, 1], c=C1)
    plt.axis('equal')
    plt.title("Masked Mahalanobis, Colored by t")

    plt.savefig("PinchedCircle.png", bbox_inches='tight')
Exemplo n.º 46
0
from sklearn.manifold import Isomap
from sklearn.manifold import LocallyLinearEmbedding
from sklearn import preprocessing
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from mpl_toolkits.mplot3d import Axes3D
import random
from colorsys import hsv_to_rgb

pca = PCA(n_components=2)
isomap = Isomap(n_components=2)
lle = LocallyLinearEmbedding(n_components=2)
data = np.genfromtxt('data01_small.txt', delimiter=',')
pca_xform = pca.fit_transform(data)
isomap_xform = isomap.fit_transform(data)
lle_xform = lle.fit_transform(data)
label = [0]*100+[1]*100
rgbs = [(0.5,0,0), (0,0.5,0)]


plt.figure()
xs = pca_xform[:,0]
ys = pca_xform[:,1]
ax = plt.subplot(111)
for i in xrange(len(xs)):
	ax.text(xs[i], ys[i], str(label[i]), color=rgbs[label[i]], fontdict={'weight': 'bold', 'size': 9})
t = (max(xs)-min(xs))*0.1
ax.axis([min(xs)-t, max(xs)+t, min(ys)-t, max(ys)+t])
plt.xticks([]), plt.yticks([])
plt.title('PCA')
Exemplo n.º 47
0
def dimensionality_reduction(df_sel_data, n_comp):
    print("[INFO] feature reduction of " + str(len(df_sel_data[0])) + " for " +
          str(n_comp))
    iso_map = Isomap(n_components=n_comp)
    sel_data = iso_map.fit_transform(pd.DataFrame(df_sel_data))
    return sel_data
Exemplo n.º 48
0
class Project(Standardize.Standard):
    '''
    
    The Project class will project data in to new feature space using linear and non-linear transformation methods.
    
    The currently supported projections are PCA and UMAP.
    
    '''
    def __init__(self, standard_method=None):
        '''
        The class take as input the valid standardization methods from the Standard class. Valid standardization 
        methods include:
        
            - 'standardize' = Mean center the data and scale by the standard deviation
            
            - 'center' abs  = Mean center the data
            
            - 'min_max'     = Scale the data to between 0 and 1
        
        '''

        self.standard_method = standard_method

        self.pca_fitted = False

        self.umap_fitted = False

        self.standard_data = None

        self.standard_obj = None

        self.project_obj = None

        self.project_data = None

        super().__init__(standard_method)

    def reset_params(self):
        '''
        
        Reset the standization and projection parameters to their default values
        
        '''

        self.standard_fitted = False

        self.pca_fitted = False

        self.standard_data = None

        self.standard_obj = None

        self.project_obj = None

        self.project_data = None

        return self

    def fit_pca(self, x, n_comps=None):
        '''
        
        Fit a pca object to a dataset with the inputs: 
        
            - x: Array or dataframe 
                 Contains the data to be fitted by the pca object. If a standardization method is specified
                 then x is standardized prior to fitting the pca object. 
            
            - n_comps: int, defalut None, n_comps <= min( n_features, n_observations ) 
                       Specifies the number of principal components to fit to the dataset
        
        '''

        if n_comps == None:

            n_comps = min(x.shape)

        assert n_comps <= min(
            x.shape
        ), 'n_comps must be less than or equal to the minimum element of x.shape'

        if self.standard_method != None:

            x = self.fit_transform_standard(x, return_data=True)

        self.project_obj = PCA(n_components=n_comps).fit(x)

        self.pca_fitted = True

        return self.project_obj

    def transform_pca(self, x=None, return_data=False):
        '''
        
        Project a dataset using the pca object fitted in the fit_pca() method: 
        
            - x: Array or dataframe, default = None, optional  
                 Contains the data to be transformed by the pca object. If x is None then the data used to fit 
                 the pca object in fit_pca() will be used in transfrom_pca. 
                 
                 If x is not None the the fitted parameters of the pca object will be used to transform x. 
                 Furthermore, if a standardization method is specified and x is not None then the parameters of
                 the standardization object fitted in fit_pca() will be used to standardize x before proejcting 
                 the data.
            
            - return_data: bool, default = False
                 If True the function will return the projected dataset as a numpy array
        
        '''

        assert self.pca_fitted == True

        if self.standard_method != None:

            if type(x) != type(None):

                x = self.transform_standard(x, return_data=True)

            else:

                x = self.standard_data

        self.project_data = self.project_obj.transform(x)

        if return_data == True:
            return self.project_data

    def fit_transform_pca(self, x, n_comps=None, return_data=False):
        '''
        
        Fit a pca object and project the dataset using the fitted pca object with the inputs: 
        
            - x: Array or dataframe 
                 Contains the data to be fitted by the pca object. If a standardization method is specified
                 then x is standardized prior to fitting the pca object. 
            
            - n_comps: int, defalut None, n_comps <= min( n_features, n_observations ) 
                       Specifies the number of principal components to fit to the dataset
                        
            - return_data: bool, default = False
                 If True the function will return the projected dataset as a numpy array
        
        '''

        if n_comps == None:

            n_comps = min(x.shape)

        assert n_comps <= min(
            x.shape
        ), 'n_comps must be less than or equal to the minimum element of x.shape'

        if self.standard_method != None:

            x = self.fit_transform_standard(x, return_data=True)

        self.project_obj = PCA(n_components=n_comps).fit(x)

        self.project_data = self.project_obj.transform(x)

        self.pca_fitted = True

        if return_data == True:
            return self.project_data

    def fit_umap(self, x, n_comps=None, n_neighbors=50):
        '''
        
        Fit a UMAP object to a dataset with the inputs: 
        
            - x: Array or dataframe 
                 Contains the data to be fitted by the pca object. If a standardization method is specified
                 then x is standardized prior to fitting the pca object. 
            
            - n_comps: int, defalut = None, n_comps <= min( n_features, n_observations, n_neighbors ) 
                Specifies the number of UMAP components to fit to the dataset
            
            - n_neighbors: int, default = 50
                This parameter controls how UMAP balances local versus global structure in the data. 
                It does this by constraining the size of the local neighborhood UMAP will look at when 
                attempting to learn the manifold structure of the data.
        
        
        Reference to the authors of UMAP: 
        McInnes, L, Healy, J, UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction, 
        ArXiv e-prints 1802.03426, 2018
        
        '''

        if n_comps == None:

            n_comps = n_neighbors

        assert n_comps <= n_neighbors, 'n_comps must be less than or equal to n_neighbors'

        if self.standard_method != None:

            x = self.fit_transform_standard(x, return_data=True)

        self.project_obj = umap.UMAP(n_neighbors=n_neighbors,
                                     min_dist=0.5,
                                     n_components=n_comps).fit(x)

        self.umap_fitted = True

        return self.project_obj

    def transform_umap(self, x=None, return_data=False):
        '''
        
        Project a dataset using the UMAP object fitted in the fit_umap() method: 
        
            - x: Array or dataframe, default = None, optional  
                 Contains the data to be transformed by the fitted umap object. If x is None then the data used to fit 
                 the umap object in fit_umap() will be used in transfrom_pca. 
                 
                 If x is not None the the fitted parameters of the umap object will be used to transform x. 
                 Furthermore, if a standardization method is specified and x is not None then the parameters of
                 the standardization object fitted in fit_umap() will be used to standardize x before proejcting 
                 the data.
            
            - return_data: bool, default = False
                 If True the function will return the projected dataset as a numpy array
        
        '''

        assert self.umap_fitted == True, 'No UMAP object has been fitted'

        if self.standard_method != None:

            if type(x) != type(None):

                x = self.transform_standard(x, return_data=True)

            else:

                x = self.standard_data

        self.project_data = self.project_obj.transform(x)

        if return_data == True:
            return self.project_data

    def fit_transform_umap(self,
                           x,
                           n_comps=None,
                           n_neighbors=50,
                           return_data=False):
        '''
        
        Fit a UMAP object to a dataset with the inputs: 
        
            - x: Array or dataframe 
                 Contains the data to be fitted by the pca object. If a standardization method is specified
                 then x is standardized prior to fitting the pca object. 
            
            - n_comps: int, defalut = None, n_comps <= min( n_features, n_observations, n_neighbors ) 
                Specifies the number of UMAP components to fit to the dataset
            
            - n_neighbors: int, default = 50
                This parameter controls how UMAP balances local versus global structure in the data. 
                It does this by constraining the size of the local neighborhood UMAP will look at when 
                attempting to learn the manifold structure of the data.
                
            - return_data: bool, default = False
                If True the function will return the projected dataset as a numpy array
        
        
        Reference to the authors of UMAP: 
        McInnes, L, Healy, J, UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction, 
        ArXiv e-prints 1802.03426, 2018
        
        '''

        if n_comps == None:

            n_comps = n_neighbors

        assert n_comps <= n_neighbors, 'n_comps must be less than or equal to n_neighbors'

        if self.standard_method != None:

            x = self.fit_transform_standard(x, return_data=True)

        self.project_obj = umap.UMAP(n_neighbors=n_neighbors,
                                     min_dist=0.5,
                                     n_components=n_comps).fit(x)

        self.project_data = self.project_obj.transform(x)

        self.umap_fitted = True

        if return_data == True:
            return self.project_data

    def fit_transform_tsne(self, x, n_comps=None, return_data=False):
        '''

        Fit a TSNE object to a dataset with the inputs: 

            - x: Array or dataframe 
                 Contains the data to be fitted by the pca object. If a standardization method is specified
                 then x is standardized prior to fitting the pca object. 

            - n_comps: int, defalut = None, n_comps <= min( n_features, n_observations, n_neighbors ) 
                Specifies the number of UMAP components to fit to the dataset

            - n_neighbors: int, default = 50
                This parameter controls how UMAP balances local versus global structure in the data. 
                It does this by constraining the size of the local neighborhood UMAP will look at when 
                attempting to learn the manifold structure of the data.

            - return_data: bool, default = False
                If True the function will return the projected dataset as a numpy array


        Reference to the authors of UMAP: 
        McInnes, L, Healy, J, UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction, 
        ArXiv e-prints 1802.03426, 2018

        '''

        if n_comps == None:

            n_comps = 3

        assert n_comps <= 3, 'n_comps must be less than or equal to 3'

        if self.standard_method != None:

            x = self.fit_transform_standard(x, return_data=True)

        self.project_obj = TSNE(n_components=n_comps, n_iter=1000)

        self.project_data = self.project_obj.fit_transform(x)

        self.tsne_fitted = True

        if return_data == True:
            return self.project_data

    def fit_phate(self,
                  x,
                  n_comps=None,
                  knn=5,
                  decay=40,
                  n_landmark=2000,
                  t='auto',
                  gamma=1,
                  n_pca=100,
                  mds_solver='sgd',
                  knn_dist='euclidean',
                  mds_dist='euclidean',
                  mds='metric',
                  n_jobs=1,
                  random_state=123,
                  verbose=1):
        '''
        
        Fit a PHATE object to a dataset with the inputs: 
        
            - x: Array or dataframe 
                 Contains the data to be fitted by the PHATE object. If a standardization method is specified
                 then x is standardized prior to fitting the pca object. 
            
            - n_comps: int, defalut = None, n_comps <= min( n_features, n_observations, n_neighbors ) 
                Specifies the number of PHATE components to fit to the dataset
        
        
        Reference to the authors of PHATE: 
        Moon KR, van Dijk D, Zheng W, et al. (2017), PHATE: A Dimensionality Reduction Method for 
        Visualizing Trajectory Structures in High-Dimensional Biological Data, BioRxiv.
        
        Documetnation: https://phate.readthedocs.io/en/stable/api.html#id2
        
        '''

        if n_comps == None:

            n_comps = min(x.shape)

            n_comps = min((n_comps, n_landmark))

        if self.standard_method != None:

            x = self.fit_transform_standard(x, return_data=True)

        self.project_obj = phate.PHATE(n_components=n_comps,
                                       knn=knn,
                                       decay=decay,
                                       n_landmark=n_landmark,
                                       t=t,
                                       gamma=gamma,
                                       n_pca=n_pca,
                                       mds_solver=mds_solver,
                                       knn_dist=knn_dist,
                                       mds_dist=mds_dist,
                                       mds=mds,
                                       n_jobs=n_jobs,
                                       random_state=random_stat,
                                       verbose=verbose).fit(x)

        self.phate_fitted = True

        return self.project_obj

    def transform_phate(self, x=None, return_data=False):
        '''
        
        Project a dataset using the UMAP object fitted in the fit_umap() method: 
        
            - x: Array or dataframe, default = None, optional  
                 Contains the data to be transformed by the fitted umap object. If x is None then the data used to fit 
                 the umap object in fit_umap() will be used in transfrom_pca. 
                 
                 If x is not None the the fitted parameters of the umap object will be used to transform x. 
                 Furthermore, if a standardization method is specified and x is not None then the parameters of
                 the standardization object fitted in fit_umap() will be used to standardize x before proejcting 
                 the data.
            
            - return_data: bool, default = False
                 If True the function will return the projected dataset as a numpy array
        
        '''

        assert self.phate_fitted == True, 'No PHATE object has been fitted'

        if self.standard_method != None:

            if type(x) != type(None):

                x = self.transform_standard(x, return_data=True)

            else:

                x = self.standard_data

        self.project_data = self.project_obj.transform(x)

        if return_data == True:
            return self.project_data

    def fit_transform_phate(self,
                            x,
                            n_comps=None,
                            knn=5,
                            decay=40,
                            n_landmark=2000,
                            t='auto',
                            gamma=1,
                            n_pca=100,
                            mds_solver='sgd',
                            knn_dist='euclidean',
                            mds_dist='euclidean',
                            mds='metric',
                            n_jobs=1,
                            random_state=123,
                            verbose=1,
                            return_data=False):
        '''
        
        Fit a PHATE object to a dataset with the inputs: 
        
            - x: Array or dataframe 
                 Contains the data to be fitted by the PHATE object. If a standardization method is specified
                 then x is standardized prior to fitting the pca object. 
            
            - n_comps: int, defalut = None, n_comps <= min( n_features, n_observations, n_neighbors ) 
                Specifies the number of PHATE components to fit to the dataset
                
            - return_data: bool, default = False
                If True the function will return the projected dataset as a numpy array
        
        
        Reference to the authors of PHATE: 
        Moon KR, van Dijk D, Zheng W, et al. (2017), PHATE: A Dimensionality Reduction Method for 
        Visualizing Trajectory Structures in High-Dimensional Biological Data, BioRxiv.
        
        Documetnation: https://phate.readthedocs.io/en/stable/api.html#id2
        
        '''

        if n_comps == None:

            n_comps = min(x.shape)

            n_comps = min((n_comps, n_landmark))

        if self.standard_method != None:

            x = self.fit_transform_standard(x, return_data=True)

        self.project_obj = phate.PHATE(n_components=n_comps,
                                       knn=knn,
                                       decay=decay,
                                       n_landmark=n_landmark,
                                       t=t,
                                       gamma=gamma,
                                       n_pca=n_pca,
                                       mds_solver=mds_solver,
                                       knn_dist=knn_dist,
                                       mds_dist=mds_dist,
                                       mds=mds,
                                       n_jobs=n_jobs,
                                       random_state=random_state,
                                       verbose=verbose).fit(x)

        self.project_data = self.project_obj.transform(x)

        self.phate_fitted = True

        if return_data == True:
            return self.project_data

    def fit_isomap(self, x, n_comps=None, n_neighbors=5):
        '''
        
        Fit a Isomap object to a dataset with the inputs: 
        
            - x: Array or dataframe 
                 Contains the data to be fitted by the Isomap object. If a standardization method is specified
                 then x is standardized prior to fitting the pca object. 
            
            - n_comps: int, defalut = None, n_comps <= min( n_features, n_observations, n_neighbors ) 
                Specifies the number of Isomap components to fit to the dataset
        
        
        Reference to the authors of PHATE: 
        R7f4d308f5054-1 Tenenbaum, J.B.; De Silva, V.; & Langford, J.C. A global geometric framework for nonlinear 
        dimensionality reduction. Science 290 (5500)
        
        '''

        if n_comps == None:

            n_comps = min(x.shape)

        if self.standard_method != None:

            x = self.fit_transform_standard(x, return_data=True)

        self.project_obj = Isomap(n_components=n_comps,
                                  n_neighbors=n_neighbors).fit(x)

        self.isomap_fitted = True

        return self.project_obj

    def transform_isomap(self, x=None, return_data=False):
        '''
        
        Project a dataset using the UMAP object fitted in the fit_umap() method: 
        
            - x: Array or dataframe, default = None, optional  
                 Contains the data to be transformed by the fitted umap object. If x is None then the data used to fit 
                 the umap object in fit_umap() will be used in transfrom_pca. 
                 
                 If x is not None the the fitted parameters of the umap object will be used to transform x. 
                 Furthermore, if a standardization method is specified and x is not None then the parameters of
                 the standardization object fitted in fit_umap() will be used to standardize x before proejcting 
                 the data.
            
            - return_data: bool, default = False
                 If True the function will return the projected dataset as a numpy array
        
        '''

        assert self.isomap_fitted == True, 'No Isomap object has been fitted'

        if self.standard_method != None:

            if type(x) != type(None):

                x = self.transform_standard(x, return_data=True)

            else:

                x = self.standard_data

        self.project_data = self.project_obj.transform(x)

        if return_data == True:
            return self.project_data

    def fit_transform_isomap(self,
                             x,
                             n_comps=None,
                             n_neighbors=5,
                             return_data=False):
        '''
        
        Fit a PHATE object to a dataset with the inputs: 
        
        Fit a Isomap object to a dataset with the inputs: 
        
            - x: Array or dataframe 
                 Contains the data to be fitted by the Isomap object. If a standardization method is specified
                 then x is standardized prior to fitting the pca object. 
        
            - n_comps: int, defalut = None, n_comps <= min( n_features, n_observations, n_neighbors ) 
                        Specifies the number of Isomap components to fit to the dataset
            
            - return_data: bool, default = False
                            If True the function will return the projected dataset as a numpy array
        
        
        Reference to the authors of PHATE: 
        Moon KR, van Dijk D, Zheng W, et al. (2017), PHATE: A Dimensionality Reduction Method for 
        Visualizing Trajectory Structures in High-Dimensional Biological Data, BioRxiv.
        
        Documetnation: https://phate.readthedocs.io/en/stable/api.html#id2
        
        '''

        if n_comps == None:

            n_comps = min(x.shape)

        if self.standard_method != None:

            x = self.fit_transform_standard(x, return_data=True)

        self.project_obj = Isomap(n_components=n_comps,
                                  n_neighbors=n_neighbors).fit(x)

        self.project_data = self.project_obj.transform(x)

        self.isomap_fitted = True

        if return_data == True:
            return self.project_data
Exemplo n.º 49
0
#
# The format is: Plot2D(T, title, x, y, num_to_plot=40):
# T is your transformed data, NDArray.
# title is your chart title
# x is the principal component you want displayed on the x-axis, Can be 0 or 1
# y is the principal component you want displayed on the y-axis, Can be 1 or 2
#
pca = PCA(n_components=3)
T = pca.fit_transform(df)
Plot2D(T, 'PCA', 0, 1)
Plot2D(T, 'PCA', 1, 2)

#%%
# TODO: Implement Isomap here. Reduce the dataframe df down
# to THREE components. Once you've done that, call Plot2D using
# the first two components.
#
iso = Isomap(n_components=3)
T_iso = iso.fit_transform(df)
Plot2D(T_iso, 'Isomap', 0, 1)
Plot2D(T_iso, 'Isomap', 1, 2)
#%%
# TODO: If you're up for a challenge, draw your dataframes in 3D
# Even if you're not, just do it anyway.
#

ax = plt.subplot(111, projection='3d')
ax.scatter(T_iso[:, 0], T_iso[:, 1], T_iso[:, 2])

plt.show()
    Method to clean the data
    :return: data and labels
    """
    # load the data
    dataset = np.genfromtxt("wdbc.data", dtype=np.float, delimiter=',', usecols=(2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
                                                                                      12, 13, 14, 15, 16, 17, 18, 19,
                                                                                      20, 21, 22, 23, 24, 25, 26, 27,
                                                                                      28, 29, 30, 31), encoding=None)
    labels = np.genfromtxt("wdbc.data", dtype=None, delimiter=',', usecols=(1), encoding=None)
    temp_labels = np.zeros(len(labels))
    for i in range(len(labels)):
        if labels[i] == 'B':
            temp_labels[i] = 0
        else:
            temp_labels[i] = 1
    # normalize
    temp_data = normalize(dataset)
    return temp_data, temp_labels


x, y = original_clean()
model = Isomap(n_components=size, n_neighbors=30)
out = model.fit_transform(x)
out = out[:, 0:2]
plt.scatter(out[:, 0], out[:, 1], c=y, marker='o')
plt.show()
model_2 = DBSCAN()
predicted = model_2.fit_predict(out)
score = v_measure_score(predicted, y)
print(score)
Exemplo n.º 51
0
def apply_ISOMap(proj_data, proj_weights=None):
    model = Isomap(n_neighbors=4, n_components=2)
    norm_data = normalize_columns(proj_data)
    result = model.fit_transform(norm_data.T)
    return result
    raw_df.to_csv("./Data/raw_mat_vectors.csv", index=False)

    ############################################################################
    ################################ ISOMAP SSMs ###############################
    ############################################################################

    print("Generating ISOMAP Matrices")

    # initialize embedding
    iso = Isomap(n_neighbors=3, n_components=1)

    # generate SSMs for each gesture
    max_sz = 0  # track size to determine largest
    iso_ssm_lst = [np.zeros(shape=(a.shape[0], a.shape[0])) for a in arrays]
    for n, a in enumerate(arrays):
        embed = iso.fit_transform(a)
        for i in range(embed.size):
            for j in range(embed.size):
                iso_ssm_lst[n][i, j] = cumulated_ts_2(embed[i, :], embed[j, :])
                if embed.shape[0] > max_sz: max_sz = embed.shape[0]

    # smooth SSM images
    for r, s in enumerate(iso_ssm_lst):
        iso_ssm_lst[r] = gaussian_filter(s, sigma=1)

    # zero pad images
    shape = (max_sz, max_sz)
    pad_img = [
        np.pad(a, np.subtract(shape, a.shape), 'constant', constant_values=0)
        for a in iso_ssm_lst
    ]
# where each dimension represents the brightness of 1 pixel.

# visualizing such relationships (given num dimensions) is hard
# One approach is to use 'dimensionality reduction', such as manifold learning
# dimensionality reduction is an example of unsupervised machine learning. neato!

# there is a later chapter dedicated to machine learning, so without getting into the
# discussion of how it works, here's a simple dimensionality reduction in action
# to achieve our desired goal:

# ----------------------------------------

# project the digits into 2 dimensions using IsoMap
from sklearn.manifold import Isomap
iso = Isomap(n_components=2)
projection = iso.fit_transform(digits.data)

# use discrete colormap to view results, set "ticks" / "clim" for aesthetics
plt.scatter(projection[:, 0],
            projection[:, 1],
            lw=0.1,
            c=digits.target,
            cmap=plt.cm.get_cmap('cubehelix', 6))
plt.colorbar(ticket=range(6), label='digit value')
plt.clim(-0.5, 5.5)

# ----------------------------------------

# projection also gives insights on relationships within dataset
# i.e. note that '5' and '3' clusters are very close together
# while '0' and '1' clusters are extremely far apart
Exemplo n.º 54
0
show_figure(fdata, labels, ulabs, 'PCA')

# Sparse PCA
print('Sparse PCA')
from sklearn.decomposition import SparsePCA

spca = SparsePCA(n_components=3)
fdata = spca.fit_transform(authors)
show_figure(fdata, labels, ulabs, 'Sparse PCA')

# ISOMAP

print('ISOMAP')
from sklearn.manifold import Isomap
iso = Isomap(n_components=3, n_neighbors=7)
fdata = iso.fit_transform(authors)

show_figure(fdata, labels, ulabs, 'ISOMAP')

# LLE
print('LLE')
from sklearn.manifold import LocallyLinearEmbedding
lle = LocallyLinearEmbedding(n_neighbors=7, n_components=3, method='standard')

fdata = lle.fit_transform(authors)

print(lle.reconstruction_error_)

show_figure(fdata, labels, ulabs, 'LLE')

# MDS
Exemplo n.º 55
0
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Set1,
            edgecolor='k')
plt.xlabel('Sepal length')
plt.ylabel('Sepal width')

plt.subplot(122)
plt.scatter(X[:, 2], X[:, 3], c=y, cmap=plt.cm.Set1,
            edgecolor='k')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
plt.show()

# +
from sklearn.manifold import Isomap
iso = Isomap(n_neighbors=5, n_components=2)
proj = iso.fit_transform(X)

plt.figure(figsize=(15, 9))
plt.scatter(proj[:, 0], proj[:, 1], c=y)
plt.colorbar()
plt.show()
# -

# ## blobs
#

# +
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobsb

# Generate 3 blobs with 2 classes where the second blob contains
y = df.iloc[:,-1:].as_matrix()

####################################################################

# # # ###### randomized principal component analysis for dimensionality reduction of alt set ########
# # # The purpose is to find a way to effectively label our data, since labeling based solely on 
# # # perceptual criteria (meaning, just listening to the sounds and judging to which instrument they should
# # # be assigned) does not work well enough.
# # from sklearn.decomposition import RandomizedPCA as RandPCA

# # pca = RandPCA(n_components = 30)

# # X = pca.fit_transform(X)
from sklearn.manifold import Isomap
isomap = Isomap(n_components=30)
X = isomap.fit_transform(X)


####################################################################

############ cluster the alternative set into 17 clusters, using KMeans ##########
clstrer = KMeans(n_clusters = 17)
clstr = clstrer.fit_predict(X)


####################################################################

########### names will be filled with the wav files' filenames ################
pardir = '../database/all_recorded_and_downloaded_alt_sounds_processed'
names = np.array([])
folders = os.listdir(pardir)[1:]
Exemplo n.º 57
0
import matplotlib.pyplot as plt
import numpy as np

from sklearn.datasets import fetch_olivetti_faces
from sklearn.manifold import Isomap

# Set random seed for reproducibility
np.random.seed(1000)

if __name__ == '__main__':
    # Create the dataset
    faces = fetch_olivetti_faces()

    # Train Isomap
    isomap = Isomap(n_neighbors=5, n_components=2)
    X_isomap = isomap.fit_transform(faces['data'])

    # Plot the result
    fig, ax = plt.subplots(figsize=(18, 10))

    for i in range(100):
        ax.scatter(X_isomap[i, 0], X_isomap[i, 1], marker='o', s=100)
        ax.annotate('%d' % faces['target'][i],
                    xy=(X_isomap[i, 0] + 0.5, X_isomap[i, 1] + 0.5))

    ax.set_xlabel(r'$x_0$')
    ax.set_ylabel(r'$x_1$')
    ax.grid()

    plt.show()
Exemplo n.º 58
0
exit(0)


min_max_scaler = MinMaxScaler()
x_benign = min_max_scaler.fit_transform(np.load("./hidden_repre/ben_hid_emd_4_50_8_200_r0.npy"))
x_vandal = min_max_scaler.fit_transform(np.load("./hidden_repre/val_hid_emd_4_50_8_200_r0.npy"))

x_benign = sample_shuffle_uspv(x_benign)
x_vandal = sample_shuffle_uspv(x_vandal)

X = x_benign[0:3000].tolist() + x_vandal[0:3000].tolist()
y = np.zeros(3000).tolist() + np.ones(3000).tolist()
X, y = np.array(X), np.array(y)

model_2D = Isomap(n_components=2)
X_2D = model_2D.fit_transform(X)


draw_2D(X_2D, y)



exit(0)







Exemplo n.º 59
0
data_pca = pca.fit_transform(data)
plt.scatter(data_pca[:, 0], data_pca[:, 1], c=target, edgecolor='none', alpha=0.5, cmap=plt.cm.get_cmap('rainbow', 2))
plt.colorbar();

## PCA能量
sb.set()
pca_ = PCA().fit(data)
plt.plot(np.cumsum(pca_.explained_variance_ratio_))
plt.xlabel('number of components')
plt.ylabel('cumulative explained variance');
plt.xlim(0,5)

## IsoMap降维
from sklearn.manifold import Isomap
iso = Isomap(n_components=2)
data_projected = iso.fit_transform(data)
plt.scatter(data_projected[:, 0], data_projected[:, 1], c=target,edgecolor='none', alpha=0.5, cmap=plt.cm.get_cmap('rainbow', 2));
plt.colorbar(label='Cancer', ticks=range(2))
plt.clim(-200, 0)

### KNN
from sklearn.neighbors import KNeighborsClassifier
from sklearn.grid_search import GridSearchCV

clf = KNeighborsClassifier()
n_neighbors = [1,2,3,5,8,10,15,20,25,30,35,40]
weights = ['uniform','distance']
param_grid = [{'n_neighbors': n_neighbors, 'weights': weights}]
grid_search = GridSearchCV(clf, param_grid=param_grid, cv=10)
grid_search.fit(data, target)
grid_search.grid_scores_
Exemplo n.º 60
0
from sklearn.decomposition import PCA, KernelPCA
from sklearn.manifold import Isomap
from analogy import Analogy
from vstore import VStore

a = Analogy(VStore("vectors.lmdb", "big-glove"))

buf = ""
linebuf = raw_input("Please enter some words to plot, or empty for a canned list: ")
while linebuf:
	buf += linebuf + " "
	linebuf = raw_input("... ")


labels = buf.split() \
    or "doctor nurse politician senator lawyer barrister defend accuse heal treat cure elect vote".split() 

vs = [a.w(x) for x in labels if a.w(x) is not None  ]
flatplot = Isomap(2)
ps = flatplot.fit_transform(vs)

plt.title("Reduced vector space model")
plt.xlabel("First Principal Component")
plt.ylabel("Second Principal Component")
plt.scatter(ps[:, 0], ps[:, 1])
for (x, y), label in zip(ps, labels):
    print "plotting %f, %f, %s" %(x, y, label)
    plt.annotate(label, xy = (x, y), xytext = (0, 0), textcoords = 'offset points')

plt.show()