Exemplo n.º 1
0
def get_candidate(X, dim, k_min, k_max, verbose=0):
    errs = []
    k_candidates = []
    for k in range(k_min, k_max+1):
        isomap = Isomap(n_neighbors=k, n_components=dim).fit(X)
        rec_err = isomap.reconstruction_error()
        errs.append(rec_err)
        i = k - k_min
        if i > 1 and errs[i-1] < errs[i-2] and errs[i-1] < errs[i]:
            k_candidates.append(k-1)
            
    if len(k_candidates) == 0:
        k_candidates.append(k)
        
    if verbose == 2:
        print 'k_candidates: ', k_candidates
    
        plt.figure()
        plt.rc("font", size=12)
        plt.plot(range(k_min, k_max+1), errs, '-o')
        plt.xlabel('Neighborhood size')
        plt.ylabel('Reconstruction error')
        plt.title('Select candidates of neighborhood size')
        plt.show()
        
    return k_candidates
def isomap_embedded_data_frame(bearing_df: pd.DataFrame, verbose: bool = False, n_components: int = 5):
    """
    Embed all observations of a bearig time series using isomap.
    :param bearing_df: Data frame which contains computed features or raw features.
    :return: Isomap embedded data frame.
    """
    isomap = Isomap(n_components=n_components)
    df_transformed = isomap.fit_transform(X=bearing_df)
    if verbose:
        print("Reconstruction error: ", isomap.reconstruction_error())
    return pd.DataFrame(df_transformed), isomap
def do_isomap(X,n_comp):
    '''
    Reduces the dimensions of data to 2 or 3.
    :param X: data
    :param n_comp: number of dimensions to reduce to
    :return: Dimension reduced data
    '''
    print("Performing isomap")
    imap = Isomap(n_components=n_comp,
                  n_neighbors=5,
                  n_jobs=2,
                  neighbors_algorithm='auto')
    X_n = imap.fit_transform(X)
    print("Reconstruction Error:", imap.reconstruction_error())
    return X_n
Exemplo n.º 4
0
 def isomap(self, data):
     print 'Isomap neighbours :', self.parameters["n_neighbors"]
     print 'Isomap components, ie final number of coordinates :', self.k
     
     k_means_n_clusters=self.parameters['k_means_n_clusters']
     isomap_params = dict(self.parameters)
     del isomap_params["k_means_n_clusters"]
     m = Isomap(neighbors_algorithm = 'kd_tree',**isomap_params)#eigen_solver='auto', tol=0, path_method='auto', neighbors_algorithm='kd_tree')
     x = m.fit_transform(data)
     
     error=m.reconstruction_error() 
     geod_d = m.dist_matrix_.flatten()
     new_euclid_d = cdist(x, x, metric='euclidean').flatten()
     corr=1- pearsonr(geod_d, new_euclid_d)[0]**2
     
     new_data = x
     print self.parameters
     return self.batch_kmeans(new_data, parameters = dict(zip(params["mini-batchk-means"], [k_means_n_clusters, 1000, 500, 1000, 'k-means++', 5])))
Exemplo n.º 5
0
def estimate_dim(data, verbose=0):
    ''' Estimate intrinsic dimensionality of data
    data: input data
    Reference:
    "Samko, O., Marshall, A. D., & Rosin, P. L. (2006). Selection of the optimal parameter 
    value for the Isomap algorithm. Pattern Recognition Letters, 27(9), 968-979."
    '''
    # Standardize by center to the mean and component wise scale to unit variance
    data = scale(data)
    # The reconstruction error will decrease as n_components is increased until n_components == intr_dim
    errs = []
    found = False
    k_min, k_max = get_k_range(data, verbose=verbose)
    for dim in range(1, data.shape[1] + 1):
        k_opt = pick_k(data, dim, k_min, k_max, verbose=verbose)
        isomap = Isomap(n_neighbors=k_opt, n_components=dim).fit(data)
        err = isomap.reconstruction_error()
        #print(err)
        errs.append(err)

        if dim > 2 and errs[dim - 2] - errs[dim - 1] < .5 * (errs[dim - 3] -
                                                             errs[dim - 2]):
            intr_dim = dim - 1
            found = True
            break

    if not found:
        intr_dim = 1


#        intr_dim = find_gap(errs, method='difference', verbose=verbose)[0] + 1
#        intr_dim = find_gap(errs, method='percentage', threshold=.9, verbose=verbose) + 1

    if verbose == 2:
        plt.figure()
        plt.rc("font", size=12)
        plt.plot(range(1, dim + 1), errs, '-o')
        plt.xlabel('Dimensionality')
        plt.ylabel('Reconstruction error')
        plt.title('Select intrinsic dimension')
        plt.show()

    return intr_dim
Exemplo n.º 6
0
    def run_isomap(self, n_neighbors, low_dim_size):
        """
		Run isomap algorithm

		Parameters
		----------
		self : object
			EC_SCOP_Evaluate object setup for this analysis
		n_neighbors : int
			number of neighbors using for the isomap run
		low_dim_size : int
			resulted number of dimensions after isomap

		Returns
		-------
		None
		"""
        print("Run isomap")
        isomap = Isomap(n_neighbors=n_neighbors, n_components=low_dim_size)
        self.X_low = isomap.fit_transform(self.get_x().values)
        print("Done. Reconstruction error: {:.3f}".format(
            isomap.reconstruction_error()))
Exemplo n.º 7
0
 def compute_isomap_explain_power(self, model, num_dims=2): 
     '''
     Given a computed course vectors embedding model, extract the
     vectors, standardize them, perform a 2-dim isomap embedding,
     and return the two-tuple with the reconstruction error.
     
     @param model: course context model as trained by neural net
     @type model: gensim.model.word_vectors
     @return: ratio of explained variance for each of the two dims
     @rtype: (float,float)
     '''
     
     vectors = model.wv.vectors
     #********
     vectors_standardized = preprocessing.scale(vectors)
     #vectors_standardized = vectors
     #vectors_standardized_normalized = preprocessing.normalize(vectors_standardized, norm='l2')
     #********
     isomap = Isomap(n_components=num_dims)
     x_transformed = isomap.fit_transform(vectors_standardized)
     x_transformed.shape
     reconstruction_error = isomap.reconstruction_error()
     return reconstruction_error
plt.ylabel('Latent Variable 2 (explains second most variance)')
plt.title('Isomap 2-Dimension Plot with Observation Class')
plt.scatter(iso_dim[:, 0], iso_dim[:, 1], c=y)
plt.colorbar()
plt.show()

#Apply isomap for many different choices of dimensions
#Limitation is that nbr dimensions must be < the number of original features
nbr_dim = range(3)
iso_dim_nbr = []
iso_reconstruction_errors = []
for nd in nbr_dim:
    iso_model = Isomap(n_neighbors=5, n_components=nd + 1)
    iso_model.fit_transform(x_std)
    iso_dim_nbr.append(nd + 1)
    iso_reconstruction_errors.append(iso_model.reconstruction_error())
iso_results = {'nbr_dim': iso_dim_nbr, 'error': iso_reconstruction_errors}
iso_results = pd.DataFrame.from_dict(iso_results)
#See which number of dimensions has the lowest reconstruction error
plt.plot(iso_results['nbr_dim'], iso_results['error'])
plt.xlabel('Number of Latent Dimensions')
plt.ylabel('Reconstruction Error')
plt.title('Plot of Error by Number of Latent Variables')
plt.show()

#Use iso_model.transform(x_test) to fit the isomap from the training set onto the test set
'''
-------------------------------------------------------------------------------
-------------------------------Modified LLE------------------------------------
-------------------------------------------------------------------------------
'''