Exemplo n.º 1
0
def dimension_reduce():
    ''' This compares a few different methods of
    dimensionality reduction on the current dataset.
    '''
    pca = PCA(n_components=2)                             # initialize a dimensionality reducer
    pca.fit(digits.data)                                  # fit it to our data
    X_pca = pca.transform(digits.data)                    # apply our data to the transformation
    plt.subplot(1, 3, 1)
    plt.scatter(X_pca[:, 0], X_pca[:, 1], c=digits.target)# plot the manifold
    
    se = SpectralEmbedding()
    X_se = se.fit_transform(digits.data)
    plt.subplot(1, 3, 2)
    plt.scatter(X_se[:, 0], X_se[:, 1], c=digits.target)
    
    isomap = Isomap(n_components=2, n_neighbors=20)
    isomap.fit(digits.data)
    X_iso = isomap.transform(digits.data)
    plt.subplot(1, 3, 3)
    plt.scatter(X_iso[:, 0], X_iso[:, 1], c=digits.target)
    plt.show()

    plt.matshow(pca.mean_.reshape(8, 8))                  # plot the mean components
    plt.matshow(pca.components_[0].reshape(8, 8))         # plot the first principal component
    plt.matshow(pca.components_[1].reshape(8, 8))         # plot the second principal component
    plt.show()
Exemplo n.º 2
0
def calculate_geodesic_distance(df_for_Box_Plot_features, points):
    """
    Computes Pairwise geodesic distances

    Parameters
    ----------
    df_for_Box_Plot_features : list
        original features
    points : nD array
        embedding

    Returns
    ----------
    geo_distance_original : nD array
        geodesic distances in the original dataset
    geo_distance_embeddings : nD array
        geodesic distances in the embedding
    """
    embedding = Isomap(n_components=2)
    embedding.fit(df_for_Box_Plot_features)
    unsquareform = lambda a: a[np.nonzero(np.triu(a, 1))] ## define a lambda to unsquare the distance matrix
    geo_distance_original = unsquareform(embedding.dist_matrix_) ## get a condensed matrix of pairwise geodesic distance among points    
    
    embedding1 = Isomap(n_components=2)
    embedding1.fit(points)
    embedding1.dist_matrix_[embedding1.dist_matrix_ == 0] = -9999 ## turn all 0 distances to -9999
    geo_distance_embeddings = unsquareform(embedding1.dist_matrix_) ## get a condensed matrix of pairwise geodesic distance among points
    geo_distance_embeddings[geo_distance_embeddings == -9999] = 0 ## turn all -9999 distances back to 0
    
    return geo_distance_original, geo_distance_embeddings
Exemplo n.º 3
0
def isomap():

    ''' k-nearest neighbors = n_neighbors '''
    iso = Isomap(n_neighbors=7, n_components=2)
    iso.fit(corr_dataframe)
    manifold_2D = iso.transform(corr_dataframe)
    return manifold_2D
 def isomap(X, n_neighbors=5, n_components=2):
     iso = Isomap(n_components=n_components, n_neighbors=n_neighbors)
     X = np.asarray(X)
     if len(X.shape) == 1:
         X = X.reshape(-1, 1)
     iso.fit(X)
     return iso
Exemplo n.º 5
0
def df_isomap(df, n_comp = 2, n_jobs = 1, n_neighbors = 5, max_iter = 1000):
    rd_df = normalize_dataframe(df)
    rd = Isomap(n_components=n_comp, n_neighbors = n_neighbors, max_iter = max_iter )
    rd.fit(caracteristicas_df)
    caracteristicas_rd = rd.transform(rd_df)
    caracteristicas_rd_df = pd.DataFrame(caracteristicas_rd)
    return caracteristicas_rd_df
Exemplo n.º 6
0
def iso_map(d: pd.DataFrame):
    iso = Isomap(n_components=2, n_jobs=-1)
    iso.fit(d)
    app = iso.transform(d)

    df = pd.DataFrame(app, columns=['comp1', 'comp2'], index=d.index)
    df.to_csv(ISOMAP_FILE, index=True)

    return df
Exemplo n.º 7
0
    def ML( self ):
        data = self.data.values[ :, :-3 ]
        scaler = MinMaxScaler()
        #scaler = StandardScaler()
        X = scaler.fit_transform( data )
        #X = data

        isomap = Isomap( n_components = 2 )
        isomap.fit( X )
        #print pca.explained_variance_ratio_
        import pdb; pdb.set_trace()
def reduce_features_to_two_dimensions(features):
    '''
    The Isomap reduces the dimensionality of the features from
    784 to 2.  This allows the visualize_features function to
    visualize the data in two dimensions.
    '''
    isomap = Isomap(n_components = 2)
    isomap.fit(features.data)
    transformed_features = isomap.transform(features.data)
    
    return transformed_features
Exemplo n.º 9
0
    def ML(self):
        data = self.data.values[:, :-3]
        scaler = MinMaxScaler()
        #scaler = StandardScaler()
        X = scaler.fit_transform(data)
        #X = data

        isomap = Isomap(n_components=2)
        isomap.fit(X)
        #print pca.explained_variance_ratio_
        import pdb
        pdb.set_trace()
Exemplo n.º 10
0
class IsomapImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def transform(self, X):
        return self._wrapped_model.transform(X)
Exemplo n.º 11
0
    def plot(self,
             n_components=2,
             n_neighbors=5,
             transform="log",
             switch_x=False,
             switch_y=False,
             switch_z=False,
             colors=None,
             max_features=500,
             show_plot=True):
        """

        :param n_components: at number starting at 2 or a value below 1
            e.g. 0.95 means select automatically the number of components to
            capture 95% of the variance
        :param transform: can be 'log' or 'anscombe', log is just log10. count
            with zeros, are set to 1
        """
        from sklearn.manifold import Isomap
        import numpy as np

        pylab.clf()

        data, kept = self.scale_data(transform_method=transform,
                                     max_features=max_features)

        iso = Isomap(n_neighbors=n_neighbors, n_components=n_components)
        iso.fit(data.T)
        Xr = iso.transform(data.T)
        self.Xr = Xr

        if switch_x:
            Xr[:, 0] *= -1
        if switch_y:
            Xr[:, 1] *= -1
        if switch_z:
            Xr[:, 2] *= -1

        # PC1 vs PC2
        if show_plot:
            pylab.figure(1)
            self._plot(Xr, pca=None, pc1=0, pc2=1, colors=colors)

        if n_components >= 3:
            if show_plot:
                pylab.figure(2)
                self._plot(Xr, pca=None, pc1=0, pc2=2, colors=colors)
                pylab.figure(3)
                self._plot(Xr, pca=None, pc1=1, pc2=2, colors=colors)
        return iso
Exemplo n.º 12
0
    def classify_concat_isomap_data(self, vis_data, sem_data, labels):
        fold = 0
        accuracies = []
        iso = Isomap(n_components=sem_data.shape[1],
                     n_neighbors=20,
                     eigen_solver='auto')
        skf = StratifiedKFold(n_splits=self.n_folds,
                              random_state=None,
                              shuffle=True)

        for train_index, test_index in skf.split(vis_data, labels):
            logging.info('Running ISO classification for fold %d' % fold)

            tr_vis = normalize(vis_data[train_index],
                               norm='l2',
                               axis=1,
                               copy=True)
            te_vis = normalize(vis_data[test_index],
                               norm='l2',
                               axis=1,
                               copy=True)
            tr_sem = normalize(sem_data[train_index],
                               norm='l2',
                               axis=1,
                               copy=True)

            te_sem = normalize(sem_data[test_index],
                               norm='l2',
                               axis=1,
                               copy=True)
            te_sem = SemanticDegradation.kill_semantic_attributes(
                te_sem, self.degradation_rate)
            te_sem = normalize(te_sem, norm='l2', axis=1, copy=True)

            tr_data, te_data = np.hstack((tr_vis, tr_sem)), np.hstack(
                (te_vis, te_sem))
            tr_labels, te_labels = labels[train_index][:, 0], labels[
                test_index][:, 0]

            clf = make_pipeline(StandardScaler(),
                                SVC(gamma='auto', C=1.0, kernel='linear'))

            iso.fit(tr_data)
            clf.fit(iso.transform(tr_data), tr_labels)
            prediction = clf.predict(iso.transform(te_data))

            fold += 1
            accuracies.append(balanced_accuracy_score(te_labels, prediction))

        return accuracies
def create_isomap(dissim_mat, embed_dimensions, neighbor_factor=2, **kwargs):
    # https://scikit-learn.org/stable/modules/manifold.html#multidimensional-scaling says isomap better suited than MDS, but DESC15 say they compared it and it's worse ([15] of [DESC15])!
    n_neighbors=min(max(5, dissim_mat.shape[0]//neighbor_factor), dissim_mat.shape[0]-1)
    print(f"Running Isomap with {get_ncpu(ignore_debug=True)} jobs for max {n_neighbors} neighbors.")
    embedding = Isomap(n_jobs=get_ncpu(ignore_debug=True), n_neighbors=n_neighbors, n_components=embed_dimensions, metric="precomputed", **kwargs)
    isomap = embedding.fit(dissim_mat)
    return isomap
Exemplo n.º 14
0
class FloorplanEstimator:
    """
    Simple estimator for rough floorplans
    """
    def __init__(self):
        """
        Instantiate floorplan estimator
        """
        self.dimred = Isomap(n_neighbors=25, n_components=2)
        self._fingerprints = None
        self._label = None

    def fit(self, fingerprints, label):
        """
        Estimate floorplan from labeled fingerprints
        :param fingerprints: list of fingerprints
        :param label: list of corresponding labels
        """
        self.dimred.fit(fingerprints)
        self._fingerprints = fingerprints
        self._label = label

    def transform(self, fingerprints):
        """
        Get x,y coordinates of fingerprints on floorplan
        :param fingerprints: list of fingerprints
        :return: list of [x,y] coordinates
        """
        return self.dimred.transform(fingerprints)

    def draw(self):
        """
        Draw the estimated floorplan in the current figure
        """
        xy = self.dimred.transform(self._fingerprints)

        x_min, x_max = xy[:,0].min(), xy[:,0].max()
        y_min, y_max = xy[:,1].min(), xy[:,1].max()
        xx, yy = np.meshgrid(np.arange(x_min, x_max, 1.0),
                             np.arange(y_min, y_max, 1.0))
        clf = RadiusNeighborsClassifier(radius=3.0, outlier_label=0)
        clf.fit(xy, self._label)
        label = clf.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)

        plt.pcolormesh(xx, yy, label)
        plt.scatter(xy[:,0], xy[:,1], c=self._label, vmin=0)
Exemplo n.º 15
0
class FloorplanEstimator:
    """
    Simple estimator for rough floorplans
    """
    def __init__(self):
        """
        Instantiate floorplan estimator
        """
        self.dimred = Isomap(n_neighbors=25, n_components=2)
        self._fingerprints = None
        self._label = None

    def fit(self, fingerprints, label):
        """
        Estimate floorplan from labeled fingerprints
        :param fingerprints: list of fingerprints
        :param label: list of corresponding labels
        """
        self.dimred.fit(fingerprints)
        self._fingerprints = fingerprints
        self._label = label

    def transform(self, fingerprints):
        """
        Get x,y coordinates of fingerprints on floorplan
        :param fingerprints: list of fingerprints
        :return: list of [x,y] coordinates
        """
        return self.dimred.transform(fingerprints)

    def draw(self):
        """
        Draw the estimated floorplan in the current figure
        """
        xy = self.dimred.transform(self._fingerprints)

        x_min, x_max = xy[:, 0].min(), xy[:, 0].max()
        y_min, y_max = xy[:, 1].min(), xy[:, 1].max()
        xx, yy = np.meshgrid(np.arange(x_min, x_max, 1.0),
                             np.arange(y_min, y_max, 1.0))
        clf = RadiusNeighborsClassifier(radius=3.0, outlier_label=0)
        clf.fit(xy, self._label)
        label = clf.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)

        plt.pcolormesh(xx, yy, label)
        plt.scatter(xy[:, 0], xy[:, 1], c=self._label, vmin=0)
Exemplo n.º 16
0
def example_04():
    digits = load_digits()
    # fig, axes = plt.subplots(10, 10, figsize=(8, 8), subplot_kw={'xticks': [], 'yticks': []},
    #                          gridspec_kw=dict(hspace=0.1, wspace=0.1))
    #
    # # axes.flat 一维迭代器
    # for i, ax in enumerate(axes.flat):
    #     ax.imshow(digits.images[i], cmap='binary')
    #     ax.text(0.05, 0.05, str(digits.target[i]), transform=ax.transAxes, color='green')
    # plt.show()

    X = digits.data
    y = digits.target

    from sklearn.manifold import Isomap
    iso = Isomap(n_components=2)
    iso.fit(X)
    data_projected = iso.transform(X)

    # plt.scatter(data_projected[:, 0], data_projected[:, 1], c=y, edgecolors='none', alpha=0.5,
    #             cmap=plt.cm.get_cmap('Spectral', 10))
    # plt.colorbar(label='digit label', ticks=range(10))
    # plt.clim(-0.5, 9.5)

    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

    from sklearn.naive_bayes import GaussianNB
    model = GaussianNB()
    model.fit(X_train, y_train)
    y_model = model.predict(X_test)

    # 量化评分
    from sklearn.metrics import accuracy_score
    print(accuracy_score(y_model, y_test))

    # 得到结果是85%的正确率 但是还是不知道哪里出了问题
    # 解决这个问题的方法就是打印混淆矩阵
    from sklearn.metrics import confusion_matrix

    mat = confusion_matrix(y_test, y_model)
    sns.heatmap(mat, square=True, annot=True, cbar=True)
    plt.xlabel('predict value')
    plt.ylabel('True value')
    plt.show()
Exemplo n.º 17
0
def perform_isomap(M):
    """Utility function to perform Isomap based on a metric
    
    Input:
    ======
         M    :  Metric as a matrix of shape (n_samples, n_samples)
    Output:
    =======
        emb_isomap  :  Embedding of the resulting Isomap algorithm as an array of shape (n_samples, 3)
        explained_variance : Output from the function `_get_explained_variance` above
    """
    embedding = Isomap(n_components=3, n_neighbors=5, metric='precomputed')
    embedding.fit(M)
    Dm = embedding.dist_matrix_
    emb_isomap = embedding.embedding_
    explained_variance = _get_explained_variance(Dm, emb_isomap)

    return emb_isomap, explained_variance
Exemplo n.º 18
0
def isomap10FoldClf(X, y, nclf):
    acc = []
    kf = KFold(X.shape[0], n_folds=10, shuffle=True)
    i = 0
    for train_index, test_index in kf:
        yTest = y[test_index]
        yTrain = y[train_index]
        n_neighbors = 30
        clf = Isomap(n_neighbors, n_components=2)
        clf.fit(X[train_index])
        newRepTrain = clf.transform(X[train_index])
        newRepTest = clf.transform(X[test_index])
        #         NN = neighbors.KNeighborsClassifier(n_neighbors=2)
        nclf.fit(newRepTrain, yTrain)
        XPred = nclf.predict(newRepTest)
        acc.append(np.sum(XPred == yTest) * 1.0 / yTest.shape[0])
        #         print i,":",acc[i]
        i += 1
    return np.mean(acc), np.std(acc)
Exemplo n.º 19
0
def main():

    digits = load_digits()
    print(digits.images.shape)

    # get the 2D representation of the images [n_samples, n_features]
    X = digits.data
    y = digits.target

    # reduce dimensionality
    iso = Isomap(n_components=2)
    iso.fit(digits.data)
    data_prj = iso.transform(digits.data)


    plt.scatter(data_prj[:, 0], data_prj[:, 1], c=digits.target,
                edgecolor='none', alpha=0.5, cmap=plt.cm.get_cmap('Accent', 10))

    plt.colorbar(label='digit label', ticks=range(10))
    plt.clim(-0.5, 9.5)
    plt.show()

    Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, random_state=0)

    # create the model
    model = GaussianNB()
    model.fit(Xtrain, ytrain)
    y_model = model.predict(Xtest)
    accuracy_score(ytest, y_model)

    mat = confusion_matrix(ytest, y_model)
    sns.heatmap(mat, square=True, annot=True, cbar=False)
    plt.xlabel('predicted value')
    plt.ylabel('true value')
    plt.show()

    fig, axes = plt.subplots(10, 10, figsize=(8, 8),
                            subplot_kw={'xticks':[], 'yticks':[]}, gridspec_kw=dict(hspace=0.1, wspace=0.1))

    for i, ax in enumerate(axes.flat):
        ax.imshow(digits.images[i], cmap='binary', interpolation='nearest')
        ax.text(0.05, 0.05, str(y_model[i]), transform=ax.transAxes, color='green' if (ytest[i] == y_model[i]) else 'red')
    plt.show()
Exemplo n.º 20
0
 def compute_iso_map(self, original_features):
   feature_matrix = original_features.drop('file', 1).as_matrix()
   feature_matrix = np.nan_to_num(feature_matrix)
   
   dimen_reductor = Isomap(n_components=self.n_components)
   
   full_size = feature_matrix.shape[0]
   train_size = int(self.ratio * full_size)
   
   row_indices = list(range(full_size))
   feature_training_indices = np.random.choice(row_indices, size = train_size)
   training_feature_matrix = feature_matrix[feature_training_indices, :]
   
   dimen_reductor.fit(training_feature_matrix)    
   reduced_features = dimen_reductor.transform(feature_matrix)
   
   reduced_normalized_features = reduced_features - reduced_features.min(axis=0)
   reduced_normalized_features /= reduced_normalized_features.max(axis=0)
   
   return reduced_normalized_features
Exemplo n.º 21
0
    def _OnClick3(self, event):
        if self.var3.get() == "Off":
            self.var3.set("On")
        elif self.var3.get() == "On":
            self.var3.set("Off")
            print("Isomap is running...")
            label = pd.read_csv(self.labelVar, header=None)[0].tolist()
            df = pd.read_csv(self.dfLabel, header=None)
            array = df.copy()
            label = label

            iso = Isomap(n_components=2)
            iso.fit(array)
            manifold_2Da = iso.transform(df)
            manifold_2D = pd.DataFrame(manifold_2Da,
                                       columns=['Component 1', 'Component 2'])
            principalDf = pd.DataFrame(data=manifold_2Da,
                                       columns=['Component 1', 'Component 2'])

            X1 = manifold_2D['Component 1']
            X2 = manifold_2D['Component 2']

            unique = np.unique(label)

            try:
                plt.scatter(X1, X2, c=label)
            except:
                print(
                    "data matrix does not match label matrix (Select input file and label, remove headers)"
                )

            #plt.legend(unique, loc=8, ncol=5,fontsize='x-small')
            name = 'ISOMAP'  #CHANGE FILENAME HERE *************************************************************************

            plt.title(name + " Clusters: " + str(len(unique)))
            plt.savefig(name + ".png")
            plt.show()
            plt.clf()
            principalDf.to_excel(
                "ISOMAP_COMPONENTS.xlsx"
            )  #Names of 1st and 2nd components to EXCEL here *************************************************************************
Exemplo n.º 22
0
def runIsomap(X_train, X_test, y_train, y_test, comp_range, n_neigh):
    rbf_scores = []
    linear_scores = []
    for n_comp in comp_range:
        print("\nn_comp=%d\n" % (n_comp))
        transformer = Isomap(n_neighbors=n_neigh,
                             n_components=n_comp,
                             n_jobs=8)
        transformer.fit(X_train)
        X_train_proj = transformer.transform(X_train)
        X_test_proj = transformer.transform(X_test)
        if n_comp == 2:
            np.save('X_train_proj_2d_Isomap_' + str(n_neigh), X_train_proj)
            np.save('X_test_proj_2d_Isomap_' + str(n_neigh), X_test_proj)
        score_rbf = SVMmodel.runSVM(X_train_proj, X_test_proj, y_train, y_test,
                                    SVMmodel.getBestParam('rbf'), 'rbf')
        rbf_scores.append(score_rbf.mean())
        score_linear = SVMmodel.runSVM(X_train_proj, X_test_proj, y_train,
                                       y_test, SVMmodel.getBestParam('linear'),
                                       'linear')
        linear_scores.append(score_linear.mean())
    for i, scores in enumerate([rbf_scores, linear_scores]):
        if i == 0:
            kernel = 'rbf'
        elif i == 1:
            kernel = 'linear'
        else:
            kernel = ''
        bestIdx = np.argmax(scores)
        bestNComp = comp_range[bestIdx]
        bestAcc = scores[bestIdx]
        with open('res_Isomap_' + kernel + '_' + str(n_neigh) + '.txt',
                  'w') as f:
            for j in range(len(comp_range)):
                f.write(kernel + ": n_comp = %f, acc = %f\n" %
                        (comp_range[j], scores[j]))
            f.write(kernel + ": Best n_comp = %f\n" % (bestNComp))
            f.write(kernel + ": acc = %f\n" % (bestAcc))
    return rbf_scores, linear_scores
Exemplo n.º 23
0
class ISO_Reducer(Reducer):
    '''Iso map reduction method'''
    def __init__(self, dimensionality=2500):
        self.iso = Isomap(n_neighbors=5,
                          n_components=dimensionality,
                          eigen_solver='auto',
                          tol=0,
                          max_iter=None,
                          path_method='auto',
                          neighbors_algorithm='auto',
                          n_jobs=-1)

    def reduced(self, A):
        embd = self.iso.fit(A).embedding_
        return np.transpose(embd)
Exemplo n.º 24
0
class Isomap(MapAlgorithm):
    name = "Isomap"
    parameters = {
        "n_neighbors": {
            "type": ModelParameter.INTEGER
            , "defaultValue": 5
        }
    }

    def __init__(self, builder, callback=None):
        super().__init__(builder, callback)

        if "n_neighbors" not in self.params:
            self.params["n_neighbors"] = 5
        from sklearn.manifold import Isomap
        self._model = Isomap(n_neighbors=self.params['n_neighbors'])


    def getPoints(self, mols, X: DataFrame) -> [Point]:
        transformed_data = self.predict(X)
        points = []
        for idx, mol in enumerate(mols):
            x = transformed_data[idx, 0]
            y = transformed_data[idx, 1]
            point = Point.objects.create(
                map=self.builder.instance,
                molecule=mol,
                x=x,
                y=y,
            )
            points.append(point)

        return points

    def fit(self, X: DataFrame, y=None):
        self._model = self._model.fit(X)

    def predict(self, X: DataFrame) -> DataFrame:
        return self.model.transform(X)

    @property
    def model(self):
        return self._model
Exemplo n.º 25
0
    def isomap(self, n_components=2, n_neighbors=3, show=False):
        """
        Calculates lower dimention coordinates using the isomap algorithm.

        :param n_components: dimentionality of the reduced space
        :type n_components: int, optional

        :param n_neighbors: Used by isomap to determine the number of neighbors
            for each point. Large neighbor size tends to produce a denser map.
        :type n_neighbors: int, optional

        :param show: Shows the calculated coordinates if true.
        :type show: boolean, optional
        """

        model = Isomap(n_components=n_components, n_neighbors=n_neighbors)
        self.pos = model.fit(self.dismat).embedding_

        if show:
            return self.pos
Exemplo n.º 26
0
    def isomap(self, n_components=2, n_neighbors=3, show=False):
        """
        Calculates lower dimention coordinates using the isomap algorithm.

        :param n_components: dimentionality of the reduced space
        :type n_components: int, optional

        :param n_neighbors: Used by isomap to determine the number of neighbors
            for each point. Large neighbor size tends to produce a denser map.
        :type n_neighbors: int, optional

        :param show: Shows the calculated coordinates if true.
        :type show: boolean, optional
        """

        model = Isomap(n_components=n_components, n_neighbors=n_neighbors)
        self.pos  = model.fit(self.dismat).embedding_

        if show:
            return self.pos
Exemplo n.º 27
0
def mult_scl(X, labels):
    print('labels:')
    for i, label in zip(range(1, len(labels) + 1), labels):
        print('{}: {}'.format(i, label))

    isomap = Isomap()
    points = isomap.fit(np.nan_to_num(X)).embedding_
    f, (ax1, ax2, ax3) = plt.subplots(1, 3)
    plot_location(labels, ax3)
    ax1.scatter(points[:, 0], points[:, 1], s=20, c='r')
    ax1.set_title('Isomap')
    add_labels(labels, points, ax1)

    mds = MDS()
    points = mds.fit(np.nan_to_num(X)).embedding_
    ax2.scatter(points[:, 0], points[:, 1], s=20, c='g')
    ax2.set_title('MDS')
    add_labels(labels, points, ax2)

    plt.show()
Exemplo n.º 28
0
def exec_isomap(X, Y, mmpno):
    #n_neighbors=20
    isomap = Isomap(n_neighbors=10, n_components=2, eigen_solver='dense')
    X_iso = isomap.fit(X).transform(X)
    Ymax = np.max(Y)
    Ymin = np.min(Y)
    Y0to1 = (Y - Ymin) / (Ymax - Ymin)

    plt.figure(figsize=(1, 8))
    #plt.scatter(, X_iso[:, 1], c=cm.RdYlGn(1-y),s=30)
    plt.show()

    plt.figure(figsize=(8, 8))
    plt.rcParams["font.size"] = 18
    plt.rcParams["font.family"] = "Serif"
    plt.scatter(X_iso[:, 0], X_iso[:, 1], c=cm.RdYlGn(1 - Y0to1), s=30)
    plt.ylim(-20, 20)
    plt.xlim(-20, 20)
    plt.xlabel("z1")
    plt.ylabel("z2")
    plt.show()

    #plt.figure(figsize=figure.figaspect(1))
    plt.figure(figsize=(8, 8))
    plt.scatter(X_iso[:, 0], X_iso[:, 1], c=cm.RdYlGn(1 - Y0to1), s=30)
    for i, no_a in enumerate(mmpno[:, 0]):
        no_b = mmpno[i, 1]
        if no_b >= 3025:
            print(i)
        plt.plot([X_iso[no_a, 0], X_iso[no_b, 0]],
                 [X_iso[no_a, 1], X_iso[no_b, 1]],
                 color='blue')

    plt.ylim(-20, 20)
    plt.xlim(-20, 20)
    plt.xlabel("z1")
    plt.ylabel("z2")
    plt.show()
Exemplo n.º 29
0
            0.05,
            str(digits.target[i]),
            transform=ax.transAxes,
            color='green')

#Treat each pixel as a feature - flatten out the array so we have length-64 array of pixel values representing each digit
X = digits.data
X.shape
y = digits.target
y.shape

#Unsupervised learning: Dimensionality reduction - Isomap
from sklearn.manifold import Isomap

iso = Isomap(n_components=2)
iso.fit(digits.data)
data_projected = iso.transform(digits.data)
data_projected.shape

plt.scatter(data_projected[:, 0],
            data_projected[:, 1],
            c=digits.target,
            edgecolor='none',
            alpha=0.5,
            cmap=plt.cm.get_cmap('Spectral', 10))
plt.colorbar(label='digit label', ticks=range(10))
plt.clim(-0.5, 9.5)
#generally good separation in parameter space

#classification
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, random_state=0)
Exemplo n.º 30
0
bears[:2]

# In[25]:

bears = pd.DataFrame(bears)

# In[26]:

bears.shape

num_neighbors = 6

# In[29]:

iso = Isomap(n_components=3, n_neighbors=num_neighbors)
iso.fit(bears)
T = iso.transform(bears)

T.shape

isodf = pd.DataFrame(T, columns=['a', 'b', 'c'])

isodf.head()

fig1 = plt.figure(figsize=(12, 10))
ax1 = fig1.add_subplot(111)
ax1.set_title("2D projection with {} neighbors".format(num_neighbors))
ax1.scatter(isodf.a, isodf.b, c=colors)

fig2 = plt.figure(figsize=(12, 10))
ax2 = fig2.add_subplot(111, projection='3d')
Exemplo n.º 31
0
# title is your chart title
# x is the principal component you want displayed on the x-axis, Can be 0 or 1
# y is the principal component you want displayed on the y-axis, Can be 1 or 2
#
# .. your code here ..
from sklearn.decomposition import PCA
pca = PCA(n_components=3)
pca.fit(df)
T = pca.transform(df)
Plot2D(T, "PCA 1 2", 1, 2)

#
# TODO: Implement Isomap here. Reduce the dataframe df down
# to THREE components. Once you've done that, call Plot2D using
# the first two components.
#
# .. your code here ..
from sklearn.manifold import Isomap
imap = Isomap(n_neighbors=8, n_components=3)
imap.fit(df)
T2 = imap.transform(df)
Plot2D(T2, "Isomap", 1, 2)
#
# TODO: If you're up for a challenge, draw your dataframes in 3D
# Even if you're not, just do it anyway.
#
# .. your code here ..


plt.show()
Exemplo n.º 32
0
    samples.append(img.reshape(-1))

df = pd.DataFrame(samples)

#
# Optional: Resample the image down by a factor of two if you
# have a slower computer. You can also convert the image from
# 0-255  to  0.0-1.0  if you'd like, but that will have no
# effect on the algorithm's results.
#
# .. your code here ..
#%%
from sklearn.manifold import Isomap

iso = Isomap(n_neighbors=6, n_components=3)
iso.fit(samples)
T = iso.transform(samples)


def Plot2D(T, title, x, y):
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.set_title(title)
    ax.set_xlabel('Component: {0}'.format(x))
    ax.set_ylabel('Component: {0}'.format(y))
    ax.scatter(T[:, x], T[:, y], marker='.', alpha=0.7)


def Plot3D(T, title, x, y, z):
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
Exemplo n.º 33
0
    if basic_plots:
        ax = pp.subplot(2, 1, 1)
        train.describe()[1:].plot(legend=False, ax=ax)
        pp.title("Description of training data.")

        ax = pp.subplot(2, 1, 2)
        train.loc[:,:5].plot(legend=False, ax=ax)
        pp.title("First 5 series plotted.")

        pp.show()

    if do_pca:
        x = train.values
        pca = PCA(n_components=3)
        pca.fit(x)
        y = pca.transform(x)
        print 'Orig shape: ', x.shape, 'New shape: ', y.shape

        pp.scatter(y[:,0], y[:,1], c=target.values)
        pp.show()

    if do_isomap:
        x = train.values
        from sklearn.manifold import Isomap
        isomap = Isomap(n_components=2, n_neighbors=20)
        isomap.fit(x)
        y = isomap.transform(x)

        pp.scatter(y[:,0], y[:,1], c=target.values)
        pp.show()
Exemplo n.º 34
0
pca = PCA(n_components=3)
pca.fit(df)
T = pca.transform(df)

Plot2D(T, 'chart title', 1,2)

#
# TODO: Implement Isomap here. Reduce the dataframe df down
# to THREE components. Once you've done that, call Plot2D using
# the first two components.
#
# .. your code here ..

from sklearn.manifold import Isomap
im = Isomap(n_components=3)
im.fit(df)
T = im.transform(df)

Plot2D(T, 'chart title', 1,2)

#
# TODO: If you're up for a challenge, draw your dataframes in 3D
# Even if you're not, just do it anyway.
#
# .. your code here ..

fig = plt.figure()
ax = fig.add_subplot(111,projection="3d")
ax.set_xlabel('0')
ax.set_ylabel('1')
ax.set_zlabel('2')
Exemplo n.º 35
0
            samples.append(img.reshape(-1))
            color_sample.append('r')
#
# TODO: Convert the list to a dataframe
#
# .. your code here .. 
df_images = pd.DataFrame(samples)
#df_images_t = df_images.transpose()

#
# TODO: Implement Isomap here. Reduce the dataframe df down
# to three components, using K=6 for your neighborhood size
#
# .. your code here .. 
iso_bear=Isomap(n_components=3,n_neighbors=6)
iso_bear.fit(df_images)
T_iso_bear = iso_bear.transform(df_images)

#
# TODO: Create a 2D Scatter plot to graph your manifold. You
# can use either 'o' or '.' as your marker. Graph the first two
# isomap components
#
# .. your code here .. 
fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_title('Manifold Scatterplot')
ax.set_xlabel('Component: {0}'.format(0))
ax.set_ylabel('Component: {0}'.format(1))
ax.scatter(T_iso_bear[:,0],T_iso_bear[:,1], marker='.',alpha=0.7, c=color_sample)
Exemplo n.º 36
0
def main():
    #Load the dataset from Matlab
    data = sio.loadmat('baseline2.mat')
    n_train = int(data['n_train'])
    n_test = int(data['n_test'])
    train_x = np.array(data['train_x'])
    train_t = np.array(data['train_t']).reshape(n_train)
    test_x = np.array(data['test_x'])
    test_t = np.array(data['test_t']).reshape(800)
    X_indices = np.arange(train_x.shape[-1])

    #SVM Fitting
    C = [-10,5,10]
    G = [-10,5,10]
    CF = [-10,5,10]

    # Plot the cross-validation score as a function of percentile of features
    NG = [10,20,50,100,200]
    components = (10,20,50,100,200)
    scores = list()
    svcs = list()
    isos = list()

    for cc in components:
        for nn in NG:
            best_c = 0
            best_g = 0
            best_cf = 0
            best_iso = None
            max_score = -np.inf

            iso = Isomap(n_components=cc, n_neighbors=nn)
            iso.fit(train_x)
            train = iso.transform(train_x)

            for c in C:
                for g in G:
                    for cf in CF:
                        #Find best C, gamma
                        svc = svm.SVC(C=2**c, gamma=2**g, coef0=2**cf, degree=3, kernel='poly',max_iter=1000000)
                        this_scores = cross_validation.cross_val_score(svc, train, train_t, n_jobs=-1, cv=5, scoring='accuracy')
                        mean_score = sum(this_scores)/len(this_scores)

                        print("C: "+str(c)+" G: "+str(g)+" CMPS: "+str(cc)+" A: "+str(mean_score) + " CF: " +str(cf) + "N: "+str(nn))

                        if mean_score > max_score:
                            max_score = mean_score
                            best_svm = svc
                            best_iso = iso
            svcs.append(best_svm)
            isos.append(best_iso)
            scores.append(max_score)

    m_ind =  scores.index(max(scores))
    best_s = svcs[m_ind]
    iso = isos[m_ind]

    # Test final model
    test = iso.transform(test_x)
    train = iso.transform(train_x)
    best_s.fit(train,train_t)

    pred = best_s.predict(test)
    sio.savemat('predicted_iso.mat',dict(x=range(800),pred_t=pred))

    final_score = best_s.score(test,test_t)
    print(best_s)
    print("Final Accuracy: "+str(final_score))
    print(scores)
Exemplo n.º 37
0
#maxabsscaler = pp.MaxAbsScaler()
#maxabsscaler.fit(X)
#X = maxabsscaler.transform(X)
#print('MaxAbsScaler\n========')

#X = pp.normalize(X)
#print('normalizer\n========')

# TODO: Use PCA to reduce noise, n_components 4-14

nc = 5
#pca = PCA(n_components=nc)
#pca.fit(X)
#X = pca.transform(X)
#print('PCA: ', nc)

# Use Isomap to reduce noise, n_neighbors 2-5
nn = 4
im = Isomap(n_neighbors=nn, n_components=nc)
im.fit(X)
X = im.transform(X)
print('Isomap: ',nn, ' comp: ', nc)

# TODO: train_test_split 30% and random_state=7

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=7)

# TODO: Create an SVC, train and score against defaults
result = findMaxSVC()
print(result['score'])
Exemplo n.º 38
0
scaler = preprocessing.StandardScaler() #0.966101694915

scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

#pcaComponent = 4
#pca = PCA(n_components=pcaComponent)
#pca.fit(X_train)
#X_train = pca.transform(X_train)
#X_test = pca.transform(X_test)

neighbors = 2
components = 4
isomap = Isomap(n_neighbors=neighbors, n_components=components)
isomap.fit(X_train)
X_train = isomap.transform(X_train)
X_test = isomap.transform(X_test)

#svc = SVC()
#svc.fit(X_train, y_train)
#print svc.score(X_test, y_test)

best_score = 0
best_C = 0
best_gamma = 0
for C in np.arange(0.05, 2.05, 0.05):
    for gamma in np.arange(0.001, 1.001, 0.001):
        svc = SVC(C = C, gamma = gamma)
        svc.fit(X_train, y_train)
        score = svc.score(X_test, y_test)
    return features_train_transformed, lables, vectorizer, selector, le, features

# nFeatures = np.arange(50, 1000, 50)
nISOMAP = np.arange(20, 200, 20)

data = {}

for k in nISOMAP:

    features, labels, vectorizer, selector, le, features_data = preprocess("pkl/article_2_people.pkl", "pkl/lable_2_people.pkl")
    features_train, features_test, labels_train, labels_test = cross_validation.train_test_split(features, labels, test_size=0.1, random_state=42)

    t0 = time()
    iso = Isomap(n_neighbors=15, n_components=k, eigen_solver='auto')
    iso.fit(features_train)
    print ("Dimension Reduction time:", round(time()-t0, 3), "s")


    features_train = iso.transform(features_train)
    features_test = iso.transform(features_test)

    for name, clf in [
        ('AdaBoostClassifier', AdaBoostClassifier(algorithm='SAMME.R')),
        ('BernoulliNB', BernoulliNB(alpha=1)),
        ('GaussianNB', GaussianNB()),
        ('DecisionTreeClassifier', DecisionTreeClassifier(min_samples_split=100)),
        ('KNeighborsClassifier', KNeighborsClassifier(n_neighbors=50, algorithm='ball_tree')),
        ('RandomForestClassifier', RandomForestClassifier(min_samples_split=100)),
        ('SVC', SVC(kernel='linear', C=1))
    ]:
plt.show()
# -

# Podemos ver que ahora la reducción es distinta a la de PCA. Si bien sigue viendose un Roll, esta vez podemos apreciar el "ancho" del mismo
#
# Veamos ahora que sucede con ISOMAP
#
# ## ISOMAP
#
# Para ISOMAP va a ser necesario definir el hiper-parámetro <i>n_neighbors</i> que indica la cantidad de vecinos a observar a la hora de construir el grafo. De este valor dependerá en gran parte la proyección resultante.

# +

iso = Isomap(n_neighbors=15, n_components=2)
iso.fit(X)
manifold_2Da = iso.transform(X)

# +
fig1 = plt.figure(figsize=(10, 10), facecolor='white')
ax = fig1.add_subplot(1, 1, 1)
ax.set_facecolor('white')
plt.scatter(
    manifold_2Da[:, 0], manifold_2Da[:, 1], c=color, marker='o', cmap=plt.cm.Spectral
)
# plt.scatter(principalComponents[df_train['Survived']==0,0], principalComponents[df_train['Survived']==0,1], color='r', s=10)

plt.show()
# -

# Veamos ahora que sucede para un valor menor de cantidad de vecinos a observar
Exemplo n.º 41
0
class ClusterPrinter:
    def __init__(self, num_images=20):
        # self.reducer = SpectralEmbedding()
        self.reducer = Isomap()
        self.sink_features = ports.StateSink()
        self.sink_filename = ports.StateSink()
        self.sink_image = ports.StateSink()
        self.num_images = num_images

    def __call__(self, clusters):
        features = self.sink_features.get()
        if clusters is None or features is None:
            return None
        valid = clusters.labels_ != -1
        view_data = features[valid]
        labels = clusters.labels_
        valid_labels = labels[valid]
        if len(valid_labels) == 0:
            return None
        choice = np.random.choice(range(len(valid_labels)),
                                  size=min(2000, len(valid_labels)),
                                  replace=False)
        view_data = self.reducer.fit(view_data[choice, :]).transform(features)
        print view_data.shape

        fig, ax = plt.subplots(figsize=(15, 15), dpi=300)

        num_clusters = len(set(valid_labels))
        patches = []
        for l in range(num_clusters):
            cluster = view_data[labels == l, :]
            try:
                hull = ConvexHull(cluster)
                patches.append(Polygon(cluster[hull.vertices, :]))
            except:
                pass
        p = PatchCollection(patches, cmap=matplotlib.cm.rainbow, alpha=0.4)
        ax.add_collection(p)

        invalid = np.invert(valid)
        plt.scatter(view_data[invalid, 0], view_data[invalid, 1], c='w', s=0.1)
        ax.set_facecolor('black')
        plt.scatter(view_data[valid, 0],
                    view_data[valid, 1],
                    c=valid_labels,
                    s=0.1,
                    cmap='rainbow')

        # Add a few images to the figure
        choices = []
        imgs_per_label = max(1, int(self.num_images / num_clusters))
        for l in range(num_clusters):
            cluster_ind = np.where(labels == l)[0]
            choices += np.random.choice(cluster_ind,
                                        size=min(imgs_per_label,
                                                 len(cluster_ind)),
                                        replace=False).tolist()

        plt.scatter(view_data[choices, 0],
                    view_data[choices, 1],
                    c=labels[choices],
                    s=180,
                    marker='s',
                    cmap='rainbow')

        # Get the x and y data and transform it into pixel coordinates
        xy_pixels = ax.transData.transform(
            np.vstack([view_data[choices, 0], view_data[choices, 1]]).T)
        xpix, ypix = xy_pixels.T

        for i, c in enumerate(choices):
            img = self.sink_image.get(c)
            if img is None:
                continue
            scale = 50.0 / np.max(img.shape)
            img = cv2.cvtColor(cv2.resize(
                img, dsize=(0, 0), fx=scale, fy=scale),
                               code=cv2.COLOR_BGR2RGB).astype(np.float32) / 255
            plt.figimage(img,
                         xo=int(xpix[i]) - 25,
                         yo=int(ypix[i]) - 25,
                         zorder=10)

        pylab.savefig(self.sink_filename.get(), dpi=fig.dpi)
        plt.close('all')
Exemplo n.º 42
0
colors = []
for imgname in os.listdir(folder):
    img = misc.imread(os.path.join(folder, imgname))
    samples.append((img/255.0).reshape(-1))
    colors.append('b')

folder += 'i'
for imgname in os.listdir(folder):
    img = misc.imread(os.path.join(folder, imgname))
    samples.append((img/255.0).reshape(-1))
    colors.append('r')

df = pd.DataFrame(samples)

iso = Isomap(n_components=3, n_neighbors=6)
iso.fit(df)
T = iso.transform(df)

import matplotlib.pyplot as plt
plt.figure()
plt.scatter(T[:, 0], T[:, 1], c=colors)
plt.show()

fig = plt.figure()
ax  = fig.add_subplot(111, projection='3d')

ax.set_title('...')
ax.set_xlabel('component 0')
ax.set_ylabel('component 1')
ax.set_zlabel('component 2')
ax.scatter(T[:, 0], T[:, 1], T[:, 2], c=colors, marker='.', alpha=0.75)
  # decision surface / boundary. In the wild, you'd probably leave in a lot
  # more dimensions, but wouldn't need to plot the boundary; simply checking
  # the results would suffice.
  #
  # Your model should only be trained (fit) against the training data (data_train)
  # Once you've done this, you need use the model to transform both data_train
  # and data_test from their original high-D image feature space, down to 2D

  #
  # Implement Isomap here. ONLY train against your training data, but
  # transform both your training + test data, storing the results back into
  # data_train, and data_test.
  #
  iso = Isomap(n_neighbors=6, n_components=2)
  print("iso map fit start ")
  iso.fit(data_train)
  print("iso map fit end ")
  data_train = iso.transform(data_train)
  data_test= iso.transform(data_test)




#
# Implement KNeighborsClassifier here. You can use any K value from 1
# through 20, so play around with it and attempt to get good accuracy.
# This is the heart of this assignment: Looking at the 2D points that
# represent your images, along with a list of "answers" or correct class
# labels that those 2d representations should be.
#
for i in range(1,21):
Exemplo n.º 44
0
def isoMap(X, y):
	im = Isomap(n_components = 1, eigen_solver = "dense", n_neighbors = 20)
	im.fit(X)
	transformX = im.transform(X)
	return transformX
Exemplo n.º 45
0
    print 'offset1: '  , offset1 
    print 'offset2: '  , offset2 
    
    #HERE structures must have only atoms of selected chain
    TM_align = rcu.TM_aligned_residues(pdb1,pdb2,offset1, offset2)
    
    
    individualjammings1 = np.asarray(get_permutations(nj1['individual'],TM_align['alignedList1']))
    individualjammings2 = np.asarray(get_permutations(nj2['individual'],TM_align['alignedList2']))
    
    PValsScore = scoreFromPvalues(individualjammings1,individualjammings2)
    print 'PValsScore: ', PValsScore
    
    
    clf = Isomap(n_components=2)#Isomap(n_components=2)
    clf.fit(individualjammings1)
    ij1 = clf.transform(individualjammings1)
    ij2 = clf.transform(individualjammings2)
    print ij1
    f, (ax1, ax2,ax3) = pl.subplots(1,3, sharex=True, sharey=True)
    pl.ioff()
    pl.title('ensemble correlation: %.4f'%PValsScore)
    #pl.subplot(1,2,1)
    ax1.scatter(ij1[:,0],ij1[:,1],marker='o',s=45,facecolor='0.6',edgecolor='r')

    #pl.subplot(1,2,2)
    ax2.scatter(ij2[:,0],ij2[:,1],marker='o',s=45,facecolor='0.6',edgecolor='r')
    ax3.scatter(ij2[:,0],ij2[:,1],marker='o',s=25,facecolor='y',edgecolor='0.05',alpha=0.6)
    ax3.scatter(ij1[:,0],ij1[:,1],marker='o',s=25,facecolor='b',edgecolor='0.05',alpha=0.5)
    ax1.axes.get_xaxis().set_visible(False)
    ax2.axes.get_xaxis().set_visible(False)
Exemplo n.º 46
0
# y is the principal component you want displayed on the y-axis, Can be 1 or 2
#
pca_data =PCA(n_components=3)
pca_data.fit(df)
T_pca = pca_data.transform(df)
Plot2D(T_pca,'PCA Transformed Data PC0VsPC1',0,1)
#Plot2D(T_pca,'PCA Transformed Data PC0VsPC2',0,2)
#Plot2D(T_pca,'PCA Transformed Data PC1VsPC2',1,2)
#
# TODO: Implement Isomap here. Reduce the dataframe df down
# to THREE components. Once you've done that, call Plot2D using
# the first two components.
#

iso_data = Isomap(n_neighbors=3,n_components=3)
iso_data.fit(df)
T_iso = iso_data.transform(df)
Plot2D(T_iso,'Isomap Transformed Data Ax0VsAx1',0,1)
#Plot2D(T_iso,'Isomap Transformed Data Ax0VsAx2',0,2)
#Plot2D(T_iso,'Isomap Transformed Data Ax1VsAx2',1,2)

#
# TODO: If you're up for a challenge, draw your dataframes in 3D
# Even if you're not, just do it anyway.
#

#fig = plt.figure()
#ax = fig.add_subplot(111, projection='3d')
#ax.set_xlabel('Principal Component 0')
#ax.set_ylabel('Principal Component 1')
#ax.set_zlabel('Principal Component 2')
Exemplo n.º 47
0
# Load the .mat file:
mat = scipy.io.loadmat('datasets/face_data.mat')
# Get the img data:
pics = mat['images'].transpose()
num_images = pics.shape[0]
num_pixels = int(np.sqrt(pics.shape[1]))
# Transpose the pictures:
for i in range(num_images):
    pics[i, :] = pics[i, :].reshape(num_pixels,
                                    num_pixels).transpose().flatten()
# Load up your face_labels dataset as a series:
labels = pd.read_csv('datasets/face_labels.csv', header=None)[0]
# Do train_test_split:
X_train, X_test, Y_train, Y_test = train_test_split(pics,
                                                    labels,
                                                    test_size=.15,
                                                    random_state=7)
# Implement Isomap:
iso = Isomap(n_components=2, n_neighbors=5)
iso.fit(X_train)
X_train = iso.transform(X_train)
X_test = iso.transform(X_test)  # Implement KNeighborsClassifier:
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, Y_train)
# Print the accuracy of the testing set:
print(f"Accuracy: {knn.score(X_test, Y_test)}")
# Plot the decision boundary, the training data and testing images:
plot_2d_boundary(knn, X_train, Y_train, X_test, Y_test)
# Show graph:
plt.show()