def ISOMAP_transform(train_feature, test_feature, n_components, n_neighbors = 5):
    """ ISOMAP method
    """
    from sklearn.manifold import Isomap
    isomap = Isomap(n_neighbors, n_components).fit(train_feature)
    
    train_feature_transformed = isomap.transform(train_feature)
    test_feature_transformed = isomap.transform(test_feature)
    
    return train_feature_transformed, test_feature_transformed
Exemplo n.º 2
0
def dimension_reduce():
    ''' This compares a few different methods of
    dimensionality reduction on the current dataset.
    '''
    pca = PCA(n_components=2)                             # initialize a dimensionality reducer
    pca.fit(digits.data)                                  # fit it to our data
    X_pca = pca.transform(digits.data)                    # apply our data to the transformation
    plt.subplot(1, 3, 1)
    plt.scatter(X_pca[:, 0], X_pca[:, 1], c=digits.target)# plot the manifold
    
    se = SpectralEmbedding()
    X_se = se.fit_transform(digits.data)
    plt.subplot(1, 3, 2)
    plt.scatter(X_se[:, 0], X_se[:, 1], c=digits.target)
    
    isomap = Isomap(n_components=2, n_neighbors=20)
    isomap.fit(digits.data)
    X_iso = isomap.transform(digits.data)
    plt.subplot(1, 3, 3)
    plt.scatter(X_iso[:, 0], X_iso[:, 1], c=digits.target)
    plt.show()

    plt.matshow(pca.mean_.reshape(8, 8))                  # plot the mean components
    plt.matshow(pca.components_[0].reshape(8, 8))         # plot the first principal component
    plt.matshow(pca.components_[1].reshape(8, 8))         # plot the second principal component
    plt.show()
def test_isomap():
    # Test chaining KNeighborsTransformer and Isomap with
    # neighbors_algorithm='precomputed'
    algorithm = 'auto'
    n_neighbors = 10

    X, _ = make_blobs(random_state=0)
    X2, _ = make_blobs(random_state=1)

    # compare the chained version and the compact version
    est_chain = make_pipeline(
        KNeighborsTransformer(n_neighbors=n_neighbors,
                              algorithm=algorithm,
                              mode='distance'),
        Isomap(n_neighbors=n_neighbors, metric='precomputed'))
    est_compact = Isomap(n_neighbors=n_neighbors,
                         neighbors_algorithm=algorithm)

    Xt_chain = est_chain.fit_transform(X)
    Xt_compact = est_compact.fit_transform(X)
    assert_array_almost_equal(Xt_chain, Xt_compact)

    Xt_chain = est_chain.transform(X2)
    Xt_compact = est_compact.transform(X2)
    assert_array_almost_equal(Xt_chain, Xt_compact)
Exemplo n.º 4
0
def process_sylvine(Xtrain, ytrain, Xval, Xtest):
    print 'ITS A SYLVINE TIME'
    print
    
    t0 = time.time()
         
    goods = np.array([False, False, False, False, False, False, True, False, True, True, False, False,
 False, False, True, True, False, False, False, True])

    Xnewtrain = np.array(Xtrain[:, goods])
    Xnewtest = np.array(Xtest[:, goods])
    Xnewval = np.array(Xval[:, goods])

    t0 = time.time()

    iso = Isomap(n_neighbors = 20, n_components = 3).fit(Xnewtrain[:, :6])

    print 'ISOSTAS !!!'
    print (time.time() - t0) / 60.

    t0 = time.time()

    Xisotrain = iso.transform(Xnewtrain[:, :6])
    Xisotest = iso.transform(Xnewtest[:, :6])
    Xisoval = iso.transform(Xnewval[:, :6])

    print 'ISOSTAS RETURNED !!!'
    print (time.time() - t0) / 60.

    Xnewtrain = np.hstack((Xnewtrain, Xisotrain))
    Xnewtest = np.hstack((Xnewtest, Xisotest))
    Xnewval = np.hstack((Xnewval, Xisoval))
        
    modelrf = ExtraTreesClassifier(n_estimators = 10000, n_jobs = -1)
    modelrf.fit(Xnewtrain, ytrain)
    
    print 'STASON ET DONE'
    print (time.time() - t0) / 60.
             
    ytestrf = modelrf.predict_proba(Xnewtest)[:, 1]
    yvalrf = modelrf.predict_proba(Xnewval)[:, 1]


    ytestfinal = np.round(ytestrf)
    yvalfinal = np.round(yvalrf)
             
    return yvalfinal, ytestfinal
Exemplo n.º 5
0
class FloorplanEstimator:
    """
    Simple estimator for rough floorplans
    """
    def __init__(self):
        """
        Instantiate floorplan estimator
        """
        self.dimred = Isomap(n_neighbors=25, n_components=2)
        self._fingerprints = None
        self._label = None

    def fit(self, fingerprints, label):
        """
        Estimate floorplan from labeled fingerprints
        :param fingerprints: list of fingerprints
        :param label: list of corresponding labels
        """
        self.dimred.fit(fingerprints)
        self._fingerprints = fingerprints
        self._label = label

    def transform(self, fingerprints):
        """
        Get x,y coordinates of fingerprints on floorplan
        :param fingerprints: list of fingerprints
        :return: list of [x,y] coordinates
        """
        return self.dimred.transform(fingerprints)

    def draw(self):
        """
        Draw the estimated floorplan in the current figure
        """
        xy = self.dimred.transform(self._fingerprints)

        x_min, x_max = xy[:, 0].min(), xy[:, 0].max()
        y_min, y_max = xy[:, 1].min(), xy[:, 1].max()
        xx, yy = np.meshgrid(np.arange(x_min, x_max, 1.0),
                             np.arange(y_min, y_max, 1.0))
        clf = RadiusNeighborsClassifier(radius=3.0, outlier_label=0)
        clf.fit(xy, self._label)
        label = clf.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)

        plt.pcolormesh(xx, yy, label)
        plt.scatter(xy[:, 0], xy[:, 1], c=self._label, vmin=0)
class FloorplanEstimator:
    """
    Simple estimator for rough floorplans
    """
    def __init__(self):
        """
        Instantiate floorplan estimator
        """
        self.dimred = Isomap(n_neighbors=25, n_components=2)
        self._fingerprints = None
        self._label = None

    def fit(self, fingerprints, label):
        """
        Estimate floorplan from labeled fingerprints
        :param fingerprints: list of fingerprints
        :param label: list of corresponding labels
        """
        self.dimred.fit(fingerprints)
        self._fingerprints = fingerprints
        self._label = label

    def transform(self, fingerprints):
        """
        Get x,y coordinates of fingerprints on floorplan
        :param fingerprints: list of fingerprints
        :return: list of [x,y] coordinates
        """
        return self.dimred.transform(fingerprints)

    def draw(self):
        """
        Draw the estimated floorplan in the current figure
        """
        xy = self.dimred.transform(self._fingerprints)

        x_min, x_max = xy[:,0].min(), xy[:,0].max()
        y_min, y_max = xy[:,1].min(), xy[:,1].max()
        xx, yy = np.meshgrid(np.arange(x_min, x_max, 1.0),
                             np.arange(y_min, y_max, 1.0))
        clf = RadiusNeighborsClassifier(radius=3.0, outlier_label=0)
        clf.fit(xy, self._label)
        label = clf.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)

        plt.pcolormesh(xx, yy, label)
        plt.scatter(xy[:,0], xy[:,1], c=self._label, vmin=0)
def df_isomap(df, n_comp=2, n_jobs=1, n_neighbors=5, max_iter=1000):
    rd_df = normalize_dataframe(df)
    rd = Isomap(n_components=n_comp,
                n_neighbors=n_neighbors,
                max_iter=max_iter)
    rd.fit(caracteristicas_df)
    caracteristicas_rd = rd.transform(rd_df)
    caracteristicas_rd_df = pd.DataFrame(caracteristicas_rd)
    return caracteristicas_rd_df
Exemplo n.º 8
0
def iso_map(d: pd.DataFrame):
    iso = Isomap(n_components=2, n_jobs=-1)
    iso.fit(d)
    app = iso.transform(d)

    df = pd.DataFrame(app, columns=['comp1', 'comp2'], index=d.index)
    df.to_csv(ISOMAP_FILE, index=True)

    return df
Exemplo n.º 9
0
def isomap10FoldClf(X, y, nclf):
    acc = []
    kf = KFold(X.shape[0], n_folds=10, shuffle=True)
    i = 0
    for train_index, test_index in kf:
        yTest = y[test_index]
        yTrain = y[train_index]
        n_neighbors = 30
        clf = Isomap(n_neighbors, n_components=2)
        clf.fit(X[train_index])
        newRepTrain = clf.transform(X[train_index])
        newRepTest = clf.transform(X[test_index])
        #         NN = neighbors.KNeighborsClassifier(n_neighbors=2)
        nclf.fit(newRepTrain, yTrain)
        XPred = nclf.predict(newRepTest)
        acc.append(np.sum(XPred == yTest) * 1.0 / yTest.shape[0])
        #         print i,":",acc[i]
        i += 1
    return np.mean(acc), np.std(acc)
Exemplo n.º 10
0
 def _exec_pca(self):
     pca = Isomap(n_components=int(self._training_data.shape[0] / 10))
     #pca = KernelPCA(n_components=int(self._training_data.shape[1]),  kernel='rbf', gamma=20.0)
     stdsc = StandardScaler()
     self._training_data = pd.DataFrame(stdsc.fit_transform(
         pca.fit_transform(self._training_data)),
                                        index=self._training_data.index)
     self._pred_data = pd.DataFrame(stdsc.transform(
         pca.transform(self._pred_data)),
                                    index=self._pred_data.index)
Exemplo n.º 11
0
class IsomapClassifier(BaseEstimator):
    def __init__(self,
                 n_neighbors=5,
                 n_components=2,
                 n_clusters=2,
                 eigen_solver='auto',
                 random_state=3319):
        self.n_neighbors = n_neighbors
        self.n_components = n_components
        self.n_clusters = n_clusters
        self.random_state = random_state

    def fit(self, X, y):
        #creating a manifold on training data
        self.model = Isomap(n_neighbors=self.n_neighbors,
                            n_components=self.n_components,
                            eigen_solver=self.eigen_solver).fit(X, y)
        #determining centroids for given classes
        self.centroids = KMeans(n_clusters=self.n_clusters,
                                random_state=self.random_state).fit(
                                    self.model.transform(X))
        labels = self.centroids.predict(self.model.transform(
            X))  # Every point is assigned to a certain cluster.
        #assigning each centroid to the correct cluster
        confusion_m = confusion_matrix(y, labels)
        m = Munkres()
        cost_m = make_cost_matrix(confusion_m)
        target_cluster = m.compute(
            cost_m)  # (target, cluster) assignment pairs.
        #saving mapping for predictions
        self.mapping = {
            cluster: target
            for target, cluster in dict(target_cluster).items()
        }

    def predict(self, X_test):
        #transforming test set using manifold learning method
        X_trans = self.model.transform(X_test)
        #assigning each of the points to the closest centroid
        labels = self.centroids.predict(X_trans)
        y_pred = list(map(self.mapping.get, labels))
        return y_pred
def reduce_features_to_two_dimensions(features):
    '''
    The Isomap reduces the dimensionality of the features from
    784 to 2.  This allows the visualize_features function to
    visualize the data in two dimensions.
    '''
    isomap = Isomap(n_components = 2)
    isomap.fit(features.data)
    transformed_features = isomap.transform(features.data)
    
    return transformed_features
Exemplo n.º 13
0
def runIsomap(X_train, X_test, y_train, y_test, comp_range, n_neigh):
    rbf_scores = []
    linear_scores = []
    for n_comp in comp_range:
        print("\nn_comp=%d\n" % (n_comp))
        transformer = Isomap(n_neighbors=n_neigh,
                             n_components=n_comp,
                             n_jobs=8)
        transformer.fit(X_train)
        X_train_proj = transformer.transform(X_train)
        X_test_proj = transformer.transform(X_test)
        if n_comp == 2:
            np.save('X_train_proj_2d_Isomap_' + str(n_neigh), X_train_proj)
            np.save('X_test_proj_2d_Isomap_' + str(n_neigh), X_test_proj)
        score_rbf = SVMmodel.runSVM(X_train_proj, X_test_proj, y_train, y_test,
                                    SVMmodel.getBestParam('rbf'), 'rbf')
        rbf_scores.append(score_rbf.mean())
        score_linear = SVMmodel.runSVM(X_train_proj, X_test_proj, y_train,
                                       y_test, SVMmodel.getBestParam('linear'),
                                       'linear')
        linear_scores.append(score_linear.mean())
    for i, scores in enumerate([rbf_scores, linear_scores]):
        if i == 0:
            kernel = 'rbf'
        elif i == 1:
            kernel = 'linear'
        else:
            kernel = ''
        bestIdx = np.argmax(scores)
        bestNComp = comp_range[bestIdx]
        bestAcc = scores[bestIdx]
        with open('res_Isomap_' + kernel + '_' + str(n_neigh) + '.txt',
                  'w') as f:
            for j in range(len(comp_range)):
                f.write(kernel + ": n_comp = %f, acc = %f\n" %
                        (comp_range[j], scores[j]))
            f.write(kernel + ": Best n_comp = %f\n" % (bestNComp))
            f.write(kernel + ": acc = %f\n" % (bestAcc))
    return rbf_scores, linear_scores
Exemplo n.º 14
0
def PlotAllMethods(X, k, reducedFeatures):
    pca = PCA(reducedFeatures).fit(X)
    isomap = Isomap(n_components=reducedFeatures, n_neighbors=1).fit(X)

    X_PCA = pca.transform(X)
    X_ISO = isomap.transform(X)

    labels_orig, inertia_orig = Kmeans(X, k)
    labels_PCA, inertia_PCA = Kmeans(X_PCA, k)
    labels_ISO, inertia_ISO = Kmeans(X_ISO, k)

    PlotElbow(inertia_orig, k)
    PlotElbow(inertia_PCA, k)
    PlotElbow(inertia_ISO, k)
Exemplo n.º 15
0
    def plot(self,
             n_components=2,
             n_neighbors=5,
             transform="log",
             switch_x=False,
             switch_y=False,
             switch_z=False,
             colors=None,
             max_features=500,
             show_plot=True):
        """

        :param n_components: at number starting at 2 or a value below 1
            e.g. 0.95 means select automatically the number of components to
            capture 95% of the variance
        :param transform: can be 'log' or 'anscombe', log is just log10. count
            with zeros, are set to 1
        """
        from sklearn.manifold import Isomap
        import numpy as np

        pylab.clf()

        data, kept = self.scale_data(transform_method=transform,
                                     max_features=max_features)

        iso = Isomap(n_neighbors=n_neighbors, n_components=n_components)
        iso.fit(data.T)
        Xr = iso.transform(data.T)
        self.Xr = Xr

        if switch_x:
            Xr[:, 0] *= -1
        if switch_y:
            Xr[:, 1] *= -1
        if switch_z:
            Xr[:, 2] *= -1

        # PC1 vs PC2
        if show_plot:
            pylab.figure(1)
            self._plot(Xr, pca=None, pc1=0, pc2=1, colors=colors)

        if n_components >= 3:
            if show_plot:
                pylab.figure(2)
                self._plot(Xr, pca=None, pc1=0, pc2=2, colors=colors)
                pylab.figure(3)
                self._plot(Xr, pca=None, pc1=1, pc2=2, colors=colors)
        return iso
Exemplo n.º 16
0
class IsomapImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def transform(self, X):
        return self._wrapped_model.transform(X)
Exemplo n.º 17
0
def spectral_embedding(train,
                       val,
                       method,
                       plot_folder,
                       neighbors,
                       classes,
                       dimensions=2):
    projected_train = train.copy()
    projected_val = val.copy()
    if method == "Isomap":
        embedding = Isomap(n_neighbors=neighbors,
                           n_components=dimensions).fit(train['data'])
        projected_train['data'] = embedding.transform(train['data'])
        projected_val['data'] = embedding.transform(val['data'])
    elif method == "TSNE":
        embedding = TSNE(n_components=dimensions)
        projected_train['data'] = embedding.fit_transform(train['data'])
        projected_val['data'] = embedding.fit_transform(val['data'])
    elif method == "LLE":
        embedding = LocallyLinearEmbedding(n_neighbors=neighbors,
                                           n_components=dimensions).fit(
                                               train['data'])
        projected_train['data'] = embedding.transform(train['data'])
        projected_val['data'] = embedding.transform(val['data'])
    elif method == "modified_LLE":
        embedding = LocallyLinearEmbedding(n_neighbors=neighbors,
                                           n_components=dimensions,
                                           method="modified").fit(
                                               train['data'])
        projected_train['data'] = embedding.transform(train['data'])
        projected_val['data'] = embedding.transform(val['data'])
    elif method == "hessian_LLE":
        embedding = LocallyLinearEmbedding(n_neighbors=neighbors,
                                           n_components=dimensions,
                                           method="hessian").fit(train['data'])
        projected_train['data'] = embedding.transform(train['data'])
        projected_val['data'] = embedding.transform(val['data'])
    elif method == "laplacian_eigenmaps":
        embedding = SpectralEmbedding(n_components=dimensions)
        projected_train['data'] = embedding.fit_transform(train['data'])
        projected_val['data'] = embedding.fit_transform(val['data'])
    visualize_groundTruth(projected_train, method + "_training", plot_folder,
                          classes)
    visualize_groundTruth(projected_val, method + "_validation", plot_folder,
                          classes)

    return projected_train, projected_val
Exemplo n.º 18
0
    def _exec_pca(self):
        pca = Isomap(n_components=int(self._training_data.shape[0] / 10))
        #pca = KernelPCA(n_components=int(self._training_data.shape[1]),  kernel='rbf', gamma=20.0)
        if self._scaler_type == 1:
            scaler = StandardScaler()
        elif self._scaler_type == 2:
            scaler = MinMaxScaler()
        else:
            scaler = RobustScaler(quantile_range=(25., 75.))

        self._training_data = pd.DataFrame(scaler.fit_transform(
            pca.fit_transform(self._training_data)),
                                           index=self._training_data.index)
        self._pred_data = pd.DataFrame(scaler.transform(
            pca.transform(self._pred_data)),
                                       index=self._pred_data.index)
Exemplo n.º 19
0
def example_04():
    digits = load_digits()
    # fig, axes = plt.subplots(10, 10, figsize=(8, 8), subplot_kw={'xticks': [], 'yticks': []},
    #                          gridspec_kw=dict(hspace=0.1, wspace=0.1))
    #
    # # axes.flat 一维迭代器
    # for i, ax in enumerate(axes.flat):
    #     ax.imshow(digits.images[i], cmap='binary')
    #     ax.text(0.05, 0.05, str(digits.target[i]), transform=ax.transAxes, color='green')
    # plt.show()

    X = digits.data
    y = digits.target

    from sklearn.manifold import Isomap
    iso = Isomap(n_components=2)
    iso.fit(X)
    data_projected = iso.transform(X)

    # plt.scatter(data_projected[:, 0], data_projected[:, 1], c=y, edgecolors='none', alpha=0.5,
    #             cmap=plt.cm.get_cmap('Spectral', 10))
    # plt.colorbar(label='digit label', ticks=range(10))
    # plt.clim(-0.5, 9.5)

    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

    from sklearn.naive_bayes import GaussianNB
    model = GaussianNB()
    model.fit(X_train, y_train)
    y_model = model.predict(X_test)

    # 量化评分
    from sklearn.metrics import accuracy_score
    print(accuracy_score(y_model, y_test))

    # 得到结果是85%的正确率 但是还是不知道哪里出了问题
    # 解决这个问题的方法就是打印混淆矩阵
    from sklearn.metrics import confusion_matrix

    mat = confusion_matrix(y_test, y_model)
    sns.heatmap(mat, square=True, annot=True, cbar=True)
    plt.xlabel('predict value')
    plt.ylabel('True value')
    plt.show()
Exemplo n.º 20
0
def main():

    digits = load_digits()
    print(digits.images.shape)

    # get the 2D representation of the images [n_samples, n_features]
    X = digits.data
    y = digits.target

    # reduce dimensionality
    iso = Isomap(n_components=2)
    iso.fit(digits.data)
    data_prj = iso.transform(digits.data)


    plt.scatter(data_prj[:, 0], data_prj[:, 1], c=digits.target,
                edgecolor='none', alpha=0.5, cmap=plt.cm.get_cmap('Accent', 10))

    plt.colorbar(label='digit label', ticks=range(10))
    plt.clim(-0.5, 9.5)
    plt.show()

    Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, random_state=0)

    # create the model
    model = GaussianNB()
    model.fit(Xtrain, ytrain)
    y_model = model.predict(Xtest)
    accuracy_score(ytest, y_model)

    mat = confusion_matrix(ytest, y_model)
    sns.heatmap(mat, square=True, annot=True, cbar=False)
    plt.xlabel('predicted value')
    plt.ylabel('true value')
    plt.show()

    fig, axes = plt.subplots(10, 10, figsize=(8, 8),
                            subplot_kw={'xticks':[], 'yticks':[]}, gridspec_kw=dict(hspace=0.1, wspace=0.1))

    for i, ax in enumerate(axes.flat):
        ax.imshow(digits.images[i], cmap='binary', interpolation='nearest')
        ax.text(0.05, 0.05, str(y_model[i]), transform=ax.transAxes, color='green' if (ytest[i] == y_model[i]) else 'red')
    plt.show()
Exemplo n.º 21
0
 def compute_iso_map(self, original_features):
   feature_matrix = original_features.drop('file', 1).as_matrix()
   feature_matrix = np.nan_to_num(feature_matrix)
   
   dimen_reductor = Isomap(n_components=self.n_components)
   
   full_size = feature_matrix.shape[0]
   train_size = int(self.ratio * full_size)
   
   row_indices = list(range(full_size))
   feature_training_indices = np.random.choice(row_indices, size = train_size)
   training_feature_matrix = feature_matrix[feature_training_indices, :]
   
   dimen_reductor.fit(training_feature_matrix)    
   reduced_features = dimen_reductor.transform(feature_matrix)
   
   reduced_normalized_features = reduced_features - reduced_features.min(axis=0)
   reduced_normalized_features /= reduced_normalized_features.max(axis=0)
   
   return reduced_normalized_features
Exemplo n.º 22
0
    def _OnClick3(self, event):
        if self.var3.get() == "Off":
            self.var3.set("On")
        elif self.var3.get() == "On":
            self.var3.set("Off")
            print("Isomap is running...")
            label = pd.read_csv(self.labelVar, header=None)[0].tolist()
            df = pd.read_csv(self.dfLabel, header=None)
            array = df.copy()
            label = label

            iso = Isomap(n_components=2)
            iso.fit(array)
            manifold_2Da = iso.transform(df)
            manifold_2D = pd.DataFrame(manifold_2Da,
                                       columns=['Component 1', 'Component 2'])
            principalDf = pd.DataFrame(data=manifold_2Da,
                                       columns=['Component 1', 'Component 2'])

            X1 = manifold_2D['Component 1']
            X2 = manifold_2D['Component 2']

            unique = np.unique(label)

            try:
                plt.scatter(X1, X2, c=label)
            except:
                print(
                    "data matrix does not match label matrix (Select input file and label, remove headers)"
                )

            #plt.legend(unique, loc=8, ncol=5,fontsize='x-small')
            name = 'ISOMAP'  #CHANGE FILENAME HERE *************************************************************************

            plt.title(name + " Clusters: " + str(len(unique)))
            plt.savefig(name + ".png")
            plt.show()
            plt.clf()
            principalDf.to_excel(
                "ISOMAP_COMPONENTS.xlsx"
            )  #Names of 1st and 2nd components to EXCEL here *************************************************************************
Exemplo n.º 23
0
class KDEIsomapGen(GenBase):
    # TODO: Isomap has no inverse transformation, maybe we can solve that in the future.
    # TODO: Look the the work: Inverse Methods for Manifold Learning from Kathleen Kay
    def __init__(self,
                 kernel="gaussian",
                 bandwidth=0.1,
                 n_components=None,
                 n_neighbors=20):
        super().__init__()
        self.transformer = Isomap(n_neighbors, n_components=n_components)
        self.bandwidth = bandwidth
        self.kernel = kernel
        self.manifold = None
        raise NotImplementedError

    def fit(self, x):
        x_pca = self.transformer.fit_transform(x)
        self.manifold = KDEGen(kernel=self.kernel,
                               bandwidth=self.bandwidth).fit(x_pca)
        return self

    def sample_radius(self,
                      x_exp,
                      n_min_kernels=20,
                      r=None,
                      n_samples=1,
                      random_state=None):
        x_exp_pca = self.transformer.transform(x_exp)
        x_sample_pca = self.manifold.sample_radius(x_exp_pca,
                                                   n_min_kernels=n_min_kernels,
                                                   r=r,
                                                   n_samples=n_samples,
                                                   random_state=random_state)
        x_sample = self.transformer.inverse_transform(x_sample_pca)
        return x_sample

    def sample(self, n_samples=1, random_state=None):
        x_sample_pca = self.manifold.sample(n_samples=n_samples,
                                            random_state=random_state)
        x_sample = self.transformer.inverse_transform(x_sample_pca)
        return x_sample
Exemplo n.º 24
0
# In[25]:

bears = pd.DataFrame(bears)

# In[26]:

bears.shape

num_neighbors = 6

# In[29]:

iso = Isomap(n_components=3, n_neighbors=num_neighbors)
iso.fit(bears)
T = iso.transform(bears)

T.shape

isodf = pd.DataFrame(T, columns=['a', 'b', 'c'])

isodf.head()

fig1 = plt.figure(figsize=(12, 10))
ax1 = fig1.add_subplot(111)
ax1.set_title("2D projection with {} neighbors".format(num_neighbors))
ax1.scatter(isodf.a, isodf.b, c=colors)

fig2 = plt.figure(figsize=(12, 10))
ax2 = fig2.add_subplot(111, projection='3d')
ax2.set_title("3D projection with {} neighbors".format(num_neighbors))
Exemplo n.º 25
0
df = pd.DataFrame(samples)

#
# Optional: Resample the image down by a factor of two if you
# have a slower computer. You can also convert the image from
# 0-255  to  0.0-1.0  if you'd like, but that will have no
# effect on the algorithm's results.
#
# .. your code here ..
#%%
from sklearn.manifold import Isomap

iso = Isomap(n_neighbors=6, n_components=3)
iso.fit(samples)
T = iso.transform(samples)


def Plot2D(T, title, x, y):
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.set_title(title)
    ax.set_xlabel('Component: {0}'.format(x))
    ax.set_ylabel('Component: {0}'.format(y))
    ax.scatter(T[:, x], T[:, y], marker='.', alpha=0.7)


def Plot3D(T, title, x, y, z):
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.set_title(title)
Exemplo n.º 26
0
# title is your chart title
# x is the principal component you want displayed on the x-axis, Can be 0 or 1
# y is the principal component you want displayed on the y-axis, Can be 1 or 2
#
# .. your code here ..
from sklearn.decomposition import PCA
pca = PCA(n_components=3)
pca.fit(df)
T = pca.transform(df)
Plot2D(T, "PCA 1 2", 1, 2)

#
# TODO: Implement Isomap here. Reduce the dataframe df down
# to THREE components. Once you've done that, call Plot2D using
# the first two components.
#
# .. your code here ..
from sklearn.manifold import Isomap
imap = Isomap(n_neighbors=8, n_components=3)
imap.fit(df)
T2 = imap.transform(df)
Plot2D(T2, "Isomap", 1, 2)
#
# TODO: If you're up for a challenge, draw your dataframes in 3D
# Even if you're not, just do it anyway.
#
# .. your code here ..


plt.show()
nISOMAP = np.arange(20, 200, 20)

data = {}

for k in nISOMAP:

    features, labels, vectorizer, selector, le, features_data = preprocess("pkl/article_2_people.pkl", "pkl/lable_2_people.pkl")
    features_train, features_test, labels_train, labels_test = cross_validation.train_test_split(features, labels, test_size=0.1, random_state=42)

    t0 = time()
    iso = Isomap(n_neighbors=15, n_components=k, eigen_solver='auto')
    iso.fit(features_train)
    print ("Dimension Reduction time:", round(time()-t0, 3), "s")


    features_train = iso.transform(features_train)
    features_test = iso.transform(features_test)

    for name, clf in [
        ('AdaBoostClassifier', AdaBoostClassifier(algorithm='SAMME.R')),
        ('BernoulliNB', BernoulliNB(alpha=1)),
        ('GaussianNB', GaussianNB()),
        ('DecisionTreeClassifier', DecisionTreeClassifier(min_samples_split=100)),
        ('KNeighborsClassifier', KNeighborsClassifier(n_neighbors=50, algorithm='ball_tree')),
        ('RandomForestClassifier', RandomForestClassifier(min_samples_split=100)),
        ('SVC', SVC(kernel='linear', C=1))
    ]:

        if not data.has_key(name):
            data[name] = []
Exemplo n.º 28
0
def main():
    #Load the dataset from Matlab
    data = sio.loadmat('baseline2.mat')
    n_train = int(data['n_train'])
    n_test = int(data['n_test'])
    train_x = np.array(data['train_x'])
    train_t = np.array(data['train_t']).reshape(n_train)
    test_x = np.array(data['test_x'])
    test_t = np.array(data['test_t']).reshape(800)
    X_indices = np.arange(train_x.shape[-1])

    #SVM Fitting
    C = [-10,5,10]
    G = [-10,5,10]
    CF = [-10,5,10]

    # Plot the cross-validation score as a function of percentile of features
    NG = [10,20,50,100,200]
    components = (10,20,50,100,200)
    scores = list()
    svcs = list()
    isos = list()

    for cc in components:
        for nn in NG:
            best_c = 0
            best_g = 0
            best_cf = 0
            best_iso = None
            max_score = -np.inf

            iso = Isomap(n_components=cc, n_neighbors=nn)
            iso.fit(train_x)
            train = iso.transform(train_x)

            for c in C:
                for g in G:
                    for cf in CF:
                        #Find best C, gamma
                        svc = svm.SVC(C=2**c, gamma=2**g, coef0=2**cf, degree=3, kernel='poly',max_iter=1000000)
                        this_scores = cross_validation.cross_val_score(svc, train, train_t, n_jobs=-1, cv=5, scoring='accuracy')
                        mean_score = sum(this_scores)/len(this_scores)

                        print("C: "+str(c)+" G: "+str(g)+" CMPS: "+str(cc)+" A: "+str(mean_score) + " CF: " +str(cf) + "N: "+str(nn))

                        if mean_score > max_score:
                            max_score = mean_score
                            best_svm = svc
                            best_iso = iso
            svcs.append(best_svm)
            isos.append(best_iso)
            scores.append(max_score)

    m_ind =  scores.index(max(scores))
    best_s = svcs[m_ind]
    iso = isos[m_ind]

    # Test final model
    test = iso.transform(test_x)
    train = iso.transform(train_x)
    best_s.fit(train,train_t)

    pred = best_s.predict(test)
    sio.savemat('predicted_iso.mat',dict(x=range(800),pred_t=pred))

    final_score = best_s.score(test,test_t)
    print(best_s)
    print("Final Accuracy: "+str(final_score))
    print(scores)
Exemplo n.º 29
0
# Load the .mat file:
mat = scipy.io.loadmat('datasets/face_data.mat')
# Get the img data:
pics = mat['images'].transpose()
num_images = pics.shape[0]
num_pixels = int(np.sqrt(pics.shape[1]))
# Transpose the pictures:
for i in range(num_images):
    pics[i, :] = pics[i, :].reshape(num_pixels,
                                    num_pixels).transpose().flatten()
# Load up your face_labels dataset as a series:
labels = pd.read_csv('datasets/face_labels.csv', header=None)[0]
# Do train_test_split:
X_train, X_test, Y_train, Y_test = train_test_split(pics,
                                                    labels,
                                                    test_size=.15,
                                                    random_state=7)
# Implement Isomap:
iso = Isomap(n_components=2, n_neighbors=5)
iso.fit(X_train)
X_train = iso.transform(X_train)
X_test = iso.transform(X_test)  # Implement KNeighborsClassifier:
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, Y_train)
# Print the accuracy of the testing set:
print(f"Accuracy: {knn.score(X_test, Y_test)}")
# Plot the decision boundary, the training data and testing images:
plot_2d_boundary(knn, X_train, Y_train, X_test, Y_test)
# Show graph:
plt.show()
Exemplo n.º 30
0
for imgname in os.listdir(folder):
    img = misc.imread(os.path.join(folder, imgname))
    samples.append((img/255.0).reshape(-1))
    colors.append('b')

folder += 'i'
for imgname in os.listdir(folder):
    img = misc.imread(os.path.join(folder, imgname))
    samples.append((img/255.0).reshape(-1))
    colors.append('r')

df = pd.DataFrame(samples)

iso = Isomap(n_components=3, n_neighbors=6)
iso.fit(df)
T = iso.transform(df)

import matplotlib.pyplot as plt
plt.figure()
plt.scatter(T[:, 0], T[:, 1], c=colors)
plt.show()

fig = plt.figure()
ax  = fig.add_subplot(111, projection='3d')

ax.set_title('...')
ax.set_xlabel('component 0')
ax.set_ylabel('component 1')
ax.set_zlabel('component 2')
ax.scatter(T[:, 0], T[:, 1], T[:, 2], c=colors, marker='.', alpha=0.75)
plt.show()
Exemplo n.º 31
0
    if basic_plots:
        ax = pp.subplot(2, 1, 1)
        train.describe()[1:].plot(legend=False, ax=ax)
        pp.title("Description of training data.")

        ax = pp.subplot(2, 1, 2)
        train.loc[:,:5].plot(legend=False, ax=ax)
        pp.title("First 5 series plotted.")

        pp.show()

    if do_pca:
        x = train.values
        pca = PCA(n_components=3)
        pca.fit(x)
        y = pca.transform(x)
        print 'Orig shape: ', x.shape, 'New shape: ', y.shape

        pp.scatter(y[:,0], y[:,1], c=target.values)
        pp.show()

    if do_isomap:
        x = train.values
        from sklearn.manifold import Isomap
        isomap = Isomap(n_components=2, n_neighbors=20)
        isomap.fit(x)
        y = isomap.transform(x)

        pp.scatter(y[:,0], y[:,1], c=target.values)
        pp.show()
Exemplo n.º 32
0
class CardiotocographyMainFrame(Tk.Frame):
    def __init__(self, master, x_train, y_train, x_test, y_test, evaluator, console):
        Tk.Frame.__init__(self, master)
        self.evaluator = evaluator
        self.x_train = x_train
        self.y_train = y_train
        self.x_test = x_test
        self.y_test = y_test
        self.new_estimator = None
        self.console = console
        self.evaluator.load_data(x_train, y_train, x_test, y_test)
        self.evaluator.train()
        self.x_train_r = self.evaluator.reduce(x_train)  # 特征降维

        # 0. 优化按钮
        self.button_opt = Tk.Button(self, text="优化", command=self.optimize_parameter)
        self.button_opt.pack(side=Tk.TOP, anchor=Tk.E)
        self.label_tips = Tk.Label(self)
        self.label_tips.pack(side=Tk.TOP, anchor=Tk.E)

        # 1. 散点图
        frame_train = Tk.Frame(self)
        frame_train.pack(fill=Tk.BOTH, expand=1, padx=15, pady=15)
        self.figure_train = Figure(figsize=(5, 4), dpi=100)
        self.subplot_train = self.figure_train.add_subplot(111)
        self.subplot_train.set_title('Cardiotocography High-Dimension Data Visualization (21-dim)')
        self.figure_train.tight_layout()  # 一定要放在add_subplot函数之后,否则崩溃
        self.last_line = None

        self.tsne = Isomap(n_components=2, n_neighbors=10)
        np.set_printoptions(suppress=True)
        x_train_r = self.tsne.fit_transform(x_train)
        self.subplot_train.scatter(x_train_r[:, 0], x_train_r[:, 1], c=y_train, cmap=plt.cm.get_cmap("Paired"))
        self.attach_figure(self.figure_train, frame_train)

        y_pred = self.evaluator.pipeline.predict(x_train)
        accuracy = accuracy_score(y_true=y_train, y_pred=y_pred)

        self.console.output("[CTG] INIT MODEL: ", str(self.evaluator.pipeline.named_steps['clf']) + "\n")
        self.console.output("[CTG] INIT ACCURACY: ", str(accuracy) + "\n")

        # 2. 概率输出框
        frame_prob = Tk.Frame(self)
        frame_prob.pack(fill=Tk.BOTH, expand=1, padx=5, pady=5)
        Tk.Label(frame_prob, text="prob").pack(side=Tk.LEFT)
        self.strvar_prob1 = Tk.StringVar()
        Tk.Label(frame_prob, text="1.").pack(side=Tk.LEFT)
        Tk.Entry(frame_prob, textvariable=self.strvar_prob1, bd=5).pack(side=Tk.LEFT, padx=5, pady=5)

        self.strvar_prob2 = Tk.StringVar()
        Tk.Label(frame_prob, text="2.").pack(side=Tk.LEFT)
        Tk.Entry(frame_prob, textvariable=self.strvar_prob2, bd=5).pack(side=Tk.LEFT, padx=5, pady=5)

        self.strvar_prob3 = Tk.StringVar()
        Tk.Label(frame_prob, text="3.").pack(side=Tk.LEFT)
        Tk.Entry(frame_prob, textvariable=self.strvar_prob3, bd=5).pack(side=Tk.LEFT, padx=5, pady=5)

        # 3. 滑动条
        frame_slides = Tk.Frame(self)
        frame_slides.pack(fill=Tk.BOTH, expand=1, padx=5, pady=5)
        canv = Tk.Canvas(frame_slides, relief=Tk.SUNKEN)
        vbar = Tk.Scrollbar(frame_slides, command=canv.yview)
        canv.config(scrollregion=(0, 0, 300, 1500))
        canv.config(yscrollcommand=vbar.set)
        vbar.pack(side=Tk.RIGHT, fill=Tk.Y)
        canv.pack(side=Tk.LEFT, expand=Tk.YES, fill=Tk.BOTH)
        feature_num = x_train.shape[1]
        self.slides = [None] * feature_num  # 滑动条个数为特征个数
        for i in range(feature_num):
            canv.create_window(60, (i + 1) * 40, window=Tk.Label(canv, text=str(i + 1) + ". "))
            min_x = np.min(x_train[:, i])
            max_x = np.max(x_train[:, i])
            self.slides[i] = Tk.Scale(canv, from_=min_x, to=max_x, resolution=(max_x - min_x) / 100.0,
                                      orient=Tk.HORIZONTAL, command=self.predict)
            canv.create_window(200, (i + 1) * 40, window=self.slides[i])

    # 根据即特征值,计算归属类别的概率
    def predict(self, trivial):
        feature_num = self.x_train.shape[1]
        x = np.arange(feature_num, dtype='f').reshape((1, feature_num))
        for i in range(feature_num):
            x[0, i] = float(self.slides[i].get())
        result = self.evaluator.predict(x)
        self.strvar_prob1.set("%.2f%%" % (result[0, 0] * 100))  # 无病的概率
        self.strvar_prob2.set("%.2f%%" % (result[0, 1] * 100))  # 存疑的概率
        self.strvar_prob3.set("%.2f%%" % (result[0, 2] * 100))  # 确诊的概率
        self.plot_point(self.subplot_train, self.tsne.transform(x))
        self.figure_train.canvas.draw()

    # 重绘点
    def plot_point(self, subplot, x):
        if self.last_line is not None:
            self.last_line.remove()
            del self.last_line
        lines = subplot.plot(x[:, 0], x[:, 1], "ro", label="case")
        self.last_line = lines.pop(0)
        subplot.legend(loc='lower right')

    # 将figure放到frame上
    @staticmethod
    def attach_figure(figure, frame):
        canvas = FigureCanvasTkAgg(figure, master=frame)  # 内嵌散点图到UI
        canvas.show()
        canvas.get_tk_widget().pack(side=Tk.TOP, fill=Tk.BOTH, expand=1)
        toolbar = NavigationToolbar2TkAgg(canvas, frame)  # 内嵌散点图工具栏到UI
        toolbar.update()
        canvas.tkcanvas.pack(side=Tk.TOP, fill=Tk.BOTH, expand=1)

    # 搜索最优参数
    def optimize_parameter(self):

        self.console.output("[CTG] OPTIMIZATION START...", "\n")

        # 计算旧模型(即初始模型)的交叉验证精度
        old_scores = cross_validation.cross_val_score(estimator=self.evaluator.pipeline, X=self.x_train, y=self.y_train,
                                                      scoring='accuracy',
                                                      cv=10, n_jobs=-1)
        old_score = np.mean(old_scores)

        # 计算新模型们中最好的交叉验证精度
        new_score = -1.0
        self.new_estimator = None
        for clf, param_grid in RandomParameterSettings.possible_models:
            self.console.output("[CTG] SEARCH MODEL:", str(clf) + "\n")
            estimator = Pipeline([('scl', StandardScaler()), ('pca', PCA()), ('clf', clf)])
            gs = RandomizedSearchCV(estimator=estimator, param_distributions=param_grid, scoring='accuracy', cv=10,
                                    n_jobs=-1)
            gs = gs.fit(self.x_train, self.y_train)
            if new_score < gs.best_score_:
                new_score = gs.best_score_
                self.new_estimator = gs.best_estimator_

        if new_score > old_score:
            self.label_tips.config(
                text='Found a new model with improvement: %.2f%%' % (100.0 * (new_score - old_score) / old_score))
            self.button_opt.config(text='应用', command=self.apply_new_estimator)
        else:
            self.label_tips.config(text="No better model founded.")

        self.console.output("[CTG] OPTIMIZATION COMPLETE !", "\n")
        self.console.output("[CTG] RESULT: ", "old_model_accuracy=%f, new_model_accuracy=%f, improvement=%.2f%%\n" % (
        old_score, new_score, (100.0 * (new_score - old_score) / old_score)) + "\n")

    def apply_new_estimator(self):
        self.console.output("[CTG] APPLY NEW MODEL:",
                            "old_model=%s \n new_model=%s\n" % (self.evaluator.pipeline, self.new_estimator))
        self.evaluator.pipeline = self.new_estimator
        self.label_tips.config(text="New model has been applied.")
Exemplo n.º 33
0
from sklearn.datasets import load_digits
from sklearn.manifold import Isomap
import matplotlib.pyplot as plt

if __name__ == "__main__":
    br = '\n'
    digits = load_digits()
    X = digits.data
    y = digits.target
    print('feature data shape:', X.shape)
    iso = Isomap(n_components=2)
    iso_name = iso.__class__.__name__
    iso.fit(digits.data)
    data_projected = iso.transform(X)
    print('project data to 2D:', data_projected.shape)
    project_1, project_2 = data_projected[:, 0],\
                           data_projected[:, 1]
    plt.figure(iso_name)
    plt.scatter(project_1,
                project_2,
                c=y,
                edgecolor='none',
                alpha=0.5,
                cmap='jet')
    plt.colorbar(label='digit label', ticks=range(10))
    plt.clim(-0.5, 9.5)
    plt.show()
Exemplo n.º 34
0
def isoMap(X, y):
	im = Isomap(n_components = 1, eigen_solver = "dense", n_neighbors = 20)
	im.fit(X)
	transformX = im.transform(X)
	return transformX
Exemplo n.º 35
0
from sklearn.manifold import Isomap
from sklearn.decomposition import PCA
from sklearn import preprocessing
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from mpl_toolkits.mplot3d import Axes3D
import random
from colorsys import hsv_to_rgb


data = np.genfromtxt('data012.txt', delimiter=',')
isomap = Isomap()
data_xformed = isomap.fit_transform(data)
# pca = PCA(n_components=2)
# data_xformed = pca.fit_transform(data)
print data.shape
print data_xformed.shape
c = [(1,0,0)]*1000+[(0,1,0)]*1000+[(1,1,0)]*1000
plt.figure()
plt.scatter(data_xformed[:,0], data_xformed[:,1], c=c)
plt.show()
quit()

train_data = np.genfromtxt('training.txt', delimiter=',')
isomap = Isomap(n_components=4)
train_xformed = isomap.fit_transform(train_data)
test_data = np.genfromtxt('testing.txt', delimiter=',')
test_xformed = isomap.transform(test_data)
np.savetxt("isomap_training_reduced4.txt", train_xformed, delimiter=',')
np.savetxt("isomap_testing_reduced4.txt", test_xformed, delimiter=',')
Exemplo n.º 36
0
            str(digits.target[i]),
            transform=ax.transAxes,
            color='green')

#Treat each pixel as a feature - flatten out the array so we have length-64 array of pixel values representing each digit
X = digits.data
X.shape
y = digits.target
y.shape

#Unsupervised learning: Dimensionality reduction - Isomap
from sklearn.manifold import Isomap

iso = Isomap(n_components=2)
iso.fit(digits.data)
data_projected = iso.transform(digits.data)
data_projected.shape

plt.scatter(data_projected[:, 0],
            data_projected[:, 1],
            c=digits.target,
            edgecolor='none',
            alpha=0.5,
            cmap=plt.cm.get_cmap('Spectral', 10))
plt.colorbar(label='digit label', ticks=range(10))
plt.clim(-0.5, 9.5)
#generally good separation in parameter space

#classification
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, random_state=0)
#Gaussian naive Bayes
Exemplo n.º 37
0
            color_sample.append('r')
#
# TODO: Convert the list to a dataframe
#
# .. your code here .. 
df_images = pd.DataFrame(samples)
#df_images_t = df_images.transpose()

#
# TODO: Implement Isomap here. Reduce the dataframe df down
# to three components, using K=6 for your neighborhood size
#
# .. your code here .. 
iso_bear=Isomap(n_components=3,n_neighbors=6)
iso_bear.fit(df_images)
T_iso_bear = iso_bear.transform(df_images)

#
# TODO: Create a 2D Scatter plot to graph your manifold. You
# can use either 'o' or '.' as your marker. Graph the first two
# isomap components
#
# .. your code here .. 
fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_title('Manifold Scatterplot')
ax.set_xlabel('Component: {0}'.format(0))
ax.set_ylabel('Component: {0}'.format(1))
ax.scatter(T_iso_bear[:,0],T_iso_bear[:,1], marker='.',alpha=0.7, c=color_sample)

plt.show()
# -

# Podemos ver que ahora la reducción es distinta a la de PCA. Si bien sigue viendose un Roll, esta vez podemos apreciar el "ancho" del mismo
#
# Veamos ahora que sucede con ISOMAP
#
# ## ISOMAP
#
# Para ISOMAP va a ser necesario definir el hiper-parámetro <i>n_neighbors</i> que indica la cantidad de vecinos a observar a la hora de construir el grafo. De este valor dependerá en gran parte la proyección resultante.

# +

iso = Isomap(n_neighbors=15, n_components=2)
iso.fit(X)
manifold_2Da = iso.transform(X)

# +
fig1 = plt.figure(figsize=(10, 10), facecolor='white')
ax = fig1.add_subplot(1, 1, 1)
ax.set_facecolor('white')
plt.scatter(
    manifold_2Da[:, 0], manifold_2Da[:, 1], c=color, marker='o', cmap=plt.cm.Spectral
)
# plt.scatter(principalComponents[df_train['Survived']==0,0], principalComponents[df_train['Survived']==0,1], color='r', s=10)

plt.show()
# -

# Veamos ahora que sucede para un valor menor de cantidad de vecinos a observar
  # the results would suffice.
  #
  # Your model should only be trained (fit) against the training data (data_train)
  # Once you've done this, you need use the model to transform both data_train
  # and data_test from their original high-D image feature space, down to 2D

  #
  # Implement Isomap here. ONLY train against your training data, but
  # transform both your training + test data, storing the results back into
  # data_train, and data_test.
  #
  iso = Isomap(n_neighbors=6, n_components=2)
  print("iso map fit start ")
  iso.fit(data_train)
  print("iso map fit end ")
  data_train = iso.transform(data_train)
  data_test= iso.transform(data_test)




#
# Implement KNeighborsClassifier here. You can use any K value from 1
# through 20, so play around with it and attempt to get good accuracy.
# This is the heart of this assignment: Looking at the 2D points that
# represent your images, along with a list of "answers" or correct class
# labels that those 2d representations should be.
#
for i in range(1,21):
    knn = KNeighborsClassifier(n_neighbors=i)
    knn.fit(data_train, label_train)
Exemplo n.º 40
0
pca.fit(df)
T = pca.transform(df)

Plot2D(T, 'chart title', 1,2)

#
# TODO: Implement Isomap here. Reduce the dataframe df down
# to THREE components. Once you've done that, call Plot2D using
# the first two components.
#
# .. your code here ..

from sklearn.manifold import Isomap
im = Isomap(n_components=3)
im.fit(df)
T = im.transform(df)

Plot2D(T, 'chart title', 1,2)

#
# TODO: If you're up for a challenge, draw your dataframes in 3D
# Even if you're not, just do it anyway.
#
# .. your code here ..

fig = plt.figure()
ax = fig.add_subplot(111,projection="3d")
ax.set_xlabel('0')
ax.set_ylabel('1')
ax.set_zlabel('2')
Exemplo n.º 41
0
Plot2D(T, title='PCA 2D', x=0, y=1, num_to_plot=40)
Plot2D(T, title='PCA 2D', x=1, y=2, num_to_plot=40)

Plot3D(T, title='PCA 3D', x=0, y=1, z=2)

#
# TODO: Implement Isomap here. Reduce the dataframe df down
# to THREE components. Once you've done that, call Plot2D using
# the first two components.
#
# .. your code here ..
from sklearn.manifold import Isomap

iso = Isomap(n_neighbors=8, n_components=3)
iso.fit(df)
T_iso = iso.transform(df)
Plot2D(T_iso, title='ISO 3D', x=0, y=1, num_to_plot=40)
Plot2D(T_iso, title='ISO 3D', x=1, y=2, num_to_plot=40)

#
# TODO: If you're up for a challenge, draw your dataframes in 3D
# Even if you're not, just do it anyway.
#
# .. your code here ..

from mpl_toolkits.mplot3d import Axes3D

Plot3D(T_iso, title='ISO 3D', x=0, y=1, z=2)

plt.show()
Exemplo n.º 42
0
# In[ ]:

# Isomap

from sklearn.manifold import Isomap

n_neighbors = 5
n_components = 10
n_jobs = 4

isomap = Isomap(n_neighbors=n_neighbors,
                n_components=n_components,
                n_jobs=n_jobs)

isomap.fit(X_train.loc[0:5000, :])
X_train_isomap = isomap.transform(X_train)
X_train_isomap = pd.DataFrame(data=X_train_isomap, index=train_index)

X_validation_isomap = isomap.transform(X_validation)
X_validation_isomap = pd.DataFrame(data=X_validation_isomap,
                                   index=validation_index)

scatterPlot(X_train_isomap, y_train, "Isomap")

# In[ ]:

# Multidimensional Scaling
from sklearn.manifold import MDS

n_components = 2
n_init = 12
Exemplo n.º 43
0
    print 'offset2: '  , offset2 
    
    #HERE structures must have only atoms of selected chain
    TM_align = rcu.TM_aligned_residues(pdb1,pdb2,offset1, offset2)
    
    
    individualjammings1 = np.asarray(get_permutations(nj1['individual'],TM_align['alignedList1']))
    individualjammings2 = np.asarray(get_permutations(nj2['individual'],TM_align['alignedList2']))
    
    PValsScore = scoreFromPvalues(individualjammings1,individualjammings2)
    print 'PValsScore: ', PValsScore
    
    
    clf = Isomap(n_components=2)#Isomap(n_components=2)
    clf.fit(individualjammings1)
    ij1 = clf.transform(individualjammings1)
    ij2 = clf.transform(individualjammings2)
    print ij1
    f, (ax1, ax2,ax3) = pl.subplots(1,3, sharex=True, sharey=True)
    pl.ioff()
    pl.title('ensemble correlation: %.4f'%PValsScore)
    #pl.subplot(1,2,1)
    ax1.scatter(ij1[:,0],ij1[:,1],marker='o',s=45,facecolor='0.6',edgecolor='r')

    #pl.subplot(1,2,2)
    ax2.scatter(ij2[:,0],ij2[:,1],marker='o',s=45,facecolor='0.6',edgecolor='r')
    ax3.scatter(ij2[:,0],ij2[:,1],marker='o',s=25,facecolor='y',edgecolor='0.05',alpha=0.6)
    ax3.scatter(ij1[:,0],ij1[:,1],marker='o',s=25,facecolor='b',edgecolor='0.05',alpha=0.5)
    ax1.axes.get_xaxis().set_visible(False)
    ax2.axes.get_xaxis().set_visible(False)
    ax3.axes.get_xaxis().set_visible(False)
Exemplo n.º 44
0
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

#pcaComponent = 4
#pca = PCA(n_components=pcaComponent)
#pca.fit(X_train)
#X_train = pca.transform(X_train)
#X_test = pca.transform(X_test)

neighbors = 2
components = 4
isomap = Isomap(n_neighbors=neighbors, n_components=components)
isomap.fit(X_train)
X_train = isomap.transform(X_train)
X_test = isomap.transform(X_test)

#svc = SVC()
#svc.fit(X_train, y_train)
#print svc.score(X_test, y_test)

best_score = 0
best_C = 0
best_gamma = 0
for C in np.arange(0.05, 2.05, 0.05):
    for gamma in np.arange(0.001, 1.001, 0.001):
        svc = SVC(C = C, gamma = gamma)
        svc.fit(X_train, y_train)
        score = svc.score(X_test, y_test)
        if score > best_score:
Exemplo n.º 45
0
#maxabsscaler = pp.MaxAbsScaler()
#maxabsscaler.fit(X)
#X = maxabsscaler.transform(X)
#print('MaxAbsScaler\n========')

#X = pp.normalize(X)
#print('normalizer\n========')

# TODO: Use PCA to reduce noise, n_components 4-14

nc = 5
#pca = PCA(n_components=nc)
#pca.fit(X)
#X = pca.transform(X)
#print('PCA: ', nc)

# Use Isomap to reduce noise, n_neighbors 2-5
nn = 4
im = Isomap(n_neighbors=nn, n_components=nc)
im.fit(X)
X = im.transform(X)
print('Isomap: ',nn, ' comp: ', nc)

# TODO: train_test_split 30% and random_state=7

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=7)

# TODO: Create an SVC, train and score against defaults
result = findMaxSVC()
print(result['score'])
Exemplo n.º 46
0
def getIso(X, neighbs):
    isoS = Isomap(n_components=2, n_neighbors=neighbs).fit(X)
    return isoS.transform(X)
Exemplo n.º 47
0
#
pca_data =PCA(n_components=3)
pca_data.fit(df)
T_pca = pca_data.transform(df)
Plot2D(T_pca,'PCA Transformed Data PC0VsPC1',0,1)
#Plot2D(T_pca,'PCA Transformed Data PC0VsPC2',0,2)
#Plot2D(T_pca,'PCA Transformed Data PC1VsPC2',1,2)
#
# TODO: Implement Isomap here. Reduce the dataframe df down
# to THREE components. Once you've done that, call Plot2D using
# the first two components.
#

iso_data = Isomap(n_neighbors=3,n_components=3)
iso_data.fit(df)
T_iso = iso_data.transform(df)
Plot2D(T_iso,'Isomap Transformed Data Ax0VsAx1',0,1)
#Plot2D(T_iso,'Isomap Transformed Data Ax0VsAx2',0,2)
#Plot2D(T_iso,'Isomap Transformed Data Ax1VsAx2',1,2)

#
# TODO: If you're up for a challenge, draw your dataframes in 3D
# Even if you're not, just do it anyway.
#

#fig = plt.figure()
#ax = fig.add_subplot(111, projection='3d')
#ax.set_xlabel('Principal Component 0')
#ax.set_ylabel('Principal Component 1')
#ax.set_zlabel('Principal Component 2')
#ax.scatter(T_pca[:,0], T_pca[:,1], T_pca[:,2], c='r', marker='.')