Python Manifold 예제들, yellowbrick.features.Manifold Python 예제들

예제 #1

0

파일 보기

파일: gallery.py 프로젝트: tylerhuntington222/yellowbrick

def manifold(dataset, manifold):
    if dataset == "concrete":
        X, y = load_concrete()
    elif dataset == "occupancy":
        X, y = load_occupancy()
    else:
        raise ValueError("unknown dataset")

    oz = Manifold(manifold=manifold, ax=newfig())
    oz.fit_transform(X, y)
    savefig(oz, "{}_{}_manifold".format(dataset, manifold))

예제 #2

0

파일 보기

파일: visualization_functions.py 프로젝트: davidfgold/runtimeDiagnostics

def plot_IsoMap(objectives, n_neighbors, ax, name):
    class_dummy = np.zeros(len(objectives))
    visualizer = Manifold(manifold='isomap',
                          n_neighbors=n_neighbors,
                          classes=[name],
                          ax=ax)
    visualizer.fit_transform(objectives, class_dummy)
    visualizer.show()

예제 #3

0

파일 보기

파일: user_methods.py 프로젝트: IeAT-ASPIDE/Event-Detection-Engine

    def manifold_embeding(data,
                          name=name,
                          location=location,
                          target=target,
                          manifold=manifold,
                          n_neighbors=n_neighbors):

        classes = data[target].unique()
        data[target].replace(0, "0", inplace=True)
        le = preprocessing.LabelEncoder()
        le.fit(data[target])
        y = le.transform(data[target])
        data_test = data.drop([target], axis=1)
        ax = plt.axes()
        vizualisation = Manifold(classes=classes,
                                 manifold=manifold,
                                 n_neighbors=n_neighbors,
                                 ax=ax)
        vizualisation.fit_transform(data_test, y)
        plot_name = f"Manifold_{manifold}_{name}.png"
        vizualisation.show(outpath=os.path.join(location, plot_name))
        plt.close()

예제 #4

0

파일 보기

파일: visualization_functions.py 프로젝트: davidfgold/runtimeDiagnostics

def plot_MDS(objectives, ax, name):
    class_dummy = np.zeros(len(objectives))
    visualizer = Manifold(manifold='mds', classes=[name], ax=ax)
    visualizer.fit_transform(objectives, class_dummy)
    visualizer.show()

예제 #5

0

파일 보기

visualizer = Rank2D(algorithm='pearson')
visualizer.fit(dfrad.iloc[:, :50],
               dfrad['activities'])  # Fit the data to the visualizer
visualizer.transform(dfrad.iloc[:, :50])  # Transform the data
visualizer.show()  # Finalize and render the figure

#MANIFOLD - No balanced
from yellowbrick.features import Manifold
classes = [1, 0]
from sklearn import preprocessing
label_encoder = preprocessing.LabelEncoder(
)  #label_encoder object knows how to understand word labels.
dfrad['activities'] = label_encoder.fit_transform(
    dfrad['activities'])  #Encode labels
dfrad['activities'].unique()
viz = Manifold(manifold="tsne", classes=classes)  # Instantiate the visualizer
viz.fit_transform(dfrad.iloc[:, :100],
                  dfrad['activities'])  # Fit the data to the visualizer
viz.show()  # Finalize and render the figure

# =============================================================================
# #CLASS BALANCE - Balanced (DO NOT USE. Draft)
# =============================================================================
#m2_train_s_bk_bal #dataframe
#m2_test_s_bk_bal #dataframe
#ai_train_rav_bal = np.ravel(y_rus)
#ai_test_rav_bal = np.ravel(y_rus2)
y_rus_df = pd.DataFrame(y_rus)

frames_bal = [m2_train_s_bk_bal, y_rus_df]
import pandas as pd

예제 #6

0

파일 보기

파일: app.py 프로젝트: L0xGames/MLV

def get_plots():
    all_plots = []
    # FEATURE Visualization

    # Instantiate the visualizer
    plt.figure(figsize=(3.5, 3.5))
    viz = Manifold(manifold="tsne")
    # Fit the data to the visualizer
    viz.fit_transform(X_train, y_train)
    # save to html
    fig = plt.gcf()
    some_htmL = mpld3.fig_to_html(fig)
    all_plots.append("<h4 align='center'>Manifold Visualization</h4>" +
                     some_htmL)
    # clear plot
    plt.clf()

    if ML_ALG_nr == 1:
        # classification

        # Check if we can get the classes
        classes = None
        try:
            classes = list(Enc.inverse_transform(model_def.classes_))
        except ValueError as e:
            app.logger.info(e)

        if classes is not None:
            # Instantiate the classification model and visualizer
            visualizer = ClassPredictionError(DecisionTreeClassifier(),
                                              classes=classes)
            # Fit the training data to the visualizer
            visualizer.fit(X_train, y_train)
            # Evaluate the model on the test data
            visualizer.score(X_test, y_test)
            # save to html
            fig = plt.gcf()
            some_htmL = mpld3.fig_to_html(fig)
            all_plots.append("<h4 align='center'>Class Prediction Error</h4>" +
                             some_htmL)
            # clear plot
            plt.clf()
            # The ConfusionMatrix visualizer taxes a model
            cm = ConfusionMatrix(model_def, classes=classes)
            cm = ConfusionMatrix(model_def, classes=classes)
            # Fit fits the passed model. This is unnecessary if you pass the visualizer a pre-fitted model
            cm.fit(X_train, y_train)
            # To create the ConfusionMatrix, we need some test data. Score runs predict() on the data
            # and then creates the confusion_matrix from scikit-learn.
            cm.score(X_test, y_test)
            # save to html
            fig = plt.gcf()
            some_htmL = mpld3.fig_to_html(fig)
            all_plots.append("<h4 align='center'>Confusion Matrix</h4>" +
                             some_htmL)
            # clear plot
            plt.clf()

        return all_plots

    elif ML_ALG_nr == 0:
        # regression

        # Instantiate the linear model and visualizer
        visualizer = PredictionError(model_def, identity=True)
        visualizer.fit(X_train,
                       y_train)  # Fit the training data to the visualizer
        visualizer.score(X_test, y_test)  # Evaluate the model on the test data
        # save to html
        fig = plt.gcf()
        some_htmL = mpld3.fig_to_html(fig)
        all_plots.append("<h4 align='center'>Prediction Error Plot</h4>" +
                         some_htmL)
        # clear plot
        plt.clf()

        # Instantiate the model and visualizer
        visualizer = ResidualsPlot(model_def)
        visualizer.fit(X_train,
                       y_train)  # Fit the training data to the visualizer
        visualizer.score(X_test, y_test)
        # save to html
        fig = plt.gcf()
        some_htmL = mpld3.fig_to_html(fig)
        all_plots.append("<h4 align='center'>Residuals Plot</h4>" + some_htmL)
        # clear plot
        plt.clf()

        return all_plots

예제 #7

0

파일 보기

def visualize_features(classes, problem_type, curdir, default_features,
                       balance_data, test_size):

    # make features into label encoder here
    features, feature_labels, class_labels = get_features(
        classes, problem_type, default_features, balance_data)

    # now preprocess features for all the other plots
    os.chdir(curdir)
    le = preprocessing.LabelEncoder()
    le.fit(class_labels)
    tclass_labels = le.transform(class_labels)

    # process features to help with clustering
    se = preprocessing.StandardScaler()
    t_features = se.fit_transform(features)

    X_train, X_test, y_train, y_test = train_test_split(features,
                                                        tclass_labels,
                                                        test_size=test_size,
                                                        random_state=42)

    # print(len(features))
    # print(len(feature_labels))
    # print(len(class_labels))
    # print(class_labels)

    # GET TRAINING DATA DURING MODELING PROCESS
    ##################################
    # get filename
    # csvfile=''
    # print(classes)
    # for i in range(len(classes)):
    # 	csvfile=csvfile+classes[i]+'_'

    # get training and testing data for later
    # try:
    # print('loading training files...')
    # X_train=pd.read_csv(prev_dir(curdir)+'/models/'+csvfile+'train.csv')
    # y_train=X_train['class_']
    # X_train.drop(['class_'], axis=1)
    # X_test=pd.read_csv(prev_dir(curdir)+'/models/'+csvfile+'test.csv')
    # y_test=X_test['class_']
    # X_test.drop(['class_'], axis=1)
    # y_train=le.inverse_transform(y_train)
    # y_test=le.inverse_transform(y_test)
    # except:
    # print('error loading in training files, making new test data')

    # Visualize each class (quick plot)
    ##################################
    visualization_dir = 'visualization_session'
    try:
        os.mkdir(visualization_dir)
        os.chdir(visualization_dir)
    except:
        shutil.rmtree(visualization_dir)
        os.mkdir(visualization_dir)
        os.chdir(visualization_dir)

    objects = tuple(set(class_labels))
    y_pos = np.arange(len(objects))
    performance = list()
    for i in range(len(objects)):
        performance.append(class_labels.count(objects[i]))

    plt.bar(y_pos, performance, align='center', alpha=0.5)
    plt.xticks(y_pos, objects)
    plt.xticks(rotation=90)
    plt.title('Counts per class')
    plt.ylabel('Count')
    plt.xlabel('Class')
    plt.tight_layout()
    plt.savefig('classes.png')
    plt.close()

    # set current directory
    curdir = os.getcwd()

    # ##################################
    # # CLUSTERING!!!
    # ##################################

    ##################################
    # Manifold type options
    ##################################
    '''
		"lle"
		Locally Linear Embedding (LLE) uses many local linear decompositions to preserve globally non-linear structures.
		"ltsa"
		LTSA LLE: local tangent space alignment is similar to LLE in that it uses locality to preserve neighborhood distances.
		"hessian"
		Hessian LLE an LLE regularization method that applies a hessian-based quadratic form at each neighborhood
		"modified"
		Modified LLE applies a regularization parameter to LLE.
		"isomap"
		Isomap seeks a lower dimensional embedding that maintains geometric distances between each instance.
		"mds"
		MDS: multi-dimensional scaling uses similarity to plot points that are near to each other close in the embedding.
		"spectral"
		Spectral Embedding a discrete approximation of the low dimensional manifold using a graph representation.
		"tsne" (default)
		t-SNE: converts the similarity of points into probabilities then uses those probabilities to create an embedding.
	'''
    os.mkdir('clustering')
    os.chdir('clustering')

    # tSNE
    plt.figure()
    viz = Manifold(manifold="tsne", classes=set(classes))
    viz.fit_transform(np.array(features), tclass_labels)
    viz.poof(outpath="tsne.png")
    plt.close()
    # os.system('open tsne.png')
    # viz.show()

    # PCA
    plt.figure()
    visualizer = PCADecomposition(scale=True, classes=set(classes))
    visualizer.fit_transform(np.array(features), tclass_labels)
    visualizer.poof(outpath="pca.png")
    plt.close()
    # os.system('open pca.png')

    # spectral embedding
    plt.figure()
    viz = Manifold(manifold="spectral", classes=set(classes))
    viz.fit_transform(np.array(features), tclass_labels)
    viz.poof(outpath="spectral.png")
    plt.close()

    # lle embedding
    plt.figure()
    viz = Manifold(manifold="lle", classes=set(classes))
    viz.fit_transform(np.array(features), tclass_labels)
    viz.poof(outpath="lle.png")
    plt.close()

    # ltsa
    # plt.figure()
    # viz = Manifold(manifold="ltsa", classes=set(classes))
    # viz.fit_transform(np.array(features), tclass_labels)
    # viz.poof(outpath="ltsa.png")
    # plt.close()

    # hessian
    # plt.figure()
    # viz = Manifold(manifold="hessian", method='dense', classes=set(classes))
    # viz.fit_transform(np.array(features), tclass_labels)
    # viz.poof(outpath="hessian.png")
    # plt.close()

    # modified
    plt.figure()
    viz = Manifold(manifold="modified", classes=set(classes))
    viz.fit_transform(np.array(features), tclass_labels)
    viz.poof(outpath="modified.png")
    plt.close()

    # isomap
    plt.figure()
    viz = Manifold(manifold="isomap", classes=set(classes))
    viz.fit_transform(np.array(features), tclass_labels)
    viz.poof(outpath="isomap.png")
    plt.close()

    # mds
    plt.figure()
    viz = Manifold(manifold="mds", classes=set(classes))
    viz.fit_transform(np.array(features), tclass_labels)
    viz.poof(outpath="mds.png")
    plt.close()

    # spectral
    plt.figure()
    viz = Manifold(manifold="spectral", classes=set(classes))
    viz.fit_transform(np.array(features), tclass_labels)
    viz.poof(outpath="spectral.png")
    plt.close()

    # UMAP embedding
    plt.figure()
    umap = UMAPVisualizer(metric='cosine',
                          classes=set(classes),
                          title="UMAP embedding")
    umap.fit_transform(np.array(features), class_labels)
    umap.poof(outpath="umap.png")
    plt.close()

    # alternative UMAP
    # import umap.plot
    # plt.figure()
    # mapper = umap.UMAP().fit(np.array(features))
    # fig=umap.plot.points(mapper, labels=np.array(tclass_labels))
    # fig = fig.get_figure()
    # fig.tight_layout()
    # fig.savefig('umap2.png')
    # plt.close(fig)

    #################################
    # 	  FEATURE RANKING!!
    #################################
    os.chdir(curdir)
    os.mkdir('feature_ranking')
    os.chdir('feature_ranking')

    # You can get the feature importance of each feature of your dataset
    # by using the feature importance property of the model.
    plt.figure(figsize=(12, 12))
    model = ExtraTreesClassifier()
    model.fit(np.array(features), tclass_labels)
    # print(model.feature_importances_)
    feat_importances = pd.Series(model.feature_importances_,
                                 index=feature_labels[0])
    feat_importances.nlargest(20).plot(kind='barh')
    plt.title('Feature importances (ExtraTrees)', size=16)
    plt.title('Feature importances with %s features' % (str(len(features[0]))))
    plt.tight_layout()
    plt.savefig('feature_importance.png')
    plt.close()
    # os.system('open feature_importance.png')

    # get selected labels for top 20 features
    selectedlabels = list(dict(feat_importances.nlargest(20)))
    new_features, new_labels = restructure_features(selectedlabels, t_features,
                                                    feature_labels[0])
    new_features_, new_labels_ = restructure_features(selectedlabels, features,
                                                      feature_labels[0])

    # Shapiro rank algorithm (1D)
    plt.figure(figsize=(28, 12))
    visualizer = Rank1D(algorithm='shapiro',
                        classes=set(classes),
                        features=new_labels)
    visualizer.fit(np.array(new_features), tclass_labels)
    visualizer.transform(np.array(new_features))
    # plt.tight_layout()
    visualizer.poof(outpath="shapiro.png")
    plt.title('Shapiro plot (top 20 features)', size=16)
    plt.close()
    # os.system('open shapiro.png')
    # visualizer.show()

    # pearson ranking algorithm (2D)
    plt.figure(figsize=(12, 12))
    visualizer = Rank2D(algorithm='pearson',
                        classes=set(classes),
                        features=new_labels)
    visualizer.fit(np.array(new_features), tclass_labels)
    visualizer.transform(np.array(new_features))
    plt.tight_layout()
    visualizer.poof(outpath="pearson.png")
    plt.title('Pearson ranking plot (top 20 features)', size=16)
    plt.close()
    # os.system('open pearson.png')
    # visualizer.show()

    # feature importances with top 20 features for Lasso
    plt.figure(figsize=(12, 12))
    viz = FeatureImportances(Lasso(), labels=new_labels_)
    viz.fit(np.array(new_features_), tclass_labels)
    plt.tight_layout()
    viz.poof(outpath="lasso.png")
    plt.close()

    # correlation plots with feature removal if corr > 0.90
    # https://towardsdatascience.com/feature-selection-correlation-and-p-value-da8921bfb3cf

    # now remove correlated features
    # --> p values
    # --> https://towardsdatascience.com/the-next-level-of-data-visualization-in-python-dd6e99039d5e / https://github.com/WillKoehrsen/Data-Analysis/blob/master/plotly/Plotly%20Whirlwind%20Introduction.ipynb- plotly for correlation heatmap and scatterplot matrix
    # --> https://seaborn.pydata.org/tutorial/distributions.html
    data = new_features
    corr = data.corr()

    plt.figure(figsize=(12, 12))
    fig = sns.heatmap(corr)
    fig = fig.get_figure()
    plt.title('Heatmap with correlated features (top 20 features)', size=16)
    fig.tight_layout()
    fig.savefig('heatmap.png')
    plt.close(fig)

    columns = np.full((corr.shape[0], ), True, dtype=bool)
    for i in range(corr.shape[0]):
        for j in range(i + 1, corr.shape[0]):
            if corr.iloc[i, j] >= 0.9:
                if columns[j]:
                    columns[j] = False
    selected_columns = data.columns[columns]
    data = data[selected_columns]
    corr = data.corr()

    plt.figure(figsize=(12, 12))
    fig = sns.heatmap(corr)
    fig = fig.get_figure()
    plt.title('Heatmap without correlated features (top 20 features)', size=16)
    fig.tight_layout()
    fig.savefig('heatmap_clean.png')
    plt.close(fig)

    # radviz
    # Instantiate the visualizer
    plt.figure(figsize=(12, 12))
    visualizer = RadViz(classes=classes, features=new_labels)
    visualizer.fit(np.array(new_features), tclass_labels)
    visualizer.transform(np.array(new_features))
    visualizer.poof(outpath="radviz.png")
    visualizer.show()
    plt.close()

    # feature correlation plot
    plt.figure(figsize=(28, 12))
    visualizer = feature_correlation(np.array(new_features),
                                     tclass_labels,
                                     labels=new_labels)
    visualizer.poof(outpath="correlation.png")
    visualizer.show()
    plt.tight_layout()
    plt.close()

    os.mkdir('feature_plots')
    os.chdir('feature_plots')

    newdata = new_features_
    newdata['classes'] = class_labels

    for j in range(len(new_labels_)):
        fig = sns.violinplot(x=newdata['classes'], y=newdata[new_labels_[j]])
        fig = fig.get_figure()
        fig.tight_layout()
        fig.savefig('%s_%s.png' % (str(j), new_labels_[j]))
        plt.close(fig)

    os.mkdir('feature_plots_transformed')
    os.chdir('feature_plots_transformed')

    newdata = new_features
    newdata['classes'] = class_labels

    for j in range(len(new_labels)):
        fig = sns.violinplot(x=newdata['classes'], y=newdata[new_labels[j]])
        fig = fig.get_figure()
        fig.tight_layout()
        fig.savefig('%s_%s.png' % (str(j), new_labels[j]))
        plt.close(fig)

    ##################################################
    # PRECISION-RECALL CURVES
    ##################################################

    os.chdir(curdir)
    os.mkdir('model_selection')
    os.chdir('model_selection')

    plt.figure()
    visualizer = precision_recall_curve(GaussianNB(), np.array(features),
                                        tclass_labels)
    visualizer.poof(outpath="precision-recall.png")
    plt.close()

    plt.figure()
    visualizer = roc_auc(LogisticRegression(), np.array(features),
                         tclass_labels)
    visualizer.poof(outpath="roc_curve_train.png")
    plt.close()

    plt.figure()
    visualizer = discrimination_threshold(
        LogisticRegression(multi_class="auto", solver="liblinear"),
        np.array(features), tclass_labels)
    visualizer.poof(outpath="thresholds.png")
    plt.close()

    plt.figure()
    visualizer = residuals_plot(Ridge(),
                                np.array(features),
                                tclass_labels,
                                train_color="maroon",
                                test_color="gold")
    visualizer.poof(outpath="residuals.png")
    plt.close()

    plt.figure()
    visualizer = prediction_error(Lasso(), np.array(features), tclass_labels)
    visualizer.poof(outpath='prediction_error.png')
    plt.close()

    # outlier detection
    plt.figure()
    visualizer = cooks_distance(np.array(features),
                                tclass_labels,
                                draw_threshold=True,
                                linefmt="C0-",
                                markerfmt=",")
    visualizer.poof(outpath='outliers.png')
    plt.close()

    # cluster numbers
    plt.figure()
    visualizer = silhouette_visualizer(
        KMeans(len(set(tclass_labels)), random_state=42), np.array(features))
    visualizer.poof(outpath='siloutte.png')
    plt.close()

    # cluster distance
    plt.figure()
    visualizer = intercluster_distance(
        KMeans(len(set(tclass_labels)), random_state=777), np.array(features))
    visualizer.poof(outpath='cluster_distance.png')
    plt.close()

    # plot percentile of features plot with SVM to see which percentile for features is optimal
    features = preprocessing.MinMaxScaler().fit_transform(features)
    clf = Pipeline([('anova', SelectPercentile(chi2)),
                    ('scaler', StandardScaler()),
                    ('logr', LogisticRegression())])
    score_means = list()
    score_stds = list()
    percentiles = (1, 3, 6, 10, 15, 20, 30, 40, 50, 60, 70, 80, 90, 100)

    for percentile in percentiles:
        clf.set_params(anova__percentile=percentile)
        this_scores = cross_val_score(clf, np.array(features), class_labels)
        score_means.append(this_scores.mean())
        score_stds.append(this_scores.std())

    plt.errorbar(percentiles, score_means, np.array(score_stds))
    plt.title(
        'Performance of the LogisticRegression-Anova varying the percent features selected'
    )
    plt.xticks(np.linspace(0, 100, 11, endpoint=True))
    plt.xlabel('Percentile')
    plt.ylabel('Accuracy Score')
    plt.axis('tight')
    plt.savefig('logr_percentile_plot.png')
    plt.close()

    # get PCA
    pca = PCA(random_state=1)
    pca.fit(X_train)
    skplt.decomposition.plot_pca_component_variance(pca)
    plt.savefig('pca_explained_variance.png')
    plt.close()

    # estimators
    rf = RandomForestClassifier()
    skplt.estimators.plot_learning_curve(rf, X_train, y_train)
    plt.title('Learning Curve (Random Forest)')
    plt.savefig('learning_curve.png')
    plt.close()

    # elbow plot
    kmeans = KMeans(random_state=1)
    skplt.cluster.plot_elbow_curve(kmeans,
                                   X_train,
                                   cluster_ranges=range(1, 30),
                                   title='Elbow plot (KMeans clustering)')
    plt.savefig('elbow.png')
    plt.close()

    # KS statistic (only if 2 classes)
    lr = LogisticRegression()
    lr = lr.fit(X_train, y_train)
    y_probas = lr.predict_proba(X_test)
    skplt.metrics.plot_ks_statistic(y_test, y_probas)
    plt.savefig('ks.png')
    plt.close()

    # precision-recall
    nb = GaussianNB()
    nb.fit(X_train, y_train)
    y_probas = nb.predict_proba(X_test)
    skplt.metrics.plot_precision_recall(y_test, y_probas)
    plt.tight_layout()
    plt.savefig('precision-recall.png')
    plt.close()

    ## plot calibration curve
    rf = RandomForestClassifier()
    lr = LogisticRegression()
    nb = GaussianNB()
    svm = LinearSVC()
    dt = DecisionTreeClassifier(random_state=0)
    ab = AdaBoostClassifier(n_estimators=100)
    gb = GradientBoostingClassifier(n_estimators=100,
                                    learning_rate=1.0,
                                    max_depth=1,
                                    random_state=0)
    knn = KNeighborsClassifier(n_neighbors=7)

    rf_probas = rf.fit(X_train, y_train).predict_proba(X_test)
    lr_probas = lr.fit(X_train, y_train).predict_proba(X_test)
    nb_probas = nb.fit(X_train, y_train).predict_proba(X_test)
    # svm_scores = svm.fit(X_train, y_train).predict_proba(X_test)
    dt_scores = dt.fit(X_train, y_train).predict_proba(X_test)
    ab_scores = ab.fit(X_train, y_train).predict_proba(X_test)
    gb_scores = gb.fit(X_train, y_train).predict_proba(X_test)
    knn_scores = knn.fit(X_train, y_train).predict_proba(X_test)

    probas_list = [
        rf_probas,
        lr_probas,
        nb_probas,  # svm_scores,
        dt_scores,
        ab_scores,
        gb_scores,
        knn_scores
    ]

    clf_names = [
        'Random Forest',
        'Logistic Regression',
        'Gaussian NB',  # 'SVM',
        'Decision Tree',
        'Adaboost',
        'Gradient Boost',
        'KNN'
    ]

    skplt.metrics.plot_calibration_curve(y_test, probas_list, clf_names)
    plt.savefig('calibration.png')
    plt.tight_layout()
    plt.close()

    # pick classifier type by ROC (without optimization)
    probs = [
        rf_probas[:, 1],
        lr_probas[:, 1],
        nb_probas[:, 1],  # svm_scores[:, 1],
        dt_scores[:, 1],
        ab_scores[:, 1],
        gb_scores[:, 1],
        knn_scores[:, 1]
    ]

    plot_roc_curve(y_test, probs, clf_names)
    # more elaborate ROC example with CV = 5 fold
    # https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc_crossval.html#sphx-glr-auto-examples-model-selection-plot-roc-crossval-py

    os.chdir(curdir)

    return ''

예제 #8

0

파일 보기

X = df.iloc[:, 0:55].astype(float)
y = df.iloc[:, 55].astype(float)

for col in X.columns:
    print(col)
    sns.distplot(X[col])
    plt.savefig(fname='dist' + col + '.png', format='png')
    plt.show()
    break

visualizer = rank2d(X)
visualizer.show()

#-------------------------------------------
viz = Manifold(manifold="isomap", n_neighbors=20, target_type="continuous")

viz.fit_transform(X, y)  # Fit the data to the visualizer
viz.show()  # Finalize and render the figure

# Create the train and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Instantiate the linear model and visualizer
model = Ridge()
visualizer = ResidualsPlot(model)
visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
visualizer.score(X_test, y_test)  # Evaluate the model on the test data
visualizer.show()  # Finalize and render the figure

from sklearn.gaussian_process import GaussianProcessRegressor

예제 #9

0

파일 보기

파일: PlotDistribution.py 프로젝트: adamnemecek/VAEChordEstimation

import numpy as np
import util as U
import matplotlib.pyplot as plt
from chainer.functions import gumbel_softmax
from librosa.display import specshow
import chord
from yellowbrick.features import Manifold

np.random.seed(10)
idx_rand = np.random.permutation(220)
fold = np.load("folds_320files.npy")[0][:5]
idx_billboard = np.load("idx_non_billboard.npy") + 320
dset = D.ChordDataset([855, 472, 1003])

model = net_generative.GenerativeChordnet()
model.load("chromavae_beta_anneal.model")

config.train = False
config.enable_backprop = False

feat, labs, aligns = dset[0]
z = model.generator.getzs([feat[:512]], [labs[aligns[:512]]])[0].data

z_random = np.random.normal(0, 1, size=z.shape)

visualizer = Manifold(manifold="tsne")
visualizer.fit_transform(np.concatenate((z, z_random)),
                         y=np.concatenate(
                             (np.zeros(len(z)),
                              np.ones(len(z_random)))).astype(np.int32))
visualizer.poof()

예제 #10

0

파일 보기

visualizer.fit(X[features], y)        # Fit the data to the visualizer
visualizer.show()           # Finalize and render the figure

#%%
from yellowbrick.features import JointPlotVisualizer

visualizer = JointPlotVisualizer()

visualizer.fit_transform(X["grade"], y)        # Fit and transform the data
visualizer.show()                     # Finalize and render the figure

#%%
from yellowbrick.features import Manifold

viz = Manifold(manifold="tsne", classes=class_labels)

viz.fit_transform(X[features], y)  # Fit the data to the visualizer
viz.show()               # Finalize and render the figure

#%%
from yellowbrick.features import Rank2D

visualizer = Rank2D(algorithm='pearson')

visualizer.fit(df[np.append(features, ["eventdeath"])], y)           # Fit the data to the visualizer
visualizer.transform(df[np.append(features, ["eventdeath"])])        # Transform the data
visualizer.show()              # Finalize and render the figure

#%%
from scipy.stats.stats import pearsonr