Python load_data примеры, sklearn.datasets.load_data Python примеры использования

Пример #1

0

Показать файл

Файл: cv_test.py Проект: punkie/resampler_new

def test():
    X, y = load_data(return_X_y=True)

    nb = GaussianNB()
    nb.fit(X, y)
    probas = nb.predict_proba(X)
    plot_precision_recall_curve_with_cv(nb, X, y)
    plt.show()

Пример #2

0

Показать файл

Файл: test_metrics.py Проект: xhlulu/scikit-plot

 def test_two_classes(self):
     np.random.seed(0)
     # Test this one on Iris (3 classes)
     X, y = load_data(return_X_y=True)
     clf = LogisticRegression()
     clf.fit(X, y)
     probas = clf.predict_proba(X)
     self.assertRaises(ValueError, plot_lift_curve, y, probas)

Пример #3

0

Показать файл

 def test_biplot(self):
     np.random.seed(0)
     clf = PCA()
     clf.fit(self.X)
     ax = plot_pca_2d_projection(clf,
                                 self.X,
                                 self.y,
                                 biplot=True,
                                 feature_labels=load_data().feature_names)

Пример #4

0

Показать файл

Файл: example.py Проект: Sandy4321/interpretability-implementations-demos

def run_example(depth: int = 2):
    features, label = load_data(return_X_y=True)
    p = features.shape[1]
    column_names = ["x{0}".format(i) for i in range(p)]
    data = pd.DataFrame(data=features, columns=column_names)
    data["label"] = label

    test_indices = np.random.random_integers(0,
                                             data.shape[0] - 1,
                                             size=(int(data.shape[0] * 0.2), ))
    train_indices = [
        i for i in range(0, data.shape[0]) if i not in test_indices
    ]
    train = data.iloc[train_indices, ].reset_index()
    test = data.iloc[test_indices, ].reset_index()

    print(train.shape)

    # Use sklearn
    train_features_sklearn = features[train_indices, ::]
    train_label_sklearn = label[train_indices]
    test_features_sklearn = features[test_indices, ::]
    test_label_sklearn = label[test_indices]
    cart_model = DecisionTreeClassifier(max_depth=depth, min_samples_leaf=1)
    clf = cart_model.fit(train_features_sklearn, train_label_sklearn)
    predicted_y = clf.predict(test_features_sklearn)

    # Use PyOptree
    model = OptimalHyperTreeModel(column_names,
                                  "label",
                                  tree_depth=depth,
                                  N_min=1)
    model.train(train, train_method="mio")

    test = model.predict(test)

    print(model.a)

    print("PyOptree Library Tree Prediction Accuracy: {}".format(
        sum(test["prediction"] == test["label"]) / len(test["label"])))

    print("SKLearn Library Tree Prediction Accuracy: {}".format(
        sum(predicted_y == test_label_sklearn) / len(test_label_sklearn)))

Пример #5

0

Показать файл

Файл: plot_roc.py Проект: vigneshwaran444/python-folders

"""
An example showing the plot_roc_curve method
used by a scikit-learn classifier
"""
from __future__ import absolute_import
import matplotlib.pyplot as plt
from sklearn.naive_bayes import GaussianNB
from sklearn.datasets import load_digits as load_data
import scikitplot as skplt


X, y = load_data(return_X_y=True)
nb = GaussianNB()
nb.fit(X, y)
probas = nb.predict_proba(X)
skplt.metrics.plot_roc(y_true=y, y_probas=probas)
plt.show()

Пример #6

0

Показать файл

Файл: test_classifiers.py Проект: AutoAVE/NLP_Learning_DL_-

 def setUp(self):
     np.random.seed(0)
     self.X, self.y = load_data(return_X_y=True)
     p = np.random.permutation(len(self.X))
     self.X, self.y = self.X[p], self.y[p]

Пример #7

0

Показать файл

Файл: test_classifiers.py Проект: AutoAVE/NLP_Learning_DL_-

 def test_two_classes(self):
     clf = LogisticRegression()
     scikitplot.classifier_factory(clf)
     X, y = load_data(return_X_y=True)
     self.assertRaises(ValueError, clf.plot_ks_statistic, X, y)

Пример #8

0

Показать файл

Файл: test_classifiers.py Проект: batermj/scikit-plot

 def setUp(self):
     np.random.seed(0)
     self.X, self.y = load_data(return_X_y=True)
     p = np.random.permutation(len(self.X))
     self.X, self.y = self.X[p], self.y[p]

Пример #9

0

Показать файл

Файл: test_classifiers.py Проект: batermj/scikit-plot

 def test_two_classes(self):
     clf = LogisticRegression()
     scikitplot.classifier_factory(clf)
     X, y = load_data(return_X_y=True)
     self.assertRaises(ValueError, clf.plot_ks_statistic, X, y)

Пример #10

0

Показать файл

Файл: plot_001_heatmap.py Проект: andersle/psynlig

and all pairs of variables (based on columns) are considered.

To better display the values of the correlation coefficient, the colors
used for the annotation of the values in the plot can be selected
with the parameter ``textcolors`` of the
:py:meth:`psynlig.heatmap.plot_heatmap` method (please see the
:ref:`documentation <api-heatmap>` for more information).
"""
from matplotlib import pyplot as plt
import pandas as pd
from sklearn.datasets import load_wine as load_data
from psynlig import plot_correlation_heatmap

plt.style.use('seaborn-talk')

data_set = load_data()
data = pd.DataFrame(data_set['data'], columns=data_set['feature_names'])

kwargs = {
    'text': {
        'fontsize': 'large',
    },
    'heatmap': {
        'vmin': -1,
        'vmax': 1,
        'cmap': 'viridis',
    },
    'figure': {
        'figsize': (14, 10)
    },
}

Пример #11

0

Показать файл

Файл: description_detailed.py Проект: Accentax/Udacity_Machine_Learning_Engineer

#visualisering

#kjøre benchmark

#t-sne og PCA med varians
# kjør clustering på t-sne
#sjekk feature importance opp mot de 50 andre


#feature importance
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris as load_data
import matplotlib.pyplot as plt
from scikitplot import classifier_factory

X1, y1 = load_data(return_X_y=True)
X1.shape
y1.shape
X.shape
y.shape
rf = classifier_factory(RandomForestClassifier(random_state=1))
rf.fit(X, Y)
rf.plot_feature_importances(feature_names=["feature"+str(i)for i in range(50)])
plt.show()

# Using the more flexible functions API
from scikitplot import plotters as skplt
rf = RandomForestClassifier()
rf = rf.fit(X, y)
skplt.plot_feature_importances(rf, feature_names=['petal length', 'petal width',
                                                  'sepal length', 'sepal width'])

Пример #12

0

Показать файл

Файл: test_decomposition.py Проект: batermj/scikit-plot

 def test_biplot(self):
     np.random.seed(0)
     clf = PCA()
     clf.fit(self.X)
     ax = plot_pca_2d_projection(clf, self.X, self.y, biplot=True,
                                 feature_labels=load_data().feature_names)

Пример #13

0

Показать файл

def getData():
    data_map = load_data()
    X = data_map['data']
    y = data_map['target']
    y[np.where(y == 0)] = -1
    return scale(X), y

Пример #14

0

Показать файл

Файл: text.py Проект: briannemsick/brian-useful-things

import numpy as np
from sklearn.datasets import fetch_20newsgroups as load_data
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

dataset = load_data(
    categories=["alt.atheism", "soc.religion.christian", "talk.politics.guns"],
    shuffle=True,
)
X, y = dataset.data, dataset.target
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42)

bow = TfidfVectorizer().fit(X_train)
X_train_bow = bow.transform(X_train)
X_test_bow = bow.transform(X_test)

logreg = LogisticRegression(multi_class="auto").fit(X_train_bow, y_train)
y_test_hat = logreg.predict_proba(X_test_bow)
print(accuracy_score(np.argmax(y_test_hat, axis=1), y_test))

Пример #15

0

Показать файл

Файл: example_update_model.py Проект: kritchie/LIBiFBTSVM

import time

from sklearn.datasets import load_iris as load_data
from sklearn.kernel_approximation import RBFSampler
from sklearn.utils import shuffle

from libifbtsvm import iFBTSVM
from libifbtsvm.models.ifbtsvm import Hyperparameters


if __name__ == '__main__':

    dataset = load_data()
    dataset.data, dataset.target = shuffle(dataset.data, dataset.target)

    params = Hyperparameters(
        epsilon=1e-10,
        fuzzy=0.01,
        C1=8,
        C2=2,
        C3=8,
        C4=2,
        max_iter=500,
        phi=1e-9,
        kernel=RBFSampler(gamma=1, n_components=20),
        forget_score=5,
    )

    # Initialisation iFBTSVM
    ifbtsvm = iFBTSVM(parameters=params, n_jobs=1)

Пример #16

0

Показать файл

from sklearn.datasets import load_iris as load_data
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import MachineLearning.DecisionTree.decisionTree as decisionTree
import MachineLearning.DecisionTree.tool as tool
from MachineLearning.DecisionTree.drawTree import *
if __name__ == '__main__':
    print('load_data......')
    dataSet = load_data()
    data = dataSet.data
    target = dataSet.target
    dataframe = pd.DataFrame(data = data, dtype = np.float32)
    dataframe.insert(4, 'label', target)
    dataMat = np.mat(dataframe)

    '''test and train
    '''
    X_train, X_test, y_train, y_test = train_test_split(dataMat[:, 0:-1], dataMat[:, -1], test_size=0.3, random_state=0)
    data_train = np.hstack((X_train, y_train))
    data_train = data_train.tolist()
    X_test = X_test.tolist()
    tree = decisionTree.decision_tree()
    tree_root = tree.build_tree(data_train)
    predictions = tree.predcit_samples(X_test, tree_root)
    pres = []
    for i in predictions:
        pres.append(list(i.keys()))

    y_test = y_test.tolist()
    accuracy = 0

Python load_data примеры использования