Пример #1
0
def make_dataset(dataset, n_rows, n_cols, n_classes=2):
    np.random.seed(137)
    if dataset == 'classification1':
        X, y = make_classification(n_rows,
                                   n_cols,
                                   n_informative=2,
                                   n_redundant=0,
                                   n_classes=n_classes,
                                   n_clusters_per_class=1)
    elif dataset == 'classification2':
        X, y = make_classification(n_rows,
                                   n_cols,
                                   n_informative=2,
                                   n_redundant=0,
                                   n_classes=n_classes,
                                   n_clusters_per_class=2)
    elif dataset == 'gaussian':
        X, y = make_gaussian_quantiles(n_samples=n_rows,
                                       n_features=n_cols,
                                       n_classes=n_classes)
    elif dataset == 'blobs':
        X, y = make_blobs(n_samples=n_rows,
                          n_features=n_cols,
                          centers=n_classes)
    X_train, X_test, y_train, y_test = train_test_split(X, y)
    # correct case when not all classes made it into the training set
    if np.unique(y_train).size < n_classes:
        for i in range(n_classes):
            y_train[i] = i
    return X_train, X_test, y_train, y_test
Пример #2
0
def get_gaussian_quantiles(n_samples=1000):
    x, y = make_gaussian_quantiles(n_samples=n_samples, n_features=2, n_classes=2)
    y = np.asarray([[0., 1.] if y_ == 0 else [1., 0,] for y_ in y])

    x = x.astype(np.float32)
    y = y.astype(np.float32)
    return x,y
Пример #3
0
import matplotlib.pyplot as plt

X, y = make_classification(n_samples=1000,
                           n_redundant=0,
                           n_features=2,
                           n_classes=3,
                           n_clusters_per_class=1)
plt.scatter(X[:, 0], X[:, 1], marker='o', c=y)
plt.show()

# Gnerate non-linearly seperable classification dataset--------------
from sklearn.datasets.samples_generator import make_gaussian_quantiles

X, y = make_gaussian_quantiles(n_samples=1000,
                               n_features=2,
                               n_classes=3,
                               mean=[10, 5],
                               cov=2)
plt.scatter(X[:, 0], X[:, 1], marker='o', c=y)
plt.show()

# XOR problem dataset
np.random.seed(0)
n = 100
x1 = np.random.rand(n, 2) * (-1)
x2 = np.random.rand(n, 2)
x2[:, 1] *= (-1)
x3 = np.random.rand(n, 2)
x3[:, 0] *= (-1)
x4 = np.random.rand(n, 2)
x = np.concatenate((x1, x2, x3, x4))
Пример #4
0
    plt.contourf(xx1, xx2, Z, alpha=0.3, cmap=cmap)
    plt.xlim(xx1.min(), xx1.max())
    plt.ylim(xx2.min(), xx2.max())

    for index, cl in enumerate(np.unique(y)):
        plt.scatter(x=X[y == cl, 0],
                    y=X[y == cl, 1],
                    alpha=0.8,
                    c=colors[index],
                    marker=markers[index],
                    label=cl,
                    edgecolor='black')


(X, y) = make_blobs(n_samples=1000, n_features=2, centers=2, cluster_std=1.05)
(X, y) = make_gaussian_quantiles(n_samples=1000, n_features=2, n_classes=3)
(X, y) = make_moons(n_samples=1000)

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    stratify=y)

sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

svm = SVC(kernel='rbf', C=1.0, gamma=0.5)
svm.fit(X_train_std, y_train)
plot_decision_regions(X_test_std, y_test, svm)