def make_dataset(dataset, n_rows, n_cols, n_classes=2): np.random.seed(137) if dataset == 'classification1': X, y = make_classification(n_rows, n_cols, n_informative=2, n_redundant=0, n_classes=n_classes, n_clusters_per_class=1) elif dataset == 'classification2': X, y = make_classification(n_rows, n_cols, n_informative=2, n_redundant=0, n_classes=n_classes, n_clusters_per_class=2) elif dataset == 'gaussian': X, y = make_gaussian_quantiles(n_samples=n_rows, n_features=n_cols, n_classes=n_classes) elif dataset == 'blobs': X, y = make_blobs(n_samples=n_rows, n_features=n_cols, centers=n_classes) X_train, X_test, y_train, y_test = train_test_split(X, y) # correct case when not all classes made it into the training set if np.unique(y_train).size < n_classes: for i in range(n_classes): y_train[i] = i return X_train, X_test, y_train, y_test
def get_gaussian_quantiles(n_samples=1000): x, y = make_gaussian_quantiles(n_samples=n_samples, n_features=2, n_classes=2) y = np.asarray([[0., 1.] if y_ == 0 else [1., 0,] for y_ in y]) x = x.astype(np.float32) y = y.astype(np.float32) return x,y
import matplotlib.pyplot as plt X, y = make_classification(n_samples=1000, n_redundant=0, n_features=2, n_classes=3, n_clusters_per_class=1) plt.scatter(X[:, 0], X[:, 1], marker='o', c=y) plt.show() # Gnerate non-linearly seperable classification dataset-------------- from sklearn.datasets.samples_generator import make_gaussian_quantiles X, y = make_gaussian_quantiles(n_samples=1000, n_features=2, n_classes=3, mean=[10, 5], cov=2) plt.scatter(X[:, 0], X[:, 1], marker='o', c=y) plt.show() # XOR problem dataset np.random.seed(0) n = 100 x1 = np.random.rand(n, 2) * (-1) x2 = np.random.rand(n, 2) x2[:, 1] *= (-1) x3 = np.random.rand(n, 2) x3[:, 0] *= (-1) x4 = np.random.rand(n, 2) x = np.concatenate((x1, x2, x3, x4))
plt.contourf(xx1, xx2, Z, alpha=0.3, cmap=cmap) plt.xlim(xx1.min(), xx1.max()) plt.ylim(xx2.min(), xx2.max()) for index, cl in enumerate(np.unique(y)): plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1], alpha=0.8, c=colors[index], marker=markers[index], label=cl, edgecolor='black') (X, y) = make_blobs(n_samples=1000, n_features=2, centers=2, cluster_std=1.05) (X, y) = make_gaussian_quantiles(n_samples=1000, n_features=2, n_classes=3) (X, y) = make_moons(n_samples=1000) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y) sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) svm = SVC(kernel='rbf', C=1.0, gamma=0.5) svm.fit(X_train_std, y_train) plot_decision_regions(X_test_std, y_test, svm)