def main3():
    X1, y1 = make_gaussian_quantiles(cov=2.,
                                     n_samples=200, n_features=2,
                                     n_classes=2, random_state=1)
    X2, y2 = make_gaussian_quantiles(mean=(3, 3), cov=1.5,
                                     n_samples=300, n_features=2,
                                     n_classes=2, random_state=1)
    X = np.concatenate((X1, X2))
    y = np.concatenate((y1, - y2 + 1))

    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=43)

    graph(x_train[:, 0], x_train[:, 1], y_train, 'gaussian_train')
    boosting = REBEL(max_iteration=301)
    boosting.fit(x_train, y_train, x_test, y_test)
    y_predict_test = boosting.get_prediction()
    print(y_predict_test)
    graph(x_test[:, 0], x_test[:, 1], y_predict_test, 'gaussian_test')

    resultats = np.array(boosting.get_resultats())
    plt.plot(resultats[:, [0]], resultats[:, [1]], label='Loss')
    plt.plot(resultats[:, [0]], resultats[:, [2]], label='erreur_train')
    plt.plot(resultats[:, [0]], resultats[:, [3]], label='erreur_test')
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.show()
Exemplo n.º 2
0
def main():
    #baseline_clfs = [SVC(), GaussianNB(), DecisionTreeClassifier(), MLPClassifier(hidden_layer_sizes=(10,10,10,10,10,10), solver='lbfgs', alpha=2, random_state=1, activation='relu')]
    datasets = []
    experiments = []
    query_strat = 'RandomSampling'

    # datasets.append((make_gaussian_quantiles(n_samples=500, n_features=10, n_classes=2),
    #                  make_gaussian_quantiles(n_samples=500, n_features=10, n_classes=2)))
    # experiments.append('hastie_10_2_vs_gauss_quant_10_2')
    # datasets.append((make_moons(n_samples=1000), make_moons(n_samples=1000)))

    # experiments.append('moons')
    # datasets.append((u.hastie(1000), u.hastie(1000)))

    datasets.append((make_gaussian_quantiles(n_samples=500,
                                             n_features=5,
                                             n_classes=3),
                     make_gaussian_quantiles(n_samples=500,
                                             n_features=5,
                                             n_classes=3)))
    experiments.append('gauus')

    #datasets.append((mnist.load_mnist(), mnist.load_mnist_rotated()))
    #experiments.append('MNIST_vs_MNIST_Rotated')

    #baseline_active(classifiers=clfs, datasets=datasets, experiments=experiments, query_strat=query_strat)
    bsda_active(datasets=datasets)
def create_data(show_scatter=False):
    # 生成协方差cov=2,维度n_features=2,类别n_classes=2,特征均值mean=(0,0)
    X1, y1 = make_gaussian_quantiles(mean=(0, 0),
                                     cov=2.,
                                     n_samples=200,
                                     n_features=2,
                                     n_classes=2,
                                     random_state=1)
    # 生成协方差cov=2,维度n_features=2,类别n_classes=2,特征均值mean=(3, 3)
    X2, y2 = make_gaussian_quantiles(mean=(3, 3),
                                     cov=1.5,
                                     n_samples=300,
                                     n_features=2,
                                     n_classes=2,
                                     random_state=1)
    # 合并X1,X2
    X = np.concatenate((X1, X2))
    y = np.concatenate((y1, -y2 + 1))
    # 图示样本的分布
    if show_scatter == True:
        fig, axs = plt.subplots(1, 2)
        axs[0].scatter(X1[:, 0], X1[:, 1], c=y1)
        axs[0].set_title('类别0的分布', fontproperties=myfont)
        axs[1].scatter(X2[:, 0], X2[:, 1], c=y2)
        axs[1].set_title('类别1的分布', fontproperties=myfont)
        fig.show()
    return X, y
Exemplo n.º 4
0
def sklearn_test():
    """ AdaBoost test takes from sklearn
   https://scikit-learn.org/stable/auto_examples/ensemble/plot_adaboost_twoclass.html#sphx-glr-auto-examples-ensemble-plot-adaboost-twoclass-py"""

    # Construct dataset
    x1_samples = 200
    x2_samples = 300
    X1, y1 = make_gaussian_quantiles(cov=2.,
                                     n_samples=x1_samples,
                                     n_features=2,
                                     n_classes=2,
                                     random_state=1)
    X2, y2 = make_gaussian_quantiles(mean=(3, 3),
                                     cov=1.5,
                                     n_samples=x2_samples,
                                     n_features=2,
                                     n_classes=2,
                                     random_state=1)
    X = np.concatenate((X1, X2))
    y = np.concatenate((y1, -y2 + 1))
    y = 2 * y - 1

    # Create and fit an AdaBoosted decision tree
    my_learner = lambda: DecisionTreeClassifier(max_depth=1)
    bdt = AdaBoostClassifier(my_learner, n_estimators=200)

    scores, exp_losses = bdt.fit(X, y)
    print('Final Accuracy', scores[-1])
    fig, ax = plt.subplots(1, 2, figsize=(12, 10))
    ax[0].plot(exp_losses, 'b--', label='exp loss')
    ax[0].plot(1.0 - np.array(scores), 'm--', label='0-1 Loss')
    ax[0].legend(fontsize=15)
    ax[0].set_title('Loss Per Iteration for AdaBoost', fontsize=20)

    #plot_colors = "br"
    plot_step = 0.02
    #class_names = "AB"

    # Plot the decision boundaries
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step),
                         np.arange(y_min, y_max, plot_step))

    Z = bdt.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    cs = ax[1].contourf(xx, yy, Z, cmap=plt.cm.Paired)

    ax[1].scatter(X[:, 0],
                  X[:, 1],
                  c=y,
                  s=20,
                  cmap=plt.cm.Paired,
                  edgecolor='k')
    ax[1].set_title('AdaBoost Decision Boundary', fontsize=20)
    plt.show()
Exemplo n.º 5
0
def generate_evil_2d_set():
    X1, y1 = datasets.make_gaussian_quantiles(cov=2.,
                                              n_samples=200, n_features=2,
                                              n_classes=2, random_state=1)
    X2, y2 = datasets.make_gaussian_quantiles(mean=(3, 3), cov=1.5,
                                              n_samples=300, n_features=2,
                                              n_classes=2, random_state=1)
    X = np.concatenate((X1, X2))
    y = np.concatenate((y1, - y2 + 1))
    return X, y
Exemplo n.º 6
0
def generate_evil_2d_set():
    X1, y1 = datasets.make_gaussian_quantiles(cov=2.,
                                              n_samples=200, n_features=2,
                                              n_classes=2, random_state=1)
    X2, y2 = datasets.make_gaussian_quantiles(mean=(3, 3), cov=1.5,
                                              n_samples=300, n_features=2,
                                              n_classes=2, random_state=1)
    X = np.concatenate((X1, X2))
    y = np.concatenate((y1, - y2 + 1))
    return X, y
Exemplo n.º 7
0
 def creat_data(self):
     # 生成2维正态分布,生成的数据按分位数分为两类,500个样本,2个样本特征
     x1, y1 = make_gaussian_quantiles(n_samples=500,
                                      n_features=2,
                                      n_classes=2)
     # 生成2维正态分布,生成的数据按分位数分为两类,400个样本,2个样本特征均值都为3
     x2, y2 = make_gaussian_quantiles(mean=(3, 3),
                                      n_samples=500,
                                      n_features=2,
                                      n_classes=2)
     # 将两组数据合成一组数据
     self.x_data = np.concatenate((x1, x2))
     self.y_data = np.concatenate((y1, -y2 + 1))
Exemplo n.º 8
0
def create_data():
    # 创建符合高斯分布的数据集
    X1, y1 = make_gaussian_quantiles(cov=2.,
                                     n_samples=200, n_features=2,
                                     n_classes=2, random_state=1)
    X2, y2 = make_gaussian_quantiles(mean=(3, 3), cov=1.5,
                                     n_samples=300, n_features=2,
                                     n_classes=2, random_state=1)

    X = np.concatenate((X1, X2))
    y = np.concatenate((y1, -y2 + 1))

    return X, y
Exemplo n.º 9
0
def gauss_easy(n_samples_bsm, n_samples_sm):
    #Dataset creation
    X1, y1 = make_gaussian_quantiles(mean=(2, 0),
                                     cov=1.5,
                                     n_samples=n_samples_bsm,
                                     n_features=2,
                                     n_classes=1,
                                     random_state=1)
    X2, y2 = make_gaussian_quantiles(mean=(-2, 0),
                                     cov=1.5,
                                     n_samples=n_samples_sm,
                                     n_features=2,
                                     n_classes=1,
                                     random_state=1)
    X12 = np.concatenate((X1, X2))
    X = np.concatenate((X12, X12))
    n_samle_size = len(X12)

    # Generating labels
    y1 = np.zeros(n_samle_size)
    y2 = np.ones(n_samle_size)
    y = np.concatenate((y1, y2))

    #Generating weights
    n_alt_hyp_size = len(X1)  #BSM
    n_hyp_size = len(X2)  #SM
    EPSILON = 0.01

    #in case of y=0, SM
    w_alt_hyp = np.zeros(n_alt_hyp_size)
    w_alt_hyp += (EPSILON / n_alt_hyp_size)
    w_hyp = np.ones(n_hyp_size)
    w_hyp /= n_hyp_size
    w0 = np.concatenate((w_alt_hyp, w_hyp))

    #in case of y=1, BSM
    w_alt_hyp = np.ones(n_alt_hyp_size)
    w_alt_hyp /= n_alt_hyp_size
    w_hyp = np.zeros(n_hyp_size)
    w_hyp += (EPSILON / n_hyp_size)
    w1 = np.concatenate((w_alt_hyp, w_hyp))

    #final weights
    w = np.concatenate((w0, w1))

    #calculate the minimal weights, to avoid division by zero
    w_min = min((2.0 * EPSILON) / n_hyp_size, (2.0 * EPSILON) / n_alt_hyp_size)

    return X, y, w, w_min
Exemplo n.º 10
0
def gauss_quantiles_dataset(samples_amount: int,
                            features_amount: int,
                            classes_amount: int,
                            full_shuffle=True,
                            **kwargs):
    """
    Generates a random dataset for n-class classification problem
    based on multi-dimensional gaussian distribution quantiles
    using scikit-learn API.

    :param samples_amount: Total amount of samples in the resulted dataset.
    :param features_amount: Total amount of features per sample.
    :param classes_amount: The amount of classes in the dataset.
    :param full_shuffle: if true then all features and samples will be shuffled.
    :param kwargs: Optional params: \
        - 'gauss_params': mean and covariance values of the distribution.
    :return: features and target as numpy-arrays.
    """
    if 'gauss_params' in kwargs:
        mean, cov = kwargs['gauss_params']
    else:
        mean, cov = None, 1.

    features, target = datasets.make_gaussian_quantiles(
        n_samples=samples_amount,
        n_features=features_amount,
        n_classes=classes_amount,
        shuffle=full_shuffle,
        mean=mean,
        cov=cov)
    return features, target
Exemplo n.º 11
0
def iterate_data():
    """Yields numpy iterator
    """
    # Yield for each epoch
    for epoch in range(NUM_EPOCHS):
        examples = [
            get_biased_data(
                possible_actions_and_rewards=datasets.make_gaussian_quantiles(
                    n_samples=numpy.random.choice(a=range(
                        MIN_NUM_ACTIONS, MAX_NUM_ACTIONS), ),
                    n_features=NUM_FEATURES,
                    n_classes=2,
                ),
                epoch=epoch,
            ) for _ in range(NUM_EXAMPLES)
        ]
        yield {
            # Shape = (num_examples, num_actions_possible, num_features)
            'possible_actions':
            [example['possible_actions'] for example in examples],
            # Shape = (num_examples, num_features)
            'chosen_actions':
            [example['chosen_action'] for example in examples],
            # Shape = (num_examples, 1)
            'rewards': [example['reward'] for example in examples],
        }
Exemplo n.º 12
0
def main():

	## create data...
	plt.figure(figsize=(8, 8))	
	
	print("'kernel', 'round', 'score', 'vetores'")
	for kernel in ['linear', 'rbf']:
		for i in range(10):
	
			#X, y = make_blobs(n_samples=300, centers=2)
			
			X, y = make_gaussian_quantiles(n_samples =300, n_features=2, n_classes =2)

			X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)
		
			# cria um SVM
			clf =  svm.SVC(kernel=kernel)

			# treina o classificador na base de treinamento
			#print "Training Classifier..."
			clf.fit(X_train, y_train)
	
			print("[\'{}\',{},{},{}], ".format(
					kernel, i, 
					clf.score(X_test, y_test), 
					clf.n_support_[0]+clf.n_support_[1]))
Exemplo n.º 13
0
def gen_data():
    # N clusters:
    # data, targets = datasets.make_classification(
    #     n_samples=n, n_features=2, n_informative=2, n_redundant=0, n_classes=num_classes, class_sep=3.0, n_clusters_per_class=1)

    data, targets = datasets.make_gaussian_quantiles(mean=(0, 0),
                                                     cov=1,
                                                     n_samples=n,
                                                     n_classes=num_classes)

    # Circles:
    # data, targets = datasets.make_circles(
    #     n_samples=n, shuffle=True, noise=0.1, random_state=None, factor=0.1)

    # Moons:
    # data, targets = datasets.make_moons(n_samples=n, shuffle=True, noise=0.05)

    # print data
    # print targets

    targets = [to_one_hot_vect(target, num_classes) for target in targets]

    train = zip(
        np.array(data[:n * 9 / 10]).astype(np.float),
        np.array(targets[:n * 9 / 10]).astype(np.float))
    test = zip(
        np.array(data[n / 10:]).astype(np.float),
        np.array(targets[n / 10:]).astype(np.float))

    return train, test
Exemplo n.º 14
0
def make_dataset(dataset, n_rows, n_cols, n_classes=2):
    np.random.seed(137)
    if n_rows*0.25 < 4000:
        # Use at least 4000 test samples
        n_test = 4000
        if n_rows > 1000:
            # To avoid a large increase in test time (which is between
            # O(n_rows^2) and O(n_rows^3)).
            n_rows = int(n_rows * 0.75)
        n_rows += n_test
    else:
        n_test = n_rows * 0.25
    if dataset == 'classification1':
        X, y = make_classification(
            n_rows, n_cols, n_informative=2, n_redundant=0,
            n_classes=n_classes, n_clusters_per_class=1)
    elif dataset == 'classification2':
        X, y = make_classification(
            n_rows, n_cols, n_informative=2, n_redundant=0,
            n_classes=n_classes, n_clusters_per_class=2)
    elif dataset == 'gaussian':
        X, y = make_gaussian_quantiles(n_samples=n_rows, n_features=n_cols,
                                       n_classes=n_classes)
    elif dataset == 'blobs':
        X, y = make_blobs(n_samples=n_rows, n_features=n_cols,
                          centers=n_classes)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=n_test)
    # correct case when not all classes made it into the training set
    if np.unique(y_train).size < n_classes:
        for i in range(n_classes):
            y_train[i] = i
    return X_train, X_test, y_train, y_test
Exemplo n.º 15
0
def make_multiview_gaussian_quantiles(n_classes=3,
                                      n_views=3,
                                      n_features=3,
                                      n_samples='auto',
                                      rotate=True,
                                      shuffle=True,
                                      seed=None):
    np.random.seed(seed)
    n_samples = n_classes * 20 if n_samples == 'auto' else n_samples
    X_0, y = make_gaussian_quantiles(n_features=n_features,
                                     n_samples=n_samples,
                                     n_classes=n_classes,
                                     random_state=seed)
    std = np.std(X_0)
    Xs = [X_0]
    for i in range(n_views - 1):
        X_i = X_0 + np.tile(
            np.random.normal(loc=0.0, scale=10 * std, size=n_features),
            (len(X_0), 1))
        X_i += np.random.normal(loc=0.0, scale=std / 100, size=X_i.shape)
        if rotate:
            X_i = X_i @ ortho_group.rvs(n_features)
        Xs.append(X_i)
    if shuffle:
        indexes = np.random.permutation(np.arange(len(y)))
        y = y[indexes]
        for _ in range(n_views):
            Xs[_] = Xs[_][indexes, :]
    if n_views > 1:
        return [torch.tensor(X).float() for X in Xs], torch.from_numpy(y)
    return torch.tensor(Xs).squeeze(0).float(), torch.from_numpy(y)
Exemplo n.º 16
0
def backward_test():
    "Test for backward flow"
    sampled_z = datasets.make_gaussian_quantiles(n_samples=1000)[0].astype(
        np.float32)
    backward_test_loader = torch.utils.data.DataLoader(sampled_z,
                                                       batch_size=batch_size,
                                                       shuffle=True,
                                                       **kwargs)

    plt.subplot(2, 2, 4)
    plt.scatter(sampled_z[:, 0], sampled_z[:, 1], c='b', s=10)
    plt.title("INPUT: z ~ p(z)")

    model.eval()
    z_all = np.array([[]]).reshape(0, 2)

    with torch.no_grad():
        for i, data in enumerate(backward_test_loader):
            z = model.backward(data)
            z_all = np.concatenate((z_all, z.numpy()))

    plt.subplot(2, 2, 3)
    plt.scatter(z_all[:, 0], z_all[:, 1], c='b', s=10)
    plt.title("OUTPUT: x = f^(-1)(z)")
    # plt.show()
    plt.savefig("result.png")
Exemplo n.º 17
0
def gaussian_data_generator(dim=2, cls=5, objs_size=None, cov=None):
    """
  init necessary parameters
  """
    if cov is None:
        cov = [random.randrange(100, 500, 100) for _ in range(cls)]

    if objs_size is None:
        # random each cluster size; min=100, max=1000
        objs_size = [random.randrange(100, 500, 50) for _ in range(cls)]
        # print("random object size = ", objs_size)

    means = [[random.randrange(100, 200, 20) for __ in range(dim)]
             for _ in range(cls)]
    # print("object's mean = ", means)

    point = []
    label = []
    for i in range(cls):
        tmp_point, tmp_label = make_gaussian_quantiles(mean=means[i],
                                                       cov=cov[i],
                                                       n_features=dim,
                                                       n_classes=1,
                                                       n_samples=objs_size[i])

        list(map(lambda x: point.append(x), tmp_point))
        list(map(lambda x: label.append(x + i), tmp_label))

    # [temp] redundant translate to np.array
    return standardize_data(np.array(point)), np.array(label)
Exemplo n.º 18
0
def dataset(n, random_seed, classes):
    if random_seed:
        np.random.seed(random_seed)
        X, y = make_gaussian_quantiles(n_samples=n,
                                       n_features=2,
                                       n_classes=classes)
        return X, y * 2 - 1
Exemplo n.º 19
0
def main():
    # Example 1
    def load_simple_data():
        features = ([[1.0, 2.1], [2.0, 1.1], [1.3, 1.0], [1.0, 1.0],
                     [2.0, 1.0]])
        labels = [1.0, 1.0, -1.0, -1.0, 1.0]
        return np.array(features), np.array(labels)

    X, y = load_simple_data()
    model = AdaBoostClassifier(n_estimators=5)
    model.fit(X, y)

    y_pred = model.predict(X)
    print(y_pred)
    accuracy = calculate_accuracy_score(y, y_pred)
    print("Accuracy Score: {:.2%}".format(accuracy))

    # Example 2
    X, y = make_gaussian_quantiles(n_samples=1300, n_features=10, n_classes=2)

    n_split = 300
    X_train, X_test = X[:n_split], X[n_split:]
    y_train, y_test = y[:n_split], y[n_split:]

    model = AdaBoostClassifier(n_estimators=100)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = calculate_accuracy_score(y_test, y_pred)
    print("Accuracy Score: {:.2%}".format(accuracy))
Exemplo n.º 20
0
def gaussian_data_generator(dim=2, cls=5, objs_size=None, cov=None):

  """
  init necessary parameters
  """
  if cov is None:
    cov = [random.randrange(100, 500, 100) for _ in range(cls)]

  if objs_size is None:
    # random each cluster size; min=100, max=1000
    objs_size = [random.randrange(100, 500, 50) for _ in range(cls)]
    # print("random object size = ", objs_size)

  means = [[random.randrange(100, 200, 20) for __ in range(dim)] for _ in range(cls)] 
  # print("object's mean = ", means)

  point = []
  label = []
  for i in range(cls):
    tmp_point, tmp_label = make_gaussian_quantiles(mean = means[i],
                                                   cov = cov[i],
                                                   n_features = dim, 
                                                   n_classes = 1, 
                                                   n_samples = objs_size[i])

    list(map(lambda x: point.append(x), tmp_point))
    list(map(lambda x: label.append(x + i), tmp_label))

  # [temp] redundant translate to np.array
  return standardize_data(np.array(point)), np.array(label)
Exemplo n.º 21
0
def gaussian_dataset(n_classes=3,
                     n_views=3,
                     n_features=3,
                     n_samples='auto',
                     rotate=True,
                     shuffle=True,
                     seed=154):
    np.random.seed(seed)
    n_samples = n_classes * 20 if n_samples == 'auto' else n_samples
    X_ori, y = make_gaussian_quantiles(cov=4.5,
                                       n_features=3,
                                       n_samples=n_samples,
                                       n_classes=n_classes,
                                       random_state=156)
    Xs = [X_ori]
    for i in range(n_views - 1):
        X_new_view = X_ori + np.random.randn(n_features) * np.random.randint(
            7, 30)
        X_new_view = np.array([
            x + np.random.rand(len(x.shape)) * np.random.randint(1, 3)
            for x in X_new_view
        ])
        if rotate:
            X_new_view = X_new_view @ rvs(n_features, seed)
        Xs.append(X_new_view)
    if shuffle:
        indexes = np.random.permutation(np.arange(len(y)))
        y = y[indexes]
        for _ in range(n_views):
            Xs[_] = Xs[_][indexes, :]
    return [torch.tensor(X).float() for X in Xs], y
Exemplo n.º 22
0
def load_non_linearly_separable_data():
    """
    Generates non-linearly separable data and returns the samples and class labels
    :return:
    """
    x, y = make_gaussian_quantiles(n_features=2, n_classes=2, random_state=1)
    assert np.bitwise_or(y == 0, y == 1).all()
    return x, y
Exemplo n.º 23
0
def generate_data():
    '''
    generate data
    :return: X: input data, y: given labels
    '''
    np.random.seed(0)
    X, y = datasets.make_gaussian_quantiles(n_features=2, n_classes=3)
    return X, y
Exemplo n.º 24
0
def main():
    # 生成2维正态分布,生成的数据按分位数分为两类,500个样本,2个样本特征,协方差系数为2
    X1, y1 = make_gaussian_quantiles(cov=2.0,
                                     n_samples=500,
                                     n_features=2,
                                     n_classes=2,
                                     random_state=1)

    # 生成2维正态分布,生成的数据按分位数分为两类,400个样本,2个样本特征均值都为3,协方差系数为2
    X2, y2 = make_gaussian_quantiles(mean=(3, 3),
                                     cov=1.5,
                                     n_samples=400,
                                     n_features=2,
                                     n_classes=2,
                                     random_state=1)

    #两组数据合成一组数据
    X = np.concatenate((X1, X2))
    y = np.concatenate((y1, -y2 + 1))
    # 显示
    plt.scatter(X[:, 0], X[:, 1], marker='o', c=y)
    plt.show()

    # 用基于决策树的Adaboost来做分类拟合
    dt = DecisionTreeClassifier(max_depth=2,
                                min_samples_split=20,
                                min_samples_leaf=5)
    adb = AdaBoostClassifier(base_estimator=dt,
                             n_estimators=300,
                             learning_rate=0.8,
                             algorithm="SAMME")

    adb.fit(X, y)

    print("Score:", adb.score(X, y))

    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
                         np.arange(y_min, y_max, 0.02))
    Z = adb.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    cs = plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)
    plt.scatter(X[:, 0], X[:, 1], marker='o', c=y)
    plt.show()
def make_toy_dataset(n: int = 100, random_seed: int = None):
    """ Generate a toy dataset for evaluating AdaBoost classifiers """

    if random_seed:
        np.random.seed(random_seed)

    x, y = make_gaussian_quantiles(n_samples=n, n_features=2, n_classes=2)

    return x, y * 2 - 1
Exemplo n.º 26
0
def Makedata():
    x1, y1 = make_gaussian_quantiles(cov=2.,
                                     n_samples=200,
                                     n_features=2,
                                     n_classes=2,
                                     shuffle=True,
                                     random_state=1)
    x2, y2 = make_gaussian_quantiles(mean=(3, 3),
                                     cov=1.5,
                                     n_samples=300,
                                     n_features=2,
                                     n_classes=2,
                                     shuffle=True,
                                     random_state=1)

    X = np.vstack((x1, x2))
    Y = np.hstack((y1, 1 - y2))
    return X, Y
Exemplo n.º 27
0
 def get_nolinear_separable_dataset(self, random_state = 10, ext = True):
     from sklearn.datasets import make_gaussian_quantiles
     X, Y  = make_gaussian_quantiles(n_samples=50, n_features= 2, n_classes = 2, random_state= random_state)
     if ext:
         unos = np.array([np.ones(X.shape[0])])
         X = np.concatenate((unos.T, X), axis=1)
         X = X.reshape(X.shape[0], X.shape[1])
         Y = Y.reshape(np.size(Y), 1)
     return (X,Y)
Exemplo n.º 28
0
def data_creation():
    
    # Construct dataset
    # Gaussian 1
    X1, y1 = make_gaussian_quantiles(cov=3.,n_samples=100, n_features=2, n_classes=1)
    X1 = pd.DataFrame(X1,columns=['x','y'])
    y1 = pd.Series(y1)

    # Gaussian 2
    X2, y2 = make_gaussian_quantiles(mean=(4, 4), cov=1, n_samples=100, n_features=2, n_classes=1)
    X2 = pd.DataFrame(X2,columns=['x','y'])
    y2 = pd.Series(y2)
    
    
    X3, y3 = make_gaussian_quantiles(mean=(-6,-1),cov=3.,n_samples=100, n_features=2, n_classes=1)
    X3 = pd.DataFrame(X3,columns=['x','y'])
    y3 = pd.Series(y3)
    
    
    X4, y4 = make_gaussian_quantiles(mean = (3, -2), cov=3.,n_samples=100, n_features=2, n_classes=1)
    X4 = pd.DataFrame(X4,columns=['x','y'])
    y4 = pd.Series(y4)
    # Combine the gaussians
    X1.shape
    X2.shape
    X3.shape
    X4.shape
    
    X = pd.DataFrame(np.concatenate((X1, X2, X3, X4)))
    y = pd.Series(np.concatenate((y1, - y2 + 1, y3, y4)))
    X.shape
    
    
    plt.figure()
    plt.plot(X[0][0:100],X[1][0:100], 'ro')
    plt.plot(X[0][100:200],X[1][100:200], 'yo')
    plt.plot(X[0][200:300],X[1][200:300], 'go')
    plt.plot(X[0][300:400],X[1][300:400], 'o')



    plt.show()
    
    return X
Exemplo n.º 29
0
def generate_nonlin_data(num_features, num_samples):
    plt.figure()
    csfont = {'fontname':'Times New Roman'}
    
    X1, Y1 = make_gaussian_quantiles(mean = (1, 1), cov = 5, n_samples=num_samples, n_features=num_features, n_classes=2)
    
    plt.scatter(X1[:, 0], X1[:, 1], marker='.', c=Y1, cmap=plt.cm.Paired)
    plt.savefig('non_lin_data', bbox_inches='tight', pad_inches=0.1)
    plt.show()
    return X1, Y1
def load_extra_datasets():
    N = 200
    gaussian_quantiles = datasets.make_gaussian_quantiles(mean=None,
                                                          cov=0.7,
                                                          n_samples=N,
                                                          n_features=2,
                                                          n_classes=2,
                                                          shuffle=True,
                                                          random_state=None)
    return gaussian_quantiles
Exemplo n.º 31
0
def generate_data():
    N = 700
    gq = skd.make_gaussian_quantiles(mean=None,
                                     cov=0.7,
                                     n_samples=N,
                                     n_features=3,
                                     n_classes=2,
                                     shuffle=True,
                                     random_state=None)
    return gq
Exemplo n.º 32
0
    def _generate(self, random_state):
        rows = api.payload['rows']
        features = api.payload['features']
        classes = api.payload['classes']

        return make_gaussian_quantiles(random_state=random_state,
                                       n_samples=rows,
                                       n_features=features,
                                       n_classes=classes,
                                       cov=1.0)
Exemplo n.º 33
0
    def make_gaussian_data(self):

        X1, y1 = make_gaussian_quantiles(cov=0.75,
                                         n_samples=self.N/2,
                                         n_features=2,
                                         n_classes=2)
        X2, y2 = make_gaussian_quantiles(mean=(3, 3),
                                         cov=0.75,
                                         n_samples=self.N/2,
                                         n_features=2,
                                         n_classes=2)
        X = np.concatenate((X1, X2))
        y = np.concatenate((y1, - y2 + 1))

        yinds = np.random.choice(range(len(y)), size=int(round(len(y)/6)), replace=False)
        yshuff = np.random.choice(y[yinds], size=len(yinds), replace=False)
        y[yinds] = yshuff



        X = X*self.scaling
        return X, y
Exemplo n.º 34
0
def test_binary_classification_with_classification_pipeline():
    # generate the dataset
    n_samples = 100
    n_features = 20
    x, y = datasets.make_gaussian_quantiles(mean=None, cov=1.0, n_samples=n_samples, n_features=n_features, n_classes=2,
                                            shuffle=True, random_state=1)

    # -- test with darwin
    classifier_name = 'RBFSVC' #'linsvm'
    cvmethod = '10'
    #n_feats = x.shape[1]

    pipe = ClassificationPipeline(clfmethod=classifier_name, cvmethod=cvmethod)
    results, metrics = pipe.cross_validation(x, y)
    assert(results is not None)
Exemplo n.º 35
0
from sklearn.datasets import make_blobs, make_classification,make_gaussian_quantiles
import matplotlib.pyplot as plt
import numpy as np
import sklearn.preprocessing as p

def saveFile(name,dataset):
    np.save(name,dataset)
    


#X1,Y1=make_blobs(n_samples=100, n_features=2, centers=2)
X1,Y1=make_gaussian_quantiles(n_samples=500,n_features=2, n_classes=2)
plt.figure()
plt.subplot(1,2,1)
plt.scatter(X1[:, 0], X1[:, 1], marker='o', c=Y1)
print 'X.shape is ',X1.shape,' y is ', Y1

plt.subplot(1,2,2)

scalar=p.StandardScaler().fit(X1)
X1=scalar.fit_transform(X1)
plt.scatter(X1[:, 0], X1[:, 1], marker='o', c=Y1)
plt.show()

sp=int(0.8*X1.shape[0])
X1train=X1[0:sp]
Y1train=Y1[0:sp]

X1test=X1[sp:]
Y1test=Y1[sp:]
saveFile("tempXtrain2",X1train)
import numpy as np
from sklearn.cross_validation import train_test_split, cross_val_score
from sklearn.datasets import load_boston, load_breast_cancer, load_iris, make_moons, make_gaussian_quantiles
from sklearn.metrics import mean_squared_error

from mlxtend.evaluate import plot_decision_regions
import matplotlib.pyplot as plt

from pines.estimators import DecisionTreeRegressor, DecisionTreeClassifier
from pines.tree_builders import TreeType

if __name__ == '__main__':
    model = DecisionTreeClassifier(max_n_splits=3, max_depth=10, tree_type=TreeType.OBLIVIOUS)
    X, y = make_gaussian_quantiles(n_samples=10000, n_classes=4)

    model.fit(X, y)
    print(model.tree_)
    plot_decision_regions(X, y, clf=model, res=0.02, legend=2)
    plt.savefig('decision_boundary.png')
Exemplo n.º 37
0
        global imgx, imgy
        temppath = tempimage()
        plt.savefig(temppath, dpi=dpi)
        dx,dy = imagesize(temppath)
        w = min(W,dx)
        image(temppath,imgx,imgy,width=w)
        imgy = imgy + dy + 20
        os.remove(temppath)
        size(W, HEIGHT+dy+40)
else:
    def pltshow(mplpyplot):
        mplpyplot.show()
# nodebox section end


X, y = make_gaussian_quantiles(n_samples=13000, n_features=10,
                               n_classes=3, random_state=1)

n_split = 3000

X_train, X_test = X[:n_split], X[n_split:]
y_train, y_test = y[:n_split], y[n_split:]

bdt_real = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=2),
    n_estimators=600,
    learning_rate=1)

bdt_discrete = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=2),
    n_estimators=600,
    learning_rate=1.5,
__author__ = 'darya'




import numpy as np
from sklearn import svm, datasets

from darwin.pipeline import ClassificationPipeline


# generate the dataset
n_samples=100
n_features=20
x, y = datasets.make_gaussian_quantiles(mean=None, cov=1.0, n_samples=n_samples, n_features=n_features, n_classes=2, shuffle=True, random_state=1)

# another way to generate the data
# x, y = datasets.make_hastie_10_2(n_samples=10, random_state=1)

# -- test with darwin
classifier_name='linsvm'
cvmethod='10'
n_feats = x.shape[1]


pipe = ClassificationPipeline(n_feats=n_feats, clfmethod=classifier_name, cvmethod=cvmethod)

results, metrics = pipe.cross_validation(x, y)

Exemplo n.º 39
0
## sklearn: make regression
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets.samples_generator import make_regression
# X为样本特征,y为样本输出, coef为回归系数,共1000个样本,每个样本1个特征
X, y, coef =make_regression(n_samples=1000, n_features=1,noise=10, coef=True)
# 画图
plt.scatter(X, y,  color='black')plt.plot(X, X*coef, color='blue',linewidth=3)plt.xticks(())plt.yticks(())plt.show()

## sklearn: make classification
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets.samples_generator import make_classification
# X1为样本特征,Y1为样本类别输出, 共400个样本,每个样本2个特征,输出有3个类别,没有冗余特征,每个类别一个簇
X1, Y1 = make_classification(n_samples=400, n_features=2, n_redundant=0, n_clusters_per_class=1, n_classes=3)plt.scatter(X1[:, 0], X1[:, 1], marker='o', c=Y1)plt.show()

## sklearn: make blobs
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets.samples_generator import make_blobs
# X为样本特征,Y为样本簇类别, 共1000个样本,每个样本2个特征,共3个簇,簇中心在[-1,-1], [1,1], [2,2], 簇方差分别为[0.4, 0.5, 0.2]
X, y = make_blobs(n_samples=1000, n_features=2, centers=[[-1,-1], [1,1], [2,2]], cluster_std=[0.4, 0.5, 0.2])plt.scatter(X[:, 0], X[:, 1], marker='o', c=y)plt.show()

## sklearn:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_gaussian_quantiles
#生成2维正态分布,生成的数据按分位数分成3组,1000个样本,2个样本特征均值为1和2,协方差系数为2
X1, Y1 = make_gaussian_quantiles(n_samples=1000, n_features=2, n_classes=3, mean=[1,2],cov=2)plt.scatter(X1[:, 0], X1[:, 1], marker='o', c=Y1)
Exemplo n.º 40
0
import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import make_gaussian_quantiles

if __name__ == '__main__':
    fig = plt.figure(figsize=(9,7))
    ax = fig.add_subplot(111)

    X, y = make_gaussian_quantiles(n_features=2, n_classes=1)
    ax.scatter(X[:, 0], X[:, 1], marker='o', c='k', alpha=0.6)

    ax.set_xlabel('$x_1$')
    ax.set_ylabel('$x_0$')
    ax.set_xticks([])
    ax.set_yticks([])

    plt.title("Cluster Analysis")
    plt.savefig('/Users/benjamin/Desktop/cluster.png')

    kx = np.random.uniform(-3, 3, 5)
    ky = np.random.uniform(-3, 3, 5)
    plt.scatter(kx,ky, c='rbgyc', s=50)
    plt.savefig('/Users/benjamin/Desktop/kpoints.png')
    plt.show()
            s=25, edgecolor='k')

plt.subplot(323)
plt.title("Two informative features, two clusters per class",
          fontsize='small')
X2, Y2 = make_classification(n_features=2, n_redundant=0, n_informative=2)
plt.scatter(X2[:, 0], X2[:, 1], marker='o', c=Y2,
            s=25, edgecolor='k')

plt.subplot(324)
plt.title("Multi-class, two informative features, one cluster",
          fontsize='small')
X1, Y1 = make_classification(n_features=2, n_redundant=0, n_informative=2,
                             n_clusters_per_class=1, n_classes=3)
plt.scatter(X1[:, 0], X1[:, 1], marker='o', c=Y1,
            s=25, edgecolor='k')

plt.subplot(325)
plt.title("Three blobs", fontsize='small')
X1, Y1 = make_blobs(n_features=2, centers=3)
plt.scatter(X1[:, 0], X1[:, 1], marker='o', c=Y1,
            s=25, edgecolor='k')

plt.subplot(326)
plt.title("Gaussian divided into three quantiles", fontsize='small')
X1, Y1 = make_gaussian_quantiles(n_features=2, n_classes=3)
plt.scatter(X1[:, 0], X1[:, 1], marker='o', c=Y1,
            s=25, edgecolor='k')

plt.show()
Exemplo n.º 42
0
from sklearn.datasets import make_gaussian_quantiles
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.ensemble import GradientBoostingClassifier as GBC
from sklearn import cross_validation as CV
import matplotlib.pyplot as plt


X,Y = make_gaussian_quantiles(n_features=2, n_samples=2000, n_classes=2)

plt.scatter(X[:, 0], X[:, 1], marker='o', c=Y)
plt.show()

skf = CV.StratifiedKFold(Y, n_folds=2)

#X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
#y = np.array([0, 0, 1, 1])

for train_index, test_index in skf:
    print("TRAIN:", len(train_index), "TEST:", len(test_index))
    X_train, X_test = X[train_index], X[test_index]
    Y_train, Y_test = Y[train_index], Y[test_index]
    
plt.scatter(X_train[:, 0], X_train[:, 1], marker='o', c=Y_train)
plt.show()
plt.scatter(X_test[:, 0], X_test[:, 1], marker='o', c=Y_test)
plt.show()


#clf = GBC(n_estimators=25, learning_rate=0.18,min_samples_leaf=6, max_features=0.8,subsample=0.9,verbose=2,max_depth=10)  
trainscore = list()
testscore = list()
Exemplo n.º 43
0
# Author: Noel Dawe <*****@*****.**>
#
# License: BSD 3 clause

import numpy as np
import matplotlib.pyplot as plt

from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import make_gaussian_quantiles


# Construct dataset
X1, y1 = make_gaussian_quantiles(cov=2.,
                                 n_samples=200, n_features=2,
                                 n_classes=2, random_state=1)
X2, y2 = make_gaussian_quantiles(mean=(3, 3), cov=1.5,
                                 n_samples=300, n_features=2,
                                 n_classes=2, random_state=1)
X = np.concatenate((X1, X2))
y = np.concatenate((y1, - y2 + 1))

# Create and fit an AdaBoosted decision tree
bdt = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1),
                         algorithm="SAMME",
                         n_estimators=200)

bdt.fit(X, y)

plot_colors = "br"
Exemplo n.º 44
0
plt.subplot(311)
plt.title("One informative feature, one cluster per class", fontsize='small')
X, Y = make_classification(n_samples=2000,n_features=8, n_redundant=0, n_informative=8,
                             n_clusters_per_class=4,random_state=13)
plt.scatter(X[:, 0], X[:, 1], marker='o', c=Y)

plt.subplot(312)
plt.title("Two informative features, one cluster per class", fontsize='small')
X, Y = make_classification(n_samples=300,n_features=3, n_redundant=0, n_informative=3,
                             n_clusters_per_class=2,random_state=13)
plt.scatter(X[:, 0], X[:, 1], marker='o', c=Y, cmap=plt.cm.Paired)

plt.subplot(313)
plt.title("Gaussian divided into three quantiles", fontsize='small')
X, Y = make_gaussian_quantiles(n_samples=500,n_features=2, n_classes=2, 
								mean=None,cov=1.0,random_state=13)
plt.scatter(X[:, 0], X[:, 1], marker='o', c=Y)

# ========================== import real data ===========================
data = scipy.io.loadmat('breastdata.mat')
X = data['X']; Y = data['Y']

data = scipy.io.loadmat('sonar.mat')
X = data['X']; Y = data['Y']

# ========================== standardize data ===========================
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = Scaler.fit_transform(X)

X_mean = np.mean(X,axis=0)