示例#1
0
def test_numpy_inplace():
    ary = np.array([[1, 10], [2, 9], [3, 8], [4, 7], [5, 6], [6, 5]])

    standardize(ary, [1])

    ary = ary_expc = np.array([[1, 1.46385], [2, 0.87831], [3, 0.29277],
                               [4, -0.29277], [5, -0.87831], [6, -1.46385]])

    np.testing.assert_allclose(ary, ary_expc, rtol=1e-03)
def test_numpy_inplace():
    ary = np.array([[1, 10], [2, 9], [3, 8], [4, 7], [5, 6], [6, 5]])

    standardize(ary, [1])

    ary = ary_expc = np.array([[1, 1.46385],
                               [2, 0.87831],
                               [3, 0.29277],
                               [4, -0.29277],
                               [5, -0.87831],
                               [6, -1.46385]])

    np.testing.assert_allclose(ary, ary_expc, rtol=1e-03)
示例#3
0
def test_numpy_single_feat():
    ary = np.array([[1, 10], [2, 9], [3, 8], [4, 7], [5, 6], [6, 5]])

    ary_actu = standardize(ary, [1])
    ary_expc = np.array([[1.46385], [0.87831], [0.29277], [-0.29277],
                         [-0.87831], [-1.46385]])

    np.testing.assert_allclose(ary_actu, ary_expc, rtol=1e-03)
def test_standardize_all_columns_ndarray():
    ary = np.array([[0, 10], [0, 9], [0, 8], [0, 7], [0, 6], [0, 5]])

    ary_actu = standardize(ary, columns=None)
    ary_expc = np.array([[0.0, 1.46385], [0.0, 0.87831], [0.0, 0.29277],
                         [0.0, -0.29277], [0.0, -0.87831], [0.0, -1.46385]])

    np.testing.assert_allclose(ary_actu, ary_expc, rtol=1e-03)
示例#5
0
def test_zero_division_numpy():
    ary = np.array([[0, 10], [0, 9], [0, 8], [0, 7], [0, 6], [0, 5]])

    ary_actu = standardize(ary, columns=[0, 1])
    ary_expc = np.array([[0.0, 1.46385], [0.0, 0.87831], [0.0, 0.29277],
                         [0.0, -0.29277], [0.0, -0.87831], [0.0, -1.46385]])

    np.testing.assert_allclose(ary_actu, ary_expc, rtol=1e-03)
示例#6
0
def test_numpy_single_dim():
    ary = np.array([1, 2, 3, 4, 5, 6])

    ary_actu = standardize(ary, [0])
    ary_expc = np.array([[-1.46385], [-0.87831], [-0.29277], [0.29277],
                         [0.87831], [1.46385]])

    np.testing.assert_allclose(ary_actu, ary_expc, rtol=1e-03)
示例#7
0
def test_numpy_standardize():
    ary = np.array([[1, 10], [2, 9], [3, 8], [4, 7], [5, 6], [6, 5]])

    ary_actu = standardize(ary, columns=[0, 1])
    ary_expc = np.array([[-1.46385, 1.46385], [-0.87831, 0.87831],
                         [-0.29277, 0.29277], [0.29277, -0.29277],
                         [0.87831, -0.87831], [1.46385, -1.46385]])

    np.testing.assert_allclose(ary_actu, ary_expc, rtol=1e-03)
示例#8
0
def test_zero_division_pandas():
    s1 = pd.Series([0, 0, 0, 0, 0, 0], index=(range(6)))
    s2 = pd.Series([10, 9, 8, 7, 6, 5], index=(range(6)))
    df = pd.DataFrame(s1, columns=['s1'])
    df['s2'] = s2

    df_out1 = standardize(df, ['s1', 's2'])
    ary_out1 = np.array([[0.0, 1.46385], [0.0, 0.87831], [0.0, 0.29277],
                         [0.0, -0.29277], [0.0, -0.87831], [0.0, -1.46385]])
    np.testing.assert_allclose(df_out1.values, ary_out1, rtol=1e-03)
示例#9
0
def test_pandas_standardize():
    s1 = pd.Series([1, 2, 3, 4, 5, 6], index=(range(6)))
    s2 = pd.Series([10, 9, 8, 7, 6, 5], index=(range(6)))
    df = pd.DataFrame(s1, columns=['s1'])
    df['s2'] = s2

    df_out1 = standardize(df, ['s1', 's2'])
    ary_out1 = np.array([[-1.46385, 1.46385], [-0.87831, 0.87831],
                         [-0.29277, 0.29277], [0.29277, -0.29277],
                         [0.87831, -0.87831], [1.46385, -1.46385]])
    np.testing.assert_allclose(df_out1.values, ary_out1, rtol=1e-03)
def test_zero_division_numpy():
    ary = np.array([[0, 10], [0, 9], [0, 8], [0, 7], [0, 6], [0, 5]])

    ary_actu = standardize(ary, columns=[0, 1])
    ary_expc = np.array([[0.0, 1.46385],
                         [0.0, 0.87831],
                         [0.0, 0.29277],
                         [0.0, -0.29277],
                         [0.0, -0.87831],
                         [0.0, -1.46385]])

    np.testing.assert_allclose(ary_actu, ary_expc, rtol=1e-03)
def test_numpy_standardize():
    ary = np.array([[1, 10], [2, 9], [3, 8], [4, 7], [5, 6], [6, 5]])

    ary_actu = standardize(ary, columns=[0, 1])
    ary_expc = np.array([[-1.46385, 1.46385],
                         [-0.87831, 0.87831],
                         [-0.29277, 0.29277],
                         [0.29277, -0.29277],
                         [0.87831, -0.87831],
                         [1.46385, -1.46385]])

    np.testing.assert_allclose(ary_actu, ary_expc, rtol=1e-03)
def test_numpy_single_feat():
    ary = np.array([[1, 10], [2, 9], [3, 8], [4, 7], [5, 6], [6, 5]])

    ary_actu = standardize(ary, [1])
    ary_expc = np.array([[1.46385],
                         [0.87831],
                         [0.29277],
                         [-0.29277],
                         [-0.87831],
                         [-1.46385]])

    np.testing.assert_allclose(ary_actu, ary_expc, rtol=1e-03)
def test_standardize_all_columns_ndarray():
    ary = np.array([[0, 10], [0, 9], [0, 8], [0, 7], [0, 6], [0, 5]])

    ary_actu = standardize(ary, columns=None)
    ary_expc = np.array([[0.0, 1.46385],
                         [0.0, 0.87831],
                         [0.0, 0.29277],
                         [0.0, -0.29277],
                         [0.0, -0.87831],
                         [0.0, -1.46385]])

    np.testing.assert_allclose(ary_actu, ary_expc, rtol=1e-03)
def test_numpy_single_dim():
    ary = np.array([1, 2, 3, 4, 5, 6])

    ary_actu = standardize(ary, [0])
    ary_expc = np.array([[-1.46385],
                         [-0.87831],
                         [-0.29277],
                         [0.29277],
                         [0.87831],
                         [1.46385]])

    np.testing.assert_allclose(ary_actu, ary_expc, rtol=1e-03)
def test_zero_division_pandas():
    s1 = pd.Series([0, 0, 0, 0, 0, 0], index=(range(6)))
    s2 = pd.Series([10, 9, 8, 7, 6, 5], index=(range(6)))
    df = pd.DataFrame(s1, columns=['s1'])
    df['s2'] = s2

    df_out1 = standardize(df, ['s1', 's2'])
    ary_out1 = np.array([[0.0, 1.46385],
                         [0.0, 0.87831],
                         [0.0, 0.29277],
                         [0.0, -0.29277],
                         [0.0, -0.87831],
                         [0.0, -1.46385]])
    np.testing.assert_allclose(df_out1.values, ary_out1, rtol=1e-03)
def test_pandas_standardize():
    s1 = pd.Series([1, 2, 3, 4, 5, 6], index=(range(6)))
    s2 = pd.Series([10, 9, 8, 7, 6, 5], index=(range(6)))
    df = pd.DataFrame(s1, columns=['s1'])
    df['s2'] = s2

    df_out1 = standardize(df, ['s1', 's2'])
    ary_out1 = np.array([[-1.46385, 1.46385],
                         [-0.87831, 0.87831],
                         [-0.29277, 0.29277],
                         [0.29277, -0.29277],
                         [0.87831, -0.87831],
                         [1.46385, -1.46385]])
    np.testing.assert_allclose(df_out1.values, ary_out1, rtol=1e-03)
示例#17
0

X, y = mnist_data()
X, y = shuffle_arrays_unison((X, y), random_seed=1)
X_train, y_train = X[:500], y[:500]
X_test, y_test = X[500:], y[500:]

def plot_digit(X, y, idx):
  img = X[idx].reshape(28, 28) # 784 => 28 * 28
  plt.imshow(img, cmap='Greys', interpolation='nearest')
  plt.title('true label: %d' % y[idx])
  plt.show()

plot_digit(X, y, 3500)

X_train_std, params = standardize(X_train, columns=range(X_train.shape[1]), return_params=True)
X_test_std = standardize(X_test, columns=range(X_test.shape[1]), params=params)

nn1 = MLP(hidden_layers=[150],
          l2=0.00,
          l1=0.0,
          epochs=100,
          eta=0.005,
          momentum=0.0,
          decrease_const=0.0,
          minibatches=100,
          random_seed=1,
          print_progress=3)

nn1.fit(X_train_std, y_train)
plt.plot(range(len(nn1.cost_)), nn1.cost_)
# Sebastian Raschka 2014-2019
# mlxtend Machine Learning Library Extensions
# Author: Sebastian Raschka <sebastianraschka.com>
#
# License: BSD 3 clause

import numpy as np
from numpy.testing import assert_almost_equal
from numpy.testing import assert_allclose
from mlxtend.utils import assert_raises
from mlxtend.feature_extraction import PrincipalComponentAnalysis as PCA
from mlxtend.data import iris_data
from mlxtend.preprocessing import standardize

X, y = iris_data()
X_std = standardize(X)


def test_default_components():
    pca = PCA()
    res = pca.fit(X_std).transform(X_std)
    assert res.shape[1] == 4


def test_whitening():
    pca = PCA(n_components=2)
    res = pca.fit(X_std).transform(X_std)
    diagonals_sum = np.sum(np.diagonal(np.cov(res.T)))
    assert round(diagonals_sum, 1) == 3.9, diagonals_sum

    pca = PCA(n_components=2, whitening=True)
args = parser.parse_args()

train_data = pd.read_csv("train.data.csv")
label_list = [i for i in train_data['class']]
label = np.empty(len(label_list))
for i in range(label.shape[0]):
    if (label_list[i] == ' >50K'):
        label[i] = 1
    else:
        label[i] = -1

feature = pd.DataFrame.as_matrix(train_data)[:, :-1]
# Preprocess the data
feature = preprocess(feature)
# Standardize columns in train_feature
feature = standardize(feature)
# Train-validation split
train_feature, val_feature, train_label, val_label = train_test_split(
    feature, label, test_size=0.1)

plt.figure()
for lamda in [0.001, 0.01, 0.1, 1]:
    # Initialize a, b
    a = np.random.randn(train_feature.shape[1], 1)  # shape=(14, 1)
    b = 0
    accuracy_list = []
    mag_list = []
    loss_list = []
    for epoch in range(args.num_epochs):
        actual_train_feature, held_out_feature, actual_train_label, held_out_label = train_test_split(
            train_feature,
示例#20
0
scaler = StandardScaler()

wine_subset = wine[['ash', 'alcalinity', 'magnesium']]

wine_subset.apply(np.var)  # see how variance varies

wine_subset_scaled = scaler.fit_transform(wine_subset)

np.var(wine_subset_scaled)  # so long variation in variance

# Standardizing using `mlxtend`
## Also creates a Z-score, but easier when working w/ Pandas DF

from mlxtend.preprocessing import standardize

standardize(wine, columns=['ash', 'alcalinity', 'magnesium'])

standardize(wine_subset)

# MinMaxScaler
## X_norm = (X - X_min) / (X_max - X_min)

from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

wine_minmax_scaled = scaler.fit_transform(wine_subset)

np.var(wine_minmax_scaled)

# Using mlxtend version of MinMaxScaler
示例#21
0
import matplotlib.pyplot as plt


def plot_digit(X, y, idx):
    img = X[idx].reshape(28, 28)
    plt.imshow(img, cmap='Greys', interpolation='nearest')
    plt.title('true label: %d' % y[idx])
    plt.show()


plot_digit(X, y, 3500)

from mlxtend.preprocessing import standardize

X_train_std, params = standardize(X_train,
                                  columns=range(X_train.shape[1]),
                                  return_params=True)
X_test_std = standardize(X_test, columns=range(X_test.shape[1]), params=params)
nn1 = MLP(hidden_layers=[150],
          l2=0.00,
          l1=0.0,
          epochs=100,
          eta=0.005,
          momentum=0.0,
          decrease_const=0.0,
          minibatches=100,
          random_seed=1,
          print_progress=3)
nn1.fit(X_train_std, y_train)

plt.plot(range(len(nn1.cost_)), nn1.cost_)
示例#22
0
import pandas as pd
import numpy as np
from mlxtend.preprocessing import standardize

s1 = pd.Series([1, 2, 3, 4, 5, 6], index=(range(6)))
s2 = pd.Series([10, 9, 8, 7, 6, 5], index=(range(6)))
df = pd.DataFrame(s1, columns=['s1'])
df['s2'] = s2
print df

print standardize(df, columns=['s1', 's2'])


X = np.array([[1, 10], [2, 9], [3, 8], [4, 7], [5, 6], [6, 5]])
print standardize(X, columns=[0, 1])


# re-using parameters

X_train = np.array([[1, 10], [4, 7], [3, 8]])
X_test = np.array([[1, 2], [3, 4], [5, 6]])

X_train_std, params = standardize(X_train, columns=[0, 1], return_params=True)
X_test_std = standardize(X_test, columns=[0, 1], params=params)

print params
print X_train_std
print X_test_std
# Sebastian Raschka 2014-2017
# mlxtend Machine Learning Library Extensions
# Author: Sebastian Raschka <sebastianraschka.com>
#
# License: BSD 3 clause

import numpy as np
from numpy.testing import assert_almost_equal
from numpy.testing import assert_allclose
from mlxtend.utils import assert_raises
from mlxtend.feature_extraction import PrincipalComponentAnalysis as PCA
from mlxtend.data import iris_data
from mlxtend.preprocessing import standardize

X, y = iris_data()
X = standardize(X)


def test_default_components():
    pca = PCA()
    res = pca.fit(X).transform(X)
    assert res.shape[1] == 4


def test_default_2components():
    pca = PCA(n_components=2)
    res = pca.fit(X).transform(X)
    assert res.shape[1] == 2


def test_eigen_vs_svd():
# Sebastian Raschka 2014-2017
# mlxtend Machine Learning Library Extensions
# Author: Sebastian Raschka <sebastianraschka.com>
#
# License: BSD 3 clause

import numpy as np
from numpy.testing import assert_almost_equal
from numpy.testing import assert_allclose
from mlxtend.utils import assert_raises
from mlxtend.feature_extraction import PrincipalComponentAnalysis as PCA
from mlxtend.data import iris_data
from mlxtend.preprocessing import standardize

X, y = iris_data()
X = standardize(X)


def test_default_components():
    pca = PCA()
    res = pca.fit(X).transform(X)
    assert res.shape[1] == 4


def test_default_2components():
    pca = PCA(n_components=2)
    res = pca.fit(X).transform(X)
    assert res.shape[1] == 2


def test_eigen_vs_svd():
# Sebastian Raschka 2014-2019
# mlxtend Machine Learning Library Extensions
# Author: Sebastian Raschka <sebastianraschka.com>
#
# License: BSD 3 clause

import numpy as np
from numpy.testing import assert_almost_equal
from numpy.testing import assert_allclose
from mlxtend.utils import assert_raises
from mlxtend.feature_extraction import PrincipalComponentAnalysis as PCA
from mlxtend.data import iris_data
from mlxtend.preprocessing import standardize

X, y = iris_data()
X_std = standardize(X)


def test_default_components():
    pca = PCA()
    res = pca.fit(X_std).transform(X_std)
    assert res.shape[1] == 4


def test_whitening():
    pca = PCA(n_components=2)
    res = pca.fit(X_std).transform(X_std)
    diagonals_sum = np.sum(np.diagonal(np.cov(res.T)))
    assert round(diagonals_sum, 1) == 3.9, diagonals_sum

    pca = PCA(n_components=2, whitening=True)