示例#1
0
def train(path="data"):
    audio_data, audio_label = wavelet_data(path)

    X = np.float32(audio_data)
    print('shape X:', str(X.shape))

    Y = audio_label
    print('shape Y: ', str(len(Y)))

    # Encode class target ke integer
    # encoder = LabelEncoder()
    # encoder.fit(img_label)
    # Y = encoder.transform(img_label)
    # print('shape Y:', str(Y.shape))

    X_train, X_test, Y_train, Y_test = train_test_split(
        X, Y, test_size=0.2)  #untuk spilt data latih n uji
    print('X_train shape: ', X_train.shape)
    print('X_test shape', X_test.shape)

    pnn = PNN(std=2, verbose=False)
    pnn.train(X_train, Y_train)

    with open('pnn-model.dill', 'wb') as f:
        dill.dump(pnn, f)

    result = pnn.predict(X_test)

    n_predicted_correctly = np.sum(result == Y_test)
    n_test_samples = X_test.shape[0]

    print("Guessed {} out of {}".format(n_predicted_correctly, n_test_samples))
    print("Processiing time : %s seconds" % (time.time() - start_time))
示例#2
0
    def neuron(self, p_class, age, sib_sp, par_ch, fare, sex_female, sex_male,
               embarked_c, embarked_q, embarked_s):
        clf = PNN(verbose=False, std=10)
        clf.fit(X_train, y_train)
        x_test = np.array([[
            p_class, age, sib_sp, par_ch, fare, sex_female, sex_male,
            embarked_c, embarked_q, embarked_s
        ]])
        y_predict = clf.predict(x_test)

        return float(y_predict)
示例#3
0
    def test_digit_prediction(self):
        dataset = datasets.load_digits()
        x_train, x_test, y_train, y_test = train_test_split(
            dataset.data, dataset.target, train_size=0.7
        )

        nw = PNN(standard_deviation=10)
        nw.train(x_train, y_train)
        result = nw.predict(x_test)

        self.assertAlmostEqual(metrics.accuracy_score(y_test, result),
                               0.9889, places=4)
示例#4
0
    def test_predict_probability(self):
        dataset = datasets.load_digits()
        x_train, x_test, y_train, y_test = train_test_split(
            dataset.data, dataset.target, train_size=0.7
        )
        number_of_classes = len(np.unique(dataset.target))

        nw = PNN(standard_deviation=10)
        nw.train(x_train, y_train)
        result = nw.predict_prob(x_test)

        n_test_inputs = x_test.shape[0]
        self.assertEqual(result.shape, (n_test_inputs, number_of_classes))

        total_classes_prob = np.round(result.sum(axis=1), 10)
        self.assertTrue(
            np.all(total_classes_prob == np.ones((n_test_inputs, 1)))
        )
示例#5
0
    def test_simple_pnn(self):
        dataset = datasets.load_iris()
        data = dataset.data
        target = dataset.target

        test_data_size = 10
        skfold = StratifiedKFold(target, test_data_size)
        avarage_result = 0

        for train, test in skfold:
            x_train, x_test = data[train], data[test]
            y_train, y_test = target[train], target[test]

            nw = PNN(standard_deviation=0.1)
            nw.train(x_train, y_train)
            result = nw.predict(x_test)
            avarage_result += sum(y_test == result)

        self.assertEqual(avarage_result / test_data_size, 14.4)
示例#6
0
    def test_handle_errors(self):
        with self.assertRaises(ValueError):
            # Wrong: size of target data not the same as size of
            # input data.
            PNN().train(np.array([[0], [0]]), np.array([0]))

        with self.assertRaises(ValueError):
            # Wrong: 2-D target vector (must be 1-D)
            PNN().train(np.array([[0], [0]]), np.array([[0]]))

        with self.assertRaises(AttributeError):
            # Wrong: can't use iterative learning process for this
            # algorithm
            PNN().train_epoch()

        with self.assertRaises(ValueError):
            # Wrong: invalid feature size for prediction data
            grnet = PNN()
            grnet.train(np.array([[0], [0]]), np.array([0]))
            grnet.predict(np.array([[0]]))
示例#7
0
import numpy as np
from sklearn import datasets
from sklearn.model_selection import StratifiedKFold

from neupy.algorithms import PNN

dataset = datasets.load_iris()
data = dataset.data
target = dataset.target

test_data_size = 10
skfold = StratifiedKFold(n_splits=test_data_size)
avarage_result = 0

print("> Start classify iris dataset")

for i, (train, test) in enumerate(skfold.split(data, target), start=1):
    x_train, x_test = data[train], data[test]
    y_train, y_test = target[train], target[test]

    pnn_network = PNN(std=0.1, verbose=False)
    pnn_network.train(x_train, y_train)
    result = pnn_network.predict(x_test)

    print("Test #{:<2}: Guessed {} out of {}".format(i,
                                                     np.sum(result == y_test),
                                                     test.size))
示例#8
0
skfold = StratifiedKFold(mesothelioma_target, kfold_number, shuffle=True)
avarage_result = 0

accu_sum = 0
mcc_sum = 0
specificity_sum = 0
thisF1_sum = 0
sensitivity_sum = 0

print("> Start classify mesothelioma dataset")

for i, (train, test) in enumerate(skfold, start=1):
    x_train, x_test = mesothelioma_data[train], mesothelioma_data[test]
    y_train, y_test = mesothelioma_target[train], mesothelioma_target[test]

    pnn_network = PNN(std=0.1, step=0.2, verbose=True)  # BEST
    #pnn_network = PNN(std=0.1, step=0.2,  verbose=True, batch_size=20)

    # pnn_network.train(x_train, y_train)
    # predictions = pnn_network.predict(x_test)
    pnn_network.train(mesothelioma_data[train], mesothelioma_target[train])
    predictions = pnn_network.predict(mesothelioma_data[test])

    # print(predictions)
    #print(mesothelioma_target[test])

    tn, fp, fn, tp = confusion_matrix(mesothelioma_target[test],
                                      predictions).ravel()
    print("tn, fp, fn, tp")
    print(
        tn,
import pandas as pd
from neupy.algorithms import PNN

# read data from the csv files
train_data = pd.DataFrame(pd.read_csv('processed_input/train.csv'))
test_data = pd.DataFrame(pd.read_csv('processed_input/test.csv'))

# print train_data.describe

# split data in to target and data
X_train = train_data.drop(columns='Made Donation in March 2007', axis=1)
y_train = train_data['Made Donation in March 2007']
X_test = test_data.drop(columns='Made Donation in March 2007', axis=1)
IDs = test_data['ID']

# instantiate the PNN model
pnn = PNN(verbose=True)

# fit to training data and then predict
prediction = pnn.fit(X_train, y_train).predict_proba(X_test)

# concatenate IDs and the prediciton
pred = pd.concat([
    IDs,
    pd.DataFrame(prediction.astype(float),
                 columns=['Made Donation in March 2007'])
],
                 axis=1)

#write prediction to csv
pred.to_csv('output/pnn_prediction.csv', index=False)
示例#10
0
文件: pnn_iris.py 项目: itdxer/neupy
import numpy as np
from sklearn import datasets
from sklearn.model_selection import StratifiedKFold

from neupy.algorithms import PNN


dataset = datasets.load_iris()
data = dataset.data
target = dataset.target

test_data_size = 10
skfold = StratifiedKFold(n_splits=test_data_size)
avarage_result = 0

print("> Start classify iris dataset")

for i, (train, test) in enumerate(skfold.split(data, target), start=1):
    x_train, x_test = data[train], data[test]
    y_train, y_test = target[train], target[test]

    pnn_network = PNN(std=0.1, verbose=False)
    pnn_network.train(x_train, y_train)
    result = pnn_network.predict(x_test)

    print("Test #{:<2}: Guessed {} out of {}".format(
        i, np.sum(result == y_test), test.size
    ))
示例#11
0
#
#xgb_estimator.fit(X_train,y_train)
#
#joblib.dump(xgb_estimator,'xgb_0.01_500_5_11_0.1_0_0.7_0.7.pkl')
#
#xgb_predictions = xgb_estimator.predict(X_valid)
#
#xgb_accuracy = accuracy_score(np.array(y_valid),np.array(xgb_predictions))
#
#xgb_report = classification_report(np.array(y_valid),np.array(xgb_predictions))
#
#xgb_confusion = confusion_matrix(np.array(y_valid),np.array(xgb_predictions))
""" MODEL 6: PNN """

#Initialise model
pnn = PNN(std=10, verbose=True)

#Parameters to grid search
X_range = np.amax(X_train) - np.amin(X_train)

std = []
for xx in [0.25, 0.5, 1, 2]:
    std.append(X_range * xx)
param_grid = {'std': std}

#Perform grid search, return best parameters and estimator
pnn_parameters, pnn_estimator = model_param_selector(X_train, y_train, X_valid,
                                                     y_valid, pnn, param_grid,
                                                     'pnn')

#Save best model
示例#12
0
# data preprocessing
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

# fit only to the training data
scaler.fit(X_train)
#StandardScaler(copy=True, with_mean=True, with_std=True)

# apply the transformations to the data
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

pnn_network = PNN(std=1,
                  shuffle_data=True,
                  batch_size=10,
                  step=0.01,
                  verbose=False)
pnn_network.train(X_train, y_train)
result = pnn_network.predict(X_test)

train_predictions = pnn_network.predict(X_train)
train_accuracy = (train_predictions == y_train).sum() / len(y_train)
print('train accuracy %s' % train_accuracy)  #0.958907605921

# prediction and evaluation
predictions = pnn_network.predict(X_test)

test_accuracy = (predictions == y_test).sum() / len(y_test)
print('test accuracy %s' % test_accuracy)  #0.617346938776
from sklearn.metrics import classification_report, confusion_matrix
示例#13
0
                       activation='tanh',
                       hidden_layer_sizes=(groups.sum(), ),
                       random_state=1)
 nnclf.fit(X_train, Y_train)
 endtime = time.time()
 nn_pred = nnclf.predict(X_test)
 nn_predictions = RBF_ISCC.target2matrix(nn_pred,
                                         len(dataset.label_dict),
                                         pos_note=1,
                                         neg_note=0)
 nn_score = performance_measure(nn_predictions, true_label)
 print("NN:{score}".format(score=nn_score))
 print("Time:{}s".format(endtime - starttime))
 #%%
 starttime = time.time()
 rbfclf = PNN(std=0.5, batch_size="all")
 rbfclf.train(X_train, Y_train)
 endtime = time.time()
 rbf_pred = rbfclf.predict(X_test)
 rbf_predictions = RBF_ISCC.target2matrix(rbf_pred,
                                          len(dataset.label_dict),
                                          pos_note=1,
                                          neg_note=0)
 rbf_score = performance_measure(rbf_predictions, true_label)
 print("RBF:{score}".format(score=rbf_score))
 print("Time:{}s".format(endtime - starttime))
 #%%
 #    starttime=time.time()
 #    krr_clf = KernelRidge().fit(X_train, Y_train)
 #    endtime=time.time()
 #    krr_clf_pred = krr_clf.predict(X_test)
示例#14
0
df2 = pd.DataFrame({'x': x2, 'y': y2, 'target': 1})
df3 = pd.DataFrame({'x': x3, 'y': y3, 'target': 2})


def split_df(df):
    x_train, x_test = train_test_split(df,
                                       test_size=0.3,
                                       shuffle=True,
                                       random_state=21)
    x_valid, x_test = train_test_split(x_test,
                                       test_size=0.3,
                                       shuffle=True,
                                       random_state=14)
    return x_train, x_valid, x_test


train = []
valid = []
test = []

for df in (df1, df2, df3):
    tr, v, te = split_df(df)
    train.append(tr)
    valid.append(v)
    test.append(te)
train = pd.concat(train)
valid = pd.concat(valid)
test = pd.concat(test)

build_model(PNN(std=0.1), train, test, valid)
letter_classA = ['A'] * prototypes
letter_classB = ['B'] * prototypes
letter_classC = ['C'] * prototypes

# Stack center of clusters as the training data
X_train = np.vstack((center_classA, center_classB, center_classC))
Y_train = np.hstack((letter_classA, letter_classB, letter_classC))

method = input('Gaussian Naive Bayes (G) or PNN (P):')

#Cross validation
cross_validation = 3

#Call Gaussian Naive Bayesian classifier as PNN
if method == 'P' or method == 'p':
    pnn = PNN(std=0.1)
    pnn.train(X_train, Y_train)

    # Cross validataion
    score = cross_val_score(pnn,
                            X_train,
                            Y_train,
                            scoring='accuracy',
                            cv=cross_validation)
    print("")
    print("Cross Validation: {0} (+/- {1})".format(abs(score.mean().round(2)),
                                                   (score.std() * 2).round(2)))
    print("")

    #Prediction
    Y_predict = pnn.predict(X_test)