def test_predict_fishlength(): import numpy as np import pykitml as pk from pykitml.datasets import fishlength # Predict length of fish that is 28 days old at 25C # Load the dataset inputs, outputs = fishlength.load() # Load the model fish_classifier = pk.load('fish_classifier.pkl') # Normalize inputs array_min, array_max = pk.get_minmax(inputs) input_data = pk.normalize_minmax(np.array([28, 25]), array_min, array_max) # Create plynomial features input_data_poly = pk.polynomial(input_data) # Get output fish_classifier.feed(input_data_poly) model_output = fish_classifier.get_output() # Denormalize output array_min, array_max = pk.get_minmax(outputs) model_output = pk.denormalize_minmax(model_output, array_min, array_max) # Print result print(model_output)
def test_normalize(): array_min, array_max = pk.get_minmax(eg_array) norm_array = pk.normalize_minmax(eg_array, array_min, array_max) denorm_array = pk.denormalize_minmax(norm_array, array_min, array_max) assert np.allclose(denorm_array, eg_array)
def test_predict_banknote(): import os.path import numpy as np import pykitml as pk from pykitml.datasets import banknote # Predict banknote validity with variance, skewness, curtosis, entropy # of -2.3, -9.3, 9.37, -0.86 # Load banknote data set inputs_train, outputs_train, inputs_test, outputs_test = banknote.load() # Load the model banknote_classifier = pk.load('banknote_classifier.pkl') # Normalize the inputs array_min, array_max = pk.get_minmax(inputs_train) input_data = pk.normalize_minmax(np.array([-2.3, -9.3, 9.37, -0.86]), array_min, array_max) # Create polynomial features input_data_poly = pk.polynomial(input_data) # Get output banknote_classifier.feed(input_data_poly) model_output = banknote_classifier.get_output() # Print result print(model_output)
def test_fishlength(): import numpy as np import pykitml as pk from pykitml.datasets import fishlength # Load the dataset inputs, outputs = fishlength.load() # Normalize inputs array_min, array_max = pk.get_minmax(inputs) inputs = pk.normalize_minmax(inputs, array_min, array_max) # Create polynomial features inputs_poly = pk.polynomial(inputs) # Normalize outputs array_min, array_max = pk.get_minmax(outputs) outputs = pk.normalize_minmax(outputs, array_min, array_max) # Create model fish_classifier = pk.LinearRegression(inputs_poly.shape[1], 1) # Train the model fish_classifier.train(training_data=inputs_poly, targets=outputs, batch_size=22, epochs=200, optimizer=pk.Adam(learning_rate=0.02, decay_rate=0.99), testing_freq=1, decay_freq=10) # Save model pk.save(fish_classifier, 'fish_classifier.pkl') # Plot performance fish_classifier.plot_performance() # Print r2 score print('r2score:', fish_classifier.r2score(inputs_poly, outputs)) # Assert if it has enough accuracy assert fish_classifier.cost(inputs_poly, outputs) <= 0
def test_banknote(): import os.path import numpy as np import pykitml as pk from pykitml.datasets import banknote # Download the dataset if(not os.path.exists('banknote.pkl')): banknote.get() # Load banknote data set inputs_train, outputs_train, inputs_test, outputs_test = banknote.load() # Normalize dataset array_min, array_max = pk.get_minmax(inputs_train) inputs_train = pk.normalize_minmax(inputs_train, array_min, array_max) inputs_test = pk.normalize_minmax(inputs_test, array_min, array_max) # Create polynomial features inputs_train_poly = pk.polynomial(inputs_train) inputs_test_poly = pk.polynomial(inputs_test) # Create model banknote_classifier = pk.LogisticRegression(inputs_train_poly.shape[1], 1) # Train the model banknote_classifier.train( training_data=inputs_train_poly, targets=outputs_train, batch_size=10, epochs=1500, optimizer=pk.Adam(learning_rate=0.06, decay_rate=0.99), testing_data=inputs_test_poly, testing_targets=outputs_test, testing_freq=30, decay_freq=40 ) # Save it pk.save(banknote_classifier, 'banknote_classifier.pkl') # Plot performance banknote_classifier.plot_performance() # Print accuracy accuracy = banknote_classifier.accuracy(inputs_train_poly, outputs_train) print('Train accuracy:', accuracy) accuracy = banknote_classifier.accuracy(inputs_test_poly, outputs_test) print('Test accuracy:', accuracy) # Plot confusion matrix banknote_classifier.confusion_matrix(inputs_test_poly, outputs_test) # Assert if it has enough accuracy assert banknote_classifier.accuracy(inputs_test_poly, outputs_test) >= 99
def test_iris(): import numpy as np import pykitml as pk from pykitml.datasets import iris # Load iris data set inputs_train, outputs_train, inputs_test, outputs_test = iris.load() # Normalize inputs in the dataset inputs_min, inputs_max = pk.get_minmax(inputs_train) inputs_train = pk.normalize_minmax(inputs_train, inputs_min, inputs_max) inputs_test = pk.normalize_minmax(inputs_test, inputs_min, inputs_max) # Create model iris_classifier = pk.LogisticRegression(4, 3) # Train the model iris_classifier.train(training_data=inputs_train, targets=outputs_train, batch_size=10, epochs=1500, optimizer=pk.Adam(learning_rate=0.4, decay_rate=0.99), testing_data=inputs_test, testing_targets=outputs_test, testing_freq=30, decay_freq=20) # Save it pk.save(iris_classifier, 'iris_classifier.pkl') # Print accuracy accuracy = iris_classifier.accuracy(inputs_train, outputs_train) print('Train accuracy:', accuracy) accuracy = iris_classifier.accuracy(inputs_test, outputs_test) print('Test accuracy:', accuracy) # Plot performance iris_classifier.plot_performance() # Plot confusion matrix iris_classifier.confusion_matrix( inputs_test, outputs_test, gnames=['Setosa', 'Versicolor', 'Virginica']) # Assert if it has enough accuracy assert iris_classifier.accuracy(inputs_train, outputs_train) >= 98
def test_heart(): import os.path import numpy as np import pykitml as pk from pykitml.datasets import heartdisease # Download the dataset if(not os.path.exists('heartdisease.pkl')): heartdisease.get() # Load heartdisease data set inputs, outputs = heartdisease.load() # Normalize inputs in the dataset inputs_min, inputs_max = pk.get_minmax(inputs) inputs = pk.normalize_minmax(inputs, inputs_min, inputs_max, cols=[0, 3, 4, 7, 9]) # Change categorical values to onehot values inputs = pk.onehot_cols(inputs, [1, 2, 5, 6, 8, 10, 11, 12]) # Create model heart_classifier = pk.LogisticRegression(35, 1) # Train the model heart_classifier.train( training_data=inputs, targets=outputs, batch_size=10, epochs=1500, optimizer=pk.Adam(learning_rate=0.015, decay_rate=0.99), testing_freq=30, decay_freq=40 ) # Save it pk.save(heart_classifier, 'heart_classifier.pkl') # Print accuracy and plot performance heart_classifier.plot_performance() accuracy = heart_classifier.accuracy(inputs, outputs) print('Accuracy:', accuracy) # Plot confusion matrix heart_classifier.confusion_matrix(inputs, outputs) # Assert if it has enough accuracy assert heart_classifier.accuracy(inputs, outputs) >= 87
def test_minmax(): expected_output = (np.array([0.1, 0.3434, 1.3434, 1.2]), np.array([5.678, 6.2, 8.3, 8.345])) assert np.allclose(pk.get_minmax(eg_array), expected_output)