def test_adult_tree(): import os import numpy as np import pykitml as pk from pykitml.datasets import adult # Download the dataset if (not os.path.exists('adult.data.pkl')): adult.get() # Load adult data set inputs_train, outputs_train, inputs_test, outputs_test = adult.load() outputs_train = pk.onehot(outputs_train) outputs_test = pk.onehot(outputs_test) # Create model ftypes = [ 'continues', 'categorical', 'continues', 'categorical', 'categorical', 'categorical', 'categorical', 'categorical', 'categorical', 'continues', 'continues', 'continues', 'categorical' ] tree_adult_classifier = pk.DecisionTree(13, 2, max_depth=100, min_split=100, feature_type=ftypes) # Train tree_adult_classifier.train(inputs_train, outputs_train) # Save it pk.save(tree_adult_classifier, 'tree_adult_classifier.pkl') # Print accuracy accuracy = tree_adult_classifier.accuracy(inputs_train, outputs_train) print('Train accuracy:', accuracy) accuracy = tree_adult_classifier.accuracy(inputs_test, outputs_test) print('Test accuracy:', accuracy) # Plot confusion matrix tree_adult_classifier.confusion_matrix(inputs_test, outputs_test, gnames=['False', 'True']) # Assert accuracy assert (tree_adult_classifier.accuracy(inputs_test, outputs_test)) >= 84
def test_banknote_forest(): import os import numpy as np import pykitml as pk from pykitml.datasets import banknote # Download the dataset if (not os.path.exists('banknote.pkl')): banknote.get() # Load heart data set inputs_train, outputs_train, inputs_test, outputs_test = banknote.load() # Change 0/False to [1, 0] # Change 1/True to [0, 1] outputs_train = pk.onehot(outputs_train) outputs_test = pk.onehot(outputs_test) # Create model ftypes = ['continues'] * 4 forest_banknote_classifier = pk.RandomForest(4, 2, max_depth=9, feature_type=ftypes) # Train forest_banknote_classifier.train(inputs_train, outputs_train) # Save it pk.save(forest_banknote_classifier, 'forest_banknote_classifier.pkl') # Print accuracy accuracy = forest_banknote_classifier.accuracy(inputs_train, outputs_train) print('Train accuracy:', accuracy) accuracy = forest_banknote_classifier.accuracy(inputs_test, outputs_test) print('Test accuracy:', accuracy) # Plot confusion matrix forest_banknote_classifier.confusion_matrix(inputs_test, outputs_test, gnames=['False', 'True']) # Assert accuracy assert (forest_banknote_classifier.accuracy(inputs_test, outputs_test)) >= 98
def test_sonar_forest(): import os import numpy as np import pykitml as pk from pykitml.datasets import sonar # Download the dataset if (not os.path.exists('sonar.pkl')): sonar.get() # Load the sonar dataset inputs_train, outputs_train, inputs_test, outputs_test = sonar.load() outputs_train = pk.onehot(outputs_train) outputs_test = pk.onehot(outputs_test) # Create model forest_sonar_classifier = pk.RandomForest(60, 2, max_depth=9, feature_type=['continues'] * 60) # Train the model forest_sonar_classifier.train(inputs_train, outputs_train, num_feature_bag=60) # Save it pk.save(forest_sonar_classifier, 'forest_sonar_classifier.pkl') # Print accuracy accuracy = forest_sonar_classifier.accuracy(inputs_train, outputs_train) print('Train accuracy:', accuracy) accuracy = forest_sonar_classifier.accuracy(inputs_test, outputs_test) print('Test accuracy:', accuracy) # Plot confusion matrix forest_sonar_classifier.confusion_matrix(inputs_test, outputs_test, gnames=['False', 'True'])
def test_heart_tree(): import os.path import numpy as np import pykitml as pk from pykitml.datasets import heartdisease # Download the dataset if (not os.path.exists('heartdisease.pkl')): heartdisease.get() # Load heart data set inputs, outputs = heartdisease.load() outputs = pk.onehot(outputs) # Create model ftypes = [ 'continues', 'categorical', 'categorical', 'continues', 'continues', 'categorical', 'categorical', 'continues', 'categorical', 'continues', 'categorical', 'categorical', 'categorical' ] tree_heart_classifier = pk.DecisionTree(13, 2, max_depth=7, feature_type=ftypes) # Train tree_heart_classifier.train(inputs, outputs) # Save it pk.save(tree_heart_classifier, 'tree_heart_classifier.pkl') # Print accuracy accuracy = tree_heart_classifier.accuracy(inputs, outputs) print('Accuracy:', accuracy) # Plot confusion matrix tree_heart_classifier.confusion_matrix(inputs, outputs, gnames=['False', 'True']) # Plot descision tree tree_heart_classifier.show_tree() # Assert accuracy assert (tree_heart_classifier.accuracy(inputs, outputs)) >= 94
def test_heart_bayes(): import os.path import numpy as np import pykitml as pk from pykitml.datasets import heartdisease # Download the dataset if (not os.path.exists('heartdisease.pkl')): heartdisease.get() # Load heart data set inputs, outputs = heartdisease.load() # Change 0/False to [1, 0] # Change 1/True to [0, 1] outputs = pk.onehot(outputs) distrbutions = [ 'gaussian', 'binomial', 'multinomial', 'gaussian', 'gaussian', 'binomial', 'multinomial', 'gaussian', 'binomial', 'gaussian', 'multinomial', 'multinomial', 'multinomial' ] # Create model bayes_heart_classifier = pk.NaiveBayes(13, 2, distrbutions) # Train bayes_heart_classifier.train(inputs, outputs) # Save it pk.save(bayes_heart_classifier, 'bayes_heart_classifier.pkl') # Print accuracy accuracy = bayes_heart_classifier.accuracy(inputs, outputs) print('Accuracy:', accuracy) # Plot confusion matrix bayes_heart_classifier.confusion_matrix(inputs, outputs, gnames=['False', 'True']) # Assert accuracy assert (bayes_heart_classifier.accuracy(inputs, outputs)) > 84