def plot_prediction( learner=None, data=None, minx=0, maxx=5, step_size=0.01, # for plotting label="function"): plt.ion() plt.xlabel("x") plt.ylabel("y") if data is None: data = Data_from_file('data/simp_regr.csv', prob_test=0, boolean_features=False, target_index=-1) if learner is None: learner = Linear_learner(data, squashed=False) learner.learning_rate = 0.001 learner.learn(100) learner.learning_rate = 0.0001 learner.learn(1000) learner.learning_rate = 0.00001 learner.learn(10000) learner.display( 1, "function learned is", learner.predictor_string(), "error=", data.evaluate_dataset(data.train, learner.predictor, "sum-of-squares")) plt.plot([e[0] for e in data.train], [e[-1] for e in data.train], "bo", label="data") plt.plot(list(arange(minx, maxx, step_size)), [learner.predictor([x]) for x in arange(minx, maxx, step_size)], label=label) plt.legend() plt.draw()
def plot_polynomials(data=None, learner_class=Linear_learner, max_degree=5, minx=0, maxx=5, num_iter=100000, learning_rate=0.0001, step_size=0.01, # for plotting ): plt.ion() plt.xlabel("x") plt.ylabel("y") if data is None: data = Data_from_file('data/simp_regr.csv', prob_test=0, boolean_features=False, target_index=-1) plt.plot([e[0] for e in data.train], [e[-1] for e in data.train], "ko", label="data") x_values = list(arange(minx, maxx, step_size)) line_styles = ['-', '--', '-.', ':'] colors = ['0.5', 'k', 'k', 'k', 'k'] for degree in range(max_degree): data_aug = Data_set_augmented(data, [power_feat(n) for n in range(1, degree + 1)], include_orig=False) learner = learner_class(data_aug, squashed=False) learner.learning_rate = learning_rate learner.learn(num_iter) learner.display(1, "For degree", degree, "function learned is", learner.predictor_string(), "error=", data.evaluate_dataset(data.train, learner.predictor, "sum-of-squares")) ls = line_styles[degree % len(line_styles)] col = colors[degree % len(colors)] plt.plot(x_values, [learner.predictor([x]) for x in x_values], linestyle=ls, color=col, label="degree=" + str(degree)) plt.legend(loc='upper left') plt.draw()
def plot_steps(learner=None, data=None, criterion="sum-of-squares", step=1, num_steps=1000, log_scale=True, label=""): """ plots the training and test error for a learner. data is the learner_class is the class of the learning algorithm criterion gives the evaluation criterion plotted on the y-axis step specifies how many steps are run for each point on the plot num_steps is the number of points to plot """ plt.ion() plt.xlabel("step") plt.ylabel("Average " + criterion + " error") if log_scale: plt.xscale('log') #plt.semilogx() #Makes a log scale else: plt.xscale('linear') if data is None: data = Data_from_file('data/holiday.csv', num_train=19, target_index=-1) #data = Data_from_file('data/SPECT.csv', target_index=0) # data = Data_from_file('data/mail_reading.csv', target_index=-1) # data = Data_from_file('data/carbool.csv', target_index=-1) random.seed(None) # reset seed if learner is None: learner = Linear_learner(data) train_errors = [] test_errors = [] for i in range(1, num_steps + 1, step): test_errors.append( data.evaluate_dataset(data.test, learner.predictor, criterion)) train_errors.append( data.evaluate_dataset(data.train, learner.predictor, criterion)) learner.display(2, "Train error:", train_errors[-1], "Test error:", test_errors[-1]) learner.learn(num_iter=step) plt.plot(range(1, num_steps + 1, step), train_errors, ls='-', c='k', label="training errors") plt.plot(range(1, num_steps + 1, step), test_errors, ls='--', c='k', label="test errors") plt.legend() plt.pause(0.001) plt.show(block=True) learner.display(1, "Train error:", train_errors[-1], "Test error:", test_errors[-1])
def test(**args): data = Data_from_file('data/SPECT.csv', target_index=0) # data = Data_from_file('data/mail_reading.csv', target_index=-1) # data = Data_from_file('data/carbool.csv', target_index=-1) learner = Linear_learner(data, **args) learner.learn() print("function learned is", learner.predictor_string()) for ecrit in Data_set.evaluation_criteria: test_error = data.evaluate_dataset(data.test, learner.predictor, ecrit) print(" Average", ecrit, "error is", test_error)
def testMain(): data = Data_from_file('data/mail_reading.csv', target_index=-1) #data = Data_from_file('data/mail_reading_consis.csv', target_index=-1) #data = Data_from_file('data/SPECT.csv', prob_test=0.5, target_index=0) #data = Data_from_file('data/holiday.csv', target_index=-1) #, num_train=19) nn1 = NN(data) nn1.add_layer(Linear_complete_layer(nn1, 3)) nn1.add_layer(Sigmoid_layer(nn1)) # comment this or the next # nn1.add_layer(ReLU_layer(nn1)) nn1.add_layer(Linear_complete_layer(nn1, 1)) nn1.add_layer(Sigmoid_layer(nn1)) nn1.learning_rate = 0.1 #nn1.learn(100) from learnLinear import plot_steps import time start_time = time.perf_counter() plot_steps(learner=nn1, data=data, num_steps=10000) for eg in data.train: print(eg, nn1.predictor(eg)) end_time = time.perf_counter() print("Time:", end_time - start_time)
train_errors.append( sum(self.distance(self.class_of_eg(eg),eg) for eg in self.dataset.train) /len(self.dataset.train)) if self.dataset.test: test_errors.append( sum(self.distance(self.class_of_eg(eg),eg) for eg in self.dataset.test) /len(self.dataset.test)) plt.plot(range(1,maxstep+1),train_errors, label=str(self.num_classes)+" classes. Training set") if self.dataset.test: plt.plot(range(1,maxstep+1),test_errors, label=str(self.num_classes)+" classes. Test set") plt.legend() plt.draw() %data = Data_from_file('data/emdata1.csv', num_train=10, target_index=2000) % trivial example data = Data_from_file('data/emdata2.csv', num_train=10, target_index=2000) %data = Data_from_file('data/emdata0.csv', num_train=14, target_index=2000) % example from textbook kml = K_means_learner(data,2) num_iter=4 print("Class assignment after",num_iter,"iterations:") kml.learn(num_iter); kml.show_classes() # Plot the error # km2=K_means_learner(data,2); km2.plot_error(20) # 2 classes # km3=K_means_learner(data,3); km3.plot_error(20) # 3 classes # km13=K_means_learner(data,13); km13.plot_error(20) # 13 classes # data = Data_from_file('data/carbool.csv', target_index=2000,boolean_features=True) # kml = K_means_learner(data,3) # kml.learn(20); kml.show_classes()
# Testing from learnDT import DT_learner from learnProblem import Data_set, Data_from_file def sp_DT_learner(min_prop=0.9): def make_learner(dataset): mne = len(dataset.train) * min_prop return DT_learner(dataset, min_number_examples=mne) return make_learner data = Data_from_file('data/carbool.csv', target_index=-1) #data = Data_from_file('data/SPECT.csv', target_index=0) #data = Data_from_file('data/mail_reading.csv', target_index=-1) #data = Data_from_file('data/holiday.csv', num_train=19, target_index=-1) learner9 = Boosting_learner(data, sp_DT_learner(0.9)) #learner7 = Boosting_learner(data, sp_DT_learner(0.7)) #learner5 = Boosting_learner(data, sp_DT_learner(0.5)) predictor9 = learner9.learn(10) for i in learner9.offsets: print(i.__doc__) import matplotlib.pyplot as plt def plot_boosting(data, steps=10, thresholds=[0.5, 0.1, 0.01, 0.001],
error_example(predictor(example), dataset.target(example), to_optimize) for example in data_subset) return error from learnProblem import Data_set, Data_from_file def test(data): """Prints errors and the trees for various evaluation criteria and ways to select leaves. """ for crit in Data_set.evaluation_criteria: for leaf in selections: tree = DT_learner(data, to_optimize=crit, leaf_selection=leaf).learn() print("For", crit, "using", leaf, "at leaves, tree built is:", tree.__doc__) if data.test: for ecrit in Data_set.evaluation_criteria: test_error = data.evaluate_dataset(data.test, tree, ecrit) print(" Average error for", ecrit, "using", leaf, "at leaves is", test_error) if __name__ == "__main__": #print("carbool.csv"); test(data = Data_from_file('data/carbool.csv', target_index=-1)) # print("SPECT.csv"); test(data = Data_from_file('data/SPECT.csv', target_index=0)) print("mail_reading.csv") test(data=Data_from_file('data/mail_reading.csv', target_index=-1)) # print("holiday.csv"); test(data = Data_from_file('data/holiday.csv', num_train=19, target_index=-1))
def plot_fig_7_15(): # different runs produce different plots data = Data_from_file('data/SPECT.csv', target_index=0) # data = Data_from_file('data/carbool.csv', target_index=-1) plot_error(data)
def prod(L): """returns the product of the elements of L""" res = 1 for e in L: res *= e return res def random_dist(k): """generate k random numbers that sum to 1""" res = [random.random() for i in range(k)] s = sum(res) return [v / s for v in res] data = Data_from_file('data/emdata2.csv', num_train=10, target_index=2000) eml = EM_learner(data, 2) num_iter = 2 print("Class assignment after", num_iter, "iterations:") eml.learn(num_iter) eml.show_class(0) # Plot the error # em2=EM_learner(data,2); em2.plot_error(40) # 2 classes # em3=EM_learner(data,3); em3.plot_error(40) # 3 classes # em13=EM_learner(data,13); em13.plot_error(40) # 13 classes # data = Data_from_file('data/carbool.csv', target_index=2000,boolean_features=False) # [f.frange for f in data.input_features] # eml = EM_learner(data,3) # eml.learn(20); eml.show_class(0)
error_example(predictor(example), dataset.target(example), to_optimize) for example in data_subset) return error from learnProblem import Data_set, Data_from_file def test(data): """Prints errors and the trees for various evaluation criteria and ways to select leaves. """ for crit in Data_set.evaluation_criteria: for leaf in ("mean", "median"): tree = DT_learner(data, to_optimize=crit, leaf_selection=leaf).learn() print("For", crit, "using", leaf, "at leaves, tree built is:", tree.__doc__) if data.test: for ecrit in Data_set.evaluation_criteria: test_error = data.evaluate_dataset(data.test, tree, ecrit) print(" Average error for", ecrit, "using", leaf, "at leaves is", test_error) if __name__ == "__main__": # print("carbool.csv"); test(data = Data_from_file('data/carbool.csv', target_index=-1)) print("SPECT.csv") test(data=Data_from_file('data/pima.txt', target_index=0)) # print("mail_reading.csv"); test(data = Data_from_file('data/mail_reading.csv', target_index=-1)) # print("holiday.csv"); test(data = Data_from_file('data/holiday.csv', num_train=19, target_index=-1))
# Artificial Intelligence: Foundations of Computational Agents # http://artint.info # Copyright David L Poole and Alan K Mackworth 2017. # This work is licensed under a Creative Commons # Attribution-NonCommercial-ShareAlike 4.0 International License. # See: http://creativecommons.org/licenses/by-nc-sa/4.0/deed.en from learnDT import DT_learner from learnProblem import Data_set, Data_from_file def test(data): """Prints errors and the trees for various evaluation criteria and ways to select leaves. """ for crit in Data_set.evaluation_criteria: for leaf in ("mean", "median"): tree = DT_learner(data, to_optimize=crit, leaf_selection=leaf).learn() print("For", crit, "using", leaf, "at leaves, tree built is:", tree.__doc__) if data.test: for ecrit in Data_set.evaluation_criteria: test_error = data.evaluate_dataset(data.test, tree, ecrit) print(" Average error for", ecrit, "using", leaf, "at leaves is", test_error) if __name__ == "__main__": test(data=Data_from_file('pima.txt', target_index=8))
def characteristic_error(target, prediction): return [ 1 - prediction[i] if target == i else -prediction[i] for i in range(len(prediction)) ] def sum_squares_error(observed, predicted): """Returns the errors for each of the target features. """ return [obsd - pred for obsd, pred in zip(observed, predicted)] data = Data_from_file('data/training.txt', target_index=-1) #data = Data_from_file('data/mail_reading_consis.csv', target_index=-1) #data = Data_from_file('data/SPECT.csv', prob_test=0.5, target_index=0) # data = Data_from_file('data/holiday.csv', target_index=-1) #, num_train=19) nn1 = NN(data) nn1.add_layer(Linear_complete_layer(nn1, 50)) nn1.add_layer(Sigmoid_layer(nn1)) # comment this or the next # nn1.add_layer(ReLU_layer(nn1)) nn1.add_layer(Linear_complete_layer(nn1, 10)) nn1.add_layer(Sigmoid_layer(nn1)) nn1.learning_rate = 0.1 # nn1.learn(100) start_time = time.perf_counter() plot_steps(learner=nn1, data=data,