def cross_validation(method, k=5): if method == "lda": Classifier = LDAClassifier elif method == "qda": Classifier = QDAClassifier else: raise Exception("lda or qda only") timer.start("folding data into", k, "copies") data_slice = [None] * k labl_slice = [None] * k train_rate = [0.0] * k valid_rate = [0.0] * k n = len(labl) m = n / k for i in range(k): data_slice[i] = data[(i * m):min((i + 1) * m, n)] labl_slice[i] = labl[(i * m):min((i + 1) * m, n)] timer.end("done") for j in range(k): timer.start("validation iteration #", j) training_data = np.concatenate( tuple(data_slice[i] for i in range(k) if i != j)) training_labl = np.concatenate( tuple(labl_slice[i] for i in range(k) if i != j)) print ".... data formating done" c = LDAClassifier(training_data, training_labl) print ".... classifier training done" train_rate[j] = c.score(c.classify_all(training_data), training_labl) print ".... training accuracy computation done" valid_rate[j] = c.score(c.classify_all(data_slice[j]), labl_slice[j]) print ".... validation accuracy computation done" timer.end("done; training accuracy =", train_rate[j], "; validation accuracy =", valid_rate[j]) print k, "fold cross validation for", method, "on dataset", which, "complete" print ".... overall training accuracy =", np.mean(train_rate) print ".... overall validation accuracy =", np.mean(valid_rate)
""" Problem : How many distinct terms are in the sequence generated by a^b for 2 ≤ a ≤ 100 and 2 ≤ b ≤ 100? Performance time: ~0.0059s """ from timer import timer timer.start() print(len(set(a**b for a in range(2, 101) for b in range(2, 101)))) timer.stop()
""" Problem: Find the sum of digits in the numerator of the 100th convergent of the continued fraction for e. Performance time: ~0.0012s """ from fractions import Fraction from timer import timer from utils import sum_of_digits timer.start() a = [int(n / 3 * 2) if n % 3 == 0 else 1 for n in range(1, 101)] a[0] = 2 def convergent_e(limit=-1, index=0): if limit == 0: return a[0] elif index == limit - 1: return a[index] + Fraction(1, a[index+1]) else: return a[index] + Fraction(1, convergent_e(limit, index+1)) print(sum_of_digits(Fraction(convergent_e(99)).numerator)) timer.stop()
import numpy as np from os import path from scipy.io import loadmat from timer import timer from classifier import LDAClassifier, QDAClassifier """ TODO: choose either mnist or spam >>HERE<< """ which = "spam.mat" which = "mnist.mat" """ TODO: choose either mnist or spam >>HERE<< """ timer.start("reading", which, "data from matlab file") raw = loadmat(path.join(path.dirname(__file__), "data", which)) raw_data = raw['data'] raw_labl = raw['label'][0] timer.end("done") timer.start("permuting data randomly") np.random.seed(0) ordering = np.random.permutation(len(raw_data)) data = np.ndarray(shape=raw_data.shape, dtype=raw_data.dtype) labl = np.ndarray(shape=raw_labl.shape, dtype=raw_labl.dtype) for old, new in enumerate(ordering): data[new] = raw_data[old] labl[new] = raw_labl[old] del raw, raw_data, raw_labl, ordering timer.end("done") def cross_validation(method, k=5): if method == "lda": Classifier = LDAClassifier