def load_mnist_data(s0, s1, ratio, usage): ''' Given digits s0, s1, load mnist data into training and validation sets :param s0: int[0, 9] First digit to classify :param s1: int[0, 9] Second digit to classify :param ratio: float(0, 1) Ratio to put into the training set :param usage: float(0, 1) How much of the total MNIST data to use :return: ''' X = np.vstack((Xt, Xv)) Y = np.hstack((Yt, Yv)) X = X[(Y == s0) | (Y == s1)].astype(np.float32) Y = Y[(Y == s0) | (Y == s1)].astype(np.float32) M = int(round(np.shape(X)[0] * usage)) use = np.zeros(np.shape(X)[0], dtype=np.bool) use[0:M] = 1.0 np.random.shuffle(use) X = X[use, :] Y = Y[use] Y = np.array(Y, dtype=np.float32) unique = set(np.array(Y, dtype=np.float32)) Y[Y == min(unique)] = 0.0 Y[Y == max(unique)] = 1.0 md = LabelData() md.add_data(X, Y) mnist_data = PartitionData(md) mnist_data.partition(ratio) indices_lp = {1.0: max(unique), 0.0: min(unique)} return mnist_data, indices_lp
def classify_test(s0, s1, usage, ratio, k): ''' :param s0: First digit to classify :param s1: Second digit to classify :param usage: float(0, 1) How much of the total MNIST data to use :param ratio: float(0, 1) Ratio to put into the training set :param k: kernel function :return: test results ''' mnist, names = load_mnist_data(s0, s1, ratio, usage) ld = LabelData() ld.add_data(mnist.training[0], mnist.training[1]) t0 = time.time() kregr = KernelRidgeRegression(ld, k=k, l=.0001) t1 = time.time() ttotal = t1 - t0 t0 = time.time() y_v = kregr(mnist.validation[0]) y_v[y_v > 0.5] = 1 y_v[y_v < 0.5] = -1 t1 = time.time() vtotal = t1 - t0 t0 = time.time() y_t = kregr(mnist.training[0]) y_t[y_t > 0.5] = 1 y_t[y_t < 0.5] = -1 t1 = time.time() rtotal = t1 - t0 error = risk(mnist.validation[1], y_v) erisk = risk(mnist.training[1], y_t) return { "error": error, "risk": erisk, "training time": ttotal, "validation time": vtotal, "risk time": rtotal, "training size": mnist.training[1].shape[0], "validation size": mnist.validation[1].shape[0] }
def classify_test(s0, s1, usage, ratio, k): mnist, names = load_mnist_data(s0, s1, ratio, usage) ld = LabelData() ld.add_data(mnist.training[0], mnist.training[1]) t0 = time.time() kregr = KernelRidgeRegression(ld, k=k, l=.0001) t1 = time.time() ttotal = t1 - t0 #print(mnist.training[1]) t0 = time.time() y_v = np.array([krr(ld, kregr._alpha, k, x) for x in mnist.validation[0]]) y_v[y_v > 0.5] = 1 y_v[y_v < 0.5] = 0 t1 = time.time() vtotal = t1 - t0 t0 = time.time() y_t = np.array([krr(ld, kregr._alpha, k, x) for x in mnist.training[0]]) y_t[y_t > 0.5] = 1 y_t[y_t < 0.5] = 0 t1 = time.time() rtotal = t1 - t0 error = risk(mnist.validation[1], y_v) erisk = risk(mnist.training[1], y_t) return { "error": error, "risk": erisk, "training time": ttotal, "validation time": vtotal, "risk time": rtotal, "training size": mnist.training[1].shape[0], "validation size": mnist.validation[1].shape[0] }
def k(self): return self._k @k.setter def k(self, func): if len(signature(func).parameters) != 2: raise Exception("kernel needs to have two arguments k(x, xp)!") else: self._k = func self.train() def train(self): self._alpha = alpha(self._data, self._k, self._l) def __call__(self, x): return np.array( [krr(self._data, self._alpha, self._k, xi) for xi in x]) if __name__ == '__main__': print(kernel_mat(np.dot, [1, 2, 3])) data = np.array([[2], [1], [7], [9]]) labs = np.array([0.4, 1.2, 3.4, -0.4]) ld = LabelData() ld.add_data(data, labs) a = alpha(ld, np.dot, 0.01) kregr = KernelRidgeRegression(ld) print(kregr([3]))