def fit(self, X, y): if y.ndim == 1: y = y[:, None] self.layer_sizes = [X.shape[1] ] + self.hidden_layer_sizes + [y.shape[1]] self.classification = ( y.shape[1] > 1 ) # assume it's classification iff y has more than 1 column # random init scale = 0.01 weights = list() for i in range(len(self.layer_sizes) - 1): W = scale * np.random.randn(self.layer_sizes[i + 1], self.layer_sizes[i]) b = scale * np.random.randn(1, self.layer_sizes[i + 1]) weights.append((W, b)) weights_flat = flatten_weights(weights) # utils.check_gradient(self, X, y, len(weights_flat), epsilon=1e-6) weights_flat_new, f = findMin.SGD(self.funObj, weights_flat, 10, 500, X, y, verbose=True) self.weights = unflatten_weights(weights_flat_new, self.layer_sizes)
def fit(self, X, y, epoch, minibatch, alpha): n, d = X.shape self.n_classes = np.unique(y).size self.w = np.zeros(d * self.n_classes) error = 1 # SGD with early stopping for e in range(epoch): X, y = shuffle(X, y) Xtrain = X[:n // 3 * 2] ytrain = y[:n // 3 * 2] Xvalid = X[n // 3 * 2:] yvalid = y[n // 3 * 2:] for i in range(0, n, minibatch): self.w, f = findMin.SGD(self.funObj, self.w, self.alpha, Xtrain[i:i + minibatch, :], ytrain[i:i + minibatch]) # check validation error self.w = np.reshape(self.w, (d, self.n_classes)) yhat = np.argmax(Xvalid @ self.w, axis=1) error_new = np.mean(yvalid != yhat) self.w = np.reshape(self.w, d * self.n_classes) if error_new < error: error = error_new #print(e) # stop when validation error doesn't improve else: break
def fitWithSGD(self, X, y, minibatch, epoch, alpha): if y.ndim == 1: y = y[:, None] n, d = X.shape self.layer_sizes = [X.shape[1] ] + self.hidden_layer_sizes + [y.shape[1]] self.classification = y.shape[ 1] > 1 # assume it's classification iff y has more than 1 column # random init scale = 0.01 weights = list() for i in range(len(self.layer_sizes) - 1): W = scale * np.random.randn(self.layer_sizes[i + 1], self.layer_sizes[i]) b = scale * np.random.randn(1, self.layer_sizes[i + 1]) weights.append((W, b)) weights_flat = flatten_weights(weights) for e in range(epoch): X, y = shuffle(X, y) for i in range(0, n, minibatch): weights_flat, f = findMin.SGD(self.funObj, weights_flat, self.alpha, X[i:i + minibatch, :], y[i:i + minibatch, :]) self.weights = unflatten_weights(weights_flat, self.layer_sizes)
def fit(self, X, y): if y.ndim == 1: y = y[:, None] n, d = X.shape self.layer_sizes = [X.shape[1] ] + self.hidden_layer_sizes + [y.shape[1]] self.classification = y.shape[ 1] > 1 # assume it's classification iff y has more than 1 column # random init scale = 0.01 weights = list() for i in range(len(self.layer_sizes) - 1): W = scale * np.random.randn(self.layer_sizes[i + 1], self.layer_sizes[i]) b = scale * np.random.randn(1, self.layer_sizes[i + 1]) weights.append((W, b)) weights_flat = flatten_weights(weights) alpha = 0.001 batch_size = 500 epochs = 10 for epoch in range(epochs): X, y = shuffle(X, y) for i in range(0, n, batch_size): weights_flat, f = findMin.SGD(self.funObj, weights_flat, X[i:i + batch_size, :], y[i:i + batch_size, :], verbose=True, alpha=alpha) # utils.check_gradient(self, X, y, len(weights_flat), epsilon=1e-6) # weights_flat_new, f = findMin.findMin(self.funObj, weights_flat, self.max_iter, X, y, verbose=True) self.weights = unflatten_weights(weights_flat, self.layer_sizes)