def J(theta): weights = pack_struct(theta, self.layer_units) loss, grad = neural_net_loss(weights, X, y, reg) grad = flatten_struct(grad) return loss, grad
def J(theta): weights = pack_struct(theta, self.layer_units) loss, grad = mlp_loss(weights, X, y, reg) grad = flatten_struct(grad) return loss, grad
def J(theta): weights = pack_struct(theta, self.layer_units) loss, grad = sparse_autoencoder_loss(weights, X, reg, beta=beta, sparsity_param=sparsity_param) grad = flatten_struct(grad) return loss, grad
def test_pack_struct(): layer_units = (4, 8, 4) n_layers = len(layer_units) num = 0 for i in range(n_layers - 1): num += layer_units[i + 1] * layer_units[i] + layer_units[i + 1] weights = np.random.randn(num) actual = flatten_struct(pack_struct(weights, layer_units)) desired = weights assert_allclose(actual, desired, atol=1e-8)
def test_pack_struct(): layer_units = (4, 8, 4) n_layers = len(layer_units) num = 0 for i in range(n_layers - 1): num += layer_units[i+1]*layer_units[i] + layer_units[i+1] weights = np.random.randn(num) actual = flatten_struct(pack_struct(weights, layer_units)) desired = weights assert_allclose(actual, desired, atol=1e-8)
def test_flatten_struct(): layer_units = (4, 8, 4) n_layers = len(layer_units) weights = init_weights(layer_units) actual = flatten_struct(weights) desired = [] for i in range(n_layers - 1): desired.append(weights[i]['W'].flatten()) desired.append(weights[i]['b'].flatten()) desired = np.concatenate(desired) assert_allclose(actual, desired, atol=1e-8)
def fit(self, X, y, X_val, y_val, reg=0.0, learning_rate=1e-2, optimizer='L-BFGS-B', max_iters=100, sample_batches=True, n_epochs=30, batch_size=128, verbose=False): epoch = 0 best_val_acc = 0.0 best_weights = {} if self.weights is None: # lazily initialize weights self.weights = self.init_weights() # Solve with L-BFGS-B options = {'maxiter': max_iters, 'disp': verbose} def J(theta): weights = pack_struct(theta, self.layer_units) loss, grad = neural_net_loss(weights, X, y, reg) grad = flatten_struct(grad) return loss, grad # Callback to get accuracies based on training / validation sets iter_feval = 0 loss_history = [] train_acc_history = [] val_acc_history = [] def progress(x): nonlocal iter_feval, best_weights, best_val_acc iter_feval += 1 # Loss history weights = pack_struct(x, self.layer_units) loss, grad = neural_net_loss(weights, X, y, reg) loss_history.append(loss) # Training accurary y_pred_train = neural_net_predict(weights, X) train_acc = np.mean(y_pred_train == y) train_acc_history.append(train_acc) # Validation accuracy y_pred_val= neural_net_predict(weights, X_val) val_acc = np.mean(y_pred_val == y_val) val_acc_history.append(val_acc) # Keep track of the best weights based on validation accuracy if val_acc > best_val_acc: best_val_acc = val_acc n_weights = len(weights) best_weights = [{} for i in range(n_weights)] for i in range(n_weights): for p in weights[i]: best_weights[i][p] = weights[i][p].copy() n_iters_verbose = max_iters / 20 if iter_feval % n_iters_verbose == 0: print("iter: {:4d}, loss: {:8f}, train_acc: {:4f}, val_acc: {:4f}".format(iter_feval, loss, train_acc, val_acc)) # Minimize the loss function init_theta = flatten_struct(self.weights) results = scipy.optimize.minimize(J, init_theta, method=optimizer, jac=True, callback=progress, options=options) # Save weights self.weights = best_weights return self.weights, loss_history, train_acc_history, val_acc_history
def flatten_struct(self, data): return flatten_struct(data)
def fit(self, X, reg=3e-3, beta=3, sparsity_param=1e-1, learning_rate=1e-2, optimizer='L-BFGS-B', max_iters=100, verbose=False): best_loss = 1e12 best_weights = {} if self.weights is None: # lazily initialize weights self.weights = self.init_weights() # Solve with L-BFGS-B options = {'maxiter': max_iters, 'disp': verbose} def J(theta): weights = pack_struct(theta, self.layer_units) loss, grad = sparse_autoencoder_loss(weights, X, reg, beta=beta, sparsity_param=sparsity_param) grad = flatten_struct(grad) return loss, grad # Callback to get accuracies based on training set iter_feval = 0 loss_history = [] def progress(x): nonlocal iter_feval, best_weights, best_loss iter_feval += 1 # Loss history weights = pack_struct(x, self.layer_units) loss, grad = sparse_autoencoder_loss(weights, X, reg, beta=beta, sparsity_param=sparsity_param) loss_history.append(loss) # Keep track of the best weights based on loss if loss < best_loss: best_loss = loss n_weights = len(weights) best_weights = [{} for i in range(n_weights)] for i in range(n_weights): for p in weights[i]: best_weights[i][p] = weights[i][p].copy() n_iters_verbose = max_iters / 20 if iter_feval % n_iters_verbose == 0: print("iter: {:4d}, loss: {:8f}".format(iter_feval, loss)) # Minimize the loss function init_theta = flatten_struct(self.weights) results = scipy.optimize.minimize(J, init_theta, method=optimizer, jac=True, callback=progress, options=options) # Save weights self.weights = best_weights return self.weights, loss_history
def fit(self, X, y, X_val, y_val, reg=0.0, learning_rate=1e-2, optimizer='L-BFGS-B', max_iters=100, verbose=False): epoch = 0 best_val_acc = 0.0 best_weights = {} if self.weights is None: # lazily initialize weights self.weights = self.init_weights() # Solve with L-BFGS-B options = {'maxiter': max_iters, 'disp': verbose} def J(theta): weights = pack_struct(theta, self.layer_units) loss, grad = mlp_loss(weights, X, y, reg) grad = flatten_struct(grad) return loss, grad # Callback to get accuracies based on training / validation sets iter_feval = 0 loss_history = [] train_acc_history = [] val_acc_history = [] def progress(x): nonlocal iter_feval, best_weights, best_val_acc iter_feval += 1 # Loss history weights = pack_struct(x, self.layer_units) loss, grad = mlp_loss(weights, X, y, reg) loss_history.append(loss) # Training accurary y_pred_train = mlp_predict(weights, X) train_acc = np.mean(y_pred_train == y) train_acc_history.append(train_acc) # Validation accuracy y_pred_val = mlp_predict(weights, X_val) val_acc = np.mean(y_pred_val == y_val) val_acc_history.append(val_acc) # Keep track of the best weights based on validation accuracy if val_acc > best_val_acc: best_val_acc = val_acc n_weights = len(weights) best_weights = [{} for i in range(n_weights)] for i in range(n_weights): for p in weights[i]: best_weights[i][p] = weights[i][p].copy() n_iters_verbose = max_iters / 20 if iter_feval % n_iters_verbose == 0: print( "iter: {:4d}, loss: {:8f}, train_acc: {:4f}, val_acc: {:4f}" .format(iter_feval, loss, train_acc, val_acc)) # Minimize the loss function init_theta = flatten_struct(self.weights) results = scipy.optimize.minimize(J, init_theta, method=optimizer, jac=True, callback=progress, options=options) # Save weights self.weights = best_weights return self.weights, loss_history, train_acc_history, val_acc_history