def step(self, net: NeuralNet) -> None: for param, grad in net.params_and_grads(): predicted = net.forward(net.curr_batch.inputs) loss_old = net.loss_f.loss(predicted, net.curr_batch.targets) old_param = copy.deepcopy(param) param -= self.lr * grad count = 0 predicted = net.forward(net.curr_batch.inputs) loss = net.loss_f.loss(predicted, net.curr_batch.targets) temp_lr = self.lr while not loss <= loss_old - self.alpha * ( np.linalg.norm(param - old_param))**2: print(f'lr: {temp_lr}') temp_lr = temp_lr / 2.0 param = old_param - temp_lr * grad predicted = net.forward(net.curr_batch.inputs) loss = net.loss_f.loss(predicted, net.curr_batch.targets) #print(f'\nloss: {loss}\nloss_desejada: {loss_old - self.alpha*(np.linalg.norm(param - old_param))**2}') if temp_lr < 1e-10: print('Passo muito pequeno') break count = count + 1
def step(self, net: NeuralNet) -> None: for param, grad, jac in net.params_and_grads_v3(): predicted = net.forward(net.curr_batch.inputs) loss_old = net.loss_f.loss(predicted, net.curr_batch.targets) #lamb = min(max( np.linalg.norm(grad.flatten()), 1e-5), 1e5) lamb = 1e4 JTJ = jac.T @ jac sh = grad.shape d = np.linalg.solve(JTJ + lamb * np.eye(len(JTJ)), -grad.flatten()) d = d.reshape(sh) # inner_p = np.inner(-grad.flatten(), d.flatten()) # print('produto interno: ', inner_p ) old_param = copy.deepcopy(param) param += d predicted = net.forward(net.curr_batch.inputs) loss = net.loss_f.loss(predicted, net.curr_batch.targets) loop_count = 0 while not loss <= loss_old - self.alpha * ( np.linalg.norm(param - old_param))**2: lamb = 2 * lamb d = np.linalg.solve(JTJ + lamb * np.eye(len(JTJ)), -grad.flatten()) d = d.reshape(sh) param += d predicted = net.forward(net.curr_batch.inputs) loss = net.loss_f.loss(predicted, net.curr_batch.targets) # inner_p = np.inner(-grad.flatten(), d.flatten()) # print('produto interno: ', inner_p ) loop_count = loop_count + 1 #print(f'loop : {loop_count}') if lamb > 1e20: #print('trapaça') break net.n_eval = net.n_eval + loop_count + 1
def intermediate_step(self, net: NeuralNet, m, param) -> None: #for param, grad in net.params_and_grads(): param = np.add(param, self.gamma * m) predicted = net.forward(net.curr_batch.inputs) grad = net.loss_f.grad(predicted, net.curr_batch.targets) net.backward(grad)
def test(net: NeuralNet, inputs: Tensor, targets: Tensor, labels: List, input_decoder: Callable) -> None: correct = 0 for i in range(1, len(inputs)): predicted = net.forward(inputs[i]) predicted_idx = np.argmax(predicted) actual_idx = np.argmax(targets[i]) print(input_decoder(inputs[i]), inputs[i], labels[predicted_idx], labels[actual_idx]) if predicted_idx == actual_idx: correct += 1 print(correct / len(inputs))
def train(net: NeuralNet, inputs: Tensor, targets: Tensor, num_epochs: int = 5000, iterator: DataIterator = BatchIterator(), loss: Loss = MSE(), optimizer: Optimizer = SGD()) -> None: for epoch in range(num_epochs): epoch_loss = 0.0 for batch in iterator(inputs, targets): predicted = net.forward(batch.inputs) epoch_loss += loss.loss(predicted, batch.targets) grad = loss.grad(predicted, batch.targets) net.backward(grad) optimizer.step(net) print(epoch, epoch_loss)
def train(net: NeuralNet, inputs: Tensor, targets: Tensor, num_epochs: int = 5000, iterator: DataIterator = BatchIterator(), loss: Loss = MSE(), optimizer: Optimizer = SGD(0.001), eps: float = -1) -> [float]: loss_list = [] eval_list = [] net.n_eval = 0 for epoch in range(num_epochs): n_iter = 0 epoch_loss = 0.0 print(f'================ EPOCH NUMBER {epoch + 1} ================') for batch in iterator(inputs, targets): #print(f'batch: \n{batch}') net.n_iter = n_iter net.curr_batch = batch net.loss_f = loss predicted = net.forward(batch.inputs) curr_loss = loss.loss(predicted, batch.targets) epoch_loss += curr_loss grad = loss.grad(predicted, batch.targets) net.backward(grad) optimizer.step(net) n_iter = n_iter + 1 eval_list.append(net.n_eval) #eval_list.append(net.n_eval) # () / iterator.batch_size print(epoch, epoch_loss) loss_list.append(epoch_loss) if eps > 0 and epoch_loss < eps: print('precisão atingida') break return loss_list, eval_list
# for x, y in zip(inputs, targets) # predicted = net.forward(x) # print(x, predicted, y) #print(f'Levenberg Marquardt com busca linear\nloss = {loss_list[len(loss_list) - 1]:.2f}') ex = np.linspace(0,20,200) ey = [] test_loss = [] for val in ex: predicted = net.forward([val]) ey.append(predicted) plt.title("Erro quadrático x Tempo") plt.xlabel("número de iterações") plt.ylabel("erro quadrático") plt.scatter(list(range(0, n_epochs)),loss_list) #plt.savefig(f'Figuras/Square/EQ.png', format='png') plt.show() plt.axis([0,20,0,300]) aux = np.arange(21) plt.scatter(aux,aux**2,s = 30, c = "red") plt.plot(ex,ey, label = f'Levenberg Marquardt com busca linear\nloss = {loss_list[len(loss_list) - 1]:.02f}') plt.legend()
return [0, 1, 0, 0] else: return [1, 0, 0, 0] def binary_encode(x: int) -> List[int]: """ 10 digit binary encoding of x """ return [x >> i & 1 for i in range(10)] inputs = np.array([binary_encode(x) for x in range(101, 1024)]) targets = np.array([fizz_buzz_encode(x) for x in range(101, 1024)]) net = NeuralNet([ Linear(input_size=10, output_size=50), Tanh(), Linear(input_size=50, output_size=4) ]) train(net, inputs, targets, num_epochs=5000, optimizer=SGD(lr=0.001)) for x in range(1, 101): predicted = net.forward(binary_encode(x)) predicted_idx = np.argmax(predicted) actual_idx = np.argmax(fizz_buzz_encode(x)) labels = [str(x), "fizz", "buzz", "fizzbuzz"] print(x, labels[predicted_idx], labels[actual_idx])
Tanh(), Linear(input_size=35, output_size=1), Sigmoid() ]) n_epochs = 200 loss_list = train(net, inputs, targets, optimizer=Adam(lr=1e-2, gamma1=0.3, gamma2=0.4), iterator=BatchIterator(128), num_epochs=n_epochs) y_pred = [] for x in X_test[0:1000]: y_pred.append(net.forward(x)) y_pred = np.array(y_pred) aux = X_test[0:1000] indices_1 = np.where(aux == 0) print('fraudes:', indices_1[0]) plt.title("Erro quadrático x Tempo") plt.xlabel("número de iterações") plt.ylabel("erro quadrático") plt.scatter(list(range(0, n_epochs)), loss_list) #plt.show() precision, recall, _ = precision_recall_curve(y_test[0:1000], y_pred) auc_val = auc(recall, precision)
""" The canonical example of a function that can't be learned with a simple linear model is XOR """ import numpy as np from joelnet.train import train from joelnet.nn import NeuralNet from joelnet.layers import Linear, Tanh inputs = np.array([[0, 0], [1, 0], [0, 1], [1, 1]]) targets = np.array([[1, 0], [0, 1], [0, 1], [1, 0]]) net = NeuralNet([ Linear(input_size=2, output_size=2), Tanh(), Linear(input_size=2, output_size=2) ]) train(net, inputs, targets) for x, y in zip(inputs, targets): predicted = net.forward(x) print(x, predicted, y)
Linear(input_size=1, output_size=2), reLu(), Linear(input_size=2, output_size=1) ]) n_epochs = 1 #loss_list = train(net, inputs,targets, optimizer = Adam(lr = 1e-2, gamma1 = 0.3, gamma2 = 0.3),iterator = BatchIterator(batch_size = 5), num_epochs = 1000) loss_list = train(net, inputs, targets, loss=MSE(), optimizer=SGD(lr=1e-3), iterator=BatchIterator(batch_size=5), num_epochs=n_epochs) for x, y in zip(inputs, targets): predicted = net.forward(x) print(x, predicted, y) plt.show() plt.title("Erro quadrático x Tempo") plt.xlabel("número de iterações") plt.ylabel("erro quadrático") plt.scatter(list(range(0, n_epochs)), loss_list) plt.show() ex = np.linspace(0, 100, 100) ey = [net.forward([val]) for val in ex] plt.axis([0, 10, 0, 30]) plt.scatter([1, 2, 3, 4, 5], [1, 4, 9, 16, 25], s=30, c="red") plt.plot(ex, ey) plt.show()
NUM_EPOCHS = 10000 inputs = np.array([ binary_encode(x, NUM_ENCODE_BITS) for x in range(101, 1024) ]) targets = np.array([ fizz_buzz_encode(x) for x in range(101, 1024) ]) net = NeuralNet([ Linear(input_size=NUM_ENCODE_BITS, output_size=50), Tanh(), Linear(input_size=50, output_size=4) ]) train(net=net, inputs=inputs, targets=targets, num_epochs=NUM_EPOCHS, optimizer=SGD(lr=0.001)) for x in range(1, 101): predicted = net.forward(inputs=binary_encode(x)) predicted_idx = np.argmax(predicted) # largest value is predicted class actual_idx = np.argmax(fizz_buzz_encode(x)) labels = [str(x), 'fizz', 'buzz', 'fizzbuzz'] print(x, labels[predicted_idx], labels[actual_idx])
def step(self, net: NeuralNet) -> None: count = 0 for param, grad, jac in net.params_and_grads_v3(): if count == 0: print('Dando o passo para w1 e w2') else: print('Dando o passo para w3 e w4') count = count + 1 predicted = net.forward(net.curr_batch.inputs) loss_old = net.loss_f.loss(predicted, net.curr_batch.targets) #lamb = min(max( np.linalg.norm(gf), 1e-5), 1e5) #print(f'GRADIENTE: {gf}') #print(f'NORMA GRAD: {np.linalg.norm(gf)}') lamb = self.lamb print('grad: ', grad) print('jac: ', jac) JTJ = jac.T @ jac print('jtj: ', JTJ) sh = grad.shape d = np.linalg.solve(JTJ + lamb * np.eye(len(JTJ)), -grad.flatten()) d = d.reshape(sh) # inner_p = np.inner(-grad.flatten(), d.flatten()) # print('produto interno: ', inner_p ) old_param = copy.deepcopy(param) param += d predicted = net.forward(net.curr_batch.inputs) loss = net.loss_f.loss(predicted, net.curr_batch.targets) print('param :', param) #time.sleep(30) lixo = input('oi') loop_count = 0 print( f'erro: {loss} / {loss_old - self.alpha*(np.linalg.norm(param - old_param))**2}' ) while not loss <= loss_old - self.alpha * ( np.linalg.norm(param - old_param))**2: #print(f'f: {loss_old}') #print(f'x: {param}\nx^k: {old_param}') #print(f'x-xk: {param - old_param}') #print('||x-xk||^2: ',np.linalg.norm(param - old_param)**2) if loop_count == 0: print('entrou no loop da busca linear') lamb = 2 * lamb #print(f'erro: {loss} / {loss_old - self.alpha*(np.linalg.norm(param - old_param))**2}') d = np.linalg.solve(JTJ + lamb * np.eye(len(JTJ)), -grad.flatten()) d = d.reshape(sh) param += d predicted = net.forward(net.curr_batch.inputs) loss = net.loss_f.loss(predicted, net.curr_batch.targets) # inner_p = np.inner(-grad.flatten(), d.flatten()) # print('produto interno: ', inner_p ) loop_count = loop_count + 1 if lamb > 1e10: print('LAMBDA GRANDE') break if loop_count > 0: print(f'saiu do loop com {loop_count} giros') else: print('não entrou no loop') net.n_eval = net.n_eval + loop_count + 1