def fit(self, X, Y, learning_rate=10e-7, reg=0, epochs=120000, show_fig=False): X, Y = shuffle(X, Y) Xvalid, Yvalid = X[-1000:], Y[-1000:] #validation set X, Y = X[:-1000], Y[:-1000] N, D = X.shape self.W = np.random.randn(D) / np.sqrt(D) self.b = 0 costs = [] best_validation_error = 1 for i in xrange(epochs): pY = self.forward(X) self.W -= learning_rate*(X.T.dot(pY-Y)+reg*self.W) self.b -= learning_rate*((pY-Y).sum()+reg*self.b) if i%20 == 0: pYvalid = self.forward(Xvalid) c = sigmoid_cost(Yvalid, pYvalid) costs.append(c) e = error_rate(Yvalid, np.round(pYvalid)) print 'i:', i, 'cost:', c, 'error:', e if e < best_validation_error: best_validation_error = e print best_validation_error if show_fig: plt.plot(costs) plt.show()
def fit(self, X, Y, learning_rate=10e-7, reg=0*10e-22, epochs=120000, show_fig=False): X, Y = shuffle(X, Y) Xvalid, Yvalid = X[-1000:], Y[-1000:] X, Y = X[:-1000], Y[:-1000] N, D = X.shape self.W = np.random.randn(D) / np.sqrt(D) self.b = 0 costs = [] best_validation_error = 1 for i in xrange(epochs): # forward propagation and cost calculation pY = self.forward(X) # gradient descent step self.W -= learning_rate*(X.T.dot(pY - Y) + reg*self.W) self.b -= learning_rate*((pY - Y).sum() + reg*self.b) if i % 20 == 0: pYvalid = self.forward(Xvalid) c = sigmoid_cost(Yvalid, pYvalid) costs.append(c) e = error_rate(Yvalid, np.round(pYvalid)) print "i:", i, "cost:", c, "error:", e if e < best_validation_error: best_validation_error = e print "best_validation_error:", best_validation_error if show_fig: plt.plot(costs) plt.show()
def fit_2class(self, X, Y, learning_rate = 5*10e-7, \ reg=1.0, epoch = 10000, show_fig = False): Xtest, Ytest, Xtrain, Ytrain = self.prepare_data(X, Y) costs = [] best_validation_error = 1 for i in xrange(epoch): pY, Z = self.forward(Xtrain) #forward prop #back prop pY_Y = pY - Ytrain self.W2 -= learning_rate * (Z.T.dot(pY_Y) + reg * self.W2) self.b2 -= learning_rate * (pY_Y.sum() + reg * self.b2) # dZ = np.outer(pY_Y, self.W2)* (Z > 0) #Z > 0 is derivative of ReLU dZ = np.outer(pY_Y, self.W2) * (1 - Z * Z) self.W1 -= learning_rate * (Xtrain.T.dot(dZ) + reg * self.W1) self.b1 -= learning_rate * (np.sum(dZ, axis=0) + reg * self.b1) if i % 20 == 0: pYtest, _ = self.forward(Xtest) c = sigmoid_cost(Ytest, pYtest) costs.append(c) e = error_rate(Ytest, np.round(pYtest)) print "i: ", i, "cost: ", c, "error: ", e if e < best_validation_error: best_validation_error = e # if e > best_validation_error: learning_rate /= 2 print "best validation error:", best_validation_error self.show_fig_cost(costs, show_fig)
def fit(self, X, Y, learning_rate=1e-6, reg=0., epochs=120000, show_fig=False): X, Y = shuffle(X, Y) Xvalid, Yvalid = X[-1000:], Y[-1000:] X, Y = X[:-1000], Y[:-1000] N, D = X.shape self.W = np.random.randn(D) / np.sqrt(D) self.b = 0 costs = [] best_validation_error = 1 for i in range(epochs): # forward propagation and cost calculation pY = self.forward(X) # gradient descent step self.W -= learning_rate*(X.T.dot(pY - Y) + reg*self.W) self.b -= learning_rate*((pY - Y).sum() + reg*self.b) if i % 20 == 0: pYvalid = self.forward(Xvalid) c = sigmoid_cost(Yvalid, pYvalid) costs.append(c) e = error_rate(Yvalid, np.round(pYvalid)) print("i:", i, "cost:", c, "error:", e) if e < best_validation_error: best_validation_error = e print("best_validation_error:", best_validation_error) if show_fig: plt.plot(costs) plt.show()
def fit(self, X, Y, learning_rate=10e-2, reg=10e-15, epochs=120000, show_fig=False): X, Y = shuffle(X, Y) Xvalid, Yvalid = X[-1000:], Y[-1000:] X, Y = X[:-1000], Y[:-1000] N, D = X.shape self.w = np.random.randn(D) / np.sqrt(D) self.b = 0 costs = [] best_validation_error = 1 for i in range(epochs): pY = self.forward(X) self.w -= learning_rate * (X.T.dot(pY - Y) + (reg * self.w)) self.b -= learning_rate * ((pY - Y).sum() + (reg * self.b)) if i % 20 == 0: pYvalid = self.forward(Xvalid) cost = util.sigmoid_cost(Yvalid, pYvalid) costs.append(cost) error_rate = util.error_rate(Yvalid, pYvalid.round()) print("i:", i, "cost:", cost, "error:", error_rate) if error_rate < best_validation_error: best_validation_error = error_rate print("best validation error:", best_validation_error) if show_fig: plt.plot(costs) plt.show()
def fit(self,X,Y,learning_rate=5e-7,regularisation=1.0,epochs=10000,show_fig=False): X,Y = shuffle(X,Y) Y = np.reshape(Y,(len(Y),1)) #s # print("X.shape"+str(X.shape)) # print("Y.shape"+str(Y.shape)) Xvalid, Yvalid = X[-1000:],Y[-1000:] X,Y = X[:-1000],Y[:-1000] # print("X.shape"+str(X.shape)) # print("Y.shape"+str(Y.shape)) N,D = X.shape self.W1,self.b1 = init_weight_and_bias(D,self.M) #s self.W2,self.b2 = init_weight_and_bias(self.M,1) #s # self.W1 = np.random.randn(D, self.M) / np.sqrt(D) #lp # self.b1 = np.zeros(self.M) #lp # self.W2 = np.random.randn(self.M) / np.sqrt(self.M) #lp # self.b2 = 0 #lp costs = [] best_validation_error = 1 for i in range(epochs): # forward propagation pY, Z = self.forward(X) # gradient descent pY_Y = pY - Y # print("X.shape"+str(X.shape)) # print("pY.shape"+str(pY.shape)) # print("Y.shape"+str(Y.shape)) # print("Z.shape"+str(Z.shape)) # print("W2.shape"+str(self.W2.shape)) # print("pY_Y.shape"+str(pY_Y.shape)) self.W2 -= learning_rate*(Z.T.dot(pY_Y) + regularisation*self.W2) self.b2 -= learning_rate*(pY_Y.sum() + regularisation*self.b2) dZ = pY_Y.dot(self.W2.T) * (Z>0) #Relu dZ = pY_Y.dot(self.W2.T) * (1-Z*Z) #Relu # dZ = np.outer(pY_Y, self.W2) * (Z > 0) #lp self.W1 -= learning_rate*(X.T.dot(dZ) + regularisation*self.W1) self.b1 -= learning_rate*(np.sum(dZ,axis=0) + regularisation*self.b1) if i%20 ==0 : pYvalid ,_ = self.forward(Xvalid) # print("Yvalid.shape"+str(Yvalid.shape)) # print("pYvalid.shape"+str(pYvalid.shape)) c = sigmoid_cost(Yvalid,pYvalid) costs.append(c) e = error_rate(Yvalid, np.round(pYvalid)) print("i : "+str(i)+"; Cost : "+str(c)+"; Error : "+str(e)) if e < best_validation_error: best_validation_error = e print("Best Validation error : "+str(best_validation_error)) if(show_fig): plt.plot(costs) plt.show()
def fit(self, X, Y, learning_rate=5 * 10e-7, reg=1.0, epochs=10000, show_fig=False): X, Y = shuffle(X, Y) # Below we are splitting X & Y into training & validation sets Xvalid, Yvalid = X[-1000:], Y[ -1000:] # Retain last 1000 rows (all columns are also retained) X, Y = X[: -1000], Y[: -1000] # Retain all except last 1000 rows (all columns are also retained) # Same as X, Y = X[:-1000, :], Y[:-1000] N = len(X) # Same as N, D = X.shape D = len(X[0]) # randomly initialize w self.W1 = np.random.randn(D, self.M) / np.sqrt(D + self.M) self.b1 = np.zeros(self.M) self.W2 = np.random.randn(self.M) / np.sqrt(self.M) self.b2 = 0 cost = [] best_validation_error = 1 for t in range(epochs): # forward propagation pY, Z = self.forward(X) # gradient descent step self.W2 -= learning_rate * (Z.T.dot(pY - Y) + reg * self.W2) self.b2 -= learning_rate * ((pY - Y).sum() + reg * self.b2) dZ = np.outer(pY - Y, self.W2) * (Z > 0) # relu self.W1 -= learning_rate * (X.T.dot(dZ) + reg * self.W1) self.b1 -= learning_rate * (dZ.sum(axis=0) + reg * self.b1) if t % 20 == 0: pYvalid, _ = self.forward(Xvalid) c = sigmoid_cost(Yvalid, pYvalid) cost.append(c) e = error_rate(Yvalid, np.round(pYvalid)) print("t:", t, "cost:", c, "error:", e) if e < best_validation_error: best_validation_error = e print("best_validation_error:", best_validation_error) if show_fig: plt.plot(cost) plt.show()
def fit(self, X, Y, learning_rate=5e-6, reg=1.0, epochs=10000, show_fig=False): X, Y = shuffle(X, Y) Xvalid, Yvalid = X[-1000:], Y[-1000:] X, Y = X[:-1000], Y[:-1000] N, D = X.shape self.W1 = np.random.randn(D, self.M) / np.sqrt(D) self.b1 = np.zeros(self.M) self.W2 = np.random.randn(self.M) / np.sqrt(self.M) self.b2 = 0 costs = [] best_validation_error = 1 for i in range(epochs): # forward propagation and cost calculation pY, Z = self.forward(X) # gradient descent step pY_Y = pY - Y self.W2 -= learning_rate * (Z.T.dot(pY_Y) + reg * self.W2) self.b2 -= learning_rate * ((pY_Y).sum() + reg * self.b2) # print "(pY_Y).dot(self.W2.T) shape:", (pY_Y).dot(self.W2.T).shape # print "Z shape:", Z.shape # dZ = np.outer(pY_Y, self.W2) * (Z > 0) dZ = np.outer(pY_Y, self.W2) * (1 - Z * Z) self.W1 -= learning_rate * (X.T.dot(dZ) + reg * self.W1) self.b1 -= learning_rate * (np.sum(dZ, axis=0) + reg * self.b1) if i % 20 == 0: pYvalid, _ = self.forward(Xvalid) c = sigmoid_cost(Yvalid, pYvalid) costs.append(c) e = error_rate(Yvalid, np.round(pYvalid)) print("i:", i, "cost:", c, "error:", e) if e < best_validation_error: best_validation_error = e print("best_validation_error:", best_validation_error) if show_fig: plt.plot(costs) plt.show()
def fit(self, X, Y, learning_rate=5 * 10e-7, reg=1.0, epochs=10000, show_fig=False): X, Y = shuffle(X, Y) Xvalid, Yvalid = X[-1000:], Y[-1000:] X, Y = X[:-1000], Y[:-1000] N, D = X.shape self.W1 = np.random.randn(D, self.M) / np.sqrt(D + self.M) self.b1 = np.zeros(self.M) self.W2 = np.random.rand(self.M) / np.sqrt(self.M) self.b2 = 0 costs = [] best_validation_error = 1 for i in range(epochs): # forward prop # pY = Prob(Y | X) # Z = hidden layer value pY, Z = self.forward(X) # gradient descent pY_Y = pY - Y self.W2 -= learning_rate * (Z.T.dot(pY_Y) + reg * self.W2) self.b2 -= learning_rate * ((pY_Y).sum() + reg * self.b2) dZ = np.outer(pY_Y, self.W2) * (Z > 0) self.W1 -= learning_rate * (X.T.dot(dZ) + reg * self.W1) self.b1 -= learning_rate * (np.sum(dZ, axis=0) + reg * self.b1) if i % 20 == 0: pYvalid, _ = self.forward(Xvalid) c = sigmoid_cost(Yvalid, pYvalid) costs.append(c) e = error_rate(Yvalid, np.round(pYvalid)) print("i: ", i, "cost:", c, "error", e) if e < best_validation_error: best_validation_error = e print('best_validation_error: ', best_validation_error) if show_fig: plt.plot(costs) plt.show()
def fit(self, X, Y, learning_rate=5 * 10e-7, reg=1.0, epochs=10000, show_fig=False): X, Y = shuffle(X, Y) Xvalid, Yvalid = X[-1000:], Y[-1000:] X, Y = X[:-1000], Y[:-1000] N, D = X.shape self.W1 = np.random.randn(D, self.M) / np.sqrt(D + self.M) self.b1 = np.zeros(self.M) self.W2 = np.random.randn(self.M) / np.sqrt(self.M) self.b2 = 0 costs = [] best_validation_error = 1 for i in xrange(epochs): pY, Z = self.forward(X) pY_Y = pY - Y self.W2 -= learning_rate * (Z.T.dot(pY_Y) + reg * self.W2) self.b2 -= learning_rate * ((pY_Y).sum() + reg * self.b2) if self.activation_func == 'relu': dZ = np.outer(pY_Y, self.W2) * (Z > 0) #derivative of relu elif self.activation_func == 'tanh': dZ = np.outer(pY_Y, self.W2) * (1 - Z * Z) # tanh self.W1 -= learning_rate * (X.T.dot(dZ) + reg * self.W1) self.b1 -= learning_rate * (np.sum(dZ, axis=0) + reg * self.b1) if i % 20 == 0: pYvalid, _ = self.forward(Xvalid) cost = sigmoid_cost(Yvalid, pYvalid) costs.append(cost) error = error_rate(Yvalid, np.round(pYvalid)) print "i:", i, "cost:", cost, "error:", error if error < best_validation_error: best_validation_error = best_validation_error print "best_validation_error:", best_validation_error if show_fig == True: plt.plot(costs) plt.show()
def fit(self, X, Y, learning_rate=5e-6, reg=1.0, epochs=10000, show_fig=False): X, Y = shuffle(X, Y) validX = X[-1000:, :] validY = Y[-1000:] trainX = X[:-1000, :] trainY = Y[:-1000] N, D = trainX.shape self.W1 = np.random.randn(D, self.M) / np.sqrt(D) self.W2 = np.random.randn(self.M) / np.sqrt(self.M) self.b1 = np.zeros(self.M) self.b2 = 0 costs = [] best_validation_error = 1 for i in range(epochs): pY, Z = self.forward(trainX) self.W2 -= learning_rate * (Z.T.dot(pY - trainY) + reg * self.W2) self.b2 -= learning_rate * ((pY - trainY).sum() + reg * self.b2) dz = np.outer(pY - trainY, self.W2) * (1 - Z * Z) self.W1 -= learning_rate * (trainX.T.dot(dz) + reg * self.W1) self.b1 -= learning_rate * (np.sum(dz, axis=0) + reg * self.b1) if i % 20 == 0: pY_valid, Z_valid = self.forward(validX) c = sigmoid_cost(validY, pY_valid) costs.append(c) e = error_rate(validY, np.round(pY_valid)) print("i: ", i, " cost: ", c, " error: ", e) if e < best_validation_error: best_validation_error = e print("best_validation_error: ", best_validation_error) if show_fig: plt.plot(costs) plt.show()
def fit(self, X, Y, learning_rate=5 * 10e-7, reg=1.0, epochs=10000, show_fig=False): X, Y = shuffle(X, Y) Xvalid, Yvalid = X[-1000:], Y[-1000:] X, Y = X[:-1000], Y[:-1000] N, D = X.shape self.W1 = np.random.randn(D, self.M) / np.sqrt(D + self.M) self.b1 = np.zeros(self.M) self.W2 = np.random.randn(self.M) / np.sqrt(self.M) self.b2 = 0 costs = [] best_validation_error = 1 for i in range(epochs): pY, Z = self.forward(X) pY_Y = pY - Y self.W2 -= learning_rate * (Z.T.dot(pY_Y) + reg * self.W2) self.b2 -= learning_rate * (pY_Y.sum() + reg * self.b2) # dZ = np.outer(pY_Y, self.W2) * (Z > 0) dZ = np.outer(pY_Y, self.W2) * (1 - Z * Z) self.W1 -= learning_rate * (X.T.dot(dZ) + reg * self.W1) self.b1 -= learning_rate * (dZ.sum(axis=0) + reg * self.b1) if i % 20 == 0: pYvalida, _ = self.forward(Xvalid) c = sigmoid_cost(Yvalid, pYvalida) costs.append(c) e = error_rate(Yvalid, np.round(pYvalida)) print("i:", i, "cost:", c, "error_rate:", e) if e < best_validation_error: best_validation_error = e print("best validation error:", best_validation_error) if show_fig: plt.plot(costs) plt.show()
def fit(self, X_train, Y_train, X_val, Y_val, learning_rate=1e-6, lambda_=1.0, epochs=10000, show_fig=False): N, D = X_train.shape self.W1 = np.random.randn(D, self.M) * np.sqrt(1 / D) self.b1 = np.zeros(self.M) self.W2 = np.random.randn(self.M) * np.sqrt(1 / self.M) self.b2 = 0 costs = [] best_val_error = 1 for i in range(epochs): # Forward Propagation Y_train_pred, Z = self.forward(X_train) # Gradient Descent step delta2 = Y_train_pred - Y_train self.W2 -= learning_rate * (Z.T.dot(delta2) + reg * self.W2) self.b2 -= learning_rate * (delta2.sum(axis=0) + reg * self.b2) delta1 = delta2.dot(self.W2.T) * (Z > 0) self.W1 -= learning_rate * (X_train.T.dot(delta1) + reg * self.W1) self.b1 -= learning_rate * (delta1.sum(axis=0) + reg * self.b1) if i % 50 == 0: Y_val_pred, _ = self.forward(X_val) c = sigmoid_cost(Y_val, Y_val_pred) costs.append(c) e = error_rate(Y_val, np.round(Y_val_pred)) print("Epoch:", i, "Cost:", c, "Error rate:", e) if e < best_val_error: best_val_error = e print("Best validation error:", best_val_error) if show_fig: plt.plot(costs) plt.show()
def fit(self,X,T,learning_rate = 5*10e-7,reg =1.0,epoch=10000,fig_show=False): X,T = shuffle(X,T) X_train = X[:-1000] T_train = T[:-1000] X_valid = X[-1000:] T_valid = T[-1000:] N,D = X_train.shape self.W1 = np.random.randn(D,self.M)/np.sqrt(D) self.b1 = np.zeros(self.M) self.W2 = np.random.randn(self.M)/np.sqrt(self.M) self.b2 = 0 best_validation_error = 1 costs = [] for n in range(epoch): # forwardpropogation Y,Z= self.forwardprop(X_train) #backpropogation process and Gradient descent Y_T = Y-T_train self.W2 -= learning_rate*(Z.T.dot(Y_T)+reg*self.W2) self.b2 -= learning_rate*((Y_T).sum(axis=0)+reg*self.b2) dZ = np.outer(Y_T,self.W2)*(1-Z*Z) self.W1 -= learning_rate*(X_train.T.dot(dZ)+reg*self.W1) self.b1 -= learning_rate*(dZ.sum()+reg*self.b1) if n%20 == 0: Y_valid, _= self.forwardprop(X_valid) cost = sigmoid_cost(T_valid,Y_valid) costs.append(cost) #must use print to give a feedback er = error_rate(T_valid,np.round(Y_valid)) print(n,'cost:',cost,'error rate',er) if er<best_validation_error: best_validation_error = er print('Best validation error:',best_validation_error) if fig_show: plt.plot(costs) plt.show()
def fit(self, X, Y, learning_rate=5*10e-7, reg=1.0, epochs=10000, show_fig=False): X, Y = shuffle(X, Y) Xvalid, Yvalid = X[-1000:], Y[-1000:] X, Y = X[:-1000], Y[:-1000] N, D = X.shape self.W1 = np.random.randn(D, self.M) / np.sqrt(D + self.M) self.b1 = np.zeros(self.M) self.W2 = np.random.randn(self.M) / np.sqrt(self.M) self.b2 = 0 costs = [] best_validation_error = 1 for i in xrange(epochs): # forward propagation and cost calculation pY, Z = self.forward(X) # gradient descent step pY_Y = pY - Y self.W2 -= learning_rate*(Z.T.dot(pY_Y) + reg*self.W2) self.b2 -= learning_rate*((pY_Y).sum() + reg*self.b2) # print "(pY_Y).dot(self.W2.T) shape:", (pY_Y).dot(self.W2.T).shape # print "Z shape:", Z.shape # dZ = np.outer(pY_Y, self.W2) * (Z > 0) dZ = np.outer(pY_Y, self.W2) * (1 - Z*Z) self.W1 -= learning_rate*(X.T.dot(dZ) + reg*self.W1) self.b1 -= learning_rate*(np.sum(dZ, axis=0) + reg*self.b1) if i % 20 == 0: pYvalid, _ = self.forward(Xvalid) c = sigmoid_cost(Yvalid, pYvalid) costs.append(c) e = error_rate(Yvalid, np.round(pYvalid)) print "i:", i, "cost:", c, "error:", e if e < best_validation_error: best_validation_error = e print "best_validation_error:", best_validation_error if show_fig: plt.plot(costs) plt.show()
def fit(self, X, Y, learning_rate=10e-7, reg=0, epochs=120000, show_fig=False): X, Y = shuffle(X, Y) Xvalid, Yvalid = X[-1000:], Y[-1000:] X, Y = X[:-1000], Y[:-1000] N, D = X.shape self.w = cp.random.randn(D) / cp.sqrt(D) self.b = 0 costs = [] best_validation_error = 1 for i in range(epochs): pY = self.forward(X) # gradient descent step self.w -= learning_rate * (X.T.dot(pY - Y) + reg * self.w) self.b -= learning_rate * ((pY - Y).sum() + reg * self.b) if i % 20 == 0: pYvalid = self.forward(Xvalid) c = sigmoid_cost(Yvalid, pYvalid) costs.append(c) e = error_rate( Yvalid, cp.around(pYvalid) ) # cp.round just means threshold of 0.5 for classification print("i: ", i, " cost: ", c, " error: ", e) if e < best_validation_error: best_validation_error = e print("best validation error: ", best_validation_error) if show_fig: plt.plot(costs) plt.show()
def fit(self, X_train, Y_train, X_val, Y_val, learning_rate=1e-6, lambda_=0.1, epochs=10000, show_fig=False): N, D = X_train.shape self.W = np.random.randn(D) * np.sqrt(1 / D) self.b = 0 costs = [] best_val_error = 1 for i in range(epochs): # Forward propagation Y_train_pred = self.forward(X_train) # Gradient descent self.W -= learning_rate * (X_train.T.dot(Y_train_pred - Y_train) + lambda_ * self.W) self.b -= learning_rate * ( (Y_train_pred - Y_train).sum() + lambda_ * self.b) if i % 50 == 0: Y_val_pred = self.forward(X_val) c = sigmoid_cost(Y_val, Y_val_pred) costs.append(c) e = error_rate(Y_val, np.round(Y_val_pred)) print("Epoch:", i, "Cost:", c, "Error rate", e) if e < best_val_error: best_val_error = e print("Best validation error", best_val_error) if show_fig: plt.plot(costs) plt.show()
def fit(self, X, Y, learning_rate=5e-7, reg=1.0, epochs=10000, show_fig=False): X, Y = shuffle(X, Y) Xvalid, Yvalid = X[-1000:], Y[-1000:] X, Y = X[:-1000], Y[:-1000] N, D = X.shape self.W1 = np.random.randn(D, self.M) / np.sqrt(D) self.b1 = np.zeros(self.M) self.W2 = np.random.randn(self.M) / np.sqrt(self.M) self.b2 = 0 costs = [] best_validation_error = 1 for i in range(epochs): # forward prop pY, Z = self.forward(X) # grad. desc. pY_Y = pY - Y self.W2 -= learning_rate * (Z.T.dot(pY_Y) + reg*self.W2) self.b2 -= learning_rate * ((pY_Y).sum() + reg*self.b2) dZ = np.outer(pY_Y, self.W2) * (Z > 0) self.W1 -= learning_rate*(X.T.dot(dZ) + reg*self.W1) self.b1 -= learning_rate*(np.sum(dZ,axis=0) + reg*self.b1) if i % 20 == 0: pYvalid, _ = self.forward(Xvalid) c = sigmoid_cost(Yvalid, pYvalid) costs.append(c) e = error_rate(Yvalid, np.round(pYvalid)) print("i: {} cost: {} error : {}".format(i, c, e)) if e < best_validation_error: best_validation_error = e print("Best Validation Error: {}".format(best_validation_error)) if show_fig: plt.plot(costs) plt.show()