def train(self, X, y, learning_rate=1e-3, reg=1e-4, decay_rate=1.00, opt='sgd', n_iters=1000, batch_size=200, verbose=True): lr = learning_rate self.reg = reg for i in range(n_iters): ids = np.random.choice(X.shape[0], batch_size) layer1, l1cache = layers.dense_forward(X[ids], self.W1, self.b1) layer2, l2cache = layers.non_linearity_forward(layer1, hiddenLayer='relu') layer3, l3cache = layers.dense_forward(layer2, self.W2, self.b2) layer4, l4cache = layers.non_linearity_forward(layer3, hiddenLayer='sigmoid') loss, l5cache = layers.binary_cross_entropy_loss_forward(layer4, y[ids]) # adding regularization loss loss += 0.5*self.reg*(np.sum(layer2*layer2))/(batch_size*batch_size) dlayer5 = 1.0 dlayer4 = layers.binary_cross_entropy_loss_backward(dlayer5, l5cache) dlayer3 = layers.non_linearity_backward(dlayer4, l4cache, hiddenLayer='sigmoid') dlayer2, dW2, db2 = layers.dense_backward(dlayer3, l3cache) dlayer2 += (self.reg*layer2)/batch_size dlayer1 = layers.non_linearity_backward(dlayer2, l2cache, hiddenLayer='relu') _, dW1, db1 = layers.dense_backward(dlayer1, l1cache) if i % 500 == 0: lr *= decay_rate if verbose: print "Iteration %d, loss = %g" % (i, loss) self.params, self.W1 = optimizers.optimize(self.params, self.W1, dW1, lr=lr, name='W1', opt=opt) self.params, self.b1 = optimizers.optimize(self.params, self.b1, db1, lr=lr, name='b1', opt=opt) self.params, self.W2 = optimizers.optimize(self.params, self.W2, dW2, lr=lr, name='W2', opt=opt) self.params, self.b2 = optimizers.optimize(self.params, self.b2, db2, lr=lr, name='b2', opt=opt) self.loss_history.append(loss)
def getloss(self, X, y): layer1, _ = layers.dense_forward(X, self.W1, self.b1) layer2, _ = layers.non_linearity_forward(layer1, hiddenLayer='relu') layer3, _ = layers.dense_forward(layer2, self.W2, self.b2) layer4, _ = layers.non_linearity_forward(layer3, hiddenLayer='sigmoid') loss, _ = layers.binary_cross_entropy_loss_forward(layer4, y) loss += 0.5 * self.reg * (np.sum(self.W1 * self.W1) + np.sum(self.W2 * self.W2)) return loss
def predict(self, X): # return the highest value for each row after a forward pass l1out, _ = layers.dense_forward(X, self.W1, self.b1) l2out, _ = layers.non_linearity_forward(l1out, self.hiddenLayer) l3out, _ = layers.dense_forward(l2out, self.W2, self.b2) l4out, _ = layers.non_linearity_forward(l3out,self.hiddenLayer) l5out, _ = layers.dense_forward(l4out, self.W3, self.b3) return np.argmax(l5out, axis=1)
def predict(self, X): W1, b1 = self.weights['W1'], self.weights['b1'] W2, b2 = self.weights['W2'], self.weights['b2'] W3, b3 = self.weights['W3'], self.weights['b3'] # return the highest value for each row after a forward pass l1out, _ = layers.dense_forward(X, W1, b1) l2out, _ = layers.non_linearity_forward(l1out, self.non_linearity) l3out, _ = layers.dense_forward(l2out, W2, b2) l4out, _ = layers.non_linearity_forward(l3out, self.non_linearity) l5out, _ = layers.dense_forward(l4out, W3, b3) return np.argmax(l5out, axis=1)
def train(self, X, y, X_val=None, y_val=None, learning_rate=1e-2, reg = 1e-4, decay_rate=0.95, opt='sgd', n_iters=5000, batch_size=200, verbose=1): lr = learning_rate for i in xrange(n_iters): # adding dense layer1 ids = np.random.choice(X.shape[0], batch_size) l1out, l1cache = layers.dense_forward(X[ids], self.W1, self.b1) # adding non-linearity layer2 l2out, l2cache = layers.non_linearity_forward(l1out,self.hiddenLayer) # adding dense layer3 l3out, l3cache = layers.dense_forward(l2out, self.W2, self.b2) # adding non-linearity layer4 l4out,l4cache = layers.non_linearity_forward(l3out, self.hiddenLayer) # adding dense layer5 l5out,l5cache = layers.dense_forward(l4out,self.W3, self.b3) # adding softmax layer loss, l6cache = layers.softmax_loss_forward(l5out, y[ids]) loss = loss + 0.5*reg*(np.sum(self.W1**2) + np.sum(self.W2**2) + np.sum(self.W3**2)) self.loss_history.append(loss) if verbose and i % 500 == 0: lr *= decay_rate print "Iteration %d, loss = %f" % (i, loss) if X_val is not None and y_val is not None: print "Validation Accuracy :%f" % (self.accuracy(X_val, y_val)) dlayer6 = 1.0 dlayer5 = layers.softmax_loss_backward(dlayer6, l6cache) dlayer4, dW3, db3 = layers.dense_backward(dlayer5, l5cache) dlayer3 = layers.non_linearity_backward(dlayer4, l4cache, self.hiddenLayer) dlayer2, dW2, db2 = layers.dense_backward(dlayer3, l3cache) dlayer1 = layers.non_linearity_backward(dlayer2, l2cache, self.hiddenLayer) _, dW1, db1 = layers.dense_backward(dlayer1, l1cache) self.gradientLayer1.append(np.mean(np.abs(dlayer1))) self.gradientLayer2.append(np.mean(np.abs(dlayer3))) self.params, self.W1 = optimizers.optimize(self.params, self.W1, dW1, lr=lr, name='W1', opt=opt) self.params, self.b1 = optimizers.optimize(self.params, self.b1, db1, lr=lr, name='b1', opt=opt) self.params, self.W2 = optimizers.optimize(self.params, self.W2, dW2, lr=lr, name='W2', opt=opt) self.params, self.b2 = optimizers.optimize(self.params, self.b2, db2, lr=lr, name='b2', opt=opt) self.params, self.W3 = optimizers.optimize(self.params, self.W3, dW3, lr=lr, name='W3', opt=opt) self.params, self.b3 = optimizers.optimize(self.params, self.b3, db3, lr=lr, name='b3', opt=opt) # gradients due to regularization self.W1 += reg * dW1 self.W2 += reg * dW2 self.W3 += reg * dW3
def rnn_step_forward(x, prev_h, Wx, Wh, b, non_liniearity='tanh'): """ Run the forward pass for a single timestep of a vanilla RNN that uses the specified activation function. The input data has dimension D, the hidden state has dimension H, and we use a minibatch size of N. :param x: Input data for this timestep, of shape (N, D) :param prev_h: Hidden state from previous timestep, of shape (N, H) :param Wx: Weight matrix for input-to-hidden connections, of shape (D, H) :param Wh: Weight matrix for hidden-to-hidden connections, of shape (H, H) :param b: Biases of shape (H,) :param non_liniearity: relu/sigmoid or tanh non-linearity to be used :return: :next_h: Next hidden state, of shape (N, H) :cache: Tuple of values needed for the backward pass. """ tmp = np.dot(x, Wx) + np.dot(prev_h, Wh) + b next_h, _ = layers.non_linearity_forward(tmp, hiddenLayer=non_liniearity) cache = (x, prev_h, Wx, Wh, b, non_liniearity) return next_h, cache
def train(self, X, y, X_val=None, y_val=None, learning_rate=1e-2, reg=1e-4, decay_rate=0.95, opt='sgd', n_iters=5000, batch_size=200, verbose=1): lr = learning_rate for i in xrange(n_iters): W1, b1 = self.weights['W1'], self.weights['b1'] W2, b2 = self.weights['W2'], self.weights['b2'] W3, b3 = self.weights['W3'], self.weights['b3'] # dense layer1 ids = np.random.choice(X.shape[0], batch_size) l1out, l1cache = layers.dense_forward(X[ids], W1, b1) # non-linearity layer2 l2out, l2cache = layers.non_linearity_forward( l1out, self.non_linearity) # dense layer3 l3out, l3cache = layers.dense_forward(l2out, W2, b2) # non-linearity layer4 l4out, l4cache = layers.non_linearity_forward( l3out, self.non_linearity) # dense layer5 l5out, l5cache = layers.dense_forward(l4out, W3, b3) # softmax layer loss, l6cache = layers.softmax_loss_forward(l5out, y[ids]) loss = loss + 0.5 * reg * (np.sum(W1**2) + np.sum(W2**2) + np.sum(W3**2)) self.loss_history.append(loss) if verbose and i % 500 == 0: lr *= decay_rate print "Iteration %d, loss = %f" % (i, loss) if X_val is not None and y_val is not None: print "Validation Accuracy :%f" % (self.accuracy( X_val, y_val)) dlayer6 = 1.0 dlayer5 = layers.softmax_loss_backward(dlayer6, l6cache) dlayer4, dW3, db3 = layers.dense_backward(dlayer5, l5cache) dlayer3 = layers.non_linearity_backward(dlayer4, l4cache, self.non_linearity) dlayer2, dW2, db2 = layers.dense_backward(dlayer3, l3cache) dlayer1 = layers.non_linearity_backward(dlayer2, l2cache, self.non_linearity) _, dW1, db1 = layers.dense_backward(dlayer1, l1cache) self.gradientLayer1.append(np.mean(np.abs(dlayer1))) self.gradientLayer2.append(np.mean(np.abs(dlayer3))) # gradients due to regularization dW1 += reg * W1 dW2 += reg * W2 dW3 += reg * W3 self.params, W1 = optimizers.optimize(self.params, W1, dW1, lr=lr, name='W1', opt=opt) self.params, b1 = optimizers.optimize(self.params, b1, db1, lr=lr, name='b1', opt=opt) self.params, W2 = optimizers.optimize(self.params, W2, dW2, lr=lr, name='W2', opt=opt) self.params, b2 = optimizers.optimize(self.params, b2, db2, lr=lr, name='b2', opt=opt) self.params, W3 = optimizers.optimize(self.params, W3, dW3, lr=lr, name='W3', opt=opt) self.params, b3 = optimizers.optimize(self.params, b3, db3, lr=lr, name='b3', opt=opt) self.weights['W1'], self.weights['b1'] = W1, b1 self.weights['W2'], self.weights['b2'] = W2, b2 self.weights['W3'], self.weights['b3'] = W3, b3
def predict(self, X): l1, _ = layers.dense_forward(X, self.W1, self.b1) l2, _ = layers.non_linearity_forward(l1, hiddenLayer='relu') l3, _ = layers.dense_forward(l2, self.W2, self.b2) l4, _ = layers.non_linearity_forward(l3, hiddenLayer='sigmoid') return l4