def back_propagate(self,X,Y): # compute gradient of loss with respect to A grad_A_L = functions_loss.loss_der(self.loss,self.get_A(0),Y) # compute gradient of loss with respect to Z grad_Z_L = functions_activation.activation_der(self.info[0]["activation"],self.get_A(0),grad_A_L) # compute grad_W L and grad_b L self.info[0]["param_der"]["b"] = np.sum(grad_Z_L,axis=1,keepdims=True) self.info[0]["param_der"]["W"] = np.dot(grad_Z_L,X.T)
def back_propagate(self,X,Y): # compute derivative of loss grad_A_L = functions_loss.loss_der(self.loss,self.get_A(self.nlayer-1),Y) for layer in range(self.nlayer-1,-1,-1): # multiply by derivative of A grad_Z_L = functions_activation.activation_der(self.info[layer]["activation"],self.get_A(layer),grad_A_L) # compute grad_W L and grad_b L self.info[layer]["param_der"]["b"] = np.sum(grad_Z_L,axis=1,keepdims=True) if layer > 0: self.info[layer]["param_der"]["W"] = np.dot(grad_Z_L,self.get_A(layer-1).T)+2*self.info[layer]["lambda"]*self.get_param(layer,"param","W") grad_A_L = np.dot(self.get_param(layer,"param","W").T,grad_Z_L) else: self.info[layer]["param_der"]["W"] = np.dot(grad_Z_L,X.T)+2*self.info[layer]["lambda"]*self.get_param(layer,"param","W")