示例#1
0
  def backward(self,model,targets,outputs):
    self.model = model
    nn_architecture = self.model.nn_architecture
    parameters = self.model.parameters
    memory = self.model.memory

    dA_prev = 2*(outputs - targets)/outputs.shape[1] + self.wd *self.grads_values['w_sum']

    for layer_idx_prev, layer in reversed(list(enumerate(nn_architecture))):
        layer_idx_curr = layer_idx_prev + 1
        dA_curr = dA_prev

        self.grads_values['w_sum'] += np.sum(parameters[f"W{layer_idx_curr}"]**2)

        if type(self.grads_values.get(f"dW{layer_idx_curr}")) == np.ndarray:
          self.grads_values[f"dW_square{layer_idx_curr}"] = self.momentum * self.grads_values[f"dW_square{layer_idx_curr}"] +  (self.grads_values[f"dW{layer_idx_curr}"]**2)
        else:
          self.grads_values[f"dW_square{layer_idx_curr}"] = 1.0
        
        if type(self.grads_values.get(f"db{layer_idx_curr}")) == np.ndarray:
          self.grads_values[f"db_square{layer_idx_curr}"] = self.momentum * self.grads_values[f"db_square{layer_idx_curr}"] + (1.0 - self.momentum) * (self.grads_values[f"db{layer_idx_curr}"]**2)
        else:
          self.grads_values[f"db_square{layer_idx_curr}"] = 1.0

        A_prev = memory[f"A{layer_idx_prev}"]
        Z_curr = memory[f"Z{layer_idx_curr}"]
        W_curr = parameters[f"W{layer_idx_curr}"]
        b_curr = parameters[f"b{layer_idx_curr}"]

        m = A_prev.shape[1]
        backward_activation_func = get_activation_function(layer['activation'],backward=True)
        dZ_curr = backward_activation_func(dA_curr, Z_curr)
        dW_curr = (1. / m) * np.matmul(dZ_curr, A_prev.T)
        db_curr = (1. / m) * np.sum(dZ_curr, axis=1, keepdims=True)
        dA_prev = np.matmul(W_curr.T, dZ_curr)

        self.grads_values[f"dW{layer_idx_curr}"] = dW_curr
        self.grads_values[f"db{layer_idx_curr}"] = db_curr

    self.grads_values['w_sum'] = 0.0
    
    for idx, layer in enumerate(nn_architecture):
      layer_idx = idx +1
      self.grads_values[f'dW{layer_idx}'] = self.lr/np.sqrt(self.grads_values[f"dW_square{layer_idx}"]+1e-8) * self.grads_values[f"dW{layer_idx}"]
      self.grads_values[f'db{layer_idx}'] = self.lr/np.sqrt(self.grads_values[f"db_square{layer_idx}"]+1e-8) * self.grads_values[f"db{layer_idx}"]
      
      parameters[f"W{layer_idx}"] -= self.lr * self.grads_values[f"dW{layer_idx}"]        
      parameters[f"b{layer_idx}"] -= self.lr * self.grads_values[f"db{layer_idx}"]
    
    self.model.parameters = parameters

    return self.model
示例#2
0
    def forward(self, xb):
        self.memory = dict()
        A_curr = xb
        for idx, (layer) in enumerate(self.nn_architecture):
            layer_idx = idx + 1
            A_prev = A_curr

            W_curr = self.parameters[f"W{layer_idx}"]
            b_curr = self.parameters[f"b{layer_idx}"]

            Z_curr = np.matmul(W_curr, A_prev) + b_curr
            active_function = get_activation_function(layer['activation'])
            A_curr = active_function(Z_curr)

            self.memory[f"A{idx}"] = A_prev
            self.memory[f"Z{layer_idx}"] = Z_curr

        return A_curr
示例#3
0
  def backward(self,model,targets,outputs):
    self.model = model
    nn_architecture = self.model.nn_architecture
    parameters = self.model.parameters
    memory = self.model.memory

    dA_prev = 2*(outputs - targets)/outputs.shape[1] + self.wd *self.grads_values['w_sum']

    for layer_idx_prev, layer in reversed(list(enumerate(nn_architecture))):
        layer_idx_curr = layer_idx_prev + 1
        dA_curr = dA_prev

        self.grads_values['w_sum'] += np.sum(parameters[f"W{layer_idx_curr}"]**2)

        if type(self.grads_values.get(f"dW{layer_idx_curr}")) == np.ndarray:
          self.grads_values[f"dW_sum{layer_idx_curr}"] += self.grads_values[f"dW{layer_idx_curr}"]
        else:
          self.grads_values[f"dW_sum{layer_idx_curr}"] = 0.0

        if type(self.grads_values.get(f"db{layer_idx_curr}")) == np.ndarray:
          self.grads_values[f"db_sum{layer_idx_curr}"] += self.grads_values[f"db{layer_idx_curr}"]
        else:
          self.grads_values[f"db_sum{layer_idx_curr}"] = 0.0

        if type(self.grads_values.get(f"dW{layer_idx_curr}")) == np.ndarray:
          vw_curr = self.beta2 * self.grads_values[f"dW_square{layer_idx_curr}"] + (1.0) * self.grads_values[f"dW{layer_idx_curr}"]**2
          self.grads_values[f"dW_square{layer_idx_curr}"] = vw_curr
        else:
          self.grads_values[f"dW_square{layer_idx_curr}"] = 1.0

        if type(self.grads_values.get(f"db{layer_idx_curr}")) == np.ndarray:
          vb_curr = self.beta2 * self.grads_values[f"db_square{layer_idx_curr}"] + (1.0) * self.grads_values[f"db{layer_idx_curr}"]**2
          self.grads_values[f"db_square{layer_idx_curr}"] = vb_curr
        else:
          self.grads_values[f"db_square{layer_idx_curr}"] = 1.0

        ## beta
        if self.grads_values.get("beta1"):
          self.grads_values["beta1"] *= self.beta1
        else:
          self.grads_values["beta1"] = self.beta1

        if self.grads_values.get("beta2"):
          self.grads_values["beta2"] *= self.beta2
        else:
          self.grads_values["beta2"] = self.beta2

        A_prev = memory[f"A{layer_idx_prev}"]
        Z_curr = memory[f"Z{layer_idx_curr}"]
        W_curr = parameters[f"W{layer_idx_curr}"]
        b_curr = parameters[f"b{layer_idx_curr}"]

        m = A_prev.shape[1]
        backward_activation_func = get_activation_function(layer['activation'],backward=True)
        dZ_curr = backward_activation_func(dA_curr, Z_curr)
        dW_curr = (1. / m) * np.matmul(dZ_curr, A_prev.T)
        db_curr = (1. / m) * np.sum(dZ_curr, axis=1, keepdims=True)
        dA_prev = np.matmul(W_curr.T, dZ_curr)

        mw_curr = self.beta1 * self.grads_values[f"dW_sum{layer_idx_curr}"] + (1.0 )* dW_curr
        mb_curr = self.beta1 * self.grads_values[f"db_sum{layer_idx_curr}"] + (1.0 )* db_curr

        self.grads_values[f"dW{layer_idx_curr}"] = dW_curr
        self.grads_values[f"db{layer_idx_curr}"] = db_curr

    self.grads_values['w_sum'] = 0.0
    
    for idx, layer in enumerate(nn_architecture):
      layer_idx = idx +1
      vtw_curr = self.grads_values[f"dW_square{layer_idx}"] / (1.0 - self.grads_values['beta2'])
      vtb_curr = self.grads_values[f"db_square{layer_idx}"] / (1.0 - self.grads_values['beta2'])

      mtw_curr = self.grads_values[f"dW{layer_idx}"] / (1.0 - self.grads_values['beta1'])
      mtb_curr = self.grads_values[f"db{layer_idx}"] / (1.0 - self.grads_values['beta1'])

      parameters[f"W{layer_idx}"] -= self.lr/(np.sqrt(vtw_curr) + 1e-8) * (self.beta1 * mtw_curr + ((1-self.beta1) *\
                                                                           self.grads_values[f"dW{layer_idx}"])/(1.0 - self.grads_values['beta1']))

      parameters[f"b{layer_idx}"] -= self.lr/(np.sqrt(vtb_curr) + 1e-8) * (self.beta1 * mtb_curr + ((1-self.beta1) *\
                                                                           self.grads_values[f"db{layer_idx}"])/(1.0 - self.grads_values['beta1']))

    
    self.model.parameters = parameters

    return self.model