def bprop(self,target): """ Computes the loss derivatives with respect to all parameters times the current learning rate. It assumes that ``self.fprop(input)`` was called first. All the derivatives are put in their corresponding object attributes (i.e. ``self.d*``). """ self.doutput_act[:] = self.output self.doutput_act[target] -= 1 self.doutput_act *= self.learning_rate/(1.+self.decrease_constant*self.n_updates) self.dd[:] = self.doutput_act for k in range(self.n_k_means): c = self.cluster_indices[k] idx = c + k*self.n_clusters mllin.outer(self.doutput_act,self.layers[k],self.dVs[idx]) mllin.product_matrix_vector(self.Vs[idx].T,self.doutput_act,self.dlayers[k]) #mlnonlin.dsigmoid(self.layers[k],self.dlayers[k],self.dlayer_acts[k]) if self.activation_function == 'sigmoid': mlnonlin.dsigmoid(self.layers[k],self.dlayers[k],self.dlayer_acts[k]) elif self.activation_function == 'tanh': mlnonlin.dtanh(self.layers[k],self.dlayers[k],self.dlayer_acts[k]) elif self.activation_function == 'reclin': mlnonlin.dreclin(self.layers[k],self.dlayers[k],self.dlayer_acts[k]) else: raise ValueError('activation_function must be either \'sigmoid\', \'tanh\' or \'reclin\'') self.dcs[idx][:] = self.dlayer_acts[k] mllin.outer(self.dlayer_acts[k],self.input,self.dWs[idx]) if self.autoencoder_regularization != 0: self.dae_doutput_act[:] = self.dae_output self.dae_doutput_act[:] -= self.input self.dae_doutput_act *= 2*self.autoencoder_regularization*self.learning_rate/(1.+self.decrease_constant*self.n_updates) self.dae_dd[:] = self.dae_doutput_act for k in range(self.n_k_means): c = self.cluster_indices[k] idx = c + k*self.n_clusters mllin.outer(self.dae_doutput_act,self.dae_layers[k],self.dae_dWsT[idx]) self.dWs[idx] += self.dae_dWsT[idx].T mllin.product_matrix_vector(self.Ws[idx],self.dae_doutput_act,self.dae_dlayers[k]) #mlnonlin.dsigmoid(self.dae_layers[k],self.dae_dlayers[k],self.dae_dlayer_acts[k]) if self.activation_function == 'sigmoid': mlnonlin.dsigmoid(self.dae_layers[k],self.dae_dlayers[k],self.dae_dlayer_acts[k]) elif self.activation_function == 'tanh': mlnonlin.dtanh(self.dae_layers[k],self.dae_dlayers[k],self.dae_dlayer_acts[k]) elif self.activation_function == 'reclin': mlnonlin.dreclin(self.dae_layers[k],self.dae_dlayers[k],self.dae_dlayer_acts[k]) else: raise ValueError('activation_function must be either \'sigmoid\', \'tanh\' or \'reclin\'') self.dcs[idx] += self.dae_dlayer_acts[k] mllin.outer(self.dae_dlayer_acts[k],self.dae_input,self.dae_dWs[idx]) self.dWs[idx] += self.dae_dWs[idx]
def update_learner(self,example): self.layers[0][:] = example[0] # fprop for h in range(self.n_hidden_layers): mllin.product_matrix_vector(self.Ws[h],self.layers[h],self.layer_acts[h+1]) self.layer_acts[h+1] += self.cs[h] if self.activation_function == 'sigmoid': mlnonlin.sigmoid(self.layer_acts[h+1],self.layers[h+1]) elif self.activation_function == 'tanh': mlnonlin.tanh(self.layer_acts[h+1],self.layers[h+1]) elif self.activation_function == 'reclin': mlnonlin.reclin(self.layer_acts[h+1],self.layers[h+1]) else: raise ValueError('activation_function must be either \'sigmoid\', \'tanh\' or \'reclin\'') mllin.product_matrix_vector(self.U,self.layers[-1],self.output_act) self.output_act += self.d mlnonlin.softmax(self.output_act,self.output) self.doutput_act[:] = self.output self.doutput_act[example[1]] -= 1 self.doutput_act *= self.learning_rate/(1.+self.decrease_constant*self.n_updates) self.dd[:] = self.doutput_act mllin.outer(self.doutput_act,self.layers[-1],self.dU) mllin.product_matrix_vector(self.U.T,self.doutput_act,self.dlayers[-1]) if self.activation_function == 'sigmoid': mlnonlin.dsigmoid(self.layers[-1],self.dlayers[-1],self.dlayer_acts[-1]) elif self.activation_function == 'tanh': mlnonlin.dtanh(self.layers[-1],self.dlayers[-1],self.dlayer_acts[-1]) elif self.activation_function == 'reclin': mlnonlin.dreclin(self.layers[-1],self.dlayers[-1],self.dlayer_acts[-1]) else: raise ValueError('activation_function must be either \'sigmoid\', \'tanh\' or \'reclin\'') for h in range(self.n_hidden_layers-1,-1,-1): self.dcs[h][:] = self.dlayer_acts[h+1] mllin.outer(self.dlayer_acts[h+1],self.layers[h],self.dWs[h]) mllin.product_matrix_vector(self.Ws[h].T,self.dlayer_acts[h+1],self.dlayers[h]) if self.activation_function == 'sigmoid': mlnonlin.dsigmoid(self.layers[h],self.dlayers[h],self.dlayer_acts[h]) elif self.activation_function == 'tanh': mlnonlin.dtanh(self.layers[h],self.dlayers[h],self.dlayer_acts[h]) elif self.activation_function == 'reclin': mlnonlin.dreclin(self.layers[h],self.dlayers[h],self.dlayer_acts[h]) else: raise ValueError('activation_function must be either \'sigmoid\', \'tanh\' or \'reclin\'') self.U -= self.dU self.d -= self.dd for h in range(self.n_hidden_layers-1,-1,-1): self.Ws[h] -= self.dWs[h] self.cs[h] -= self.dcs[h] self.n_updates += 1
def apply_dactivation(self, output, doutput, dinput): """ Apply the derivative of the activatiun fonction """ if self.activation_function == "sigmoid": mlnonlin.dsigmoid(output, doutput, dinput) elif self.activation_function == "tanh": mlnonlin.dtanh(output, doutput, dinput) elif self.activation_function == "reclin": mlnonlin.dreclin(output, doutput, dinput) elif self.activation_function == "softmax": dinput[:] = output * (doutput - (doutput * output).sum(axis=1).reshape((-1, 1))) else: raise ValueError("activation_function must be either 'sigmoid', 'tanh' or 'reclin'")
def apply_dactivation(self, output, doutput, dinput): """ Apply the derivative of the activatiun fonction """ if self.activation_function == 'sigmoid': mlnonlin.dsigmoid(output,doutput,dinput) elif self.activation_function == 'tanh': mlnonlin.dtanh(output,doutput,dinput) elif self.activation_function == 'reclin': mlnonlin.dreclin(output,doutput,dinput) elif self.activation_function == 'softmax': dinput[:] = output*(doutput-(doutput*output).sum(axis=1).reshape((-1,1))) else: raise ValueError('activation_function must be either \'sigmoid\', \'tanh\' or \'reclin\'')