def reset_parameters(self): for head in range(self.attn_heads): glorot_uniform(self.kernels[head]) glorot_uniform(self.att_kernels[head]) if self.use_bias: zeros(self.biases[head])
def _initialize(self): for w, wv in zip(self.weights, self.w_velocities): glorot_uniform(w) zeros(wv) for ix in range(len(self.weights)): self.weights[ix] = self.weights[ix].detach() self.w_velocities[ix] = self.w_velocities[ix].detach() self.weights[ix].requires_grad = True
def reset_parameters(self): for layer in self.layers: layer.reset_parameters() for s in self.scores: glorot_uniform(s) for b in self.bias: # fill in b with postive value to make # score s closer to 1 at the beginning b.data.fill_(0.) for Dk in self.D_k: glorot_uniform(Dk) for b in self.D_bias: b.data.fill_(0.)
def reset_parameters(self): for head in range(self.attn_heads): W, a1, a2 = self.kernels[head], self.attn_kernel_self[ head], self.attn_kernel_neighs[head] glorot_uniform(W) glorot_uniform(a1) glorot_uniform(a2) if self.use_bias: zeros(self.biases[head])