def reset_parameters(self, param_init): """Initialize parameters.""" if self.memory_transformer: logger.info('===== Initialize %s with normal distribution =====' % self.__class__.__name__) for n, p in self.named_parameters(): if 'conv' in n: continue init_like_transformer_xl(n, p, std=0.02) elif param_init == 'xavier_uniform': logger.info( '===== Initialize %s with Xavier uniform distribution =====' % self.__class__.__name__) if self.conv is None: nn.init.xavier_uniform_(self.embed.weight) nn.init.constant_(self.embed.bias, 0.) if self.bridge is not None: nn.init.xavier_uniform_(self.bridge.weight) nn.init.constant_(self.bridge.bias, 0.) if self.bridge_sub1 is not None: nn.init.xavier_uniform_(self.bridge_sub1.weight) nn.init.constant_(self.bridge_sub1.bias, 0.) if self.bridge_sub2 is not None: nn.init.xavier_uniform_(self.bridge_sub2.weight) nn.init.constant_(self.bridge_sub2.bias, 0.)
def reset_parameters(self, param_init): """Initialize parameters.""" if self.memory_transformer: logger.info('===== Initialize %s with normal distribution =====' % self.__class__.__name__) for n, p in self.named_parameters(): if 'conv' in n: continue init_like_transformer_xl(n, p, std=0.02) elif param_init == 'xavier_uniform': logger.info('===== Initialize %s with Xavier uniform distribution =====' % self.__class__.__name__) # see https://github.com/pytorch/fairseq/blob/master/fairseq/models/transformer.py # embedding nn.init.normal_(self.embed.weight, mean=0., std=self.d_model**-0.5) nn.init.constant_(self.embed.weight[self.pad], 0.) # output layer nn.init.xavier_uniform_(self.output.weight) # nn.init.normal_(self.output.weight, mean=0., std=self.d_model**-0.5) nn.init.constant_(self.output.bias, 0.)
def reset_parameters(self): """Initialize parameters with normal distribution.""" logger.info('===== Initialize %s with normal distribution =====' % self.__class__.__name__) for n, p in self.named_parameters(): init_like_transformer_xl(n, p, std=0.02)