Пример #1
0
    def reset_parameters(self, param_init):
        """Initialize parameters."""
        if self.memory_transformer:
            logger.info('===== Initialize %s with normal distribution =====' %
                        self.__class__.__name__)
            for n, p in self.named_parameters():
                if 'conv' in n:
                    continue
                init_like_transformer_xl(n, p, std=0.02)

        elif param_init == 'xavier_uniform':
            logger.info(
                '===== Initialize %s with Xavier uniform distribution =====' %
                self.__class__.__name__)
            if self.conv is None:
                nn.init.xavier_uniform_(self.embed.weight)
                nn.init.constant_(self.embed.bias, 0.)
            if self.bridge is not None:
                nn.init.xavier_uniform_(self.bridge.weight)
                nn.init.constant_(self.bridge.bias, 0.)
            if self.bridge_sub1 is not None:
                nn.init.xavier_uniform_(self.bridge_sub1.weight)
                nn.init.constant_(self.bridge_sub1.bias, 0.)
            if self.bridge_sub2 is not None:
                nn.init.xavier_uniform_(self.bridge_sub2.weight)
                nn.init.constant_(self.bridge_sub2.bias, 0.)
Пример #2
0
    def reset_parameters(self, param_init):
        """Initialize parameters."""
        if self.memory_transformer:
            logger.info('===== Initialize %s with normal distribution =====' % self.__class__.__name__)
            for n, p in self.named_parameters():
                if 'conv' in n:
                    continue
                init_like_transformer_xl(n, p, std=0.02)

        elif param_init == 'xavier_uniform':
            logger.info('===== Initialize %s with Xavier uniform distribution =====' % self.__class__.__name__)
            # see https://github.com/pytorch/fairseq/blob/master/fairseq/models/transformer.py
            # embedding
            nn.init.normal_(self.embed.weight, mean=0., std=self.d_model**-0.5)
            nn.init.constant_(self.embed.weight[self.pad], 0.)
            # output layer
            nn.init.xavier_uniform_(self.output.weight)
            # nn.init.normal_(self.output.weight, mean=0., std=self.d_model**-0.5)
            nn.init.constant_(self.output.bias, 0.)
Пример #3
0
 def reset_parameters(self):
     """Initialize parameters with normal distribution."""
     logger.info('===== Initialize %s with normal distribution =====' % self.__class__.__name__)
     for n, p in self.named_parameters():
         init_like_transformer_xl(n, p, std=0.02)