def forward(self, features, **kwargs): x = self.dense(features) x = gelu(x) x = self.layer_norm(x) # project back to size of vocabulary with bias x = self.decoder(x) + self.bias return x
def forward(self, sequence_output): x = self.dense(sequence_output) x = gelu(x) hidden_states = self.layer_norm(x) # print("hidden_states",hidden_states) prediction_scores = None for i in range(hidden_states.size(1)): if i == 0: tmp_state = hidden_states[:, i, :].unsqueeze(1) prediction_scores = self.op[i](tmp_state) else: tmp_state = hidden_states[:, i, :].unsqueeze(1) tmp = self.op[i](tmp_state) prediction_scores = torch.cat((prediction_scores, tmp), 1) # print("prediction_scores",prediction_scores.size(),prediction_scores) return prediction_scores
def forward(self, sequence_output): x = self.dense(sequence_output) x = gelu(x) hidden_states = self.layer_norm(x) prediction_scores = self.decoder(hidden_states) + self.bias return prediction_scores