def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2VecEncoder, classifier_feedforward: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) # raise ValueError(self.vocab.get_vocab_size("tokens")) # raise ValueError(text_field_embedder.get_output_dim()) if text_field_embedder.get_output_dim() != encoder.get_input_dim(): raise ConfigurationError("The output dimension of the text_field_embedder must match the " "input dimension of the title_encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), encoder.get_input_dim())) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.encoder = encoder self.classifier_feedforward = classifier_feedforward self.metrics = { "multilabel-f1": MultiLabelF1Measure(), 'accuracy': BooleanAccuracy() } self.pearson_r = PearsonCorrelation() self.loss = nn.MultiLabelSoftMarginLoss() #BCEWithLogitsLoss() self._threshold = 0.5 initializer(self)
class WS353(Metric): def __init__(self, sim_file_path: str) -> None: self._sim_data = [] self._sim_gold = [] self._data_reader = KoWikiReader() self._pearson = PearsonCorrelation() with open(sim_file_path, 'r', encoding='utf-8') as f: f.readline() for line in f: w1, w2, score = line.strip().split('\t') self._sim_data.append((w1, w2)) self._sim_gold.append(float(score)) self._sim_gold = torch.tensor(self._sim_gold) @overrides def __call__(self, vocab: Vocabulary, embedder: SyllableEmbedder, cuda_device: torch.device, print_mode: bool = False) -> None: preds = [] for i in range(len(self._sim_data)): w1, w2 = self._sim_data[i] w1 = self._data_reader.text_to_instance(source=Token(w1))['source'] w2 = self._data_reader.text_to_instance(source=Token(w2))['source'] w1.index(vocab) w2.index(vocab) w1 = w1.as_tensor(w1.get_padding_lengths())['syllables'].to(cuda_device) w2 = w2.as_tensor(w2.get_padding_lengths())['syllables'].to(cuda_device) e1, e2 = embedder(w1), embedder(w2) preds.append(F.cosine_similarity(e1, e2)) self._pearson(torch.tensor(preds), self._sim_gold) if print_mode: print('w1\tw2\tgold\tpred') for ((w1, w2), gold, pred) in zip(self._sim_data, self._sim_gold, preds): print(f'{w1}\t{w2}\t{gold.item():.2f}\t{pred.item():.2f}') print(f'pscore: {self.get_metric():.3f}') @overrides def get_metric(self, reset: bool = False): score = self._pearson.get_metric(reset) if reset: self.reset() return score @overrides def reset(self): self._pearson.reset()
def forward(self, sentence: Dict[str, torch.Tensor], labels: torch.Tensor = None) -> Dict[str, torch.Tensor]: # 接下来我们需要实现forward,这是实际计算发生的地方。数据集中的每个实例(Instance)都将(与其他实例(instances)一起批处理)输入forward。 # 张量的输入作为forward方法的输入,并且它们的名称应该是实例(Instances)中字段(fields)的名称。 # 在这种情况下,我们有一个句子字段(sentence field)和(可能)标签字段(labels field),所以我们将相应地构建我们的forward: mask = get_text_field_mask(sentence) # AllenNLP设计用于批量输入,但不同的输入序列具有不同的长度。 # 因此,AllenNLP填充(padding)较短的输入,以便批处理具有统一的形状,这意味着我们的计算需要使用掩码(mask)来排除填充。 # 这里我们只使用效用函数(utility function) get_text_field_mask,它返回与填充和未填充位置相对应的0和1的张量。 embeddings = self.word_embeddings(sentence) # 我们首先将句子张量(每个句子一系列tokens ID)传递给word_embeddings模块,该模块将每个句子转换为嵌入式张量序列(a sequence of embedded tensors)。 encoder_out = self.encoder(embeddings, mask) # 接下来,我们将嵌入式张量(embedded tensors)(和掩码(mask))传递给LSTM,LSTM产生一系列编码(encoded)输出。 tag_logits = self.hidden2tag(encoder_out) output = {"tag_logits": tag_logits} # 最后,我们将每个编码输出张量(encoded output tensor)传递给前馈层(feedforward),以产生对应于各种标签(tags)的logits。 if labels is not None: self.accuracy(tag_logits, labels, mask) output["loss"] = sequence_cross_entropy_with_logits( tag_logits, labels, mask) logits_flat = tag_logits.view(-1, tag_logits.size(-1)) # shape : (batch * sequence_length, num_classes) log_probs_flat = torch.nn.functional.log_softmax(logits_flat, dim=-1) # shape : (batch * max_len, 1) targets_flat = labels.view(-1, 1).long() negative_log_likelihood_flat = -torch.gather( log_probs_flat, dim=1, index=targets_flat) # shape : (batch, sequence_length) negative_log_likelihood = negative_log_likelihood_flat.view( *labels.size()) # shape : (batch, sequence_length) negative_log_likelihood = negative_log_likelihood * mask.float() from allennlp.training.metrics import PearsonCorrelation self.m = PearsonCorrelation() self.m(predictions=negative_log_likelihood, gold_labels=labels.float()) # 和以前一样,标签是可选的,因为我们可能希望运行此模型来对未标记的数据进行预测。 # 如果我们有标签,那么我们使用它们来更新我们的准确度指标(accuracy metric)并计算输出中的“损失(loss)”。 return output
def __init__(self, sim_file_path: str) -> None: self._sim_data = [] self._sim_gold = [] self._data_reader = KoWikiReader() self._pearson = PearsonCorrelation() with open(sim_file_path, 'r', encoding='utf-8') as f: f.readline() for line in f: w1, w2, score = line.strip().split('\t') self._sim_data.append((w1, w2)) self._sim_gold.append(float(score)) self._sim_gold = torch.tensor(self._sim_gold)
def test_pearson_correlation_unmasked_computation(self): pearson_correlation = PearsonCorrelation() batch_size = 100 num_labels = 10 predictions_1 = np.random.randn(batch_size, num_labels).astype("float32") labels_1 = 0.5 * predictions_1 + np.random.randn(batch_size, num_labels).astype("float32") predictions_2 = np.random.randn(1).repeat(num_labels).astype("float32") predictions_2 = predictions_2[np.newaxis, :].repeat(batch_size, axis=0) labels_2 = np.random.randn(1).repeat(num_labels).astype("float32") labels_2 = 0.5 * predictions_2 + labels_2[np.newaxis, :].repeat(batch_size, axis=0) # in most cases, the data is constructed like predictions_1, the data of such a batch different. # but in a few cases, for example, predictions_2, the data of such a batch is exactly the same. predictions_labels = [(predictions_1, labels_1), (predictions_2, labels_2)] stride = 10 for predictions, labels in predictions_labels: pearson_correlation.reset() for i in range(batch_size // stride): timestep_predictions = torch.FloatTensor(predictions[stride * i:stride * (i + 1), :]) timestep_labels = torch.FloatTensor(labels[stride * i:stride * (i + 1), :]) expected_pearson_correlation = pearson_corrcoef(predictions[:stride * (i + 1), :].reshape(-1), labels[:stride * (i + 1), :].reshape(-1)) pearson_correlation(timestep_predictions, timestep_labels) assert_allclose(expected_pearson_correlation, pearson_correlation.get_metric(), rtol=1e-5) # Test reset pearson_correlation.reset() pearson_correlation(torch.FloatTensor(predictions), torch.FloatTensor(labels)) assert_allclose(pearson_corrcoef(predictions.reshape(-1), labels.reshape(-1)), pearson_correlation.get_metric(), rtol=1e-5)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, seq2vec_encoder: Seq2VecEncoder, seq2seq_encoder: Seq2SeqEncoder = None, dropout: float = None, scale: float = 1, label_namespace: str = "labels", initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder if seq2seq_encoder: self._seq2seq_encoder = seq2seq_encoder else: self._seq2seq_encoder = None self._seq2vec_encoder = seq2vec_encoder self._classifier_input_dim = self._seq2vec_encoder.get_output_dim( ) * 2 # run encoder seperately and concat the result if dropout: self._dropout = torch.nn.Dropout(dropout) self._dropout_a = torch.nn.Dropout(dropout) self._dropout_b = torch.nn.Dropout(dropout) else: self._dropout = None self._label_namespace = label_namespace self._num_labels = 1 # because we're running a regression task self._scale = scale self.__first = True self._mlp_dims = [self._classifier_input_dim] * 3 self._mlp_layers = torch.nn.ModuleList() for i, j in zip(self._mlp_dims, self._mlp_dims[1:]): self._mlp_layers.append(torch.nn.Linear(i, j)) self._mlp_layers.append(torch.nn.ReLU()) if dropout: self._mlp_layers.append(torch.nn.Dropout(dropout)) self._classification_layer = torch.nn.Linear( self._classifier_input_dim, self._num_labels) self._metric = PearsonCorrelation() self._similarity = torch.nn.CosineSimilarity() self._loss = torch.nn.MSELoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, similarity_function: SimilarityFunction, projection_feedforward: FeedForward, inference_encoder: Seq2SeqEncoder, output_feedforward: FeedForward, output_logit: FeedForward, dropout: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._encoder = encoder self._matrix_attention = LegacyMatrixAttention(similarity_function) self._projection_feedforward = projection_feedforward self._inference_encoder = inference_encoder if dropout: self.dropout = torch.nn.Dropout(dropout) self.rnn_input_dropout = InputVariationalDropout(dropout) else: self.dropout = None self.rnn_input_dropout = None self._output_feedforward = output_feedforward self._num_labels = 1 check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(encoder.get_output_dim() * 4, projection_feedforward.get_input_dim(), "encoder output dim", "projection feedforward input") check_dimensions_match(projection_feedforward.get_output_dim(), inference_encoder.get_input_dim(), "proj feedforward output dim", "inference lstm input dim") self._metric = PearsonCorrelation() self._loss = torch.nn.MSELoss() initializer(self)
def __init__(self, word_embeddings: TextFieldEmbedder, encoder: Seq2VecEncoder, vocab: Vocabulary) -> None: super().__init__(vocab) self.word_embeddings = word_embeddings self.encoder = encoder hidden_dim = 128 self.mlp = torch.nn.Sequential( torch.nn.Linear(in_features=encoder.get_output_dim() * 4, out_features=hidden_dim), torch.nn.Tanh(), torch.nn.Linear(in_features=hidden_dim, out_features=hidden_dim), torch.nn.Tanh(), torch.nn.Linear(in_features=hidden_dim, out_features=1)) self.covar = Covariance() self.pearson = PearsonCorrelation()
def test_pearson_correlation_masked_computation(self, device: str): pearson_correlation = PearsonCorrelation() batch_size = 100 num_labels = 10 predictions_1 = torch.randn(batch_size, num_labels, device=device) labels_1 = 0.5 * predictions_1 + torch.randn( batch_size, num_labels, device=device) predictions_2 = torch.randn(1, device=device).expand(num_labels) predictions_2 = predictions_2.unsqueeze(0).expand(batch_size, -1) labels_2 = torch.randn(1, device=device).expand(num_labels) labels_2 = 0.5 * predictions_2 + labels_2.unsqueeze(0).expand( batch_size, -1) predictions_labels = [(predictions_1, labels_1), (predictions_2, labels_2)] # Random binary mask mask = torch.randint(0, 2, size=(batch_size, num_labels), device=device).bool() stride = 10 for predictions, labels in predictions_labels: pearson_correlation.reset() for i in range(batch_size // stride): timestep_predictions = predictions[stride * i:stride * (i + 1), :] timestep_labels = labels[stride * i:stride * (i + 1), :] timestep_mask = mask[stride * i:stride * (i + 1), :] expected_pearson_correlation = pearson_corrcoef( predictions[:stride * (i + 1), :].view(-1).cpu().numpy(), labels[:stride * (i + 1), :].view(-1).cpu().numpy(), fweights=mask[:stride * (i + 1), :].view(-1).cpu().numpy(), ) pearson_correlation(timestep_predictions, timestep_labels, timestep_mask) assert_allclose(expected_pearson_correlation, pearson_correlation.get_metric()) # Test reset pearson_correlation.reset() pearson_correlation(predictions, labels, mask) expected_pearson_correlation = pearson_corrcoef( predictions.view(-1).cpu().numpy(), labels.view(-1).cpu().numpy(), fweights=mask.view(-1).cpu().numpy(), ) assert_allclose(expected_pearson_correlation, pearson_correlation.get_metric())
def test_pearson_correlation_unmasked_computation(self): pearson_correlation = PearsonCorrelation() batch_size = 100 num_labels = 10 predictions = np.random.randn(batch_size, num_labels).astype("float32") labels = 0.5 * predictions + np.random.randn( batch_size, num_labels).astype("float32") stride = 10 for i in range(batch_size // stride): timestep_predictions = torch.FloatTensor( predictions[stride * i:stride * (i + 1), :]) timestep_labels = torch.FloatTensor(labels[stride * i:stride * (i + 1), :]) expected_pearson_correlation = np.corrcoef( predictions[:stride * (i + 1), :].reshape(-1), labels[:stride * (i + 1), :].reshape(-1))[0, 1] pearson_correlation(timestep_predictions, timestep_labels) assert_allclose(expected_pearson_correlation, pearson_correlation.get_metric(), rtol=1e-5) # Test reset pearson_correlation.reset() pearson_correlation(torch.FloatTensor(predictions), torch.FloatTensor(labels)) assert_allclose(np.corrcoef(predictions.reshape(-1), labels.reshape(-1))[0, 1], pearson_correlation.get_metric(), rtol=1e-5)
class RuseModel(Model): def __init__(self, word_embeddings: TextFieldEmbedder, encoder: Seq2VecEncoder, vocab: Vocabulary) -> None: super().__init__(vocab) self.word_embeddings = word_embeddings self.encoder = encoder hidden_dim = 128 self.mlp = torch.nn.Sequential( torch.nn.Linear(in_features=encoder.get_output_dim() * 4, out_features=hidden_dim), torch.nn.Tanh(), torch.nn.Linear(in_features=hidden_dim, out_features=hidden_dim), torch.nn.Tanh(), torch.nn.Linear(in_features=hidden_dim, out_features=1)) self.covar = Covariance() self.pearson = PearsonCorrelation() def forward(self, mt_sent: Dict[str, torch.Tensor], ref_sent: Dict[str, torch.Tensor], human_score: np.ndarray, origin: str) -> Dict[str, torch.Tensor]: mt_mask = get_text_field_mask(mt_sent) ref_mask = get_text_field_mask(ref_sent) mt_embeddings = self.word_embeddings(mt_sent) ref_embeddings = self.word_embeddings(ref_sent) mt_encoder_out = self.encoder(mt_embeddings, mt_mask) ref_encoder_out = self.encoder(ref_embeddings, ref_mask) input = torch.cat((mt_encoder_out, ref_encoder_out, torch.mul(mt_encoder_out, ref_encoder_out), torch.abs(mt_encoder_out - ref_encoder_out)), 1) reg = self.mlp(input) output = {"reg": reg} if human_score is not None: # run metric calculation self.covar(reg, human_score) self.pearson(reg, human_score) # calculate mean squared error delta = reg - human_score output["loss"] = torch.mul(delta, delta).sum() return output def get_metrics(self, reset: bool = False) -> Dict[str, float]: return { "covar": self.covar.get_metric(reset), "pearson": self.pearson.get_metric(reset) }
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, seq2vec_encoder: Seq2VecEncoder, seq2seq_encoder: Seq2SeqEncoder = None, dropout: float = None, scale: float = 1, label_namespace: str = "labels", initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder if seq2seq_encoder: self._seq2seq_encoder = seq2seq_encoder else: self._seq2seq_encoder = None self._seq2vec_encoder = seq2vec_encoder self._classifier_input_dim = self._seq2vec_encoder.get_output_dim() if dropout: self._dropout = torch.nn.Dropout(dropout) else: self._dropout = None self._label_namespace = label_namespace self._num_labels = 1 # because we're running a regression task self._scale = scale self._classification_layer = torch.nn.Linear( self._classifier_input_dim, self._num_labels) self._metric = PearsonCorrelation() self._loss = torch.nn.MSELoss() initializer(self)
def test_pearson_correlation_unmasked_computation(self, device: str): pearson_correlation = PearsonCorrelation() batch_size = 100 num_labels = 10 predictions_1 = torch.randn(batch_size, num_labels, device=device) labels_1 = 0.5 * predictions_1 + torch.randn( batch_size, num_labels, device=device) predictions_2 = torch.randn(1, device=device).expand(num_labels) predictions_2 = predictions_2.unsqueeze(0).expand(batch_size, -1) labels_2 = torch.randn(1, device=device).expand(num_labels) labels_2 = 0.5 * predictions_2 + labels_2.unsqueeze(0).expand( batch_size, -1) # in most cases, the data is constructed like predictions_1, the data of such a batch different. # but in a few cases, for example, predictions_2, the data of such a batch is exactly the same. predictions_labels = [(predictions_1, labels_1), (predictions_2, labels_2)] stride = 10 for predictions, labels in predictions_labels: pearson_correlation.reset() for i in range(batch_size // stride): timestep_predictions = predictions[stride * i:stride * (i + 1), :] timestep_labels = labels[stride * i:stride * (i + 1), :] expected_pearson_correlation = pearson_corrcoef( predictions[:stride * (i + 1), :].view(-1).cpu().numpy(), labels[:stride * (i + 1), :].view(-1).cpu().numpy(), ) pearson_correlation(timestep_predictions, timestep_labels) assert_allclose(expected_pearson_correlation, pearson_correlation.get_metric()) # Test reset pearson_correlation.reset() pearson_correlation(predictions, labels) assert_allclose( pearson_corrcoef( predictions.view(-1).cpu().numpy(), labels.view(-1).cpu().numpy()), pearson_correlation.get_metric(), )
def test_pearson_correlation_masked_computation(self): pearson_correlation = PearsonCorrelation() batch_size = 100 num_labels = 10 predictions_1 = np.random.randn(batch_size, num_labels).astype("float32") labels_1 = 0.5 * predictions_1 + np.random.randn(batch_size, num_labels).astype("float32") predictions_2 = np.random.randn(1).repeat(num_labels).astype("float32") predictions_2 = predictions_2[np.newaxis, :].repeat(batch_size, axis=0) labels_2 = np.random.randn(1).repeat(num_labels).astype("float32") labels_2 = 0.5 * predictions_2 + labels_2[np.newaxis, :].repeat(batch_size, axis=0) predictions_labels = [(predictions_1, labels_1), (predictions_2, labels_2)] # Random binary mask mask = np.random.randint(0, 2, size=(batch_size, num_labels)).astype("float32") stride = 10 for predictions, labels in predictions_labels: pearson_correlation.reset() for i in range(batch_size // stride): timestep_predictions = torch.FloatTensor(predictions[stride * i:stride * (i + 1), :]) timestep_labels = torch.FloatTensor(labels[stride * i:stride * (i + 1), :]) timestep_mask = torch.FloatTensor(mask[stride * i:stride * (i + 1), :]) expected_pearson_correlation = pearson_corrcoef(predictions[:stride * (i + 1), :].reshape(-1), labels[:stride * (i + 1), :].reshape(-1), fweights=mask[:stride * (i + 1), :].reshape(-1)) pearson_correlation(timestep_predictions, timestep_labels, timestep_mask) assert_allclose(expected_pearson_correlation, pearson_correlation.get_metric(), rtol=1e-5) # Test reset pearson_correlation.reset() pearson_correlation(torch.FloatTensor(predictions), torch.FloatTensor(labels), torch.FloatTensor(mask)) expected_pearson_correlation = pearson_corrcoef(predictions.reshape(-1), labels.reshape(-1), fweights=mask.reshape(-1)) assert_allclose(expected_pearson_correlation, pearson_correlation.get_metric(), rtol=1e-5)
def test_distributed_pearson(self): batch_size = 10 num_labels = 10 predictions = torch.randn(batch_size, num_labels) labels = 0.5 * predictions + torch.randn(batch_size, num_labels) expected_pearson_correlation = pearson_corrcoef( predictions.view(-1).cpu().numpy(), labels.view(-1).cpu().numpy(), ) predictions = [predictions[:5], predictions[5:]] labels = [labels[:5], labels[5:]] metric_kwargs = {"predictions": predictions, "gold_labels": labels} run_distributed_test( [-1, -1], global_distributed_metric, PearsonCorrelation(), metric_kwargs, expected_pearson_correlation, exact=(0.0001, 1e-01), )
def test_pearson_correlation_masked_computation(self): pearson_correlation = PearsonCorrelation() batch_size = 100 num_labels = 10 predictions = np.random.randn(batch_size, num_labels).astype("float32") labels = 0.5 * predictions + np.random.randn( batch_size, num_labels).astype("float32") # Random binary mask mask = np.random.randint(0, 2, size=(batch_size, num_labels)).astype("float32") stride = 10 for i in range(batch_size // stride): timestep_predictions = torch.FloatTensor( predictions[stride * i:stride * (i + 1), :]) timestep_labels = torch.FloatTensor(labels[stride * i:stride * (i + 1), :]) timestep_mask = torch.FloatTensor(mask[stride * i:stride * (i + 1), :]) covariance_matrices = np.cov( predictions[:stride * (i + 1), :].reshape(-1), labels[:stride * (i + 1), :].reshape(-1), fweights=mask[:stride * (i + 1), :].reshape(-1)) expected_pearson_correlation = covariance_matrices[0, 1] / np.sqrt( covariance_matrices[0, 0] * covariance_matrices[1, 1]) pearson_correlation(timestep_predictions, timestep_labels, timestep_mask) assert_allclose(expected_pearson_correlation, pearson_correlation.get_metric(), rtol=1e-5) # Test reset pearson_correlation.reset() pearson_correlation(torch.FloatTensor(predictions), torch.FloatTensor(labels), torch.FloatTensor(mask)) covariance_matrices = np.cov(predictions.reshape(-1), labels.reshape(-1), fweights=mask.reshape(-1)) expected_pearson_correlation = covariance_matrices[0, 1] / np.sqrt( covariance_matrices[0, 0] * covariance_matrices[1, 1]) assert_allclose(expected_pearson_correlation, pearson_correlation.get_metric(), rtol=1e-5)
class AttMT(Model): """ This ``Model`` implements the baseline model with attention Parameters ---------- vocab : ``Vocabulary`` text_field_embedder : ``TextFieldEmbedder`` Used to embed the ``mtref`` and ``mtsys`` ``TextFields`` we get as input to the model. encoder : ``Seq2SeqEncoder`` Used to encode the mtref and mtsys. similarity_function : ``SimilarityFunction`` This is the similarity function used when computing the similarity matrix between encoded words in the mtref and words in the mtsys. output_feedforward : ``FeedForward`` Used to prepare the concatenated mtref and mtsys for prediction. output_logit: FeedForward, legacy input that does nothing dropout : ``float``, optional (default=0.5) Dropout percentage to use. initializer : ``InitializerApplicator``, optional (default=``InitializerApplicator()``) Used to initialize the model parameters. regularizer : ``RegularizerApplicator``, optional (default=``None``) If provided, will be used to calculate the regularization penalty during training. """ def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, similarity_function: SimilarityFunction, output_feedforward: FeedForward, output_logit: FeedForward, dropout: float = 0.5, aggr_type: str = "both", initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._encoder = encoder self._matrix_attention = LegacyMatrixAttention(similarity_function) if dropout: self.dropout = torch.nn.Dropout(dropout) self.rnn_input_dropout = InputVariationalDropout(dropout) else: self.dropout = None self.rnn_input_dropout = None self._output_feedforward = output_feedforward self._output_logit = output_logit self._num_labels = 1 check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") # check_dimensions_match(encoder.get_output_dim() * 4, projection_feedforward.get_input_dim(), # "encoder output dim", "projection feedforward input") # check_dimensions_match(projection_feedforward.get_output_dim(), inference_encoder.get_input_dim(), # "proj feedforward output dim", "inference lstm input dim") self._aggr_type = aggr_type self._metric = PearsonCorrelation() self._loss = torch.nn.MSELoss() initializer(self) def forward( self, # type: ignore ref: Dict[str, torch.LongTensor], mt: Dict[str, torch.LongTensor], score: torch.IntTensor = None, # pylint:disable=unused-argument ) -> Dict[str, torch.Tensor]: # pylint: disable=arguments-differ """ Parameters ---------- ref : Dict[str, torch.LongTensor] From a ``TextField`` mt : Dict[str, torch.LongTensor] From a ``TextField`` score : torch.IntTensor, optional (default = None) From a ``NumericField`` Returns ------- An output dictionary consisting of: loss : torch.FloatTensor, optional A scalar loss to be optimised. """ embedded_mtref = self._text_field_embedder(ref) embedded_mtsys = self._text_field_embedder(mt) mtref_mask = get_text_field_mask(ref).float() mtsys_mask = get_text_field_mask(mt).float() # apply dropout for LSTM if self.rnn_input_dropout: embedded_mtref = self.rnn_input_dropout(embedded_mtref) embedded_mtsys = self.rnn_input_dropout(embedded_mtsys) # encode mtref and mtsys # Shape: (batch_size, mtref/sys_length, modeldim*bi? =600) encoded_mtref = self._encoder(embedded_mtref, mtref_mask) encoded_mtsys = self._encoder(embedded_mtsys, mtsys_mask) # Shape: (batch_size, mtref_length, mtsys_length) similarity_matrix = self._matrix_attention(encoded_mtref, encoded_mtsys) # Shape: (batch_size, mtref_length, mtsys_length) p2h_attention = masked_softmax(similarity_matrix, mtsys_mask) # Shape: (batch_size, mtref_length, modeldim*2) attended_mtref = weighted_sum(encoded_mtsys, p2h_attention) # Shape: (batch_size, mtsys_length, mtref_length) h2p_attention = masked_softmax( similarity_matrix.transpose(1, 2).contiguous(), mtref_mask) # Shape: (batch_size, mtsys_length, modeldim*2) attended_mtsys = weighted_sum(encoded_mtref, h2p_attention) # The pooling layer -- max and avg pooling. # (batch_size, model_dim *2 = 600) v_a_max, _ = replace_masked_values(attended_mtref, mtref_mask.unsqueeze(-1), -1e7).max(dim=1) # (batch_size, model_dim *2 = 600) v_b_max, _ = replace_masked_values(attended_mtsys, mtsys_mask.unsqueeze(-1), -1e7).max(dim=1) v_a_avg = torch.sum(attended_mtref * mtref_mask.unsqueeze(-1), dim=1) / torch.sum(mtref_mask, 1, keepdim=True) v_b_avg = torch.sum(attended_mtsys * mtsys_mask.unsqueeze(-1), dim=1) / torch.sum(mtsys_mask, 1, keepdim=True) if self._aggr_type == 'both': # Now concat # (batch_size, model_dim *2* 2 * 4) v_all = torch.cat([ v_a_avg, v_b_avg, v_a_avg - v_b_avg, v_a_avg * v_b_avg, v_a_max, v_b_max, v_a_max - v_b_max, v_a_max * v_b_max ], dim=1) elif self._aggr_type == 'max': # (batch_size, model_dim *2* 4) v_all = torch.cat( [v_a_max, v_b_max, v_a_max - v_b_max, v_a_max * v_b_max], dim=1) elif self._aggr_type == 'avg': v_all = torch.cat( [v_a_avg, v_b_avg, v_a_avg - v_b_avg, v_a_avg * v_b_avg], dim=1) # the final MLP -- apply dropout to input, and MLP applies to output & hidden if self.dropout: v_all = self.dropout(v_all) pred = self._output_feedforward(v_all) output_dict = {'pred': pred} if score is not None: loss = self._loss(pred, score) self._metric(pred, score) output_dict["loss"] = loss return output_dict def get_metrics(self, reset: bool = False) -> Dict[str, float]: return {'pearson': self._metric.get_metric(reset)}
def __init__(self, vocab: Vocabulary, token_representation_dim: int, encoder: Optional[Seq2SeqEncoder] = None, decoder: Optional[Union[FeedForward, str]] = None, contextualizer: Optional[Contextualizer] = None, pretrained_file: Optional[str] = None, transfer_contextualizer_from_pretrained_file: bool = False, transfer_encoder_from_pretrained_file: bool = False, freeze_encoder: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(SelectiveRegressor, self).__init__(vocab, regularizer) self._token_representation_dim = token_representation_dim self._contextualizer = contextualizer if encoder is None: encoder = PassThroughEncoder( input_dim=self._token_representation_dim) self._encoder = encoder # Load the contextualizer and encoder weights from the # pretrained_file if applicable if pretrained_file: archive = None if self._contextualizer and transfer_contextualizer_from_pretrained_file: logger.info("Attempting to load contextualizer weights from " "pretrained_file at {}".format(pretrained_file)) archive = load_archive(cached_path(pretrained_file)) contextualizer_state = archive.model._contextualizer.state_dict( ) contextualizer_layer_num = self._contextualizer._layer_num self._contextualizer.load_state_dict(contextualizer_state) if contextualizer_layer_num is not None: logger.info("Setting layer num to {}".format( contextualizer_layer_num)) self._contextualizer.set_layer_num( contextualizer_layer_num) else: self._contextualizer.reset_layer_num() logger.info("Successfully loaded contextualizer weights!") if transfer_encoder_from_pretrained_file: logger.info("Attempting to load encoder weights from " "pretrained_file at {}".format(pretrained_file)) if archive is None: archive = load_archive(cached_path(pretrained_file)) encoder_state = archive.model._encoder.state_dict() self._encoder.load_state_dict(encoder_state) logger.info("Successfully loaded encoder weights!") self._freeze_encoder = freeze_encoder for parameter in self._encoder.parameters(): # If freeze is true, requires_grad should be false and vice versa. parameter.requires_grad_(not self._freeze_encoder) if decoder is None or decoder == "linear": # Create the default decoder (logistic regression) if it is not provided. decoder = FeedForward.from_params( Params({ "input_dim": self._encoder.get_output_dim(), "num_layers": 1, "hidden_dims": 1, "activations": "linear" })) logger.info("No decoder provided to model, using default " "decoder: {}".format(decoder)) elif decoder == "mlp": # Create the MLP decoder decoder = FeedForward.from_params( Params({ "input_dim": self._encoder.get_output_dim(), "num_layers": 2, "hidden_dims": [1024, 1], "activations": ["relu", "linear"] })) logger.info("Using MLP decoder: {}".format(decoder)) self._decoder = decoder check_dimensions_match(self._token_representation_dim, self._encoder.get_input_dim(), "token representation dim", "encoder input dim") check_dimensions_match(self._encoder.get_output_dim(), self._decoder.get_input_dim(), "encoder output dim", "decoder input dim") check_dimensions_match(self._decoder.get_output_dim(), 1, "decoder output dim", "1, since we're predicting a real value") # SmoothL1Loss as described in "Neural Models of Factuality" (NAACL 2018) self.loss = torch.nn.SmoothL1Loss(reduction="none") self.metrics = { "mae": MeanAbsoluteError(), "pearson_r": PearsonCorrelation() } # Whether to run in error analysis mode or not, see commands.error_analysis self.error_analysis = False logger.info("Applying initializer...") initializer(self)
class BERTMoji(Model): """ This ``Model`` performs text classification for an academic paper. We assume we're given a title and an abstract, and we predict some output label. The basic model structure: we'll embed the title and the abstract, and encode each of them with separate Seq2VecEncoders, getting a single vector representing the content of each. We'll then concatenate those two vectors, and pass the result through a feedforward network, the output of which we'll use as our scores for each label. Parameters ---------- vocab : ``Vocabulary``, required A Vocabulary, required in order to compute sizes for input/output projections. text_field_embedder : ``TextFieldEmbedder``, required Used to embed the ``tokens`` ``TextField`` we get as input to the model. encoder : ``Seq2VecEncoder`` The encoder that we will use to convert the text to a vector. classifier_feedforward : ``FeedForward`` initializer : ``InitializerApplicator``, optional (default=``InitializerApplicator()``) Used to initialize the model parameters. regularizer : ``RegularizerApplicator``, optional (default=``None``) If provided, will be used to calculate the regularization penalty during training. """ def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2VecEncoder, classifier_feedforward: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) # raise ValueError(self.vocab.get_vocab_size("tokens")) # raise ValueError(text_field_embedder.get_output_dim()) if text_field_embedder.get_output_dim() != encoder.get_input_dim(): raise ConfigurationError("The output dimension of the text_field_embedder must match the " "input dimension of the title_encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), encoder.get_input_dim())) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.encoder = encoder self.classifier_feedforward = classifier_feedforward self.metrics = { "multilabel-f1": MultiLabelF1Measure(), 'accuracy': BooleanAccuracy() } self.pearson_r = PearsonCorrelation() self.loss = nn.MultiLabelSoftMarginLoss() #BCEWithLogitsLoss() self._threshold = 0.5 initializer(self) @overrides def forward(self, # type: ignore tokens: Dict[str, torch.LongTensor], label: torch.LongTensor = None) -> Dict[str, torch.Tensor]: # pylint: disable=arguments-differ """ Parameters ---------- tokens : Dict[str, Variable], required The output of ``TextField.as_array()``. label : Variable, optional (default = None) A variable representing the label for each instance in the batch. Returns ------- An output dictionary consisting of: class_probabilities : torch.FloatTensor A tensor of shape ``(batch_size, num_classes)`` representing a distribution over the label classes for each instance. loss : torch.FloatTensor, optional A scalar loss to be optimised. """ # print(tokens) embedded = self.text_field_embedder(tokens) mask = util.get_text_field_mask(tokens) encoded = self.encoder(embedded, mask) logits = self.classifier_feedforward(encoded) output_dict = {'logits': torch.sigmoid(logits)} if label is None: # inference decoded = self.decode(output_dict) output_dict['decoded'] = decoded else: loss = self.loss(logits, label.float()) loss = loss + (1-rsq_loss(logits, label.float())) self.pearson_r(logits, label.float()) preds = (logits > self._threshold).long() for metric in self.metrics.values(): metric(preds, label) output_dict["loss"] = loss return output_dict # @overrides def decode(self, output_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: """ Does a simple argmax over the class probabilities, converts indices to string labels, and adds a ``"label"`` key to the dictionary with the result. """ def get_scores(row): scores = ((self.vocab.get_token_from_index(i, namespace="labels"), s) for (i, s) in enumerate(row)) return sorted(scores, key=lambda x: x[1], reverse=True) class_probabilities = output_dict['logits'] predictions = class_probabilities.cpu().data.numpy() output_dict['scores'] = list(map(get_scores, predictions)) return output_dict @overrides def get_metrics(self, reset: bool = False) -> Dict[str, float]: def unpack(m): if isinstance(m, tuple): return m[-1] return m metrics = {metric_name: unpack(metric.get_metric(reset)) for metric_name, metric in self.metrics.items()} metrics['pearson_r'] = self.pearson_r.get_metric(reset) return metrics
class ESIM(Model): """ This ``Model`` implements the ESIM sequence model described in `"Enhanced LSTM for Natural Language Inference" <https://www.semanticscholar.org/paper/Enhanced-LSTM-for-Natural-Language-Inference-Chen-Zhu/83e7654d545fbbaaf2328df365a781fb67b841b4>`_ by Chen et al., 2017. This code was taken from the AllenNLP repo, and modified for predicting (continuous) scores for MT system outputs Parameters ---------- vocab : ``Vocabulary`` text_field_embedder : ``TextFieldEmbedder`` Used to embed the ``mtref`` and ``mtsys`` ``TextFields`` we get as input to the model. encoder : ``Seq2SeqEncoder`` Used to encode the mtref and mtsys. similarity_function : ``SimilarityFunction`` This is the similarity function used when computing the similarity matrix between encoded words in the mtref and words in the mtsys. projection_feedforward : ``FeedForward`` The feedforward network used to project down the encoded and enhanced mtref and mtsys. inference_encoder : ``Seq2SeqEncoder`` Used to encode the projected mtref and mtsys for prediction. output_feedforward : ``FeedForward`` Used to prepare the concatenated mtref and mtsys for prediction. output_logit: FeedForward, legacy input that does nothing dropout : ``float``, optional (default=0.5) Dropout percentage to use. initializer : ``InitializerApplicator``, optional (default=``InitializerApplicator()``) Used to initialize the model parameters. regularizer : ``RegularizerApplicator``, optional (default=``None``) If provided, will be used to calculate the regularization penalty during training. """ def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, similarity_function: SimilarityFunction, projection_feedforward: FeedForward, inference_encoder: Seq2SeqEncoder, output_feedforward: FeedForward, output_logit: FeedForward, dropout: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._encoder = encoder self._matrix_attention = LegacyMatrixAttention(similarity_function) self._projection_feedforward = projection_feedforward self._inference_encoder = inference_encoder if dropout: self.dropout = torch.nn.Dropout(dropout) self.rnn_input_dropout = InputVariationalDropout(dropout) else: self.dropout = None self.rnn_input_dropout = None self._output_feedforward = output_feedforward self._num_labels = 1 check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(encoder.get_output_dim() * 4, projection_feedforward.get_input_dim(), "encoder output dim", "projection feedforward input") check_dimensions_match(projection_feedforward.get_output_dim(), inference_encoder.get_input_dim(), "proj feedforward output dim", "inference lstm input dim") self._metric = PearsonCorrelation() self._loss = torch.nn.MSELoss() initializer(self) def forward( self, # type: ignore ref: Dict[str, torch.LongTensor], mt: Dict[str, torch.LongTensor], score: torch.IntTensor = None # pylint:disable=unused-argument ) -> Dict[str, torch.Tensor]: # pylint: disable=arguments-differ """ Parameters ---------- ref : Dict[str, torch.LongTensor] From a ``TextField`` mt : Dict[str, torch.LongTensor] From a ``TextField`` score : torch.IntTensor, optional (default = None) From a ``NumericField`` Returns ------- An output dictionary consisting of: loss : torch.FloatTensor, optional A scalar loss to be optimised. """ # print(worker) embedded_mtref = self._text_field_embedder(ref) embedded_mtsys = self._text_field_embedder(mt) mtref_mask = get_text_field_mask(ref).float() mtsys_mask = get_text_field_mask(mt).float() # apply dropout for LSTM if self.rnn_input_dropout: embedded_mtref = self.rnn_input_dropout(embedded_mtref) embedded_mtsys = self.rnn_input_dropout(embedded_mtsys) # encode mtref and mtsys # Shape: (batch_size, mtref/sys_length, modeldim*2 =600) encoded_mtref = self._encoder(embedded_mtref, mtref_mask) encoded_mtsys = self._encoder(embedded_mtsys, mtsys_mask) # Shape: (batch_size, mtref_length, mtsys_length) similarity_matrix = self._matrix_attention(encoded_mtref, encoded_mtsys) # Shape: (batch_size, mtref_length, mtsys_length) p2h_attention = masked_softmax(similarity_matrix, mtsys_mask) # Shape: (batch_size, mtref_length, embedding_dim) attended_mtsys = weighted_sum(encoded_mtsys, p2h_attention) # Shape: (batch_size, mtsys_length, mtref_length) h2p_attention = masked_softmax( similarity_matrix.transpose(1, 2).contiguous(), mtref_mask) # Shape: (batch_size, mtsys_length, embedding_dim) attended_mtref = weighted_sum(encoded_mtref, h2p_attention) # the "enhancement" layer # Shape: (batch_size, mtref/sys_length, modeldim *2 * 4=2400) mtref_enhanced = torch.cat([ encoded_mtref, attended_mtsys, encoded_mtref - attended_mtsys, encoded_mtref * attended_mtsys ], dim=-1) mtsys_enhanced = torch.cat([ encoded_mtsys, attended_mtref, encoded_mtsys - attended_mtref, encoded_mtsys * attended_mtref ], dim=-1) # The projection layer down to the model dimension. Dropout is not applied before # projection. # Shape: (batch_size, mtref/sys_length, modeldim =300) projected_enhanced_mtref = self._projection_feedforward(mtref_enhanced) projected_enhanced_mtsys = self._projection_feedforward(mtsys_enhanced) # Run the inference layer if self.rnn_input_dropout: projected_enhanced_mtref = self.rnn_input_dropout( projected_enhanced_mtref) projected_enhanced_mtsys = self.rnn_input_dropout( projected_enhanced_mtsys) # Shape: (batch_size, mtref/sys_length, modeldim*2 =600) v_ai = self._inference_encoder(projected_enhanced_mtref, mtref_mask) v_bi = self._inference_encoder(projected_enhanced_mtsys, mtsys_mask) # The pooling layer -- max and avg pooling. # (batch_size, model_dim*2 = 600) v_a_max, _ = replace_masked_values(v_ai, mtref_mask.unsqueeze(-1), -1e7).max(dim=1) # (batch_size, model_dim * 2 = 600) v_b_max, _ = replace_masked_values(v_bi, mtsys_mask.unsqueeze(-1), -1e7).max(dim=1) # (batch_size, model_dim * 2 = 600) v_a_avg = torch.sum(v_ai * mtref_mask.unsqueeze(-1), dim=1) / torch.sum(mtref_mask, 1, keepdim=True) # (batch_size, model_dim * 2 = 600) v_b_avg = torch.sum(v_bi * mtsys_mask.unsqueeze(-1), dim=1) / torch.sum(mtsys_mask, 1, keepdim=True) # Now concat # (batch_size, model_dim * 2 * 4) v_all = torch.cat([v_a_avg, v_a_max, v_b_avg, v_b_max], dim=1) # the final MLP -- apply dropout to input, and MLP applies to output & hidden if self.dropout: v_all = self.dropout(v_all) pred = self._output_feedforward(v_all) output_dict = {'pred': pred} if score is not None: loss = self._loss(pred, score) self._metric(pred, score) output_dict["loss"] = loss return output_dict def get_metrics(self, reset: bool = False) -> Dict[str, float]: return {'pearson': self._metric.get_metric(reset)}
class LstmTagger(Model): ''' 您基本上必须实现的另一个类是Model,它是torch.nn.Module的子类。 它的工作原理在很大程度上取决于你,它主要只是需要一个前向方法(forward method),它接受张量输入并产生一个张量输出的字典, 其中包括你用来训练模型的损失(losss)。 如上所述,我们的模型将包括嵌入层(embedding layer),序列编码器(sequence encoder)和前馈网络(feedforward network)。 ''' ''' 可能看似不寻常的一件事是我们将嵌入器(embedder)和序列编码器(sequence encoder)作为构造函数参数(constructor parameters)传递。 这使我们可以尝试不同的嵌入器(embedders)和编码器(encoders),而无需更改模型代码。 ''' def __init__( self, word_embeddings: TextFieldEmbedder, # 嵌入层(embedding layer)被指定为AllenNLP TextFieldEmbedder,它表示将tokens转换为张量(tensors)的一般方法。 # (这里我们知道我们想要用学习的张量来表示每个唯一的单词,但是使用通用类(general class)可以让我们轻松地尝试不同类型的嵌入,例如ELMo。) encoder: Seq2SeqEncoder, # 类似地,编码器(encoder)被指定为通用Seq2SeqEncoder,即使我们知道我们想要使用LSTM。 # 同样,这使得可以很容易地尝试其他序列编码器(sequence encoders),例如Transformer。 vocab: Vocabulary ) -> None: # 每个AllenNLP模型还需要一个词汇表(Vocabulary),其中包含tokens到索引(indices)和索引标签(labels to indices)的命名空间映射。 super().__init__(vocab) self.word_embeddings = word_embeddings self.encoder = encoder # 请注意,我们必须将vocab传递给基类构造函数(base class constructor)。 self.hidden2tag = torch.nn.Linear( in_features=encoder.get_output_dim(), out_features=vocab.get_vocab_size('labels')) # 前馈层(feed forward layer)不作为参数传入,而是由我们构造。 # 请注意,它会查看编码器(encoder)以查找正确的输入维度并查看词汇表(vocabulary)(特别是在 label->index 映射处)以查找正确的输出维度。 self.accuracy = CategoricalAccuracy() # 最后要注意的是我们还实例化了一个CategoricalAccuracy指标,我们将用它来跟踪每个训练(training)和验证(validation)epoch的准确性。 def forward(self, sentence: Dict[str, torch.Tensor], labels: torch.Tensor = None) -> Dict[str, torch.Tensor]: # 接下来我们需要实现forward,这是实际计算发生的地方。数据集中的每个实例(Instance)都将(与其他实例(instances)一起批处理)输入forward。 # 张量的输入作为forward方法的输入,并且它们的名称应该是实例(Instances)中字段(fields)的名称。 # 在这种情况下,我们有一个句子字段(sentence field)和(可能)标签字段(labels field),所以我们将相应地构建我们的forward: mask = get_text_field_mask(sentence) # AllenNLP设计用于批量输入,但不同的输入序列具有不同的长度。 # 因此,AllenNLP填充(padding)较短的输入,以便批处理具有统一的形状,这意味着我们的计算需要使用掩码(mask)来排除填充。 # 这里我们只使用效用函数(utility function) get_text_field_mask,它返回与填充和未填充位置相对应的0和1的张量。 embeddings = self.word_embeddings(sentence) # 我们首先将句子张量(每个句子一系列tokens ID)传递给word_embeddings模块,该模块将每个句子转换为嵌入式张量序列(a sequence of embedded tensors)。 encoder_out = self.encoder(embeddings, mask) # 接下来,我们将嵌入式张量(embedded tensors)(和掩码(mask))传递给LSTM,LSTM产生一系列编码(encoded)输出。 tag_logits = self.hidden2tag(encoder_out) output = {"tag_logits": tag_logits} # 最后,我们将每个编码输出张量(encoded output tensor)传递给前馈层(feedforward),以产生对应于各种标签(tags)的logits。 if labels is not None: self.accuracy(tag_logits, labels, mask) output["loss"] = sequence_cross_entropy_with_logits( tag_logits, labels, mask) logits_flat = tag_logits.view(-1, tag_logits.size(-1)) # shape : (batch * sequence_length, num_classes) log_probs_flat = torch.nn.functional.log_softmax(logits_flat, dim=-1) # shape : (batch * max_len, 1) targets_flat = labels.view(-1, 1).long() negative_log_likelihood_flat = -torch.gather( log_probs_flat, dim=1, index=targets_flat) # shape : (batch, sequence_length) negative_log_likelihood = negative_log_likelihood_flat.view( *labels.size()) # shape : (batch, sequence_length) negative_log_likelihood = negative_log_likelihood * mask.float() from allennlp.training.metrics import PearsonCorrelation self.m = PearsonCorrelation() self.m(predictions=negative_log_likelihood, gold_labels=labels.float()) # 和以前一样,标签是可选的,因为我们可能希望运行此模型来对未标记的数据进行预测。 # 如果我们有标签,那么我们使用它们来更新我们的准确度指标(accuracy metric)并计算输出中的“损失(loss)”。 return output def get_metrics(self, reset: bool = False) -> Dict[str, float]: return { "accuracy": self.accuracy.get_metric(reset), "pearson_correlation": self.m.get_metric(reset) }
class STSBRegressor(Model): """ This ``Model`` implements a basic text regressor. After embedding the text into a text field, we will optionally encode the embeddings with a ``Seq2SeqEncoder``. The resulting sequence is pooled using a ``Seq2VecEncoder`` and then passed to a linear regression layer, which projects into a single value. If a ``Seq2SeqEncoder`` is not provided, we will pass the embedded text directly to the ``Seq2VecEncoder``. Parameters ---------- vocab : ``Vocabulary`` text_field_embedder : ``TextFieldEmbedder`` Used to embed the input text into a ``TextField`` seq2seq_encoder : ``Seq2SeqEncoder``, optional (default=``None``) Optional Seq2Seq encoder layer for the input text. seq2vec_encoder : ``Seq2VecEncoder`` Required Seq2Vec encoder layer. If `seq2seq_encoder` is provided, this encoder will pool its output. Otherwise, this encoder will operate directly on the output of the `text_field_embedder`. dropout : ``float``, optional (default = ``None``) Dropout percentage to use. scale : ``float``, optional (default = 1) Scale regression result is between 0 ~ scale label_namespace: ``str``, optional (default = "labels") Vocabulary namespace corresponding to labels. By default, we use the "labels" namespace. initializer : ``InitializerApplicator``, optional (default=``InitializerApplicator()``) If provided, will be used to initialize the model parameters. regularizer : ``RegularizerApplicator``, optional (default=``None``) If provided, will be used to calculate the regularization penalty during training. """ def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, seq2vec_encoder: Seq2VecEncoder, seq2seq_encoder: Seq2SeqEncoder = None, dropout: float = None, scale: float = 1, label_namespace: str = "labels", initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder if seq2seq_encoder: self._seq2seq_encoder = seq2seq_encoder else: self._seq2seq_encoder = None self._seq2vec_encoder = seq2vec_encoder self._classifier_input_dim = self._seq2vec_encoder.get_output_dim( ) * 2 # run encoder seperately and concat the result if dropout: self._dropout = torch.nn.Dropout(dropout) self._dropout_a = torch.nn.Dropout(dropout) self._dropout_b = torch.nn.Dropout(dropout) else: self._dropout = None self._label_namespace = label_namespace self._num_labels = 1 # because we're running a regression task self._scale = scale self.__first = True self._mlp_dims = [self._classifier_input_dim] * 3 self._mlp_layers = torch.nn.ModuleList() for i, j in zip(self._mlp_dims, self._mlp_dims[1:]): self._mlp_layers.append(torch.nn.Linear(i, j)) self._mlp_layers.append(torch.nn.ReLU()) if dropout: self._mlp_layers.append(torch.nn.Dropout(dropout)) self._classification_layer = torch.nn.Linear( self._classifier_input_dim, self._num_labels) self._metric = PearsonCorrelation() self._similarity = torch.nn.CosineSimilarity() self._loss = torch.nn.MSELoss() initializer(self) def forward( self, # type: ignore tokens_a: Dict[str, torch.LongTensor], tokens_b: Dict[str, torch.LongTensor], label: torch.IntTensor = None) -> Dict[str, torch.Tensor]: # pylint: disable=arguments-differ """ Parameters ---------- tokens : Dict[str, torch.LongTensor] From a ``TextField`` label : torch.IntTensor, optional (default = None) From a ``LabelField`` Returns ------- An output dictionary consisting of: logits : torch.FloatTensor A tensor of shape ``(batch_size, 1)`` representing unnormalized log probabilities of the label. loss : torch.FloatTensor, optional A scalar loss to be optimised. """ tokens = { "tokens_a": tokens_a["tokens_a"], "tokens_b": tokens_b["tokens_b"] } if self.__first: self.__first = False print("tokens: \n") print(tokens) # I don't know why tokens_a and tokens_b both includes keys named by each other tokens_a = {"tokens_a": tokens_a["tokens_a"]} tokens_b = {"tokens_b": tokens_b["tokens_b"]} embedded_text = self._text_field_embedder(tokens) embedded_text_a = embedded_text[ "tokens_a"] # TODO: check the shape for this mask_a = get_text_field_mask(tokens_a).float() embedded_text_b = embedded_text["tokens_b"] mask_b = get_text_field_mask(tokens_b).float() if self._seq2seq_encoder: embedded_text_a = self._seq2seq_encoder(embedded_text_a, mask=mask_a) embedded_text_b = self._seq2seq_encoder(embedded_text_b, mask=mask_b) embedded_text_a = self._seq2vec_encoder(embedded_text_a, mask=mask_a) embedded_text_b = self._seq2vec_encoder(embedded_text_b, mask=mask_b) # embedded_text = torch.cat([embedded_text_a, embedded_text_b], dim=-1) if self._dropout: embedded_text_a = self._dropout_a(embedded_text_a) embedded_text_b = self._dropout_b(embedded_text_b) ''' if self._mlp_layers: for l in self._mlp_layers: embedded_text = l(embedded_text) logits = self._classification_layer(embedded_text) ''' logits = self._similarity(embedded_text_a, embedded_text_b) * 5 output_dict = {"logits": logits} if label is not None: # convert the label into a float number and update the metric label_to_str = lambda l: self.vocab.get_index_to_token_vocabulary( self._label_namespace).get(l) label_tensor = torch.tensor( [ float(label_to_str(int(label[i]))) for i in range(label.shape[0]) ], device=logits.device, requires_grad=True ) # make sure loss.backward have something to update loss = self._loss(logits.view(-1), label_tensor) output_dict["loss"] = loss self._metric(logits, label_tensor) return output_dict @overrides def decode( self, output_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: """ Does a simple argmax over the probabilities, converts index to string label, and add ``"label"`` key to the dictionary with the result. """ # update this part to generate a float number result as similarity score predictions = output_dict["logits"] if predictions.dim() == 2: predictions_list = [ predictions[i] for i in range(predictions.shape[0]) ] else: predictions_list = [predictions] classes = [] for prediction in predictions_list: label_idx = "{:.1f}".format(prediction.long()) label_str = (self.vocab.get_index_to_token_vocabulary( self._label_namespace).get(label_idx, str(label_idx))) classes.append(label_str) output_dict["label"] = classes return output_dict def get_metrics(self, reset: bool = False) -> Dict[str, float]: metrics = {'PearsonCorrelation': self._metric.get_metric(reset)} return metrics