def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder, encoder: Seq2VecEncoder, posclass_weight: Optional[float] = 1, use_power: Optional[bool] = False, dropout: Optional[float] = 0) -> None: super().__init__(vocab) self.embedder = embedder self.encoder = encoder if use_power: self.classifier = torch.nn.Linear( in_features=encoder.get_output_dim() + 1, out_features=vocab.get_vocab_size('labels') ) else: self.classifier = torch.nn.Linear( in_features=encoder.get_output_dim(), out_features=vocab.get_vocab_size('labels') ) self.use_power = use_power self.f1_lie = F1Measure(vocab.get_token_index('False', 'labels')) self.f1_truth = F1Measure(vocab.get_token_index('True', 'labels')) self.micro_f1 = FBetaMeasure(average='micro') self.macro_f1 = FBetaMeasure(average='macro') weights = [1,1] weights[vocab.get_token_index('False', 'labels')] = posclass_weight self.loss = torch.nn.CrossEntropyLoss(weight = torch.Tensor(weights)) self.dropout = torch.nn.Dropout(dropout)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, seq2vec_encoder: Seq2VecEncoder, seq2seq_encoder: Seq2SeqEncoder = None, feedforward: Optional[FeedForward] = None, dropout: float = None, num_labels: int = None, label_namespace: str = "labels", loss: str = None, # focal_loss initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder if seq2seq_encoder: self._seq2seq_encoder = seq2seq_encoder else: self._seq2seq_encoder = None self._seq2vec_encoder = seq2vec_encoder self._feedforward = feedforward if feedforward is not None: self._classifier_input_dim = self._feedforward.get_output_dim() else: self._classifier_input_dim = self._seq2vec_encoder.get_output_dim() if dropout: self._dropout = torch.nn.Dropout(dropout) else: self._dropout = None self._label_namespace = label_namespace if num_labels: self._num_labels = num_labels else: self._num_labels = vocab.get_vocab_size( namespace=self._label_namespace) self._classification_layer = torch.nn.Linear( self._classifier_input_dim, self._num_labels) self._accuracy = CategoricalAccuracy() if loss is None: self._loss = torch.nn.CrossEntropyLoss() elif loss == 'focal_loss': self._loss = FocalLoss(alpha=0.25, num_classes=self._num_labels) # focal loss elif loss == 'cross_entropy_loss': self._loss = torch.nn.CrossEntropyLoss() else: raise ValueError('wrong loss type') self._f1_measure = FBetaMeasure() initializer(self)
class SimpleClassifier(Model): def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder, encoder: Seq2VecEncoder): super().__init__(vocab) self.embedder = embedder self.encoder = encoder num_labels = vocab.get_vocab_size("labels") self.classifier = torch.nn.Linear(encoder.get_output_dim(), num_labels) self.accuracy = CategoricalAccuracy() self.macrof1 = FBetaMeasure(average='macro') self.microf1 = FBetaMeasure(average='micro') self.weightedf1 = FBetaMeasure(average='weighted') def forward(self, text: Dict[str, torch.Tensor], label: torch.Tensor) -> Dict[str, torch.Tensor]: # Shape: (batch_size, num_tokens, embedding_dim) embedded_text = self.embedder(text) # Shape: (batch_size, num_tokens) mask = util.get_text_field_mask(text) # Shape: (batch_size, encoding_dim) encoded_text = self.encoder(embedded_text, mask) # Shape: (batch_size, num_labels) logits = self.classifier(encoded_text) self.accuracy(logits, label) self.macrof1(logits, label) self.microf1(logits, label) self.weightedf1(logits, label) # Shape: (batch_size, num_labels) probs = torch.nn.functional.softmax(logits) # Shape: (1,) loss = torch.nn.functional.cross_entropy(logits, label) return {'loss': loss, 'probs': probs} def get_metrics(self, reset: bool = False) -> Dict[str, float]: result_macro = self.macrof1.get_metric(reset) result_micro = self.microf1.get_metric(reset) result_weighted = self.weightedf1.get_metric(reset) return { "accuracy": self.accuracy.get_metric(reset), "macrof1_precision": result_macro["precision"], "macrof1_recall": result_macro["recall"], "macrof1_fscore": result_macro["fscore"], "microf1_precision": result_micro["precision"], "microf1_recall": result_micro["recall"], "microf1_fscore": result_macro["fscore"], "weightedf1_precision": result_weighted["precision"], "weightedf1_recall": result_weighted["recall"], "weightedf1_fscore": result_weighted["fscore"] }
def __init__( self, vocab: Vocabulary, vocab_path: str = None, config_path: str = None, model_path: str = None, dropout: float = None, label_namespace: str = "labels", num_labels: int = None, loss: str = None, # focal_loss initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab, regularizer) config, tokenizer, model = get_albert_total(config_path, vocab_path, model_path) self._bert = model if dropout: self._dropout = torch.nn.Dropout(dropout) else: self._dropout = None self._label_namespace = label_namespace if num_labels: self._num_labels = num_labels else: self._num_labels = vocab.get_vocab_size( namespace=self._label_namespace) self._classification_layer = torch.nn.Linear(config.hidden_size, config.num_labels) self._accuracy = CategoricalAccuracy() if loss is None: self._loss = torch.nn.CrossEntropyLoss() elif loss == 'focal_loss': self._loss = FocalLoss(alpha=0.25, num_classes=self._num_labels) # focal loss elif loss == 'cross_entropy_loss': self._loss = torch.nn.CrossEntropyLoss() else: raise ValueError('wrong loss type') self._f1_measure = FBetaMeasure() initializer(self)
def __init__( self, vocab: Vocabulary, bert_model: Union[str, AutoModel], embedding_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), label_smoothing: float = None, ignore_span_metric: bool = False, srl_eval_path: str = DEFAULT_SRL_EVAL_PATH, restrict_frames: bool = False, restrict_roles: bool = False, inventory: str = "verbatlas", **kwargs, ) -> None: # bypass SrlBert constructor Model.__init__(self, vocab, **kwargs) self.lemma_frame_dict = load_lemma_frame(LEMMA_FRAME_PATH) self.frame_role_dict = load_role_frame(FRAME_ROLE_PATH) self.restrict_frames = restrict_frames self.restrict_roles = restrict_roles self.transformer = AutoModel.from_pretrained(bert_model) self.frame_criterion = nn.CrossEntropyLoss() if inventory == "verbatlas": # add missing labels frame_list = load_label_list(FRAME_LIST_PATH) self.vocab.add_tokens_to_namespace(frame_list, "frames_labels") self.num_classes = self.vocab.get_vocab_size("labels") self.frame_num_classes = self.vocab.get_vocab_size("frames_labels") if srl_eval_path is not None: # For the span based evaluation, we don't want to consider labels # for verb, because the verb index is provided to the model. self.span_metric = SrlEvalScorer(srl_eval_path, ignore_classes=["V"]) else: self.span_metric = None self.f1_frame_metric = FBetaMeasure(average="micro") self.tag_projection_layer = nn.Linear(self.transformer.config.hidden_size, self.num_classes) self.frame_projection_layer = nn.Linear( self.transformer.config.hidden_size, self.frame_num_classes ) self.embedding_dropout = nn.Dropout(p=embedding_dropout) self._label_smoothing = label_smoothing self.ignore_span_metric = ignore_span_metric initializer(self)
def __init__( self, vocab: Vocabulary, model_name: Union[str, AutoModel], embedding_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), label_smoothing: float = None, ignore_span_metric: bool = False, srl_eval_path: str = DEFAULT_SRL_EVAL_PATH, restrict_frames: bool = False, restrict_roles: bool = False, **kwargs, ) -> None: # bypass SrlBert constructor Model.__init__(self, vocab, **kwargs) self.lemma_frame_dict = load_lemma_frame(LEMMA_FRAME_PATH) self.frame_role_dict = load_role_frame(FRAME_ROLE_PATH) self.restrict_frames = restrict_frames self.restrict_roles = restrict_roles if isinstance(model_name, str): self.transformer = AutoModel.from_pretrained(model_name) else: self.transformer = model_name # loss self.role_criterion = nn.CrossEntropyLoss(ignore_index=0) self.frame_criterion = nn.CrossEntropyLoss() # number of classes self.num_classes = self.vocab.get_vocab_size("labels") self.frame_num_classes = self.vocab.get_vocab_size("frames_labels") # metrics role_set = self.vocab.get_token_to_index_vocabulary("labels") role_set_filter = [v for k, v in role_set.items() if k != "O"] self.f1_role_metric = FBetaMeasure(average="micro", labels=role_set_filter) self.f1_frame_metric = FBetaMeasure(average="micro") # output layer self.tag_projection_layer = nn.Linear(self.transformer.config.hidden_size, self.num_classes) self.frame_projection_layer = nn.Linear( self.transformer.config.hidden_size, self.frame_num_classes ) self.embedding_dropout = nn.Dropout(p=embedding_dropout) self._label_smoothing = label_smoothing initializer(self)
def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder, encoder: Seq2VecEncoder): super().__init__(vocab) self.embedder = embedder self.encoder = encoder num_labels = vocab.get_vocab_size("labels") self.classifier = torch.nn.Linear(encoder.get_output_dim(), num_labels) self.accuracy = CategoricalAccuracy() self.macrof1 = FBetaMeasure(average='macro') self.microf1 = FBetaMeasure(average='micro') self.weightedf1 = FBetaMeasure(average='weighted')
class TransformerSrlSpan(SrlBert): """ # Parameters vocab : `Vocabulary`, required A Vocabulary, required in order to compute sizes for input/output projections. model : `Union[str, AutoModel]`, required. A string describing the BERT model to load or an already constructed AutoModel. initializer : `InitializerApplicator`, optional (default=`InitializerApplicator()`) Used to initialize the model parameters. label_smoothing : `float`, optional (default = `0.0`) Whether or not to use label smoothing on the labels when computing cross entropy loss. ignore_span_metric : `bool`, optional (default = `False`) Whether to calculate span loss, which is irrelevant when predicting BIO for Open Information Extraction. srl_eval_path : `str`, optional (default=`DEFAULT_SRL_EVAL_PATH`) The path to the srl-eval.pl script. By default, will use the srl-eval.pl included with allennlp, which is located at allennlp/tools/srl-eval.pl . If `None`, srl-eval.pl is not used. """ def __init__( self, vocab: Vocabulary, bert_model: Union[str, AutoModel], embedding_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), label_smoothing: float = None, ignore_span_metric: bool = False, srl_eval_path: str = DEFAULT_SRL_EVAL_PATH, inventory: str = "verbatlas", **kwargs, ) -> None: # bypass SrlBert constructor Model.__init__(self, vocab, **kwargs) self.transformer = AutoModel.from_pretrained(bert_model) self.frame_criterion = nn.CrossEntropyLoss() if inventory == "verbatlas": # add missing frame labels frame_list = load_label_list(FRAME_LIST_PATH) self.vocab.add_tokens_to_namespace(frame_list, "frames_labels") # add missing role labels role_list = load_label_list(ROLE_LIST_PATH) self.vocab.add_tokens_to_namespace(role_list, "labels") self.num_classes = self.vocab.get_vocab_size("labels") self.frame_num_classes = self.vocab.get_vocab_size("frames_labels") if srl_eval_path is not None: # For the span based evaluation, we don't want to consider labels # for verb, because the verb index is provided to the model. self.span_metric = SrlEvalScorer(srl_eval_path, ignore_classes=["V"]) else: self.span_metric = None self.f1_frame_metric = FBetaMeasure(average="micro") self.tag_projection_layer = nn.Linear( self.transformer.config.hidden_size, self.num_classes) self.frame_projection_layer = nn.Linear( self.transformer.config.hidden_size, self.frame_num_classes) self.embedding_dropout = nn.Dropout(p=embedding_dropout) self._label_smoothing = label_smoothing self.ignore_span_metric = ignore_span_metric initializer(self) def forward( # type: ignore self, tokens: TextFieldTensors, verb_indicator: torch.Tensor, frame_indicator: torch.Tensor, metadata: List[Any], tags: torch.LongTensor = None, frame_tags: torch.LongTensor = None, ): """ # Parameters tokens : `TextFieldTensors`, required The output of `TextField.as_array()`, which should typically be passed directly to a `TextFieldEmbedder`. For this model, this must be a `SingleIdTokenIndexer` which indexes wordpieces from the BERT vocabulary. verb_indicator: `torch.LongTensor`, required. An integer `SequenceFeatureField` representation of the position of the verb in the sentence. This should have shape (batch_size, num_tokens) and importantly, can be all zeros, in the case that the sentence has no verbal predicate. frame_indicator: torch.LongTensor, required. An integer ``SequenceFeatureField`` representation of the position of the frame in the sentence. This should have shape (batch_size, num_tokens). Similar to verb_indicator, but handles bert wordpiece tokenizer by cosnidering a frame only the first subtoken. tags : `torch.LongTensor`, optional (default = `None`) A torch tensor representing the sequence of integer gold class labels of shape `(batch_size, num_tokens)` frame_tags : torch.LongTensor, optional (default = None) A torch tensor representing the gold frames of shape ``(batch_size, num_tokens)`` metadata : `List[Dict[str, Any]]`, optional, (default = `None`) metadata containg the original words in the sentence, the verb to compute the frame for, and start offsets for converting wordpieces back to a sequence of words, under 'words', 'verb' and 'offsets' keys, respectively. # Returns An output dictionary consisting of: logits : `torch.FloatTensor` A tensor of shape `(batch_size, num_tokens, tag_vocab_size)` representing unnormalised log probabilities of the tag classes. class_probabilities : `torch.FloatTensor` A tensor of shape `(batch_size, num_tokens, tag_vocab_size)` representing a distribution of the tag classes per word. loss : `torch.FloatTensor`, optional A scalar loss to be optimised. """ mask = get_text_field_mask(tokens) input_ids = util.get_token_ids_from_text_field_tensors(tokens) bert_embeddings, _ = self.transformer( input_ids=input_ids, token_type_ids=verb_indicator, attention_mask=mask, return_dict=False, ) # extract embeddings embedded_text_input = self.embedding_dropout(bert_embeddings) frame_embeddings = embedded_text_input[frame_indicator == 1] # get sizes batch_size, sequence_length, _ = embedded_text_input.size() # outputs logits = self.tag_projection_layer(embedded_text_input) frame_logits = self.frame_projection_layer(frame_embeddings) reshaped_log_probs = logits.view(-1, self.num_classes) class_probabilities = F.softmax(reshaped_log_probs, dim=-1).view( [batch_size, sequence_length, self.num_classes]) frame_probabilities = F.softmax(frame_logits, dim=-1) # We need to retain the mask in the output dictionary # so that we can crop the sequences to remove padding # when we do viterbi inference in self.make_output_human_readable. output_dict = { "logits": logits, "frame_logits": frame_logits, "class_probabilities": class_probabilities, "frame_probabilities": frame_probabilities, "mask": mask, } # We add in the offsets here so we can compute the un-wordpieced tags. words, verbs, offsets = zip(*[(x["words"], x["verb"], x["offsets"]) for x in metadata]) lemmas = [l for x in metadata for l in x["lemmas"]] output_dict["words"] = list(words) output_dict["lemma"] = list(lemmas) output_dict["verb"] = list(verbs) output_dict["wordpiece_offsets"] = list(offsets) if tags is not None: # compute role loss role_loss = sequence_cross_entropy_with_logits( logits, tags, mask, label_smoothing=self._label_smoothing) # compute frame loss frame_tags_filtered = frame_tags[frame_indicator == 1] frame_loss = self.frame_criterion(frame_logits, frame_tags_filtered) if not self.ignore_span_metric and self.span_metric is not None and not self.training: batch_verb_indices = [ example_metadata["verb_index"] for example_metadata in metadata ] batch_sentences = [ example_metadata["words"] for example_metadata in metadata ] # Get the BIO tags from make_output_human_readable() batch_bio_predicted_tags = self.make_output_human_readable( output_dict).pop("tags") from allennlp_models.structured_prediction.models.srl import ( convert_bio_tags_to_conll_format, ) batch_conll_predicted_tags = [ convert_bio_tags_to_conll_format(tags) for tags in batch_bio_predicted_tags ] batch_bio_gold_tags = [ example_metadata["gold_tags"] for example_metadata in metadata ] batch_conll_gold_tags = [ convert_bio_tags_to_conll_format(tags) for tags in batch_bio_gold_tags ] self.span_metric( batch_verb_indices, batch_sentences, batch_conll_predicted_tags, batch_conll_gold_tags, ) self.f1_frame_metric(frame_logits, frame_tags_filtered) output_dict["frame_loss"] = frame_loss output_dict["role_loss"] = role_loss output_dict["loss"] = (role_loss + frame_loss) / 2 return output_dict def decode_frames( self, output_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: # frame prediction frame_probabilities = output_dict["frame_probabilities"] frame_predictions = frame_probabilities.argmax( dim=-1).cpu().data.numpy() output_dict["frame_tags"] = [ self.vocab.get_token_from_index(f, namespace="frames_labels") for f in frame_predictions ] output_dict["frame_scores"] = [ fp[f] for f, fp in zip(frame_predictions, frame_probabilities) ] return output_dict @overrides def make_output_human_readable( self, output_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: output_dict = self.decode_frames(output_dict) output_dict = super().make_output_human_readable(output_dict) return output_dict @overrides def get_metrics(self, reset: bool = False): if self.ignore_span_metric: # Return an empty dictionary if ignoring the # span metric return {} else: metric_dict = self.span_metric.get_metric(reset=reset) frame_metric_dict = self.f1_frame_metric.get_metric(reset=reset) # This can be a lot of metrics, as there are 3 per class. # we only really care about the overall metrics, so we filter for them here. metric_dict_filtered = { x.split("-")[0] + "_role": y for x, y in metric_dict.items() if "overall" in x } frame_metric_dict = { x + "_frame": y for x, y in frame_metric_dict.items() } return {**metric_dict_filtered, **frame_metric_dict} def _get_label_tokens(self, namespace: str = "labels"): return self.vocab.get_token_to_index_vocabulary(namespace).keys() def _get_label_ids(self, namespace: str = "labels"): return self.vocab.get_index_to_token_vocabulary(namespace).keys() default_predictor = "transformer_srl"
class BasicClassifierF1(Model): def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, seq2vec_encoder: Seq2VecEncoder, seq2seq_encoder: Seq2SeqEncoder = None, dropout: float = None, num_labels: int = None, label_namespace: str = "labels", loss: str = None, # focal_loss initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder if seq2seq_encoder: self._seq2seq_encoder = seq2seq_encoder else: self._seq2seq_encoder = None self._seq2vec_encoder = seq2vec_encoder self._classifier_input_dim = self._seq2vec_encoder.get_output_dim() if dropout: self._dropout = torch.nn.Dropout(dropout) else: self._dropout = None self._label_namespace = label_namespace if num_labels: self._num_labels = num_labels else: self._num_labels = vocab.get_vocab_size( namespace=self._label_namespace) self._classification_layer = torch.nn.Linear( self._classifier_input_dim, self._num_labels) self._accuracy = CategoricalAccuracy() if loss is None: self._loss = torch.nn.CrossEntropyLoss() elif loss == 'focal_loss': self._loss = FocalLoss(alpha=0.25, num_classes=self._num_labels) # focal loss elif loss == 'cross_entropy_loss': self._loss = torch.nn.CrossEntropyLoss() else: raise ValueError('wrong loss type') self._f1_measure = FBetaMeasure() initializer(self) def forward( self, # type: ignore tokens: Dict[str, torch.LongTensor], label: torch.IntTensor = None) -> Dict[str, torch.Tensor]: embedded_text = self._text_field_embedder(tokens) mask = get_text_field_mask(tokens).float() if self._seq2seq_encoder: embedded_text = self._seq2seq_encoder(embedded_text, mask=mask) embedded_text = self._seq2vec_encoder(embedded_text, mask=mask) if self._dropout: embedded_text = self._dropout(embedded_text) logits = self._classification_layer(embedded_text) probs = F.softmax(logits, dim=-1) output_dict = {"logits": logits, "probs": probs} if label is not None: loss = self._loss(logits, label.long().view(-1)) output_dict["loss"] = loss self._accuracy(logits, label) self._f1_measure(logits, label) return output_dict @overrides def decode( self, output_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: """ Does a simple argmax over the probabilities, converts index to string label, and add ``"label"`` key to the dictionary with the result. """ predictions = output_dict["probs"] if predictions.dim() == 2: predictions_list = [ predictions[i] for i in range(predictions.shape[0]) ] else: predictions_list = [predictions] labels = [] for prediction in predictions_list: label_idx = prediction.argmax(dim=-1).item() label_str = self.vocab.get_index_to_token_vocabulary( self._label_namespace).get(label_idx, str(label_idx)) labels.append(label_str) output_dict["label"] = labels return output_dict def get_metrics(self, reset: bool = False) -> Dict[str, float]: f1_dict = self._f1_measure.get_metric(reset) output = {'accuracy': self._accuracy.get_metric(reset=reset)} counter = 0 for precision, recall, fscore in zip(f1_dict['precision'], f1_dict['recall'], f1_dict['fscore']): output[str(counter) + '_precision'] = precision output[str(counter) + '_recall'] = recall output[str(counter) + '_fscore'] = fscore counter += 1 return output
def __init__( self, vocab: Vocabulary, task_type: str, model_type: str, random_init_bert: bool, # set True to shuffle the BERT encoder and get random init initializer: InitializerApplicator = InitializerApplicator() ) -> None: super().__init__(vocab) assert task_type in ["unary", "binary"] # unary or binary edges assert model_type in ["clf", "reg"] # classification or regression self.task_type = task_type self.model_type = model_type mix_params = None if self.task_type == "binary": # for binary tasks train two separate mixes self.bert_embedder = PretrainedBertEmbedderSplitMix( BERT_MODEL_NAME, requires_grad=False, top_layer_only=False, scalar_mix_parameters=mix_params) else: # for unary task train a single mix self.bert_embedder = PretrainedBertEmbedder( BERT_MODEL_NAME, requires_grad=False, top_layer_only=False, scalar_mix_parameters=mix_params) if random_init_bert: self.bert_embedder.bert_model.apply(init_weights) self.vocab = vocab self.num_classes = self.vocab.get_vocab_size("labels") self.num_classes = self.num_classes if self.num_classes > 0 else 1 self.span_projection_dim = self.bert_embedder.output_dim # represent each span by its first wordpiece token self.span_extractor = EndpointSpanExtractor(self.span_projection_dim, combination="x") if self.task_type == "binary": clf_input_dim = self.span_projection_dim * 2 else: clf_input_dim = self.span_projection_dim self.classifier = Linear( clf_input_dim, self.num_classes) # just a linear tag projection layer if self.model_type == "clf": self.loss = torch.nn.CrossEntropyLoss( ) # cross-entropy for classification else: self.loss = torch.nn.SmoothL1Loss() # smooth L1 for regresison self.m_acc = CategoricalAccuracy() self.m_fmicro = FBetaMeasure(average="micro") self.mse = MeanSquaredError() initializer(self)
class TransformerSrlDependency(Model): """ # Parameters vocab : `Vocabulary`, required A Vocabulary, required in order to compute sizes for input/output projections. model : `Union[str, AutoModel]`, required. A string describing the BERT model to load or an already constructed AutoModel. initializer : `InitializerApplicator`, optional (default=`InitializerApplicator()`) Used to initialize the model parameters. label_smoothing : `float`, optional (default = `0.0`) Whether or not to use label smoothing on the labels when computing cross entropy loss. ignore_span_metric : `bool`, optional (default = `False`) Whether to calculate span loss, which is irrelevant when predicting BIO for Open Information Extraction. srl_eval_path : `str`, optional (default=`DEFAULT_SRL_EVAL_PATH`) The path to the srl-eval.pl script. By default, will use the srl-eval.pl included with allennlp, which is located at allennlp/tools/srl-eval.pl . If `None`, srl-eval.pl is not used. """ def __init__( self, vocab: Vocabulary, model_name: Union[str, AutoModel], embedding_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), label_smoothing: float = None, ignore_span_metric: bool = False, srl_eval_path: str = DEFAULT_SRL_EVAL_PATH, restrict_frames: bool = False, restrict_roles: bool = False, **kwargs, ) -> None: # bypass SrlBert constructor Model.__init__(self, vocab, **kwargs) self.lemma_frame_dict = load_lemma_frame(LEMMA_FRAME_PATH) self.frame_role_dict = load_role_frame(FRAME_ROLE_PATH) self.restrict_frames = restrict_frames self.restrict_roles = restrict_roles if isinstance(model_name, str): self.transformer = AutoModel.from_pretrained(model_name) else: self.transformer = model_name # loss self.role_criterion = nn.CrossEntropyLoss(ignore_index=0) self.frame_criterion = nn.CrossEntropyLoss() # number of classes self.num_classes = self.vocab.get_vocab_size("labels") self.frame_num_classes = self.vocab.get_vocab_size("frames_labels") # metrics role_set = self.vocab.get_token_to_index_vocabulary("labels") role_set_filter = [v for k, v in role_set.items() if k != "O"] self.f1_role_metric = FBetaMeasure(average="micro", labels=role_set_filter) self.f1_frame_metric = FBetaMeasure(average="micro") # output layer self.tag_projection_layer = nn.Linear(self.transformer.config.hidden_size, self.num_classes) self.frame_projection_layer = nn.Linear( self.transformer.config.hidden_size, self.frame_num_classes ) self.embedding_dropout = nn.Dropout(p=embedding_dropout) self._label_smoothing = label_smoothing initializer(self) def forward( # type: ignore self, tokens: TextFieldTensors, verb_indicator: torch.Tensor, frame_indicator: torch.Tensor, metadata: List[Any], tags: torch.LongTensor = None, frame_tags: torch.LongTensor = None, ): """ # Parameters tokens : `TextFieldTensors`, required The output of `TextField.as_array()`, which should typically be passed directly to a `TextFieldEmbedder`. For this model, this must be a `SingleIdTokenIndexer` which indexes wordpieces from the BERT vocabulary. verb_indicator: `torch.LongTensor`, required. An integer `SequenceFeatureField` representation of the position of the verb in the sentence. This should have shape (batch_size, num_tokens) and importantly, can be all zeros, in the case that the sentence has no verbal predicate. tags : `torch.LongTensor`, optional (default = `None`) A torch tensor representing the sequence of integer gold class labels of shape `(batch_size, num_tokens)` frame_tags : torch.LongTensor, optional (default = None) A torch tensor representing the gold frames of shape ``(batch_size, num_tokens)`` metadata : `List[Dict[str, Any]]`, optional, (default = `None`) metadata containg the original words in the sentence, the verb to compute the frame for, and start offsets for converting wordpieces back to a sequence of words, under 'words', 'verb' and 'offsets' keys, respectively. # Returns An output dictionary consisting of: logits : `torch.FloatTensor` A tensor of shape `(batch_size, num_tokens, tag_vocab_size)` representing unnormalised log probabilities of the tag classes. class_probabilities : `torch.FloatTensor` A tensor of shape `(batch_size, num_tokens, tag_vocab_size)` representing a distribution of the tag classes per word. loss : `torch.FloatTensor`, optional A scalar loss to be optimised. """ mask = get_text_field_mask(tokens) bert_embeddings, _ = self.transformer( input_ids=util.get_token_ids_from_text_field_tensors(tokens), token_type_ids=verb_indicator, attention_mask=mask, ) # extract embeddings embedded_text_input = self.embedding_dropout(bert_embeddings) frame_embeddings = embedded_text_input[frame_indicator == 1] # get sizes batch_size, sequence_length, _ = embedded_text_input.size() # outputs logits = self.tag_projection_layer(embedded_text_input) frame_logits = self.frame_projection_layer(frame_embeddings) reshaped_log_probs = logits.view(-1, self.num_classes) role_probabilities = F.softmax(reshaped_log_probs, dim=-1).view( [batch_size, sequence_length, self.num_classes] ) frame_probabilities = F.softmax(frame_logits, dim=-1) # We need to retain the mask in the output dictionary # so that we can crop the sequences to remove padding # when we do viterbi inference in self.make_output_human_readable. output_dict = { "logits": logits, "frame_logits": frame_logits, "role_probabilities": role_probabilities, "frame_probabilities": frame_probabilities, "mask": mask, } # We add in the offsets here so we can compute the un-wordpieced tags. words, verbs = zip(*[(x["words"], x["verb"]) for x in metadata]) lemmas = [l for x in metadata for l in x["lemmas"]] output_dict["words"] = list(words) output_dict["verb"] = list(verbs) output_dict["lemma"] = list(lemmas) if tags is not None: # compute role loss # role_loss = sequence_cross_entropy_with_logits( # logits, tags, mask, label_smoothing=self._label_smoothing # ) role_loss = self.role_criterion(logits.view(-1, self.num_classes), tags.view(-1)) # compute frame loss frame_tags_filtered = frame_tags[frame_indicator == 1] frame_loss = self.frame_criterion(frame_logits, frame_tags_filtered) self.f1_role_metric(role_probabilities, tags) self.f1_frame_metric(frame_logits, frame_tags_filtered) output_dict["frame_loss"] = frame_loss output_dict["role_loss"] = role_loss output_dict["loss"] = (role_loss + frame_loss) / 2 return output_dict def decode_frames(self, output_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: # frame prediction frame_probabilities = output_dict["frame_probabilities"] if self.restrict: frame_probabilities = frame_probabilities.cpu().data.numpy() lemmas = output_dict["lemma"] candidate_labels = [self.lemma_frame_dict.get(l, []) for l in lemmas] # clear candidates from unknowns label_set = set(k for k in self._get_label_tokens("frames_labels")) candidate_labels_ids = [ [ self.vocab.get_token_index(l, namespace="frames_labels") for l in cl if l in label_set ] for cl in candidate_labels ] frame_predictions = [] for cl, fp in zip(candidate_labels_ids, frame_probabilities): # restrict candidates from verbatlas inventory fp_candidates = np.take(fp, cl) if fp_candidates.size > 0: frame_predictions.append(cl[fp_candidates.argmax(axis=-1)]) else: frame_predictions.append(fp.argmax(axis=-1)) else: frame_predictions = frame_probabilities.argmax(dim=-1).cpu().data.numpy() output_dict["frame_tags"] = [ self.vocab.get_token_from_index(f, namespace="frames_labels") for f in frame_predictions ] output_dict["frame_scores"] = [ fp[f] for f, fp in zip(frame_predictions, frame_probabilities) ] return output_dict @overrides def make_output_human_readable( self, output_dict: Dict[str, torch.Tensor], restrict: bool = True ) -> Dict[str, torch.Tensor]: output_dict = self.decode_frames(output_dict) # if self.restrict: # output_dict = self._mask_args(output_dict) # output_dict = super().make_output_human_readable(output_dict) roles_probabilities = output_dict["role_probabilities"] roles_predictions = roles_probabilities.argmax(dim=-1).cpu().data.numpy() output_dict["tags"] = [ [self.vocab.get_token_from_index(r, namespace="labels") for r in roles] for roles in roles_predictions ] return output_dict def _mask_args(self, output_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: class_probs = output_dict["class_probabilities"] device = get_device_of(class_probs) lemmas = output_dict["lemma"] frames = output_dict["frame_tags"] candidate_mask = torch.ones_like(class_probs, dtype=torch.bool).to(device) for i, (l, f) in enumerate(zip(lemmas, frames)): candidates = self.frame_role_dict.get((l, f), []) if candidates: canidate_ids = [ self.vocab.get_token_index(r, namespace="labels") for r in candidates ] canidate_ids = torch.tensor(canidate_ids).to(device) canidate_ids = canidate_ids.repeat(candidate_mask.shape[1], 1) candidate_mask[i].scatter_(1, canidate_ids, False) else: candidate_mask[i].fill_(False) class_probs.masked_fill_(candidate_mask, 0) return output_dict @overrides def get_metrics(self, reset: bool = False): role_metric_dict = self.f1_role_metric.get_metric(reset=reset) frame_metric_dict = self.f1_frame_metric.get_metric(reset=reset) # This can be a lot of metrics, as there are 3 per class. # we only really care about the overall metrics, so we filter for them here. # metric_dict_filtered = { # x.split("-")[0] + "_role": y for x, y in metric_dict.items() if "overall" in x # } metric_dict = { "f1_role": role_metric_dict["fscore"], "f1_frame": frame_metric_dict["fscore"], } return metric_dict def _get_label_tokens(self, namespace: str = "labels"): return self.vocab.get_token_to_index_vocabulary(namespace).keys() def _get_label_ids(self, namespace: str = "labels"): return self.vocab.get_index_to_token_vocabulary(namespace).keys() default_predictor = "transformer_srl"
class AlbertClassifierF1(Model): """ 文本分类模型。 """ def __init__( self, vocab: Vocabulary, vocab_path: str = None, config_path: str = None, model_path: str = None, dropout: float = None, label_namespace: str = "labels", num_labels: int = None, loss: str = None, # focal_loss initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab, regularizer) config, tokenizer, model = get_albert_total(config_path, vocab_path, model_path) self._bert = model if dropout: self._dropout = torch.nn.Dropout(dropout) else: self._dropout = None self._label_namespace = label_namespace if num_labels: self._num_labels = num_labels else: self._num_labels = vocab.get_vocab_size( namespace=self._label_namespace) self._classification_layer = torch.nn.Linear(config.hidden_size, config.num_labels) self._accuracy = CategoricalAccuracy() if loss is None: self._loss = torch.nn.CrossEntropyLoss() elif loss == 'focal_loss': self._loss = FocalLoss(alpha=0.25, num_classes=self._num_labels) # focal loss elif loss == 'cross_entropy_loss': self._loss = torch.nn.CrossEntropyLoss() else: raise ValueError('wrong loss type') self._f1_measure = FBetaMeasure() initializer(self) def forward( self, # type: ignore tokens: Dict[str, torch.LongTensor], label: torch.IntTensor = None) -> Dict[str, torch.Tensor]: # print(tokens) outputs = self._bert(tokens['bert'], attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None) if self._dropout: embedded_text = self._dropout(outputs[1]) logits = self._classification_layer(embedded_text) probs = torch.nn.functional.softmax(logits, dim=-1) output_dict = {"logits": logits, "probs": probs} if label is not None: loss = self._loss(logits, label.long().view(-1)) output_dict["loss"] = loss self._accuracy(logits, label) self._f1_measure(logits, label) return output_dict @overrides def decode( self, output_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: """ Does a simple argmax over the probabilities, converts index to string label, and add ``"label"`` key to the dictionary with the result. """ predictions = output_dict["probs"] if predictions.dim() == 2: predictions_list = [ predictions[i] for i in range(predictions.shape[0]) ] else: predictions_list = [predictions] classes = [] for prediction in predictions_list: label_idx = prediction.argmax(dim=-1).item() label_str = self.vocab.get_index_to_token_vocabulary( self._label_namespace).get(label_idx, str(label_idx)) classes.append(label_str) output_dict["label"] = classes return output_dict def get_metrics(self, reset: bool = False) -> Dict[str, float]: f1_dict = self._f1_measure.get_metric(reset) output = {} output['accuracy'] = self._accuracy.get_metric(reset=reset) counter = 0 for precision, recall, fscore in zip(f1_dict['precision'], f1_dict['recall'], f1_dict['fscore']): output[str(counter) + '_precision'] = precision output[str(counter) + '_recall'] = recall output[str(counter) + '_fscore'] = fscore counter += 1 return output
class LieDetector(Model): def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder, encoder: Seq2VecEncoder, posclass_weight: Optional[float] = 1, use_power: Optional[bool] = False, dropout: Optional[float] = 0) -> None: super().__init__(vocab) self.embedder = embedder self.encoder = encoder if use_power: self.classifier = torch.nn.Linear( in_features=encoder.get_output_dim() + 1, out_features=vocab.get_vocab_size('labels') ) else: self.classifier = torch.nn.Linear( in_features=encoder.get_output_dim(), out_features=vocab.get_vocab_size('labels') ) self.use_power = use_power self.f1_lie = F1Measure(vocab.get_token_index('False', 'labels')) self.f1_truth = F1Measure(vocab.get_token_index('True', 'labels')) self.micro_f1 = FBetaMeasure(average='micro') self.macro_f1 = FBetaMeasure(average='macro') weights = [1,1] weights[vocab.get_token_index('False', 'labels')] = posclass_weight self.loss = torch.nn.CrossEntropyLoss(weight = torch.Tensor(weights)) self.dropout = torch.nn.Dropout(dropout) def get_metrics(self, reset: bool = False) -> Dict[str, float]: lie_precision, lie_recall, lie_fscore = self.f1_lie.get_metric(reset) truth_precision, truth_recall, truth_fscore = self.f1_truth.get_metric(reset) micro_metrics = self.micro_f1.get_metric(reset) macro_metrics = self.macro_f1.get_metric(reset) return { 'truth_precision': truth_precision, 'truth_recall': truth_recall, 'truth_fscore': truth_fscore, 'lie_precision': lie_precision, 'lie_recall': lie_recall, 'lie_fscore': lie_fscore, 'macro_fscore': macro_metrics['fscore'], 'micro_precision':micro_metrics['precision'], 'micro_recall':micro_metrics['recall'], 'micro_fscore':micro_metrics['fscore'] } def forward(self, message: Dict[str, torch.Tensor], score_delta: torch.Tensor, label: torch.Tensor = None) -> Dict[str, torch.Tensor]: mask = get_text_field_mask(message) embedded = self.embedder(message) #embedded = self._dropout(embedded) encoded = self.encoder(embedded, mask) if self.use_power: encoded = torch.cat((score_delta.view(-1,1),encoded),1) encoded = self.dropout(encoded) classified = self.classifier(encoded) output = {} output["logits"] = classified if label is not None: self.f1_lie(classified, label) self.f1_truth(classified, label) self.micro_f1(classified, label) self.macro_f1(classified, label) output["loss"] = self.loss(classified, label) return output