def test_squad_em_and_f1(self): metric = SquadEmAndF1() metric("this is the best span", ["this is a good span", "something irrelevant"]) exact_match, f1_score = metric.get_metric() assert exact_match == 0.0 assert f1_score == 0.75
def __init__(self, submodels: List[BidirectionalAttentionFlow]) -> None: vocab = submodels[0].vocab for submodel in submodels: if submodel.vocab != vocab: raise ConfigurationError("Vocabularies in ensemble differ") super().__init__(vocab, None) # Using ModuleList propagates calls to .eval() so dropout is disabled on the submodels in evaluation # and prediction. self.submodels = torch.nn.ModuleList(submodels) self._squad_metrics = SquadEmAndF1()
def __init__( self, vocab: Vocabulary, transformer_model_name: str = "bert-base-cased", **kwargs ) -> None: super().__init__(vocab, **kwargs) self._text_field_embedder = BasicTextFieldEmbedder( {"tokens": PretrainedTransformerEmbedder(transformer_model_name)} ) self._linear_layer = nn.Linear(self._text_field_embedder.get_output_dim(), 2) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._per_instance_metrics = SquadEmAndF1()
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, matrix_attention_layer: MatrixAttention, modeling_layer: Seq2SeqEncoder, dropout_prob: float = 0.1, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab, regularizer) text_embed_dim = text_field_embedder.get_output_dim() encoding_in_dim = phrase_layer.get_input_dim() encoding_out_dim = phrase_layer.get_output_dim() modeling_in_dim = modeling_layer.get_input_dim() modeling_out_dim = modeling_layer.get_output_dim() self._text_field_embedder = text_field_embedder self._embedding_proj_layer = torch.nn.Linear(text_embed_dim, encoding_in_dim) self._highway_layer = Highway(encoding_in_dim, num_highway_layers) self._encoding_proj_layer = torch.nn.Linear(encoding_in_dim, encoding_in_dim, bias=False) self._phrase_layer = phrase_layer self._matrix_attention = matrix_attention_layer self._modeling_proj_layer = torch.nn.Linear(encoding_out_dim * 4, modeling_in_dim, bias=False) self._modeling_layer = modeling_layer self._span_start_predictor = torch.nn.Linear(modeling_out_dim * 2, 1) self._span_end_predictor = torch.nn.Linear(modeling_out_dim * 2, 1) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._metrics = SquadEmAndF1() self._dropout = torch.nn.Dropout( p=dropout_prob) if dropout_prob > 0 else lambda x: x initializer(self)
def test_distributed_squad_em_and_f1(self): best_span_string = ["this is the best span", "this is another span"] answer_strings = [ ["this is a good span", "something irrelevant"], ["this is another span", "this one is less perfect"], ] metric_kwargs = { "best_span_string": best_span_string, "answer_strings": answer_strings } desired_values = (1 / 2, 1.75 / 2) run_distributed_test( [-1, -1], global_distributed_metric, SquadEmAndF1(), metric_kwargs, desired_values, exact=True, )
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, matrix_attention: MatrixAttention, modeling_layer: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed( Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = matrix_attention self._modeling_layer = modeling_layer self._span_end_encoder = span_end_encoder encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() span_start_input_dim = encoding_dim * 4 + modeling_dim self._span_start_predictor = TimeDistributed( torch.nn.Linear(span_start_input_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim self._span_end_predictor = TimeDistributed( torch.nn.Linear(span_end_input_dim, 1)) # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily # obvious from the configuration files, so we check here. check_dimensions_match( modeling_layer.get_input_dim(), 4 * encoding_dim, "modeling layer input dim", "4 * encoding dim", ) check_dimensions_match( text_field_embedder.get_output_dim(), phrase_layer.get_input_dim(), "text field embedder output dim", "phrase layer input dim", ) check_dimensions_match( span_end_encoder.get_input_dim(), 4 * encoding_dim + 3 * modeling_dim, "span end encoder input dim", "4 * encoding dim + 3 * modeling dim", ) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)