示例#1
0
文件: elmo.py 项目: vin-ivar/allennlp
    def __init__(
        self,
        options_file: str,
        weight_file: str,
        num_output_representations: int,
        requires_grad: bool = False,
        do_layer_norm: bool = False,
        dropout: float = 0.5,
        vocab_to_cache: List[str] = None,
        keep_sentence_boundaries: bool = False,
        scalar_mix_parameters: List[float] = None,
        module: torch.nn.Module = None,
    ) -> None:
        super().__init__()

        logger.info("Initializing ELMo")
        if module is not None:
            if options_file is not None or weight_file is not None:
                raise ConfigurationError("Don't provide options_file or weight_file with module")
            self._elmo_lstm = module
        else:
            self._elmo_lstm = _ElmoBiLm(
                options_file,
                weight_file,
                requires_grad=requires_grad,
                vocab_to_cache=vocab_to_cache,
            )
        self._has_cached_vocab = vocab_to_cache is not None
        self._keep_sentence_boundaries = keep_sentence_boundaries
        self._dropout = Dropout(p=dropout)
        self._scalar_mixes: Any = []
        for k in range(num_output_representations):
            scalar_mix = ScalarMix(
                self._elmo_lstm.num_layers,
                do_layer_norm=do_layer_norm,
                initial_scalar_parameters=scalar_mix_parameters,
                trainable=scalar_mix_parameters is None,
            )
            self.add_module("scalar_mix_{}".format(k), scalar_mix)
            self._scalar_mixes.append(scalar_mix)
示例#2
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 binary_feature_dim: int,
                 embedding_dropout: float = 0.0,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 label_smoothing: float = None,
                 ignore_span_metric: bool = False,
                 srl_eval_path: str = DEFAULT_SRL_EVAL_PATH) -> None:
        super().__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")

        if srl_eval_path is not None:
            # For the span based evaluation, we don't want to consider labels
            # for verb, because the verb index is provided to the model.
            self.span_metric = SrlEvalScorer(srl_eval_path,
                                             ignore_classes=["V"])
        else:
            self.span_metric = None

        self.encoder = encoder
        # There are exactly 2 binary features for the verb predicate embedding.
        self.binary_feature_embedding = Embedding(2, binary_feature_dim)
        self.tag_projection_layer = TimeDistributed(
            Linear(self.encoder.get_output_dim(), self.num_classes))
        self.embedding_dropout = Dropout(p=embedding_dropout)
        self._label_smoothing = label_smoothing
        self.ignore_span_metric = ignore_span_metric

        check_dimensions_match(
            text_field_embedder.get_output_dim() + binary_feature_dim,
            encoder.get_input_dim(),
            "text embedding dim + verb indicator embedding dim",
            "encoder input dim")
        initializer(self)
示例#3
0
    def __init__(
            self,
            vocab: Vocabulary,
            text_field_embedder: TextFieldEmbedder,
            stacked_encoder: Seq2SeqEncoder,
            #######
            config_path: None,
            vocab_path: None,
            model_path: None,
            #########
            predicate_feature_dim: int,
            dim_hidden: int = 100,
            embedding_dropout: float = 0.0,
            initializer: InitializerApplicator = InitializerApplicator(),
            regularizer: Optional[RegularizerApplicator] = None):
        super(SpanDetector, self).__init__(vocab, regularizer)
        ##############
        _, _, model_bert = get_bert_total(config_path, vocab_path, model_path)
        self.bert = model_bert

        # self.bert = bert_load_state_dict(self.bert, torch.load("bert-base-uncased/pytorch_model.bin", map_location='cpu'))
        ###############
        self.dim_hidden = dim_hidden

        self.text_field_embedder = text_field_embedder
        self.predicate_feature_embedding = Embedding(
            2, predicate_feature_dim)  #100

        self.embedding_dropout = Dropout(p=embedding_dropout)

        self.threshold_metric = ThresholdMetric()

        self.stacked_encoder = stacked_encoder

        self.span_hidden = SpanRepAssembly(
            self.stacked_encoder.get_output_dim(),
            self.stacked_encoder.get_output_dim(), self.dim_hidden)
        self.pred = TimeDistributed(Linear(self.dim_hidden, 1))
示例#4
0
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 stacked_encoder: Seq2SeqEncoder,
                 span_feedforward: FeedForward,
                 max_span_width: int,
                 span_width_feature_size: int,
                 label_namespace: str = "labels",
                 embedding_dropout: float = 0.2,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(ConstitLabeler, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.embedding_dropout = Dropout(p=embedding_dropout)
        self.stacked_encoder = stacked_encoder

        if text_field_embedder.get_output_dim() != stacked_encoder.get_input_dim():
            raise ConfigurationError("The output dimension of the text_field_embedder must match the "
                                     "input dimension of the phrase_encoder. Found {} and {}, "
                                     "respectively.".format(text_field_embedder.get_output_dim(),
                                                            stacked_encoder.get_input_dim()))

        self.max_span_width = max_span_width
        self.span_width_embedding = Embedding(max_span_width, span_width_feature_size)

        self.span_feedforward = TimeDistributed(span_feedforward)
        self.head_scorer = TimeDistributed(
            torch.nn.Linear(stacked_encoder.get_output_dim(), 1))

        self.num_classes = self.vocab.get_vocab_size(label_namespace)
        self.tag_projection_layer = TimeDistributed(Linear(span_feedforward.get_output_dim(),
                                                           self.num_classes))

        # self.span_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace)
        # Using accuracy as the metric, span F1 is overkill.
        self.span_metric = {"accuracy": CategoricalAccuracy()}

        initializer(self)
示例#5
0
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 stacked_encoder: Seq2SeqEncoder,
                 predicate_feature_dim: int,
                 dim_hidden: int = 100,
                 embedding_dropout: float = 0.0,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None):
        super(SpanDetector, self).__init__(vocab, regularizer)

        self.dim_hidden = dim_hidden

        self.text_field_embedder = text_field_embedder
        self.predicate_feature_embedding = Embedding(2, predicate_feature_dim)

        self.embedding_dropout = Dropout(p=embedding_dropout)

        self.threshold_metric = ThresholdMetric()

        self.stacked_encoder = stacked_encoder

        self.span_hidden = SpanRepAssembly(self.stacked_encoder.get_output_dim(), self.stacked_encoder.get_output_dim(), self.dim_hidden)
        self.pred = TimeDistributed(Linear(self.dim_hidden, 1))
示例#6
0
    def __init__(self,
                 options_file: str,
                 weight_file: str,
                 num_output_representations: int,
                 requires_grad: bool = False,
                 do_layer_norm: bool = False,
                 dropout: float = 0.5,
                 module: torch.nn.Module = None) -> None:
        super(Elmo, self).__init__()

        logging.info("Initializing ELMo")
        if module is not None:
            if options_file is not None or weight_file is not None:
                raise ConfigurationError(
                        "Don't provide options_file or weight_file with module")
            self._elmo_lstm = module
        else:
            self._elmo_lstm = _ElmoBiLm(options_file, weight_file, requires_grad=requires_grad)
        self._dropout = Dropout(p=dropout)
        self._scalar_mixes: Any = []
        for k in range(num_output_representations):
            scalar_mix = ScalarMix(self._elmo_lstm.num_layers, do_layer_norm=do_layer_norm)
            self.add_module('scalar_mix_{}'.format(k), scalar_mix)
            self._scalar_mixes.append(scalar_mix)
示例#7
0
    def __init__(self,
                 vocab: Vocabulary,
                 source_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 max_decoding_steps: int,
                 attention: Attention = None,
                 attention_function: SimilarityFunction = None,
                 beam_size: int = None,
                 target_namespace: str = "tokens",
                 target_embedding_dim: int = None,
                 scheduled_sampling_ratio: float = 0.,
                 use_bleu: bool = True,
                 emb_dropout: float = 0.5) -> None:
        super(Seq2Seq, self).__init__(vocab)
        self._target_namespace = target_namespace
        self._scheduled_sampling_ratio = scheduled_sampling_ratio

        # We need the start symbol to provide as the input at the first timestep of decoding, and
        # end symbol as a way to indicate the end of the decoded sequence.
        self._start_index = self.vocab.get_token_index(START_SYMBOL, self._target_namespace)
        self._end_index = self.vocab.get_token_index(END_SYMBOL, self._target_namespace)

        if use_bleu:
            pad_index = self.vocab.get_token_index(self.vocab._padding_token, self._target_namespace)  # pylint: disable=protected-access
            self._bleu = BLEU(exclude_indices={pad_index, self._end_index, self._start_index})
        else:
            self._bleu = None

        self._token_based_metric = TokenSequenceAccuracy()

        # At prediction time, we use a beam search to find the most likely sequence of target tokens.
        beam_size = beam_size or 1
        self._max_decoding_steps = max_decoding_steps
        self._beam_search = BeamSearch(self._end_index, max_steps=max_decoding_steps, beam_size=beam_size)

        # Dense embedding of source vocab tokens.
        self._source_embedder = source_embedder
        self._emb_dropout = Dropout(p=emb_dropout)

        # Encodes the sequence of source embeddings into a sequence of hidden states.
        self._encoder = encoder

        num_classes = self.vocab.get_vocab_size(self._target_namespace)

        # Attention mechanism applied to the encoder output for each step.
        if attention:
            if attention_function:
                raise ConfigurationError("You can only specify an attention module or an "
                                         "attention function, but not both.")
            self._attention = attention
        elif attention_function:
            self._attention = LegacyAttention(attention_function)
        else:
            self._attention = None

        # Dense embedding of vocab words in the target space.
        target_embedding_dim = target_embedding_dim or source_embedder.get_output_dim()
        self._target_embedder = Embedding(num_classes, target_embedding_dim)

        # Decoder output dim needs to be the same as the encoder output dim since we initialize the
        # hidden state of the decoder with the final hidden state of the encoder.
        self._encoder_output_dim = self._encoder.get_output_dim()
        self._decoder_output_dim = self._encoder_output_dim

        if self._attention:
            # If using attention, a weighted average over encoder outputs will be concatenated
            # to the previous target embedding to form the input to the decoder at each
            # time step.
            self._decoder_input_dim = self._decoder_output_dim + target_embedding_dim
        else:
            # Otherwise, the input to the decoder is just the previous target embedding.
            self._decoder_input_dim = target_embedding_dim

        # We'll use an LSTM cell as the recurrent cell that produces a hidden state
        # for the decoder at each time step.
        # TODO (pradeep): Do not hardcode decoder cell type.
        self._decoder_cell = LSTMCell(self._decoder_input_dim, self._decoder_output_dim)

        # We project the hidden state from the decoder into the output vocabulary space
        # in order to get log probabilities of each target token, at each time step.
        self._output_projection_layer = Linear(self._decoder_output_dim, num_classes)
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 stacked_encoder: Seq2SeqEncoder,
                 span_feedforward: FeedForward,
                 binary_feature_dim: int,
                 max_span_width: int,
                 binary_feature_size: int,
                 distance_feature_size: int,
                 ontology_path: str,
                 embedding_dropout: float = 0.2,
                 srl_label_namespace: str = "labels",
                 constit_label_namespace: str = "constit_labels",
                 fast_mode: bool = True,
                 loss_type: str = "hamming",
                 unlabeled_constits: bool = False,
                 np_pp_constits: bool = False,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(ScaffoldedFrameSrl, self).__init__(vocab, regularizer)

        # Base token-level encoding.
        self.text_field_embedder = text_field_embedder
        self.embedding_dropout = Dropout(p=embedding_dropout)
        # There are exactly 2 binary features for the verb predicate embedding.
        self.binary_feature_embedding = Embedding(2, binary_feature_dim)
        self.stacked_encoder = stacked_encoder
        if text_field_embedder.get_output_dim(
        ) + binary_feature_dim != stacked_encoder.get_input_dim():
            raise ConfigurationError(
                "The input dimension of the stacked_encoder must be equal to "
                "the output dimension of the text_field_embedder.")

        # Span-level encoding.
        self.max_span_width = max_span_width
        self.span_width_embedding = Embedding(max_span_width,
                                              binary_feature_size)
        # Based on the average sentence length in FN train.
        self.span_distance_bin = 25
        self.span_distance_embedding = Embedding(self.span_distance_bin,
                                                 distance_feature_size)
        self.span_direction_embedding = Embedding(2, binary_feature_size)
        self.span_feedforward = TimeDistributed(span_feedforward)
        self.head_scorer = TimeDistributed(
            torch.nn.Linear(stacked_encoder.get_output_dim(), 1))

        self.num_srl_args = self.vocab.get_vocab_size(srl_label_namespace)
        self.not_a_span_tag = self.vocab.get_token_index(
            "*", srl_label_namespace)
        self.outside_span_tag = self.vocab.get_token_index(
            "O", srl_label_namespace)
        self.semi_crf = SemiMarkovConditionalRandomField(
            num_tags=self.num_srl_args,
            max_span_width=max_span_width,
            default_tag=self.not_a_span_tag,
            outside_span_tag=self.outside_span_tag,
            loss_type=loss_type)
        # self.crf = ConditionalRandomField(self.num_classes)
        self.unlabeled_constits = unlabeled_constits
        self.np_pp_constits = np_pp_constits
        self.constit_label_namespace = constit_label_namespace

        assert not (unlabeled_constits and np_pp_constits)
        if unlabeled_constits:
            self.num_constit_tags = 2
        elif np_pp_constits:
            self.num_constit_tags = 3
        else:
            self.num_constit_tags = self.vocab.get_vocab_size(
                constit_label_namespace)

        # Topmost MLP.
        self.srl_arg_projection_layer = TimeDistributed(
            Linear(span_feedforward.get_output_dim(), self.num_srl_args))
        self.constit_arg_projection_layer = TimeDistributed(
            Linear(span_feedforward.get_output_dim(), self.num_constit_tags))

        # Evaluation.
        self.metrics = {
            "constituents":
            NonBioSpanBasedF1Measure(vocab,
                                     tag_namespace=constit_label_namespace,
                                     ignore_classes=["*"]),
            "srl":
            NonBioSpanBasedF1Measure(vocab,
                                     tag_namespace=srl_label_namespace,
                                     ignore_classes=["O", "*"],
                                     ontology_path=ontology_path)
        }

        # Mode for the model, if turned on it only evaluates on dev and calculates loss for train.
        self.fast_mode = fast_mode
        initializer(self)
示例#9
0
    def __init__(
            self,
            vocab: Vocabulary,
            text_field_embedder: TextFieldEmbedder,
            encoder: Seq2SeqEncoder,
            arc_representation_dim: int,
            tag_representation_dim: int,
            rank: int,
            capsule_dim: int,
            iter_num: int,
            arc_feedforward: FeedForward = None,
            tag_feedforward: FeedForward = None,
            pos_tag_embedding: Embedding = None,
            #dep_tag_embedding: Embedding = None,
            predicate_embedding: Embedding = None,
            delta_type: str = "hinge_ce",
            subtract_gold: bool = False,
            dropout: float = 0.0,
            input_dropout: float = 0.0,
            edge_prediction_threshold: float = 0.5,
            gumbel_t: float = 1,
            initializer: InitializerApplicator = InitializerApplicator(),
            regularizer: Optional[RegularizerApplicator] = None,
            double_loss: bool = True,
            base_average: bool = False,
            bilinear_matrix_capsule: bool = True,
            using_global: bool = False,
            passing_type: str = 'plain',
            global_node: bool = False,
            comments: str = "") -> None:
        super(SRLGraphParserBase, self).__init__(vocab, regularizer)
        self.capsule_dim = capsule_dim
        num_labels = self.vocab.get_vocab_size("arc_types")
        # print("num_labels", num_labels)

        if global_node == True:
            self.get_global_layer = Plain_Feedforward(
                (num_labels + 1) * capsule_dim, capsule_dim,
                Activation.by_name('relu')())
            self.bilinear_matrix_capsule_layer_for_global_node = BilinearMatrix(
                capsule_dim, capsule_dim)
        self.global_node = global_node

        if using_global == True:
            self.capsule_dim = int(self.capsule_dim / 2)
            if passing_type == 'plain':
                self.get_global_layer = Plain_Feedforward(
                    (num_labels + 1) * capsule_dim,
                    (num_labels + 1) * self.capsule_dim,
                    Activation.by_name('relu')())
            elif passing_type == 'attention':
                self.get_global_layer = Attention_Feedforward(
                    self.capsule_dim, capsule_dim, self.capsule_dim)
            else:
                self.get_global_layer = None
        self.using_global = using_global
        self.passing_type = passing_type

        self.iter_num = iter_num
        self.double_loss = double_loss
        self.base_average = base_average
        self.bilinear_matrix_capsule = bilinear_matrix_capsule

        self.text_field_embedder = text_field_embedder
        self.encoder = encoder
        self.subtract_gold = subtract_gold
        self.edge_prediction_threshold = edge_prediction_threshold
        if not 0 < edge_prediction_threshold < 1:
            raise ConfigurationError(
                f"edge_prediction_threshold must be between "
                f"0 and 1 (exclusive) but found {edge_prediction_threshold}.")

    #   print ("predicates",self.vocab._index_to_token["predicates"])
    #   print ("arc_types",self.vocab._index_to_token["arc_types"])
        self.delta_type = delta_type

        self.gumbel_t = gumbel_t
        node_dim = predicate_embedding.get_output_dim()
        encoder_dim = encoder.get_output_dim()
        #self.arg_arc_feedforward = arc_feedforward or \
        #                           FeedForward(encoder_dim, 1,
        #                                       arc_representation_dim,
        #                                       Activation.by_name("elu")())
        #self.pred_arc_feedforward = copy.deepcopy(self.arg_arc_feedforward)

        #self.arc_attention = BilinearMatrixAttention(arc_representation_dim,
        #arc_representation_dim,
        #label_dim=capsule_dim,
        #use_input_biases=True)

        self.arg_tag_feedforward = tag_feedforward or \
                                   FeedForward(encoder_dim, 1,
                                               tag_representation_dim,
                                               Activation.by_name("elu")())
        self.pred_tag_feedforward = copy.deepcopy(self.arg_tag_feedforward)

        self.tag_bilinear = BilinearMatrixAttention_Lowrank(
            tag_representation_dim,
            tag_representation_dim,
            rank,
            label_dim=(num_labels + 1) * self.capsule_dim,
            use_input_biases=True)  #,activation=Activation.by_name("tanh")()
        if self.bilinear_matrix_capsule == True:
            self.bilinear_matrix_capsule_layer = BilinearMatrix(
                capsule_dim, capsule_dim)
        self.predicte_feedforward = FeedForward(encoder_dim, 1, node_dim,
                                                Activation.by_name("elu")())
        self._pos_tag_embedding = pos_tag_embedding or None
        #self._dep_tag_embedding = dep_tag_embedding or None
        self._pred_embedding = predicate_embedding or None
        self._dropout = InputVariationalDropout(dropout)
        self._input_dropout = Dropout(input_dropout)

        #   check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim")

        self._labelled_f1 = IterativeLabeledF1Measure(
            negative_label=0,
            negative_pred=0,
            selected_metrics=["F", "l_F", "p_F", "u_F"])
        self._tag_loss = torch.nn.NLLLoss(reduction="none")  # ,ignore_index=-1
        self._sense_loss = torch.nn.NLLLoss(
            reduction="none")  # ,ignore_index=-1
        initializer(self)
示例#10
0
    def __init__(self,
                 vocab: Vocabulary,
                 context_field_embedder: TextFieldEmbedder,
                 left_text_encoder: Seq2VecEncoder,
                 right_text_encoder: Seq2VecEncoder,
                 feedforward: Optional[FeedForward] = None,
                 target_field_embedder: Optional[TextFieldEmbedder] = None,
                 target_encoder: Optional[Seq2VecEncoder] = None,
                 inter_target_encoding: Optional[InterTarget] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 dropout: float = 0.0,
                 label_name: str = 'target-sentiment-labels',
                 loss_weights: Optional[List[float]] = None) -> None:
        super().__init__(vocab, regularizer)
        '''
        :param vocab: A Vocabulary, required in order to compute sizes 
                      for input/output projections.
        :param context_field_embedder: Used to embed the text and target text if
                                       target_field_embedder is None but the 
                                       target_encoder is NOT None.
        :param left_text_encoder: Encoder that will create the representation 
                                  of the tokens left of the target and  
                                  the target itself if included from the 
                                  dataset reader.
        :param right_text_encoder: Encoder that will create the representation 
                                   of the tokens right of the target and the 
                                   target itself if included from the 
                                   dataset reader.
        :param feedforward: An optional feed forward layer to apply after the 
                            encoder.
        :param target_field_embedder: Used to embed the target text to give as 
                                      input to the target_encoder. Thus this 
                                      allows a seperate embedding for text and 
                                      target text.
        :param target_encoder: Encoder that will create the representation of 
                               target text tokens.
        :param inter_target_encoding: Whether to model the relationship between 
                                      targets/aspect.
        :param initializer: Used to initialize the model parameters.
        :param regularizer: If provided, will be used to calculate the 
                            regularization penalty during training.
        :param dropout: To apply dropout after each layer apart from the last 
                        layer. All dropout that is applied to timebased data 
                        will be `variational dropout`_ all else will be  
                        standard dropout.
        :param label_name: Name of the label name space.
        :param loss_weights: The amount of weight to give the negative, neutral,
                             positive classes respectively. e.g. [0.2, 0.5, 0.3]
                             would weight the negative class by a factor of 
                             0.2, neutral by 0.5 and positive by 0.3. NOTE It 
                             assumes the sentiment labels are the following:
                             [negative, neutral, positive].
        
        Without the target encoder this will be the standard TDLSTM method 
        from `Effective LSTM's for Target-Dependent Sentiment classification`_
        . With the target encoder this will then become the TCLSTM method 
        from `Effective LSTM's for Target-Dependent Sentiment classification`_.
        .. _variational dropout:
           https://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks.pdf
        .. _Effective LSTM's for Target-Dependent Sentiment classification:
           https://aclanthology.coli.uni-saarland.de/papers/C16-1311/c16-1311
        '''

        self.label_name = label_name
        self.context_field_embedder = context_field_embedder
        self.target_field_embedder = target_field_embedder
        self.num_classes = self.vocab.get_vocab_size(self.label_name)
        self.left_text_encoder = left_text_encoder
        self.right_text_encoder = right_text_encoder
        self.target_encoder = target_encoder
        self.feedforward = feedforward

        # Set the loss weights (have to sort them by order of label index in
        # the vocab)
        self.loss_weights = target_sentiment.util.loss_weight_order(
            self, loss_weights, self.label_name)

        # Inter target modelling
        self.inter_target_encoding = inter_target_encoding

        left_out_dim = self.left_text_encoder.get_output_dim()
        right_out_dim = self.right_text_encoder.get_output_dim()
        left_right_out_dim = left_out_dim + right_out_dim
        if feedforward is not None:
            output_dim = self.feedforward.get_output_dim()
        elif self.inter_target_encoding is not None:
            output_dim = self.inter_target_encoding.get_output_dim()
        else:
            output_dim = left_right_out_dim
        self.label_projection = Linear(output_dim, self.num_classes)

        self.metrics = {"accuracy": CategoricalAccuracy()}
        self.f1_metrics = {}
        # F1 Scores
        label_index_name = self.vocab.get_index_to_token_vocabulary(
            self.label_name)
        for label_index, _label_name in label_index_name.items():
            _label_name = f'F1_{_label_name.capitalize()}'
            self.f1_metrics[_label_name] = F1Measure(label_index)
        # Dropout
        self._variational_dropout = InputVariationalDropout(dropout)
        self._naive_dropout = Dropout(dropout)

        # Ensure that the input to the right_text_encoder and left_text_encoder
        # is the size of the target encoder output plus the size of the text
        # embedding output.
        if self.target_encoder is not None:
            right_in_dim = self.right_text_encoder.get_input_dim()
            left_in_dim = self.left_text_encoder.get_input_dim()

            target_dim = self.target_encoder.get_output_dim()
            text_dim = self.context_field_embedder.get_output_dim()
            total_out_dim = target_dim + text_dim
            config_err_msg = (
                "As the target is being encoded the output of the"
                " target encoder is concatenated onto each word "
                " vector for the left and right contexts "
                "therefore the input of the right_text_encoder"
                "/left_text_encoder is the output dimension of "
                "the target encoder + the dimension of the word "
                "embeddings for the left and right contexts.")

            if (total_out_dim != right_in_dim or total_out_dim != left_in_dim):
                raise ConfigurationError(config_err_msg)
        # Ensure that the target field embedder has an output dimension the
        # same as the input dimension to the target encoder.
        if self.target_encoder and self.target_field_embedder:
            target_embed_out = self.target_field_embedder.get_output_dim()
            target_in = self.target_encoder.get_input_dim()
            check_dimensions_match(target_in, target_embed_out,
                                   'target_field_embedder output',
                                   'target_encoder input')

        if self.inter_target_encoding:
            check_dimensions_match(left_right_out_dim,
                                   self.inter_target_encoding.get_input_dim(),
                                   'Output from the left and right encoders',
                                   'Inter Target encoder input dim')

        # TimeDistributed everything as we are processing multiple Targets at
        # once as the input is a sentence containing one or more targets
        self.left_text_encoder = TimeDistributed(self.left_text_encoder)
        self.right_text_encoder = TimeDistributed(self.right_text_encoder)
        if self.target_encoder is not None:
            self.target_encoder = TimeDistributed(self.target_encoder)
        if self.feedforward is not None:
            self.feedforward = TimeDistributed(self.feedforward)
        self.label_projection = TimeDistributed(self.label_projection)
        self._time_variational_dropout = TimeDistributed(
            self._variational_dropout)
        self._naive_dropout = TimeDistributed(self._naive_dropout)

        initializer(self)
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 tag_representation_dim: int,
                 arc_representation_dim: int,
                 tag_feedforward: FeedForward = None,
                 arc_feedforward: FeedForward = None,
                 pos_tag_embedding: Embedding = None,
                 treebank_embedding: Embedding = None,
                 use_mst_decoding_for_validation: bool = True,
                 use_treebank_embedding: bool = False,
                 dropout: float = 0.0,
                 input_dropout: float = 0.0,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(BiaffineDependencyParserMonolingual, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.encoder = encoder

        encoder_dim = encoder.get_output_dim()

        self.head_arc_feedforward = arc_feedforward or \
                                        FeedForward(encoder_dim, 1,
                                                    arc_representation_dim,
                                                    Activation.by_name("elu")())
        self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward)

        self.arc_attention = BilinearMatrixAttention(arc_representation_dim,
                                                     arc_representation_dim,
                                                     use_input_biases=True)

        num_labels = self.vocab.get_vocab_size("head_tags")

        self.head_tag_feedforward = tag_feedforward or \
                                        FeedForward(encoder_dim, 1,
                                                    tag_representation_dim,
                                                    Activation.by_name("elu")())
        self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward)

        self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim,
                                                      tag_representation_dim,
                                                      num_labels)

        self._pos_tag_embedding = pos_tag_embedding or None
        self._treebank_embedding = treebank_embedding or None
        self._dropout = InputVariationalDropout(dropout)
        self._input_dropout = Dropout(input_dropout)
        self._head_sentinel = torch.nn.Parameter(torch.randn([1, 1, encoder.get_output_dim()]))

        representation_dim = text_field_embedder.get_output_dim()
        if pos_tag_embedding is not None:
            representation_dim += pos_tag_embedding.get_output_dim()
        if treebank_embedding is not None:
            representation_dim += treebank_embedding.get_output_dim()

        check_dimensions_match(representation_dim, encoder.get_input_dim(),
                               "text field embedding dim", "encoder input dim")

        check_dimensions_match(tag_representation_dim, self.head_tag_feedforward.get_output_dim(),
                               "tag representation dim", "tag feedforward output dim")
        check_dimensions_match(arc_representation_dim, self.head_arc_feedforward.get_output_dim(),
                               "arc representation dim", "arc feedforward output dim")

        self.use_mst_decoding_for_validation = use_mst_decoding_for_validation
        self.use_treebank_embedding = use_treebank_embedding

        tags = self.vocab.get_token_to_index_vocabulary("pos")
        punctuation_tag_indices = {tag: index for tag, index in tags.items() if tag in POS_TO_IGNORE}
        self._pos_to_ignore = set(punctuation_tag_indices.values())
        logger.info(f"Found POS tags corresponding to the following punctuation : {punctuation_tag_indices}. "
                    "Ignoring words with these POS tags for evaluation.")
        
        if self.use_treebank_embedding:
            tbids = self.vocab.get_token_to_index_vocabulary("tbids")
            tbid_indices = {tb: index for tb, index in tbids.items()}
            self._tbids = set(tbid_indices.values())
            logger.info(f"Found TBIDs corresponding to the following treebanks : {tbid_indices}. "
                        "Embedding these as additional features.")

        self._attachment_scores = AttachmentScores()
        initializer(self)
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 text_encoder: Seq2SeqEncoder,
                 output_encoder: Seq2VecEncoder,
                 num_cpt_layers: Optional[int] = 2,
                 cpt_highway: bool = True,
                 target_encoder: Optional[Seq2SeqEncoder] = None,
                 feedforward: Optional[FeedForward] = None,
                 target_field_embedder: Optional[TextFieldEmbedder] = None,
                 share_text_target_encoder: bool = False,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 word_dropout: float = 0.0,
                 dropout: float = 0.0) -> None:
        '''
        Useful acronyms:

        CPT - Context-Preserving Transformation 

        :param vocab: vocab : A Vocabulary, required in order to compute sizes 
                              for input/output projections.
        :param text_field_embedder: Used to embed the text and target text if
                                    target_field_embedder is None but the 
                                    target_encoder is not None.
        :param text_encoder: Sequence Encoder that will create the 
                             representation of each token in the context 
                             sentence.
        :param output_encoder: The encoder that takes as input the words after 
                               they have been transformed through the CPT 
                               layers. In the original paper this would be a 
                               CNN.
        :param num_cpt_layers: Number of times to perform the CPT layer to the 
                               hidden representation of the words.
        :param cpt_highway: highway adds the contextualised word vector (input 
                            word representation to CPT) to the transformed word 
                            vector (output word representation of CPT). Setting 
                            this is the equivalent of using Lossless Forwarding 
                            (LF) from the original paper.
        :param target_encoder: Encoder that will create the representation of 
                               target text tokens.
        :param feedforward: An optional feed forward layer to apply after
                            either the text encoder if target encoder is None. 
                            Else it would be after the target and the text 
                            encoded representations have been concatenated.
        :param target_field_embedder: Used to embed the target text to give as 
                                      input to the target_encoder. Thus this 
                                      allows a seperate embedding for text and 
                                      target text.
        :param share_text_target_encoder: Whether or not to use the same 
                                          encoder for the text and the target.
        :param initializer: Used to initialize the model parameters.
        :param regularizer: If provided, will be used to calculate the 
                            regularization penalty during training.
        :param word_dropout: Dropout that is applied after the embedding of the 
                             tokens/words. It will drop entire words with this 
                             probabilty.
        :param dropout: To apply dropout after each layer apart from the last 
                        layer. All dropout that is applied to timebased data 
                        will be `variational dropout`_ all else will be  
                        standard dropout.
        
        The classifier is based on the model in `Transformation Networks for 
        Target-Oriented Sentiment Classification 
        <https://aclweb.org/anthology/P18-1087>`_. If the 
        `share_text_target_encoder` is `True` and `cpt_highway` is True this 
        model would be equivalent to the TNet-LF model within the original 
        paper.

        .. _variational dropout:
           https://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks.pdf
        '''
        super().__init__(vocab, regularizer)

        if share_text_target_encoder and (target_encoder is not None):
            config_err = ("The target encoder will not be used when sharing. "
                          "Set the target_encoder to None (default)")
            raise ConfigurationError(config_err)
        elif (not share_text_target_encoder) and (target_encoder is None):
            config_err = ('As the target and text are not sharing the encoder '
                          'an encoder is required for the target text')
            raise ConfigurationError(config_err)

        self.text_field_embedder = text_field_embedder
        self.target_field_embedder = target_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")
        self.text_encoder = text_encoder
        if share_text_target_encoder:
            target_encoder = text_encoder
        self.target_encoder = target_encoder
        self.output_encoder = output_encoder

        text_enc_out = text_encoder.get_output_dim()
        target_enc_out = target_encoder.get_output_dim()
        self.cpt = TimeDistributed(
            CPT(num_cpt_layers,
                text_enc_out,
                target_enc_out,
                cpt_highway,
                dropout=dropout))

        self.feedforward = feedforward

        if feedforward is not None:
            output_dim = self.feedforward.get_output_dim()
        else:
            output_dim = self.output_encoder.get_output_dim()
        self.label_projection = Linear(output_dim, self.num_classes)
        self.metrics = {"accuracy": CategoricalAccuracy()}
        self.f1_metrics = {}
        # F1 Scores
        label_index_name = self.vocab.get_index_to_token_vocabulary('labels')
        for label_index, label_name in label_index_name.items():
            label_name = f'F1_{label_name.capitalize()}'
            self.f1_metrics[label_name] = F1Measure(label_index)

        self._word_dropout = WordDrouput(word_dropout)
        self._variational_dropout = InputVariationalDropout(dropout)
        self._naive_dropout = Dropout(dropout)

        self.loss = torch.nn.CrossEntropyLoss()

        # Ensure that the dimensions of the text field embedder and text encoder
        # match
        check_dimensions_match(text_field_embedder.get_output_dim(),
                               text_encoder.get_input_dim(),
                               "text field embedding dim",
                               "text encoder input dim")
        # Ensure that the dimensions of the target or text field embedder and
        # the target encoder match
        target_field_embedder_dim = text_field_embedder.get_output_dim()
        target_field_error = "text field embedding dim"
        if self.target_field_embedder:
            target_field_embedder_dim = target_field_embedder.get_output_dim()
            target_field_error = "target field embedding dim"

        check_dimensions_match(target_field_embedder_dim,
                               target_encoder.get_input_dim(),
                               target_field_error, "target encoder input dim")
        initializer(self)
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 stacked_encoder: Seq2SeqEncoder,
                 span_feedforward: FeedForward,
                 binary_feature_dim: int,
                 max_span_width: int,
                 binary_feature_size: int,
                 distance_feature_size: int,
                 embedding_dropout: float = 0.2,
                 srl_label_namespace: str = "labels",
                 constit_label_namespace: str = "constit_labels",
                 mixing_ratio: float = 1.0,
                 cutoff_epoch: int = -1,
                 fast_mode: bool = True,
                 loss_type: str = "logloss",
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(PropBankScaffoldSpanSrl, self).__init__(vocab, regularizer)

        # Base token-level encoding.
        self.text_field_embedder = text_field_embedder
        self.embedding_dropout = Dropout(p=embedding_dropout)
        # There are exactly 2 binary features for the verb predicate embedding.
        self.binary_feature_embedding = Embedding(2, binary_feature_dim)
        self.stacked_encoder = stacked_encoder
        if text_field_embedder.get_output_dim(
        ) + binary_feature_dim != stacked_encoder.get_input_dim():
            raise ConfigurationError(
                "The SRL Model uses a binary verb indicator feature, meaning "
                "the input dimension of the stacked_encoder must be equal to "
                "the output dimension of the text_field_embedder + 1.")

        # Span-level encoding.
        self.max_span_width = max_span_width
        self.span_width_embedding = Embedding(max_span_width,
                                              binary_feature_size)
        # Based on the average sentence length in FN train. TODO(Swabha): find out for OntoNotes.
        self.span_distance_bin = 25
        self.span_distance_embedding = Embedding(self.span_distance_bin,
                                                 distance_feature_size)
        self.span_direction_embedding = Embedding(2, binary_feature_size)
        self.span_feedforward = TimeDistributed(span_feedforward)
        self.head_scorer = TimeDistributed(
            torch.nn.Linear(stacked_encoder.get_output_dim(), 1))

        self.num_srl_args = self.vocab.get_vocab_size(srl_label_namespace)
        not_a_span_tag = self.vocab.get_token_index("*", srl_label_namespace)
        outside_span_tag = self.vocab.get_token_index("O", srl_label_namespace)
        self.semi_crf = SemiMarkovConditionalRandomField(
            num_tags=self.num_srl_args,
            max_span_width=max_span_width,
            loss_type=loss_type,
            default_tag=not_a_span_tag,
            outside_span_tag=outside_span_tag)
        # self.crf = ConditionalRandomField(self.num_classes)
        self.num_constit_tags = self.vocab.get_vocab_size(
            constit_label_namespace)

        # Topmost MLP.
        self.srl_arg_projection_layer = TimeDistributed(
            Linear(span_feedforward.get_output_dim(), self.num_srl_args))
        self.constit_arg_projection_layer = TimeDistributed(
            Linear(span_feedforward.get_output_dim(), self.num_constit_tags))

        self.mixing_ratio = mixing_ratio
        self.cutoff_batch = cutoff_epoch
        self.batch = 0

        # Evaluation.
        self.metrics = {
            "constituents":
            NonBioSpanBasedF1Measure(vocab,
                                     tag_namespace=constit_label_namespace,
                                     ignore_classes=["*"]),
            "srl":
            NonBioSpanBasedF1Measure(vocab,
                                     tag_namespace=srl_label_namespace,
                                     ignore_classes=["V", "*"])
        }

        # Mode for the model, if turned on it only evaluates on dev and calculates loss for train.
        self.fast_mode = fast_mode
        initializer(self)
    def __init__(self,
                 vocab: Vocabulary,
                 source_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 attention: Attention,
                 beam_size: int,
                 max_decoding_steps: int,
                 target_embedding_dim: int = 30,
                 copy_token: str = "@COPY@",
                 source_namespace: str = "tokens",
                 target_namespace: str = "target_tokens",
                 tensor_based_metric: Metric = None,
                 token_based_metric: Metric = None,
                 emb_dropout: float = 0.0,
                 dec_dropout: float = 0.0,
                 target_pretrained_file: str = None) -> None:
        super().__init__(vocab)
        self._source_namespace = source_namespace
        self._target_namespace = target_namespace
        self._src_start_index = self.vocab.get_token_index(
            START_SYMBOL, self._source_namespace)
        self._src_end_index = self.vocab.get_token_index(
            END_SYMBOL, self._source_namespace)
        self._start_index = self.vocab.get_token_index(START_SYMBOL,
                                                       self._target_namespace)
        self._end_index = self.vocab.get_token_index(END_SYMBOL,
                                                     self._target_namespace)
        self._oov_index = self.vocab.get_token_index(self.vocab._oov_token,
                                                     self._target_namespace)  # pylint: disable=protected-access
        self._pad_index = self.vocab.get_token_index(self.vocab._padding_token,
                                                     self._target_namespace)  # pylint: disable=protected-access
        self._copy_index = self.vocab.add_token_to_namespace(
            copy_token, self._target_namespace)

        self._tensor_based_metric = tensor_based_metric or \
            BLEU(exclude_indices={self._pad_index, self._end_index, self._start_index})
        self._token_based_metric = token_based_metric

        self._target_vocab_size = self.vocab.get_vocab_size(
            self._target_namespace)

        # Encoding modules.
        self._source_embedder = source_embedder
        self._emb_dropout = Dropout(p=emb_dropout)
        self._dec_dropout = Dropout(p=dec_dropout)

        self._encoder = encoder

        # Decoder output dim needs to be the same as the encoder output dim since we initialize the
        # hidden state of the decoder with the final hidden state of the encoder.
        # We arbitrarily set the decoder's input dimension to be the same as the output dimension.
        self.encoder_output_dim = self._encoder.get_output_dim()
        self.decoder_output_dim = self.encoder_output_dim
        self.decoder_input_dim = self.decoder_output_dim

        target_vocab_size = self.vocab.get_vocab_size(self._target_namespace)

        # The decoder input will be a function of the embedding of the previous predicted token,
        # an attended encoder hidden state called the "attentive read", and another
        # weighted sum of the encoder hidden state called the "selective read".
        # While the weights for the attentive read are calculated by an `Attention` module,
        # the weights for the selective read are simply the predicted probabilities
        # corresponding to each token in the source sentence that matches the target
        # token from the previous timestep.
        self._target_embedder = Embedding(
            target_vocab_size,
            target_embedding_dim,
            vocab_namespace=self._target_namespace,
            pretrained_file=target_pretrained_file)
        self._attention = attention
        self._input_projection_layer = Linear(
            target_embedding_dim + self.encoder_output_dim * 2,
            self.decoder_input_dim)

        # We then run the projected decoder input through an LSTM cell to produce
        # the next hidden state.
        self._decoder_cell = LSTMCell(self.decoder_input_dim,
                                      self.decoder_output_dim)

        # We create a "generation" score for each token in the target vocab
        # with a linear projection of the decoder hidden state.
        self._output_generation_layer = Linear(self.decoder_output_dim,
                                               target_vocab_size)

        # We create a "copying" score for each source token by applying a non-linearity
        # (tanh) to a linear projection of the encoded hidden state for that token,
        # and then taking the dot product of the result with the decoder hidden state.
        self._output_copying_layer = Linear(self.encoder_output_dim,
                                            self.decoder_output_dim)

        # At prediction time, we'll use a beam search to find the best target sequence.
        self._beam_search = BeamSearch(self._end_index,
                                       max_steps=max_decoding_steps,
                                       beam_size=beam_size)
示例#15
0
    def __init__(self,
                 vocab,
                 text_field_embedder,
                 encoder,
                 tag_representation_dim,
                 arc_representation_dim,
                 tag_feedforward=None,
                 arc_feedforward=None,
                 pos_tag_embedding=None,
                 use_mst_decoding_for_validation=True,
                 dropout=0.0,
                 input_dropout=0.0,
                 initializer=InitializerApplicator(),
                 regularizer=None):
        super(BiaffineDependencyParser, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.encoder = encoder

        encoder_dim = encoder.get_output_dim()

        self.head_arc_feedforward = arc_feedforward or\
                                        FeedForward(encoder_dim, 1,
                                                    arc_representation_dim,
                                                    Activation.by_name(u"elu")())
        self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward)

        self.arc_attention = BilinearMatrixAttention(arc_representation_dim,
                                                     arc_representation_dim,
                                                     use_input_biases=True)

        num_labels = self.vocab.get_vocab_size(u"head_tags")

        self.head_tag_feedforward = tag_feedforward or\
                                        FeedForward(encoder_dim, 1,
                                                    tag_representation_dim,
                                                    Activation.by_name(u"elu")())
        self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward)

        self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim,
                                                      tag_representation_dim,
                                                      num_labels)

        self._pos_tag_embedding = pos_tag_embedding or None
        self._dropout = InputVariationalDropout(dropout)
        self._input_dropout = Dropout(input_dropout)
        self._head_sentinel = torch.nn.Parameter(
            torch.randn([1, 1, encoder.get_output_dim()]))

        representation_dim = text_field_embedder.get_output_dim()
        if pos_tag_embedding is not None:
            representation_dim += pos_tag_embedding.get_output_dim()

        check_dimensions_match(representation_dim, encoder.get_input_dim(),
                               u"text field embedding dim",
                               u"encoder input dim")

        check_dimensions_match(tag_representation_dim,
                               self.head_tag_feedforward.get_output_dim(),
                               u"tag representation dim",
                               u"tag feedforward output dim")
        check_dimensions_match(arc_representation_dim,
                               self.head_arc_feedforward.get_output_dim(),
                               u"arc representation dim",
                               u"arc feedforward output dim")

        self.use_mst_decoding_for_validation = use_mst_decoding_for_validation

        tags = self.vocab.get_token_to_index_vocabulary(u"pos")
        punctuation_tag_indices = dict((tag, index)
                                       for tag, index in list(tags.items())
                                       if tag in POS_TO_IGNORE)
        self._pos_to_ignore = set(punctuation_tag_indices.values())
        logger.info(
            "Found POS tags correspoding to the following punctuation : {punctuation_tag_indices}. "
            u"Ignoring words with these POS tags for evaluation.")

        self._attachment_scores = AttachmentScores()
        initializer(self)
示例#16
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 tag_representation_dim: int,
                 arc_representation_dim: int,
                 pos_tag_embedding: Embedding = None,
                 use_mst_decoding_for_validation: bool = True,
                 dropout: float = 0.0,
                 input_dropout: float = 0.0,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(BiaffineDependencyParser, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.encoder = encoder

        encoder_dim = encoder.get_output_dim()
        self.head_arc_projection = torch.nn.Linear(encoder_dim,
                                                   arc_representation_dim)
        self.child_arc_projection = torch.nn.Linear(encoder_dim,
                                                    arc_representation_dim)
        self.arc_attention = BilinearMatrixAttention(arc_representation_dim,
                                                     arc_representation_dim,
                                                     use_input_biases=True)

        num_labels = self.vocab.get_vocab_size("head_tags")
        self.head_tag_projection = torch.nn.Linear(encoder_dim,
                                                   tag_representation_dim)
        self.child_tag_projection = torch.nn.Linear(encoder_dim,
                                                    tag_representation_dim)
        self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim,
                                                      tag_representation_dim,
                                                      num_labels)

        self._pos_tag_embedding = pos_tag_embedding or None
        self._dropout = InputVariationalDropout(dropout)
        self._input_dropout = Dropout(input_dropout)
        self._head_sentinel = torch.nn.Parameter(
            torch.randn([1, 1, encoder.get_output_dim()]))

        representation_dim = text_field_embedder.get_output_dim()
        if pos_tag_embedding is not None:
            representation_dim += pos_tag_embedding.get_output_dim()

        check_dimensions_match(representation_dim, encoder.get_input_dim(),
                               "text field embedding dim", "encoder input dim")

        self.use_mst_decoding_for_validation = use_mst_decoding_for_validation

        tags = self.vocab.get_token_to_index_vocabulary("pos")
        punctuation_tag_indices = {
            tag: index
            for tag, index in tags.items() if tag in POS_TO_IGNORE
        }
        self._pos_to_ignore = set(punctuation_tag_indices.values())
        logger.info(
            f"Found POS tags correspoding to the following punctuation : {punctuation_tag_indices}. "
            "Ignoring words with these POS tags for evaluation.")

        self._attachment_scores = AttachmentScores()
        initializer(self)
示例#17
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder_0: Seq2SeqEncoder,
                 encoder_1: Seq2SeqEncoder,
                 encoder_2: Seq2SeqEncoder,
                 tag_representation_dim: int,
                 arc_representation_dim: int,
                 tag_feedforward: FeedForward = None,
                 arc_feedforward: FeedForward = None,
                 pos_tag_embedding: Embedding = None,
                 use_mst_decoding_for_validation: bool = True,
                 use_layer_normalization: bool = True,
                 dropout: float = 0.0,
                 input_dropout: float = 0.0,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(BiaffineDependencyParser, self).__init__(vocab, regularizer)

        a = vocab.get_index_to_token_vocabulary(namespace='tokens')
        # glyph_config['idx2word'] = {k: v for k, v in a.items()}

        # self.glyph = GlyphEmbedding(glyph_config)

        self.text_field_embedder = text_field_embedder

        self.encoder_0 = encoder_0
        self.encoder_1 = encoder_1
        self.encoder_2 = encoder_2

        encoder_dim = self.encoder_2.get_output_dim()

        self.head_arc_feedforward = arc_feedforward or \
                                        FeedForward(encoder_dim, 1,
                                                    arc_representation_dim,
                                                    Activation.by_name("elu")())
        self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward)

        self.arc_attention = BilinearMatrixAttention(arc_representation_dim,
                                                     arc_representation_dim,
                                                     use_input_biases=True)

        num_labels = self.vocab.get_vocab_size("head_tags")

        self.head_tag_feedforward = tag_feedforward or \
                                        FeedForward(encoder_dim, 1,
                                                    tag_representation_dim,
                                                    Activation.by_name("elu")())
        self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward)

        self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim,
                                                      tag_representation_dim,
                                                      num_labels)

        self._pos_tag_embedding = pos_tag_embedding or None
        self._dropout = InputVariationalDropout(dropout)
        # self._dropout = Dropout(dropout)
        self._input_dropout = Dropout(input_dropout)
        self._head_sentinel = torch.nn.Parameter(
            torch.randn([1, 1, self.encoder_2.get_output_dim()]))

        self.use_layer_normalization = use_layer_normalization

        if use_layer_normalization:
            self.norm_input = torch.nn.LayerNorm(
                self.encoder_0.get_input_dim())
            self.norm_hidden = torch.nn.LayerNorm(
                self.encoder_0.get_output_dim())

        representation_dim = text_field_embedder.get_output_dim()
        if pos_tag_embedding is not None:
            representation_dim += pos_tag_embedding.get_output_dim()

        # check_dimensions_match(representation_dim, encoder.get_input_dim(),
        #                        "text field embedding dim", "encoder input dim")

        check_dimensions_match(tag_representation_dim,
                               self.head_tag_feedforward.get_output_dim(),
                               "tag representation dim",
                               "tag feedforward output dim")
        check_dimensions_match(arc_representation_dim,
                               self.head_arc_feedforward.get_output_dim(),
                               "arc representation dim",
                               "arc feedforward output dim")

        self.use_mst_decoding_for_validation = use_mst_decoding_for_validation

        tags = self.vocab.get_token_to_index_vocabulary("pos")
        punctuation_tag_indices = {
            tag: index
            for tag, index in tags.items() if tag in POS_TO_IGNORE
        }
        self._pos_to_ignore = set(punctuation_tag_indices.values())
        logger.info(
            f"Found POS tags corresponding to the following punctuation : {punctuation_tag_indices}. "
            "Ignoring words with these POS tags for evaluation.")

        self._attachment_scores = AttachmentScores()
        initializer(self)
示例#18
0
文件: adnc.py 项目: Fuchai/mayoehr
    def __init__(self, x, h, L, v_t, W, R, N, dropout_rate=0.8, prior=None):
        super(APDNC, self).__init__()

        # debugging usages
        self.last_state_dict = None
        '''PARAMETERS'''
        # input vector size x_t
        # dataset specific
        self.x = x
        # single hidden unit output size h^l_t
        # state size
        # output size, forget gate size, input gate size are all equal to state size s
        # all weight matrices in equation 1-5 then has dimension (s, x+2*h)
        # by equation 5, h=s=o
        self.h = h
        # Controller RNN layers count
        # refers to the number of parallel RNN units
        self.L = L
        # Controller output v_t size
        # dataset specific
        self.v_t = v_t
        # Memory location width
        # Memory read heads count R
        # Controller interface epsilon_t size, derived
        self.W = W
        self.R = R
        # Total memory address count
        # Total memory block (N, W)
        self.N = N
        self.bs = None
        self.E_t = W * R + 3 * W + 3 * R + 3
        '''CONTROLLER'''
        # self.RNN_list = nn.ModuleList()
        # for _ in range(self.L):
        #     self.RNN_list.append(LSTM_Unit(self.x, self.R, self.W, self.h, self.bs))
        self.W_y = Parameter(
            torch.Tensor(self.L * self.h * 2, self.v_t).cuda())
        self.W_E = Parameter(
            torch.Tensor(self.L * self.h * 2, self.E_t).cuda())
        self.controller = Stock_LSTM(self.x, self.R, self.W, self.h, self.L,
                                     self.v_t)
        # every time step every layer has 1 channel*space only
        self.layernorm = LayerNorm(1)
        self.dropout = Dropout(p=dropout_rate)
        '''COMPUTER'''
        self.W_r = Parameter(torch.Tensor(self.W * self.R, self.v_t).cuda())
        # print("Using 0.4.1 PyTorch BatchNorm1d")
        # self.bn = nn.BatchNorm1d(self.x, eps=1e-3, momentum=1e-10, affine=False)
        self.bn = nn.BatchNorm1d(self.x)
        self.reset_parameters()
        '''States'''
        self.hidden_previous_timestep = None
        # self.precedence_weighting=None
        # self.temporal_memory_linkage=None
        self.memory = None
        self.last_read_weightings = None
        self.last_usage_vector = None
        self.last_write_weighting = None
        self.last_read_vector = None
        self.not_first_t_flag = None
        '''prior'''
        # this is the prior probability of each label predicting true
        # this is added to the logit
        self.prior = prior
        if self.prior is not None:
            if isinstance(self.prior, np.ndarray):
                self.prior = torch.from_numpy(self.prior).float()
                self.prior = Variable(self.prior, requires_grad=False)
            elif isinstance(self.prior, torch.Tensor):
                self.prior = Variable(self.prior, requires_grad=False)
            else:
                assert (isinstance(self.prior, Variable))

            # transform to logits
            # because we are using sigmoid, not softmax, self.prior=log(P(y))-log(P(not y))
            # sigmoid_input = z + self.prior
            # z = log(P(x|y)) - log(P(x|not y))
            # sigmoid output is the posterior positive
            self.prior = self.prior.clamp(1e-8, 1 - 1e-8)
            self.prior = torch.log(self.prior) - torch.log(1 - self.prior)
            a = Variable(torch.Tensor([0]))
            self.prior = torch.cat((a, self.prior))
            self.prior = self.prior.cuda()

            print("Using DNC with prior probability")
示例#19
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 tag_representation_dim: int,
                 arc_representation_dim: int,
                 lemmatize_helper: LemmatizeHelper,
                 task_config: TaskConfig,
                 morpho_vector_dim: int = 0,
                 gram_val_representation_dim: int = -1,
                 lemma_representation_dim: int = -1,
                 tag_feedforward: FeedForward = None,
                 arc_feedforward: FeedForward = None,
                 pos_tag_embedding: Embedding = None,
                 use_mst_decoding_for_validation: bool = True,
                 dropout: float = 0.0,
                 input_dropout: float = 0.0,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(DependencyParser, self).__init__(vocab, regularizer)

        self.TopNCnt = 3

        self.text_field_embedder = text_field_embedder
        self.encoder = encoder
        self.lemmatize_helper = lemmatize_helper
        self.task_config = task_config

        encoder_dim = encoder.get_output_dim()

        self.head_arc_feedforward = arc_feedforward or \
                                        FeedForward(encoder_dim, 1,
                                                    arc_representation_dim,
                                                    Activation.by_name("elu")())
        self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward)

        self.arc_attention = BilinearMatrixAttention(arc_representation_dim,
                                                     arc_representation_dim,
                                                     use_input_biases=True)

        num_labels = self.vocab.get_vocab_size("head_tags")

        self.head_tag_feedforward = tag_feedforward or \
                                        FeedForward(encoder_dim, 1,
                                                    tag_representation_dim,
                                                    Activation.by_name("elu")())
        self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward)

        self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim,
                                                      tag_representation_dim,
                                                      num_labels)

        self._pos_tag_embedding = pos_tag_embedding or None
        assert self.task_config.params.get("use_pos_tag",
                                           False) == (self._pos_tag_embedding
                                                      is not None)

        self._dropout = InputVariationalDropout(dropout)
        self._input_dropout = Dropout(input_dropout)
        self._head_sentinel = torch.nn.Parameter(
            torch.randn([1, 1, encoder.get_output_dim()]))

        if gram_val_representation_dim <= 0:
            self._gram_val_output = torch.nn.Linear(
                encoder_dim, self.vocab.get_vocab_size("grammar_value_tags"))
        else:
            self._gram_val_output = torch.nn.Sequential(
                Dropout(dropout),
                torch.nn.Linear(encoder_dim, gram_val_representation_dim),
                Dropout(dropout),
                torch.nn.Linear(
                    gram_val_representation_dim,
                    self.vocab.get_vocab_size("grammar_value_tags")))

        if lemma_representation_dim <= 0:
            self._lemma_output = torch.nn.Linear(encoder_dim,
                                                 len(lemmatize_helper))
        else:
            # Заведем выход предсказания грамматической метки на вход лемматизатора -- ЭКСПЕРИМЕНТАЛЬНОЕ
            #actual_input_dim = encoder_dim
            actual_input_dim = encoder_dim + self.vocab.get_vocab_size(
                "grammar_value_tags")
            self._lemma_output = torch.nn.Sequential(
                Dropout(dropout),
                torch.nn.Linear(actual_input_dim, lemma_representation_dim),
                Dropout(dropout),
                torch.nn.Linear(lemma_representation_dim,
                                len(lemmatize_helper)))

        representation_dim = text_field_embedder.get_output_dim(
        ) + morpho_vector_dim
        if pos_tag_embedding is not None:
            representation_dim += pos_tag_embedding.get_output_dim()

        check_dimensions_match(representation_dim, encoder.get_input_dim(),
                               "text field embedding dim", "encoder input dim")

        check_dimensions_match(tag_representation_dim,
                               self.head_tag_feedforward.get_output_dim(),
                               "tag representation dim",
                               "tag feedforward output dim")
        check_dimensions_match(arc_representation_dim,
                               self.head_arc_feedforward.get_output_dim(),
                               "arc representation dim",
                               "arc feedforward output dim")

        self.use_mst_decoding_for_validation = use_mst_decoding_for_validation

        tags = self.vocab.get_token_to_index_vocabulary("pos")
        punctuation_tag_indices = {
            tag: index
            for tag, index in tags.items() if tag in POS_TO_IGNORE
        }
        self._pos_to_ignore = set(punctuation_tag_indices.values())
        logger.info("HELLO FROM INIT")
        logger.info(
            f"Found POS tags corresponding to the following punctuation : {punctuation_tag_indices}. "
            "Ignoring words with these POS tags for evaluation.")

        self._attachment_scores = AttachmentScores()
        self._gram_val_prediction_accuracy = CategoricalAccuracy()
        self._lemma_prediction_accuracy = CategoricalAccuracy()

        initializer(self)
示例#20
0
    def __init__(self,
                 vocab: Vocabulary,
                 hidden_dim: int,
                 action_dim: int,
                 ratio_dim: int,
                 num_layers: int,
                 word_dim: int = 0,
                 text_field_embedder: TextFieldEmbedder = None,
                 mces_metric: Metric = None,
                 recurrent_dropout_probability: float = 0.0,
                 layer_dropout_probability: float = 0.0,
                 same_dropout_mask_per_instance: bool = True,
                 input_dropout: float = 0.0,
                 lemma_text_field_embedder: TextFieldEmbedder = None,
                 pos_tag_embedding: Embedding = None,
                 deprel_embedding: Embedding = None,
                 bios_embedding: Embedding = None,
                 lexcat_embedding: Embedding = None,
                 ss_embedding: Embedding = None,
                 ss2_embedding: Embedding = None,
                 action_embedding: Embedding = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None
                 ) -> None:

        super(TransitionParser, self).__init__(vocab, regularizer)

        self._primary_labeled_correct = 0
        self._primary_unlabeled_correct = 0
        self._primary_total_edges_predicted = 0
        self._primary_total_edges_actual = 0
        self._primary_exact_labeled_correct = 0
        self._primary_exact_unlabeled_correct = 0

        self._remote_labeled_correct = 0
        self._remote_unlabeled_correct = 0
        self._remote_total_edges_predicted = 0
        self._remote_total_edges_actual = 0
        self._remote_exact_labeled_correct = 0
        self._remote_exact_unlabeled_correct = 0

        self._total_sentences = 0

        self.num_actions = vocab.get_vocab_size('actions')
        self.text_field_embedder = text_field_embedder
        self.lemma_text_field_embedder = lemma_text_field_embedder
        self._pos_tag_embedding = pos_tag_embedding
        self._deprel_embedding = deprel_embedding
        self._bios_embedding = bios_embedding
        self._lexcat_embedding = lexcat_embedding
        self._ss_embedding = ss_embedding
        self._ss2_embedding = ss2_embedding
        self._mces_metric = mces_metric

        node_dim = 0
        if self.text_field_embedder:
            node_dim += word_dim
        for embedding in pos_tag_embedding, deprel_embedding, bios_embedding, lexcat_embedding, ss_embedding, \
                         ss2_embedding:
            if embedding:
                node_dim += embedding.output_dim
        self.node_dim = node_dim
        self.word_dim = word_dim
        self.hidden_dim = hidden_dim
        self.ratio_dim = ratio_dim
        self.action_dim = action_dim

        self.action_embedding = action_embedding

        if action_embedding is None:
            self.action_embedding = Embedding(num_embeddings=self.num_actions,
                                              embedding_dim=self.action_dim,
                                              trainable=False)

        # syntactic composition
        self.p_comp = torch.nn.Linear(self.hidden_dim * 5 + self.ratio_dim, node_dim)
        # parser state to hidden
        self.p_s2h = torch.nn.Linear(self.hidden_dim * 3 + self.ratio_dim, self.hidden_dim)
        # hidden to action
        self.p_act = torch.nn.Linear(self.hidden_dim + self.ratio_dim, self.num_actions)

        self.update_concept_node = torch.nn.Linear(self.hidden_dim + self.ratio_dim, node_dim)

        self.pempty_buffer_emb = torch.nn.Parameter(torch.randn(self.hidden_dim))
        self.proot_stack_emb = torch.nn.Parameter(torch.randn(node_dim))
        self.pempty_action_emb = torch.nn.Parameter(torch.randn(self.hidden_dim))
        self.pempty_stack_emb = torch.nn.Parameter(torch.randn(self.hidden_dim))

        self._input_dropout = Dropout(input_dropout)

        self.buffer = StackRnn(input_size=node_dim,
                               hidden_size=self.hidden_dim,
                               num_layers=num_layers,
                               recurrent_dropout_probability=recurrent_dropout_probability,
                               layer_dropout_probability=layer_dropout_probability,
                               same_dropout_mask_per_instance=same_dropout_mask_per_instance)

        self.stack = StackRnn(input_size=node_dim,
                              hidden_size=self.hidden_dim,
                              num_layers=num_layers,
                              recurrent_dropout_probability=recurrent_dropout_probability,
                              layer_dropout_probability=layer_dropout_probability,
                              same_dropout_mask_per_instance=same_dropout_mask_per_instance)

        self.action_stack = StackRnn(input_size=self.action_dim,
                                     hidden_size=self.hidden_dim,
                                     num_layers=num_layers,
                                     recurrent_dropout_probability=recurrent_dropout_probability,
                                     layer_dropout_probability=layer_dropout_probability,
                                     same_dropout_mask_per_instance=same_dropout_mask_per_instance)
        initializer(self)
示例#21
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 text_encoder: Seq2SeqEncoder,
                 target_encoder: Seq2VecEncoder,
                 feedforward: Optional[FeedForward] = None,
                 target_field_embedder: Optional[TextFieldEmbedder] = None,
                 target_concat_text_embedding: bool = False,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 word_dropout: float = 0.0,
                 dropout: float = 0.0) -> None:
        '''
        :param vocab: vocab : A Vocabulary, required in order to compute sizes 
                              for input/output projections.
        :param text_field_embedder: Used to embed the text and target text if
                                    target_field_embedder is None but the 
                                    target_encoder is not None.
        :param text_encoder: Sequence Encoder that will create the 
                             representation of each token in the context 
                             sentence.
        :param target_encoder: Encoder that will create the representation of 
                               target text tokens.
        :param feedforward: An optional feed forward layer to apply after
                            either the text encoder if target encoder is None. 
                            Else it would be after the target and the text 
                            encoded representations have been concatenated.
        :param target_field_embedder: Used to embed the target text to give as 
                                      input to the target_encoder. Thus this 
                                      allows a seperate embedding for text and 
                                      target text.
        :param target_concat_text_embedding: Whether or not the target should be 
                                             concatenated to the each word 
                                             embedding within the text before 
                                             being encoded.
        :param initializer: Used to initialize the model parameters.
        :param regularizer: If provided, will be used to calculate the 
                            regularization penalty during training.
        :param word_dropout: Dropout that is applied after the embedding of the 
                             tokens/words. It will drop entire words with this 
                             probabilty.
        :param dropout: To apply dropout after each layer apart from the last 
                        layer. All dropout that is applied to timebased data 
                        will be `variational dropout`_ all else will be  
                        standard dropout.
        
        This class is all based around the following paper `Attention-based 
        LSTM for Aspect-level Sentiment Classification 
        <https://www.aclweb.org/anthology/D16-1058>`_. The default model here 
        is the equivalent to the AT-LSTM within this paper (Figure 2). If the 
        `target_concat_text_embedding` argument is `True` then the model becomes 
        the ATAE-LSTM within the cited paper (Figure 3).

        The only difference between this model and the attention based models 
        in the paper is that the final sentence representation is `r` rather 
        than `h* = tanh(Wpr + WxhN)` as we found this projection to not help 
        the performance.

        .. _variational dropout:
           https://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks.pdf
        '''
        super().__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.target_field_embedder = target_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")
        self.text_encoder = text_encoder
        self.target_encoder = target_encoder
        self.feedforward = feedforward

        target_text_encoder_dim = (target_encoder.get_output_dim() +
                                   text_encoder.get_output_dim())
        self.encoded_target_text_fusion = TimeDistributed(
            Linear(target_text_encoder_dim, target_text_encoder_dim))
        self.attention_vector = Parameter(
            torch.Tensor(target_text_encoder_dim))
        self.attention_layer = DotProductAttention(normalize=True)

        if feedforward is not None:
            output_dim = self.feedforward.get_output_dim()
        else:
            output_dim = text_encoder.get_output_dim()
        self.label_projection = Linear(output_dim, self.num_classes)
        self.metrics = {"accuracy": CategoricalAccuracy()}
        self.f1_metrics = {}
        # F1 Scores
        label_index_name = self.vocab.get_index_to_token_vocabulary('labels')
        for label_index, label_name in label_index_name.items():
            label_name = f'F1_{label_name.capitalize()}'
            self.f1_metrics[label_name] = F1Measure(label_index)

        self._word_dropout = WordDrouput(word_dropout)
        self._variational_dropout = InputVariationalDropout(dropout)
        self._naive_dropout = Dropout(dropout)

        self.target_concat_text_embedding = target_concat_text_embedding
        self.loss = torch.nn.CrossEntropyLoss()

        # Ensure the text encoder has the correct input dimension
        if target_concat_text_embedding:
            text_encoder_expected_in = (text_field_embedder.get_output_dim() +
                                        target_encoder.get_output_dim())
            check_dimensions_match(
                text_encoder_expected_in, text_encoder.get_input_dim(),
                "text field embedding dim + target encoder output dim",
                "text encoder input dim")
        else:
            check_dimensions_match(text_field_embedder.get_output_dim(),
                                   text_encoder.get_input_dim(),
                                   "text field embedding dim",
                                   "text encoder input dim")
        # Ensure that the dimensions of the target or text field embedder and
        # the target encoder match
        target_field_embedder_dim = text_field_embedder.get_output_dim()
        target_field_error = "text field embedding dim"
        if self.target_field_embedder:
            target_field_embedder_dim = target_field_embedder.get_output_dim()
            target_field_error = "target field embedding dim"

        check_dimensions_match(target_field_embedder_dim,
                               target_encoder.get_input_dim(),
                               target_field_error, "target encoder input dim")
        self.reset_parameters()
        initializer(self)
    def __init__(self,
                 vocab: Vocabulary,
                 context_field_embedder: TextFieldEmbedder,
                 context_encoder: Seq2SeqEncoder,
                 target_encoder: Seq2SeqEncoder,
                 feedforward: Optional[FeedForward] = None,
                 context_attention_activation_function: str = 'tanh',
                 target_attention_activation_function: str = 'tanh',
                 target_field_embedder: Optional[TextFieldEmbedder] = None,
                 inter_target_encoding: Optional[InterTarget] = None,
                 target_position_weight: Optional[TargetPositionWeight] = None,
                 target_position_embedding: Optional[TextFieldEmbedder] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 dropout: float = 0.0,
                 label_name: str = 'target-sentiment-labels',
                 loss_weights: Optional[List[float]] = None,
                 use_target_sequences: bool = False) -> None:
        super().__init__(vocab, regularizer)
        '''
        :param vocab: A Vocabulary, required in order to compute sizes 
                      for input/output projections.
        :param context_field_embedder: Used to embed the context/sentence and 
                                       target text if target_field_embedder is 
                                       None but the target_encoder is NOT None.
        :param context_encoder: Encoder that will create the representation 
                                for the sentence/context that the target 
                                appears in.
        :param target_encoder: Encoder that will create the representation of 
                               target text tokens.
        :param feedforward: An optional feed forward layer to apply after the 
                            encoder.
        :param context_attention_activation_function: The attention method to be
                                                      used on the context.
        :param target_attention_activation_function: The attention method to be
                                                     used on the target text.
        :param target_field_embedder: Used to embed the target text to give as 
                                      input to the target_encoder. Thus this 
                                      allows a separate embedding for context 
                                      and target text.
        :param inter_target_encoding: Whether to model the relationship between 
                                      targets/aspect.
        :param target_position_weight: Whether to weight the output of the 
                                       context encoding based on the position 
                                       of the tokens to the target tokens. This 
                                       weighting is applied before any attention 
                                       is applied.
        :param target_position_embedding: Whether or not to concatenate a position
                                          embedding on to the input embeddings 
                                          before being an input to the 
                                          `context_encoder`.
        :param initializer: Used to initialize the model parameters.
        :param regularizer: If provided, will be used to calculate the 
                            regularization penalty during training.
        :param dropout: To apply dropout after each layer apart from the last 
                        layer. All dropout that is applied to timebased data 
                        will be `variational dropout 
                        <https://arxiv.org/abs/1512.05287>`_ all else will be  
                        standard dropout. Variation dropout is applied to the 
                        target vectors after they have been processed by the 
                        `inter_target_encoding` if this is set.
        :param label_name: Name of the label name space.
        :param loss_weights: The amount of weight to give the negative, neutral,
                             positive classes respectively. e.g. [0.2, 0.5, 0.3]
                             would weight the negative class by a factor of 
                             0.2, neutral by 0.5 and positive by 0.3. NOTE It 
                             assumes the sentiment labels are the following:
                             [negative, neutral, positive].
        :param use_target_sequences: Whether or not to use target tokens within 
                                     the context as the targets contextualized 
                                     word representation (CWR). This would only
                                     make sense to use if the word representation 
                                     i.e. field embedder is a contextualized 
                                     embedder e.g. ELMO etc. This also requires 
                                     that the dataset reader has the following 
                                     argument set to True `target_sequences`.
                                     ANOTHER reason why you would want to use 
                                     this even when not using CWR is that you 
                                     want to get contextualised POS/Dep tags 
                                     etc.
        
        This is based on the `Interactive Attention Networks for Aspect-Level 
        Sentiment Classification 
        <https://www.ijcai.org/proceedings/2017/0568.pdf>`_. The model is also 
        known as `IAN`.

         .. _variational dropout:
           https://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks.pdf
        '''

        self.label_name = label_name
        self.context_field_embedder = context_field_embedder
        self.target_field_embedder = target_field_embedder
        self.num_classes = self.vocab.get_vocab_size(self.label_name)
        self.target_encoder = target_encoder
        self.context_encoder = context_encoder
        self.feedforward = feedforward
        self._use_target_sequences = use_target_sequences
        if self._use_target_sequences and self.target_field_embedder:
            raise ConfigurationError(
                '`use_target_sequences` cannot be True at'
                ' the same time as a value for '
                '`target_field_embedder` as the embeddings'
                ' come from the context and not a separate embedder')

        context_attention_activation_function = Activation.by_name(
            f'{context_attention_activation_function}')()
        target_attention_activation_function = Activation.by_name(
            f'{target_attention_activation_function}')()

        target_encoder_out = self.target_encoder.get_output_dim()
        context_encoder_out = self.context_encoder.get_output_dim()
        self.context_attention_layer = BilinearAttention(
            target_encoder_out,
            context_encoder_out,
            context_attention_activation_function,
            normalize=True)
        self.target_attention_layer = BilinearAttention(
            context_encoder_out,
            target_encoder_out,
            target_attention_activation_function,
            normalize=True)
        # To be used as the pooled input into the target attention layer as
        # the query vector.
        self._context_averager = BagOfEmbeddingsEncoder(context_encoder_out,
                                                        averaged=True)
        # To be used as the pooled input into the context attention layer as
        # the query vector.
        self._target_averager = BagOfEmbeddingsEncoder(target_encoder_out,
                                                       averaged=True)

        # Set the loss weights (have to sort them by order of label index in
        # the vocab)
        self.loss_weights = target_sentiment.util.loss_weight_order(
            self, loss_weights, self.label_name)

        # Inter target modelling
        self.inter_target_encoding = inter_target_encoding

        if feedforward is not None:
            output_dim = self.feedforward.get_output_dim()
        elif self.inter_target_encoding is not None:
            output_dim = self.inter_target_encoding.get_output_dim()
        else:
            output_dim = target_encoder_out + context_encoder_out
        self.label_projection = Linear(output_dim, self.num_classes)

        self.metrics = {"accuracy": CategoricalAccuracy()}
        self.f1_metrics = {}
        # F1 Scores
        label_index_name = self.vocab.get_index_to_token_vocabulary(
            self.label_name)
        for label_index, _label_name in label_index_name.items():
            _label_name = f'F1_{_label_name.capitalize()}'
            self.f1_metrics[_label_name] = F1Measure(label_index)
        # Dropout
        self._variational_dropout = InputVariationalDropout(dropout)
        self._naive_dropout = Dropout(dropout)

        # position embeddings
        self.target_position_embedding = target_position_embedding
        # Ensure that the dimensions of the text field embedder and text encoder
        # match
        if self.target_position_embedding:
            context_and_position_dim = (
                context_field_embedder.get_output_dim() +
                self.target_position_embedding.get_output_dim())
            check_dimensions_match(
                context_and_position_dim, context_encoder.get_input_dim(),
                "context field embedding dim and the position embeddings",
                "text encoder input dim")
        else:
            check_dimensions_match(context_field_embedder.get_output_dim(),
                                   context_encoder.get_input_dim(),
                                   "context field embedding dim",
                                   "text encoder input dim")
        # Ensure that the dimensions of the target or text field embedder and
        # the target encoder match
        target_field_embedder_dim = context_field_embedder.get_output_dim()
        target_field_error = "context field embedding dim"
        if self.target_field_embedder:
            target_field_embedder_dim = target_field_embedder.get_output_dim()
            target_field_error = "target field embedding dim"

        check_dimensions_match(target_field_embedder_dim,
                               target_encoder.get_input_dim(),
                               target_field_error, "target encoder input dim")

        if self.inter_target_encoding:
            check_dimensions_match(target_encoder_out + context_encoder_out,
                                   self.inter_target_encoding.get_input_dim(),
                                   'Output from target and context encdoers',
                                   'Inter Target encoder input dim')

        self.target_position_weight = target_position_weight
        # TimeDistributed anything that is related to the targets.
        if self.feedforward is not None:
            self.feedforward = TimeDistributed(self.feedforward)
        self.label_projection = TimeDistributed(self.label_projection)
        self._time_naive_dropout = TimeDistributed(self._naive_dropout)

        initializer(self)
示例#23
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        encoder: Seq2SeqEncoder,
        tag_representation_dim: int,
        arc_representation_dim: int,
        tag_feedforward: FeedForward = None,
        arc_feedforward: FeedForward = None,
        lemma_tag_embedding: Embedding = None,
        upos_tag_embedding: Embedding = None,
        xpos_tag_embedding: Embedding = None,
        feats_tag_embedding: Embedding = None,
        head_information_embedding: Embedding = None,
        head_tag_embedding: Embedding = None,
        dropout: float = 0.0,
        input_dropout: float = 0.0,
        edge_prediction_threshold: float = 0.5,
        initializer: InitializerApplicator = InitializerApplicator(),
        **kwargs,
    ) -> None:
        super().__init__(vocab, **kwargs)

        self.text_field_embedder = text_field_embedder
        self.encoder = encoder
        self.edge_prediction_threshold = edge_prediction_threshold
        if not 0 < edge_prediction_threshold < 1:
            raise ConfigurationError(
                f"edge_prediction_threshold must be between "
                f"0 and 1 (exclusive) but found {edge_prediction_threshold}.")

        encoder_dim = encoder.get_output_dim()

        self.head_arc_feedforward = arc_feedforward or FeedForward(
            encoder_dim, 1, arc_representation_dim,
            Activation.by_name("elu")())
        self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward)

        self.arc_attention = BilinearMatrixAttention(arc_representation_dim,
                                                     arc_representation_dim,
                                                     use_input_biases=True)

        num_labels = self.vocab.get_vocab_size("deps")
        self.head_tag_feedforward = tag_feedforward or FeedForward(
            encoder_dim, 1, tag_representation_dim,
            Activation.by_name("elu")())
        self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward)

        self.tag_bilinear = BilinearMatrixAttention(tag_representation_dim,
                                                    tag_representation_dim,
                                                    label_dim=num_labels)

        self._lemma_tag_embedding = lemma_tag_embedding or None
        self._upos_tag_embedding = upos_tag_embedding or None
        self._xpos_tag_embedding = xpos_tag_embedding or None
        self._feats_tag_embedding = feats_tag_embedding or None
        self._head_tag_embedding = head_tag_embedding or None
        self._head_information_embedding = head_information_embedding or None

        self._dropout = InputVariationalDropout(dropout)
        self._input_dropout = Dropout(input_dropout)

        # add a head sentinel to accommodate for extra root token in EUD graphs
        self._head_sentinel = torch.nn.Parameter(
            torch.randn([1, 1, encoder.get_output_dim()]))

        representation_dim = text_field_embedder.get_output_dim()
        if lemma_tag_embedding is not None:
            representation_dim += lemma_tag_embedding.get_output_dim()
        if upos_tag_embedding is not None:
            representation_dim += upos_tag_embedding.get_output_dim()
        if xpos_tag_embedding is not None:
            representation_dim += xpos_tag_embedding.get_output_dim()
        if feats_tag_embedding is not None:
            representation_dim += feats_tag_embedding.get_output_dim()
        if head_tag_embedding is not None:
            representation_dim += head_tag_embedding.get_output_dim()
        if head_information_embedding is not None:
            representation_dim += head_information_embedding.get_output_dim()

        check_dimensions_match(
            representation_dim,
            encoder.get_input_dim(),
            "text field embedding dim",
            "encoder input dim",
        )
        check_dimensions_match(
            tag_representation_dim,
            self.head_tag_feedforward.get_output_dim(),
            "tag representation dim",
            "tag feedforward output dim",
        )
        check_dimensions_match(
            arc_representation_dim,
            self.head_arc_feedforward.get_output_dim(),
            "arc representation dim",
            "arc feedforward output dim",
        )

        self._enhanced_attachment_scores = EnhancedAttachmentScores()
        self._arc_loss = torch.nn.BCEWithLogitsLoss(reduction="none")
        self._tag_loss = torch.nn.CrossEntropyLoss(reduction="none")
        initializer(self)
示例#24
0
    def __init__(
        self,
        vocab: Vocabulary,
        bert_model: Union[str, AutoModel],
        mismatched_embedder: TokenEmbedder = None,
        lp: bool = False,
        lpsmap: bool = False,
        lpsmap_core_roles_only: bool = True,
        validation_inference: bool = True,
        batch_size: int = None,
        encoder: Seq2SeqEncoder = None,
        reinitialize_pos_embedding: bool = False,
        embedding_dropout: float = 0.0,
        mlp_hidden_size: int = 300,
        initializer: InitializerApplicator = InitializerApplicator(),
        label_smoothing: float = None,
        ignore_span_metric: bool = False,
        srl_eval_path: str = DEFAULT_SRL_EVAL_PATH,
        label_encoding: str = "BIO",
        constrain_crf_decoding: bool = None,
        include_start_end_transitions: bool = True,
        label_namespace: str = "labels",
        **kwargs,
    ) -> None:
        super().__init__(vocab, **kwargs)

        if isinstance(bert_model, str):
            if mismatched_embedder is None:
                self.bert_model = AutoModel.from_pretrained(bert_model)
            self.bert_config = AutoConfig.from_pretrained(bert_model)
        else:
            if mismatched_embedder is None:
                self.bert_model = bert_model
            self.bert_config = bert_model.config
        if reinitialize_pos_embedding:
            self.bert_model._init_weights(
                self.bert_model.embeddings.position_embeddings)
            # self.bert_model._init_weights(self.bert_model.embeddings.token_type_embeddings)
        if mismatched_embedder is not None:
            self.bert_model = mismatched_embedder

        self._label_namespace = label_namespace
        self.num_classes = self.vocab.get_vocab_size(label_namespace)
        if srl_eval_path is not None:
            # For the span based evaluation, we don't want to consider labels
            # for verb, because the verb index is provided to the model.
            self.span_metric = SrlEvalScorer(srl_eval_path,
                                             ignore_classes=["V"])
        else:
            self.span_metric = None

        if constrain_crf_decoding is None:
            constrain_crf_decoding = label_encoding is not None

        self.label_encoding = label_encoding
        self.constrain_crf_decoding = constrain_crf_decoding
        if constrain_crf_decoding:
            if not label_encoding:
                raise ConfigurationError(
                    "constrain_crf_decoding is True, but no label_encoding was specified."
                )
            labels = self.vocab.get_index_to_token_vocabulary(label_namespace)
            constraints = allowed_transitions(label_encoding, labels)
        else:
            constraints = None

        self.include_start_end_transitions = include_start_end_transitions
        self.crf = ConditionalRandomField(
            self.num_classes,
            constraints,
            include_start_end_transitions=include_start_end_transitions)
        self._encoder = encoder
        representation_size = self.bert_config.hidden_size
        if self.bert_config.type_vocab_size == 1:
            representation_size = self.bert_config.hidden_size * 2
        if encoder is None:
            self.tag_projection_layer = torch.nn.Sequential(
                Linear(representation_size, mlp_hidden_size), torch.nn.ReLU(),
                Linear(mlp_hidden_size, self.num_classes))
        else:
            self.tag_projection_layer = torch.nn.Sequential(
                Linear(encoder.get_output_dim() * 2, mlp_hidden_size),
                torch.nn.ReLU(), Linear(mlp_hidden_size, self.num_classes))

        self.embedding_dropout = Dropout(p=embedding_dropout)
        self.predicate_embedding = torch.nn.Embedding(num_embeddings=2,
                                                      embedding_dim=10)
        self._label_smoothing = label_smoothing
        self.ignore_span_metric = ignore_span_metric
        self._lp = lp
        self._lpsmap = lpsmap
        self._lpsmap_core_only = lpsmap_core_roles_only
        self._val_inference = validation_inference
        if self._lpsmap:
            self._core_roles = []
            for i in range(6):
                try:
                    self._core_roles.append(
                        self.vocab.get_token_index(
                            "B-ARG" + str(i), namespace=self._label_namespace))
                except:
                    logger.info("B-ARG" + str(i) + " is not in labels")
            self._r_roles = []
            self._c_roles = []
            for i in range(self.num_classes):
                token = self.vocab.get_token_from_index(
                    i, namespace=self._label_namespace)
                if token[:4] == "B-R-" and token[4:] != "ARG1":
                    try:
                        base_arg_index = self.vocab.get_token_index(
                            "B-" + token[4:], namespace=self._label_namespace)
                        self._r_roles.append((i, base_arg_index))
                    except:
                        logger.info("B-" + token[4:] + " is not in labels")
                elif token[:4] == "B-C-" and token[4:] != "ARG1":
                    try:
                        base_arg_index = self.vocab.get_token_index(
                            "B-" + token[4:], namespace=self._label_namespace)
                        self._c_roles.append((i, base_arg_index))
                    except:
                        logger.info("B-" + token[4:] + " is not in labels")
            # self._core_roles = [index for index in range(self.vocab.get_vocab_size("labels")) if index in [self.vocab.get_token_index("B-ARG"+str(i), namespace="labels") for i in range(3)]]
            self.lpsmap = None
        if lp:
            """self._layer_list = []
            self.length_map = {}
            self.lengths = []
            for max_sequence_length in [70, 100, 200, 300]:
                x = cp.Variable((max_sequence_length, self.vocab.get_vocab_size(namespace="labels")))
                S = cp.Parameter((max_sequence_length, self.vocab.get_vocab_size(namespace="labels")))
                constraints = [x >= 0, cp.sum(x, axis=1) == 1]
                objective = cp.Maximize(cp.sum(cp.multiply(x, S)))
                problem = cp.Problem(objective, constraints)
                assert problem.is_dpp()
                lp_layer = CvxpyLayer(problem, parameters=[S], variables=[x])
                self._layer_list.append(lp_layer)
                self.length_map[max_sequence_length] = len(self._layer_list)-1
                self.lengths.append(max_sequence_length)
            self._layer_list = torch.nn.ModuleList(self._layer_list)"""
            pass
        initializer(self)
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        encoder: Seq2SeqEncoder,
        tag_representation_dim: int,
        arc_representation_dim: int,
        tag_feedforward: FeedForward = None,
        arc_feedforward: FeedForward = None,
        pos_tag_embedding: Embedding = None,
        dropout: float = 0.0,
        input_dropout: float = 0.0,
        edge_prediction_threshold: float = 0.5,
        initializer: InitializerApplicator = InitializerApplicator(),
        **kwargs,
    ) -> None:
        super().__init__(vocab, **kwargs)

        self.text_field_embedder = text_field_embedder
        self.encoder = encoder
        self.edge_prediction_threshold = edge_prediction_threshold
        if not 0 < edge_prediction_threshold < 1:
            raise ConfigurationError(
                f"edge_prediction_threshold must be between "
                f"0 and 1 (exclusive) but found {edge_prediction_threshold}.")

        encoder_dim = encoder.get_output_dim()

        self.head_arc_feedforward = arc_feedforward or FeedForward(
            encoder_dim, 1, arc_representation_dim,
            Activation.by_name("elu")())
        self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward)

        self.arc_attention = BilinearMatrixAttention(arc_representation_dim,
                                                     arc_representation_dim,
                                                     use_input_biases=True)

        num_labels = self.vocab.get_vocab_size("labels")
        self.head_tag_feedforward = tag_feedforward or FeedForward(
            encoder_dim, 1, tag_representation_dim,
            Activation.by_name("elu")())
        self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward)

        self.tag_bilinear = BilinearMatrixAttention(tag_representation_dim,
                                                    tag_representation_dim,
                                                    label_dim=num_labels)

        self._pos_tag_embedding = pos_tag_embedding or None
        self._dropout = InputVariationalDropout(dropout)
        self._input_dropout = Dropout(input_dropout)

        representation_dim = text_field_embedder.get_output_dim()
        if pos_tag_embedding is not None:
            representation_dim += pos_tag_embedding.get_output_dim()

        check_dimensions_match(
            representation_dim,
            encoder.get_input_dim(),
            "text field embedding dim",
            "encoder input dim",
        )
        check_dimensions_match(
            tag_representation_dim,
            self.head_tag_feedforward.get_output_dim(),
            "tag representation dim",
            "tag feedforward output dim",
        )
        check_dimensions_match(
            arc_representation_dim,
            self.head_arc_feedforward.get_output_dim(),
            "arc representation dim",
            "arc feedforward output dim",
        )

        self._unlabelled_f1 = F1Measure(positive_label=1)
        self._arc_loss = torch.nn.BCEWithLogitsLoss(reduction="none")
        self._tag_loss = torch.nn.CrossEntropyLoss(reduction="none")
        initializer(self)
示例#26
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 word_dim: int,
                 hidden_dim: int,
                 action_dim: int,
                 num_layers: int,
                 mces_metric: Metric = None,
                 recurrent_dropout_probability: float = 0.0,
                 layer_dropout_probability: float = 0.0,
                 same_dropout_mask_per_instance: bool = True,
                 input_dropout: float = 0.0,
                 lemma_text_field_embedder: TextFieldEmbedder = None,
                 pos_tag_embedding: Embedding = None,
                 action_embedding: Embedding = None,
                 frame_tagger_encoder: Seq2SeqEncoder = None,
                 pos_tagger_encoder: Seq2SeqEncoder = None,
                 node_label_tagger_encoder: Seq2SeqEncoder = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:

        super(TransitionParser, self).__init__(vocab, regularizer)

        self._unlabeled_correct = 0
        self._labeled_correct = 0
        self._total_edges_predicted = 0
        self._total_edges_actual = 0
        self._exact_unlabeled_correct = 0
        self._exact_labeled_correct = 0
        self._total_sentences = 0

        self.num_actions = vocab.get_vocab_size('actions')
        self.text_field_embedder = text_field_embedder
        self.pos_tag_embedding = pos_tag_embedding
        self._mces_metric = mces_metric

        self.action_embedding = action_embedding

        if action_embedding is None:
            self.action_embedding = Embedding(num_embeddings=self.num_actions,
                                              embedding_dim=action_dim,
                                              trainable=False)
        # syntactic composition
        self.p_comp = torch.nn.Linear(hidden_dim * 4, word_dim)
        # parser state to hidden
        self.p_s2h = torch.nn.Linear(hidden_dim * 4, hidden_dim)
        # hidden to action

        self.p_act = torch.nn.Linear(hidden_dim, self.num_actions)
        self.pempty_buffer_emb = torch.nn.Parameter(torch.randn(hidden_dim))
        self.proot_stack_emb = torch.nn.Parameter(torch.randn(word_dim))
        self.pempty_action_emb = torch.nn.Parameter(torch.randn(hidden_dim))
        self.pempty_deque_emb = torch.nn.Parameter(torch.randn(hidden_dim))

        self._input_dropout = Dropout(input_dropout)

        self.frame_tagger_encoder = frame_tagger_encoder
        self.pos_tagger_encoder = pos_tagger_encoder
        self.node_label_tagger_encoder = node_label_tagger_encoder

        self.buffer = StackRnn(
            input_size=word_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            recurrent_dropout_probability=recurrent_dropout_probability,
            layer_dropout_probability=layer_dropout_probability,
            same_dropout_mask_per_instance=same_dropout_mask_per_instance)

        self.stack = StackRnn(
            input_size=word_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            recurrent_dropout_probability=recurrent_dropout_probability,
            layer_dropout_probability=layer_dropout_probability,
            same_dropout_mask_per_instance=same_dropout_mask_per_instance)

        self.deque = StackRnn(
            input_size=word_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            recurrent_dropout_probability=recurrent_dropout_probability,
            layer_dropout_probability=layer_dropout_probability,
            same_dropout_mask_per_instance=same_dropout_mask_per_instance)

        self.action_stack = StackRnn(
            input_size=action_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            recurrent_dropout_probability=recurrent_dropout_probability,
            layer_dropout_probability=layer_dropout_probability,
            same_dropout_mask_per_instance=same_dropout_mask_per_instance)

        self.frame_tagger = SimpleTagger(
            vocab=vocab,
            text_field_embedder=text_field_embedder,
            encoder=self.frame_tagger_encoder,
            label_namespace='frame')

        self.pos_tagger = SimpleTagger(vocab=vocab,
                                       text_field_embedder=text_field_embedder,
                                       encoder=self.pos_tagger_encoder,
                                       label_namespace='pos_tag')

        self.node_label_tagger = SimpleTagger(
            vocab=vocab,
            text_field_embedder=text_field_embedder,
            encoder=self.node_label_tagger_encoder,
            label_namespace='node_label')

        initializer(self)
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        encoder: Seq2SeqEncoder,
        tag_representation_dim: int,
        arc_representation_dim: int,
        model_name: str = None,
        tag_feedforward: FeedForward = None,
        arc_feedforward: FeedForward = None,
        pos_tag_embedding: Embedding = None,
        use_mst_decoding_for_validation: bool = True,
        dropout: float = 0.0,
        input_dropout: float = 0.0,
        word_dropout: float = 0.0,
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
    ) -> None:
        super().__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.encoder = encoder

        if model_name:
            from src.data.token_indexers import PretrainedAutoTokenizer
            self._tokenizer = PretrainedAutoTokenizer.load(model_name)

        encoder_dim = encoder.get_output_dim()

        self.head_arc_feedforward = arc_feedforward or FeedForward(
            encoder_dim, 1, arc_representation_dim,
            Activation.by_name("elu")())
        self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward)

        self.arc_attention = BilinearMatrixAttention(arc_representation_dim,
                                                     arc_representation_dim,
                                                     use_input_biases=True)

        num_labels = self.vocab.get_vocab_size("head_tags")

        self.head_tag_feedforward = tag_feedforward or FeedForward(
            encoder_dim, 1, tag_representation_dim,
            Activation.by_name("elu")())
        self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward)

        self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim,
                                                      tag_representation_dim,
                                                      num_labels)

        self._pos_tag_embedding = pos_tag_embedding or None
        self._dropout = InputVariationalDropout(dropout)
        self._input_dropout = Dropout(input_dropout)
        self._word_dropout = word_dropout
        self._head_sentinel = torch.nn.Parameter(
            torch.randn([1, 1, encoder.get_output_dim()]))

        representation_dim = text_field_embedder.get_output_dim()
        if pos_tag_embedding is not None:
            representation_dim += pos_tag_embedding.get_output_dim()

        check_dimensions_match(
            representation_dim,
            encoder.get_input_dim(),
            "text field embedding dim",
            "encoder input dim",
        )

        check_dimensions_match(
            tag_representation_dim,
            self.head_tag_feedforward.get_output_dim(),
            "tag representation dim",
            "tag feedforward output dim",
        )
        check_dimensions_match(
            arc_representation_dim,
            self.head_arc_feedforward.get_output_dim(),
            "arc representation dim",
            "arc feedforward output dim",
        )

        self.use_mst_decoding_for_validation = use_mst_decoding_for_validation

        tags = self.vocab.get_token_to_index_vocabulary("pos")
        punctuation_tag_indices = {
            tag: index
            for tag, index in tags.items() if tag in POS_TO_IGNORE
        }
        self._pos_to_ignore = set(punctuation_tag_indices.values())
        logger.info(
            f"Found POS tags corresponding to the following punctuation : {punctuation_tag_indices}. "
            "Ignoring words with these POS tags for evaluation.")

        self._attachment_scores = AttachmentScores()
        initializer(self)
示例#28
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 stacked_encoder: Seq2SeqEncoder,
                 span_feedforward: FeedForward,
                 binary_feature_dim: int,
                 max_span_width: int,
                 binary_feature_size: int,
                 distance_feature_size: int,
                 embedding_dropout: float = 0.2,
                 label_namespace: str = "labels",
                 fast_mode: bool = False,
                 loss_type: str = "logloss",
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(SemiCrfSemanticRoleLabeler, self).__init__(vocab, regularizer)

        # Base token-level encoding.
        self.text_field_embedder = text_field_embedder
        self.embedding_dropout = Dropout(p=embedding_dropout)
        # There are exactly 2 binary features for the verb predicate embedding.
        self.binary_feature_embedding = Embedding(2, binary_feature_dim)
        self.stacked_encoder = stacked_encoder
        if text_field_embedder.get_output_dim(
        ) + binary_feature_dim != stacked_encoder.get_input_dim():
            raise ConfigurationError(
                "The SRL Model uses a binary verb indicator feature, meaning "
                "the input dimension of the stacked_encoder must be equal to "
                "the output dimension of the text_field_embedder + 1.")

        # Span-level encoding.
        self.max_span_width = max_span_width
        self.span_width_embedding = Embedding(max_span_width,
                                              binary_feature_size)
        # Based on the average sentence length in FN train. TODO(Swabha): find out for OntoNotes.
        self.span_distance_bin = 25
        self.span_distance_embedding = Embedding(self.span_distance_bin,
                                                 distance_feature_size)
        self.span_direction_embedding = Embedding(2, binary_feature_size)
        self.span_feedforward = TimeDistributed(span_feedforward)
        self.head_scorer = TimeDistributed(
            torch.nn.Linear(stacked_encoder.get_output_dim(), 1))

        self.num_classes = self.vocab.get_vocab_size(label_namespace)
        self.not_a_span_tag = self.vocab.get_token_index("*", label_namespace)
        self.outside_span_tag = self.vocab.get_token_index(
            "O", label_namespace)
        self.semi_crf = SemiMarkovConditionalRandomField(
            num_tags=self.num_classes,
            max_span_width=max_span_width,
            loss_type=loss_type,
            default_tag=self.not_a_span_tag,
            outside_span_tag=self.outside_span_tag)

        # Topmost MLP.
        self.tag_projection_layer = TimeDistributed(
            Linear(span_feedforward.get_output_dim(), self.num_classes))

        # Evaluation.
        # For the span based evaluation, we don't want to consider labels
        # for verb, because the verb index is provided to the model.
        self.non_bio_span_metric = NonBioSpanBasedF1Measure(
            vocab, tag_namespace=label_namespace, ignore_classes=["V", "*"])

        # Mode for the model, if turned on it only evaluates on dev and calculates loss for train.
        self.fast_mode = fast_mode
        initializer(self)
示例#29
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 left_text_encoder: Seq2VecEncoder,
                 right_text_encoder: Seq2VecEncoder,
                 feedforward: Optional[FeedForward] = None,
                 target_field_embedder: Optional[TextFieldEmbedder] = None,
                 target_encoder: Optional[Seq2VecEncoder] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 word_dropout: float = 0.0,
                 dropout: float = 0.0) -> None:
        super().__init__(vocab, regularizer)
        '''
        :param vocab: vocab : A Vocabulary, required in order to compute sizes 
                              for input/output projections.
        :param text_field_embedder: Used to embed the text and target text if
                                    target_field_embedder is None but the 
                                    target_encoder is not None.
        :param left_text_encoder: Encoder that will create the representation 
                                  of the tokens left of the target and  
                                  the target itself if included from the 
                                  dataset reader.
        :param right_text_encoder: Encoder that will create the representation 
                                   of the tokens right of the target and the 
                                   target itself if included from the 
                                   dataset reader.
        :param feedforward: An optional feed forward layer to apply after the 
                            encoder.
        :param target_field_embedder: Used to embed the target text to give as 
                                      input to the target_encoder. Thus this 
                                      allows a seperate embedding for text and 
                                      target text.
        :param target_encoder: Encoder that will create the representation of 
                               target text tokens.
        :param initializer: Used to initialize the model parameters.
        :param regularizer: If provided, will be used to calculate the 
                            regularization penalty during training.
        :param word_dropout: Dropout that is applied after the embedding of the 
                             tokens/words. It will drop entire words with this 
                             probabilty.
        :param dropout: To apply dropout after each layer apart from the last 
                        layer. All dropout that is applied to timebased data 
                        will be `variational dropout`_ all else will be  
                        standard dropout.
        
        Without the target encoder this will be the standard TDLSTM method 
        from `Effective LSTM's for Target-Dependent Sentiment classification`_
        . With the target encoder this will then become the TCLSTM method 
        from `Effective LSTM's for Target-Dependent Sentiment classification`_.

        .. _variational dropout:
           https://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks.pdf
        .. _Effective LSTM's for Target-Dependent Sentiment classification:
           https://aclanthology.coli.uni-saarland.de/papers/C16-1311/c16-1311
        '''

        self.text_field_embedder = text_field_embedder
        self.target_field_embedder = target_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")
        self.left_text_encoder = left_text_encoder
        self.right_text_encoder = right_text_encoder
        self.target_encoder = target_encoder
        self.feedforward = feedforward

        if feedforward is not None:
            output_dim = self.feedforward.get_output_dim()
        else:
            left_out_dim = self.left_text_encoder.get_output_dim()
            right_out_dim = self.right_text_encoder.get_output_dim()
            output_dim = left_out_dim + right_out_dim
        self.label_projection = Linear(output_dim, self.num_classes)

        self.metrics = {"accuracy": CategoricalAccuracy()}
        self.f1_metrics = {}
        # F1 Scores
        label_index_name = self.vocab.get_index_to_token_vocabulary('labels')
        for label_index, label_name in label_index_name.items():
            label_name = f'F1_{label_name.capitalize()}'
            self.f1_metrics[label_name] = F1Measure(label_index)
        # Dropout
        self._word_dropout = WordDrouput(word_dropout)
        self._variational_dropout = InputVariationalDropout(dropout)
        self._naive_dropout = Dropout(dropout)

        self.loss = torch.nn.CrossEntropyLoss()

        # Ensure that the input to the right_text_encoder and left_text_encoder
        # is the size of the target encoder output plus the size of the text
        # embedding output.
        if self.target_encoder:
            right_text_out_dim = self.right_text_encoder.get_input_dim()
            left_text_out_dim = self.left_text_encoder.get_input_dim()

            target_dim = self.target_encoder.get_output_dim()
            text_dim = self.text_field_embedder.get_output_dim()
            total_out_dim = target_dim + text_dim
            config_err_msg = (
                "As the target is being encoded the output of the"
                " target encoder is concatenated onto each word "
                " vector for the left and right contexts "
                "therefore the input of the right_text_encoder"
                "/left_text_encoder is the output dimension of "
                "the target encoder + the dimension of the word "
                "embeddings for the left and right contexts.")

            if (total_out_dim != right_text_out_dim
                    or total_out_dim != left_text_out_dim):
                raise ConfigurationError(config_err_msg)
        # Ensure that the target field embedder has an output dimension the
        # same as the input dimension to the target encoder.
        if self.target_encoder and self.target_field_embedder:
            target_embed_out = self.target_field_embedder.get_output_dim()
            target_in = self.target_encoder.get_input_dim()
            config_embed_err_msg = ("The Target field embedder should have"
                                    " the same output size "
                                    f"{target_embed_out} as the input to "
                                    f"the target encoder {target_in}")
            if target_embed_out != target_in:
                raise ConfigurationError(config_embed_err_msg)

        initializer(self)
示例#30
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 arc_representation_dim: int,
                 tag_representation_dim: int,
                 r_lambda: float = 1e-2,
                 normalize: bool = False,
                 arc_feedforward: FeedForward = None,
                 tag_feedforward: FeedForward = None,
                 pos_tag_embedding: Embedding = None,
                 dep_tag_embedding: Embedding = None,
                 predicate_embedding: Embedding = None,
                 delta_type: str = "hinge_ce",
                 subtract_gold: float = 0.0,
                 dropout: float = 0.0,
                 input_dropout: float = 0.0,
                 gumbel_t: float = 0,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(SRLGraphParserBase, self).__init__(vocab, regularizer)
        self.text_field_embedder = text_field_embedder
        self.encoder = encoder
        self.r_lambda = r_lambda
        self.normalize = normalize
        self.as_base = False
        #   print ("predicates",self.vocab._index_to_token["predicates"])
        #   print ("tags",self.vocab._index_to_token["tags"])
        self.subtract_gold = subtract_gold
        self.delta_type = delta_type
        num_labels = self.vocab.get_vocab_size("tags")
        print("num_labels", num_labels)
        self.gumbel_t = gumbel_t
        node_dim = predicate_embedding.get_output_dim()
        encoder_dim = encoder.get_output_dim()
        self.arg_arc_feedforward = arc_feedforward or \
                                   FeedForward(encoder_dim, 1,
                                               arc_representation_dim,
                                               Activation.by_name("elu")())
        self.pred_arc_feedforward = copy.deepcopy(self.arg_arc_feedforward)

        self.arc_attention = BilinearMatrixAttention(arc_representation_dim,
                                                     arc_representation_dim,
                                                     use_input_biases=True)

        self.arg_tag_feedforward = tag_feedforward or \
                                   FeedForward(encoder_dim, 1,
                                               tag_representation_dim,
                                               Activation.by_name("elu")())
        self.pred_tag_feedforward = copy.deepcopy(self.arg_tag_feedforward)

        self.tag_bilinear = BilinearMatrixAttention(
            tag_representation_dim,
            tag_representation_dim,
            label_dim=num_labels,
            use_input_biases=True)  #,activation=Activation.by_name("tanh")()

        self.predicte_feedforward = FeedForward(encoder_dim, 1, node_dim,
                                                Activation.by_name("elu")())
        self._pos_tag_embedding = pos_tag_embedding or None
        self._dep_tag_embedding = dep_tag_embedding or None
        self._pred_embedding = predicate_embedding or None
        self._dropout = InputVariationalDropout(dropout)
        self._input_dropout = Dropout(input_dropout)

        #   check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim")

        self._labelled_f1 = IterativeLabeledF1Measure(
            negative_label=0,
            negative_pred=0,
            selected_metrics=["F", "p_F", "l_P", "l_R"])
        self._tag_loss = torch.nn.NLLLoss(reduction="none")  # ,ignore_index=-1
        self._sense_loss = torch.nn.NLLLoss(
            reduction="none")  # ,ignore_index=-1
        initializer(self)