def __init__( self, options_file: str, weight_file: str, num_output_representations: int, requires_grad: bool = False, do_layer_norm: bool = False, dropout: float = 0.5, vocab_to_cache: List[str] = None, keep_sentence_boundaries: bool = False, scalar_mix_parameters: List[float] = None, module: torch.nn.Module = None, ) -> None: super().__init__() logger.info("Initializing ELMo") if module is not None: if options_file is not None or weight_file is not None: raise ConfigurationError("Don't provide options_file or weight_file with module") self._elmo_lstm = module else: self._elmo_lstm = _ElmoBiLm( options_file, weight_file, requires_grad=requires_grad, vocab_to_cache=vocab_to_cache, ) self._has_cached_vocab = vocab_to_cache is not None self._keep_sentence_boundaries = keep_sentence_boundaries self._dropout = Dropout(p=dropout) self._scalar_mixes: Any = [] for k in range(num_output_representations): scalar_mix = ScalarMix( self._elmo_lstm.num_layers, do_layer_norm=do_layer_norm, initial_scalar_parameters=scalar_mix_parameters, trainable=scalar_mix_parameters is None, ) self.add_module("scalar_mix_{}".format(k), scalar_mix) self._scalar_mixes.append(scalar_mix)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, binary_feature_dim: int, embedding_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, label_smoothing: float = None, ignore_span_metric: bool = False, srl_eval_path: str = DEFAULT_SRL_EVAL_PATH) -> None: super().__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") if srl_eval_path is not None: # For the span based evaluation, we don't want to consider labels # for verb, because the verb index is provided to the model. self.span_metric = SrlEvalScorer(srl_eval_path, ignore_classes=["V"]) else: self.span_metric = None self.encoder = encoder # There are exactly 2 binary features for the verb predicate embedding. self.binary_feature_embedding = Embedding(2, binary_feature_dim) self.tag_projection_layer = TimeDistributed( Linear(self.encoder.get_output_dim(), self.num_classes)) self.embedding_dropout = Dropout(p=embedding_dropout) self._label_smoothing = label_smoothing self.ignore_span_metric = ignore_span_metric check_dimensions_match( text_field_embedder.get_output_dim() + binary_feature_dim, encoder.get_input_dim(), "text embedding dim + verb indicator embedding dim", "encoder input dim") initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, stacked_encoder: Seq2SeqEncoder, ####### config_path: None, vocab_path: None, model_path: None, ######### predicate_feature_dim: int, dim_hidden: int = 100, embedding_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None): super(SpanDetector, self).__init__(vocab, regularizer) ############## _, _, model_bert = get_bert_total(config_path, vocab_path, model_path) self.bert = model_bert # self.bert = bert_load_state_dict(self.bert, torch.load("bert-base-uncased/pytorch_model.bin", map_location='cpu')) ############### self.dim_hidden = dim_hidden self.text_field_embedder = text_field_embedder self.predicate_feature_embedding = Embedding( 2, predicate_feature_dim) #100 self.embedding_dropout = Dropout(p=embedding_dropout) self.threshold_metric = ThresholdMetric() self.stacked_encoder = stacked_encoder self.span_hidden = SpanRepAssembly( self.stacked_encoder.get_output_dim(), self.stacked_encoder.get_output_dim(), self.dim_hidden) self.pred = TimeDistributed(Linear(self.dim_hidden, 1))
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, stacked_encoder: Seq2SeqEncoder, span_feedforward: FeedForward, max_span_width: int, span_width_feature_size: int, label_namespace: str = "labels", embedding_dropout: float = 0.2, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(ConstitLabeler, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.embedding_dropout = Dropout(p=embedding_dropout) self.stacked_encoder = stacked_encoder if text_field_embedder.get_output_dim() != stacked_encoder.get_input_dim(): raise ConfigurationError("The output dimension of the text_field_embedder must match the " "input dimension of the phrase_encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), stacked_encoder.get_input_dim())) self.max_span_width = max_span_width self.span_width_embedding = Embedding(max_span_width, span_width_feature_size) self.span_feedforward = TimeDistributed(span_feedforward) self.head_scorer = TimeDistributed( torch.nn.Linear(stacked_encoder.get_output_dim(), 1)) self.num_classes = self.vocab.get_vocab_size(label_namespace) self.tag_projection_layer = TimeDistributed(Linear(span_feedforward.get_output_dim(), self.num_classes)) # self.span_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace) # Using accuracy as the metric, span F1 is overkill. self.span_metric = {"accuracy": CategoricalAccuracy()} initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, stacked_encoder: Seq2SeqEncoder, predicate_feature_dim: int, dim_hidden: int = 100, embedding_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None): super(SpanDetector, self).__init__(vocab, regularizer) self.dim_hidden = dim_hidden self.text_field_embedder = text_field_embedder self.predicate_feature_embedding = Embedding(2, predicate_feature_dim) self.embedding_dropout = Dropout(p=embedding_dropout) self.threshold_metric = ThresholdMetric() self.stacked_encoder = stacked_encoder self.span_hidden = SpanRepAssembly(self.stacked_encoder.get_output_dim(), self.stacked_encoder.get_output_dim(), self.dim_hidden) self.pred = TimeDistributed(Linear(self.dim_hidden, 1))
def __init__(self, options_file: str, weight_file: str, num_output_representations: int, requires_grad: bool = False, do_layer_norm: bool = False, dropout: float = 0.5, module: torch.nn.Module = None) -> None: super(Elmo, self).__init__() logging.info("Initializing ELMo") if module is not None: if options_file is not None or weight_file is not None: raise ConfigurationError( "Don't provide options_file or weight_file with module") self._elmo_lstm = module else: self._elmo_lstm = _ElmoBiLm(options_file, weight_file, requires_grad=requires_grad) self._dropout = Dropout(p=dropout) self._scalar_mixes: Any = [] for k in range(num_output_representations): scalar_mix = ScalarMix(self._elmo_lstm.num_layers, do_layer_norm=do_layer_norm) self.add_module('scalar_mix_{}'.format(k), scalar_mix) self._scalar_mixes.append(scalar_mix)
def __init__(self, vocab: Vocabulary, source_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, max_decoding_steps: int, attention: Attention = None, attention_function: SimilarityFunction = None, beam_size: int = None, target_namespace: str = "tokens", target_embedding_dim: int = None, scheduled_sampling_ratio: float = 0., use_bleu: bool = True, emb_dropout: float = 0.5) -> None: super(Seq2Seq, self).__init__(vocab) self._target_namespace = target_namespace self._scheduled_sampling_ratio = scheduled_sampling_ratio # We need the start symbol to provide as the input at the first timestep of decoding, and # end symbol as a way to indicate the end of the decoded sequence. self._start_index = self.vocab.get_token_index(START_SYMBOL, self._target_namespace) self._end_index = self.vocab.get_token_index(END_SYMBOL, self._target_namespace) if use_bleu: pad_index = self.vocab.get_token_index(self.vocab._padding_token, self._target_namespace) # pylint: disable=protected-access self._bleu = BLEU(exclude_indices={pad_index, self._end_index, self._start_index}) else: self._bleu = None self._token_based_metric = TokenSequenceAccuracy() # At prediction time, we use a beam search to find the most likely sequence of target tokens. beam_size = beam_size or 1 self._max_decoding_steps = max_decoding_steps self._beam_search = BeamSearch(self._end_index, max_steps=max_decoding_steps, beam_size=beam_size) # Dense embedding of source vocab tokens. self._source_embedder = source_embedder self._emb_dropout = Dropout(p=emb_dropout) # Encodes the sequence of source embeddings into a sequence of hidden states. self._encoder = encoder num_classes = self.vocab.get_vocab_size(self._target_namespace) # Attention mechanism applied to the encoder output for each step. if attention: if attention_function: raise ConfigurationError("You can only specify an attention module or an " "attention function, but not both.") self._attention = attention elif attention_function: self._attention = LegacyAttention(attention_function) else: self._attention = None # Dense embedding of vocab words in the target space. target_embedding_dim = target_embedding_dim or source_embedder.get_output_dim() self._target_embedder = Embedding(num_classes, target_embedding_dim) # Decoder output dim needs to be the same as the encoder output dim since we initialize the # hidden state of the decoder with the final hidden state of the encoder. self._encoder_output_dim = self._encoder.get_output_dim() self._decoder_output_dim = self._encoder_output_dim if self._attention: # If using attention, a weighted average over encoder outputs will be concatenated # to the previous target embedding to form the input to the decoder at each # time step. self._decoder_input_dim = self._decoder_output_dim + target_embedding_dim else: # Otherwise, the input to the decoder is just the previous target embedding. self._decoder_input_dim = target_embedding_dim # We'll use an LSTM cell as the recurrent cell that produces a hidden state # for the decoder at each time step. # TODO (pradeep): Do not hardcode decoder cell type. self._decoder_cell = LSTMCell(self._decoder_input_dim, self._decoder_output_dim) # We project the hidden state from the decoder into the output vocabulary space # in order to get log probabilities of each target token, at each time step. self._output_projection_layer = Linear(self._decoder_output_dim, num_classes)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, stacked_encoder: Seq2SeqEncoder, span_feedforward: FeedForward, binary_feature_dim: int, max_span_width: int, binary_feature_size: int, distance_feature_size: int, ontology_path: str, embedding_dropout: float = 0.2, srl_label_namespace: str = "labels", constit_label_namespace: str = "constit_labels", fast_mode: bool = True, loss_type: str = "hamming", unlabeled_constits: bool = False, np_pp_constits: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(ScaffoldedFrameSrl, self).__init__(vocab, regularizer) # Base token-level encoding. self.text_field_embedder = text_field_embedder self.embedding_dropout = Dropout(p=embedding_dropout) # There are exactly 2 binary features for the verb predicate embedding. self.binary_feature_embedding = Embedding(2, binary_feature_dim) self.stacked_encoder = stacked_encoder if text_field_embedder.get_output_dim( ) + binary_feature_dim != stacked_encoder.get_input_dim(): raise ConfigurationError( "The input dimension of the stacked_encoder must be equal to " "the output dimension of the text_field_embedder.") # Span-level encoding. self.max_span_width = max_span_width self.span_width_embedding = Embedding(max_span_width, binary_feature_size) # Based on the average sentence length in FN train. self.span_distance_bin = 25 self.span_distance_embedding = Embedding(self.span_distance_bin, distance_feature_size) self.span_direction_embedding = Embedding(2, binary_feature_size) self.span_feedforward = TimeDistributed(span_feedforward) self.head_scorer = TimeDistributed( torch.nn.Linear(stacked_encoder.get_output_dim(), 1)) self.num_srl_args = self.vocab.get_vocab_size(srl_label_namespace) self.not_a_span_tag = self.vocab.get_token_index( "*", srl_label_namespace) self.outside_span_tag = self.vocab.get_token_index( "O", srl_label_namespace) self.semi_crf = SemiMarkovConditionalRandomField( num_tags=self.num_srl_args, max_span_width=max_span_width, default_tag=self.not_a_span_tag, outside_span_tag=self.outside_span_tag, loss_type=loss_type) # self.crf = ConditionalRandomField(self.num_classes) self.unlabeled_constits = unlabeled_constits self.np_pp_constits = np_pp_constits self.constit_label_namespace = constit_label_namespace assert not (unlabeled_constits and np_pp_constits) if unlabeled_constits: self.num_constit_tags = 2 elif np_pp_constits: self.num_constit_tags = 3 else: self.num_constit_tags = self.vocab.get_vocab_size( constit_label_namespace) # Topmost MLP. self.srl_arg_projection_layer = TimeDistributed( Linear(span_feedforward.get_output_dim(), self.num_srl_args)) self.constit_arg_projection_layer = TimeDistributed( Linear(span_feedforward.get_output_dim(), self.num_constit_tags)) # Evaluation. self.metrics = { "constituents": NonBioSpanBasedF1Measure(vocab, tag_namespace=constit_label_namespace, ignore_classes=["*"]), "srl": NonBioSpanBasedF1Measure(vocab, tag_namespace=srl_label_namespace, ignore_classes=["O", "*"], ontology_path=ontology_path) } # Mode for the model, if turned on it only evaluates on dev and calculates loss for train. self.fast_mode = fast_mode initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, arc_representation_dim: int, tag_representation_dim: int, rank: int, capsule_dim: int, iter_num: int, arc_feedforward: FeedForward = None, tag_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, #dep_tag_embedding: Embedding = None, predicate_embedding: Embedding = None, delta_type: str = "hinge_ce", subtract_gold: bool = False, dropout: float = 0.0, input_dropout: float = 0.0, edge_prediction_threshold: float = 0.5, gumbel_t: float = 1, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, double_loss: bool = True, base_average: bool = False, bilinear_matrix_capsule: bool = True, using_global: bool = False, passing_type: str = 'plain', global_node: bool = False, comments: str = "") -> None: super(SRLGraphParserBase, self).__init__(vocab, regularizer) self.capsule_dim = capsule_dim num_labels = self.vocab.get_vocab_size("arc_types") # print("num_labels", num_labels) if global_node == True: self.get_global_layer = Plain_Feedforward( (num_labels + 1) * capsule_dim, capsule_dim, Activation.by_name('relu')()) self.bilinear_matrix_capsule_layer_for_global_node = BilinearMatrix( capsule_dim, capsule_dim) self.global_node = global_node if using_global == True: self.capsule_dim = int(self.capsule_dim / 2) if passing_type == 'plain': self.get_global_layer = Plain_Feedforward( (num_labels + 1) * capsule_dim, (num_labels + 1) * self.capsule_dim, Activation.by_name('relu')()) elif passing_type == 'attention': self.get_global_layer = Attention_Feedforward( self.capsule_dim, capsule_dim, self.capsule_dim) else: self.get_global_layer = None self.using_global = using_global self.passing_type = passing_type self.iter_num = iter_num self.double_loss = double_loss self.base_average = base_average self.bilinear_matrix_capsule = bilinear_matrix_capsule self.text_field_embedder = text_field_embedder self.encoder = encoder self.subtract_gold = subtract_gold self.edge_prediction_threshold = edge_prediction_threshold if not 0 < edge_prediction_threshold < 1: raise ConfigurationError( f"edge_prediction_threshold must be between " f"0 and 1 (exclusive) but found {edge_prediction_threshold}.") # print ("predicates",self.vocab._index_to_token["predicates"]) # print ("arc_types",self.vocab._index_to_token["arc_types"]) self.delta_type = delta_type self.gumbel_t = gumbel_t node_dim = predicate_embedding.get_output_dim() encoder_dim = encoder.get_output_dim() #self.arg_arc_feedforward = arc_feedforward or \ # FeedForward(encoder_dim, 1, # arc_representation_dim, # Activation.by_name("elu")()) #self.pred_arc_feedforward = copy.deepcopy(self.arg_arc_feedforward) #self.arc_attention = BilinearMatrixAttention(arc_representation_dim, #arc_representation_dim, #label_dim=capsule_dim, #use_input_biases=True) self.arg_tag_feedforward = tag_feedforward or \ FeedForward(encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.pred_tag_feedforward = copy.deepcopy(self.arg_tag_feedforward) self.tag_bilinear = BilinearMatrixAttention_Lowrank( tag_representation_dim, tag_representation_dim, rank, label_dim=(num_labels + 1) * self.capsule_dim, use_input_biases=True) #,activation=Activation.by_name("tanh")() if self.bilinear_matrix_capsule == True: self.bilinear_matrix_capsule_layer = BilinearMatrix( capsule_dim, capsule_dim) self.predicte_feedforward = FeedForward(encoder_dim, 1, node_dim, Activation.by_name("elu")()) self._pos_tag_embedding = pos_tag_embedding or None #self._dep_tag_embedding = dep_tag_embedding or None self._pred_embedding = predicate_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) # check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim") self._labelled_f1 = IterativeLabeledF1Measure( negative_label=0, negative_pred=0, selected_metrics=["F", "l_F", "p_F", "u_F"]) self._tag_loss = torch.nn.NLLLoss(reduction="none") # ,ignore_index=-1 self._sense_loss = torch.nn.NLLLoss( reduction="none") # ,ignore_index=-1 initializer(self)
def __init__(self, vocab: Vocabulary, context_field_embedder: TextFieldEmbedder, left_text_encoder: Seq2VecEncoder, right_text_encoder: Seq2VecEncoder, feedforward: Optional[FeedForward] = None, target_field_embedder: Optional[TextFieldEmbedder] = None, target_encoder: Optional[Seq2VecEncoder] = None, inter_target_encoding: Optional[InterTarget] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, dropout: float = 0.0, label_name: str = 'target-sentiment-labels', loss_weights: Optional[List[float]] = None) -> None: super().__init__(vocab, regularizer) ''' :param vocab: A Vocabulary, required in order to compute sizes for input/output projections. :param context_field_embedder: Used to embed the text and target text if target_field_embedder is None but the target_encoder is NOT None. :param left_text_encoder: Encoder that will create the representation of the tokens left of the target and the target itself if included from the dataset reader. :param right_text_encoder: Encoder that will create the representation of the tokens right of the target and the target itself if included from the dataset reader. :param feedforward: An optional feed forward layer to apply after the encoder. :param target_field_embedder: Used to embed the target text to give as input to the target_encoder. Thus this allows a seperate embedding for text and target text. :param target_encoder: Encoder that will create the representation of target text tokens. :param inter_target_encoding: Whether to model the relationship between targets/aspect. :param initializer: Used to initialize the model parameters. :param regularizer: If provided, will be used to calculate the regularization penalty during training. :param dropout: To apply dropout after each layer apart from the last layer. All dropout that is applied to timebased data will be `variational dropout`_ all else will be standard dropout. :param label_name: Name of the label name space. :param loss_weights: The amount of weight to give the negative, neutral, positive classes respectively. e.g. [0.2, 0.5, 0.3] would weight the negative class by a factor of 0.2, neutral by 0.5 and positive by 0.3. NOTE It assumes the sentiment labels are the following: [negative, neutral, positive]. Without the target encoder this will be the standard TDLSTM method from `Effective LSTM's for Target-Dependent Sentiment classification`_ . With the target encoder this will then become the TCLSTM method from `Effective LSTM's for Target-Dependent Sentiment classification`_. .. _variational dropout: https://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks.pdf .. _Effective LSTM's for Target-Dependent Sentiment classification: https://aclanthology.coli.uni-saarland.de/papers/C16-1311/c16-1311 ''' self.label_name = label_name self.context_field_embedder = context_field_embedder self.target_field_embedder = target_field_embedder self.num_classes = self.vocab.get_vocab_size(self.label_name) self.left_text_encoder = left_text_encoder self.right_text_encoder = right_text_encoder self.target_encoder = target_encoder self.feedforward = feedforward # Set the loss weights (have to sort them by order of label index in # the vocab) self.loss_weights = target_sentiment.util.loss_weight_order( self, loss_weights, self.label_name) # Inter target modelling self.inter_target_encoding = inter_target_encoding left_out_dim = self.left_text_encoder.get_output_dim() right_out_dim = self.right_text_encoder.get_output_dim() left_right_out_dim = left_out_dim + right_out_dim if feedforward is not None: output_dim = self.feedforward.get_output_dim() elif self.inter_target_encoding is not None: output_dim = self.inter_target_encoding.get_output_dim() else: output_dim = left_right_out_dim self.label_projection = Linear(output_dim, self.num_classes) self.metrics = {"accuracy": CategoricalAccuracy()} self.f1_metrics = {} # F1 Scores label_index_name = self.vocab.get_index_to_token_vocabulary( self.label_name) for label_index, _label_name in label_index_name.items(): _label_name = f'F1_{_label_name.capitalize()}' self.f1_metrics[_label_name] = F1Measure(label_index) # Dropout self._variational_dropout = InputVariationalDropout(dropout) self._naive_dropout = Dropout(dropout) # Ensure that the input to the right_text_encoder and left_text_encoder # is the size of the target encoder output plus the size of the text # embedding output. if self.target_encoder is not None: right_in_dim = self.right_text_encoder.get_input_dim() left_in_dim = self.left_text_encoder.get_input_dim() target_dim = self.target_encoder.get_output_dim() text_dim = self.context_field_embedder.get_output_dim() total_out_dim = target_dim + text_dim config_err_msg = ( "As the target is being encoded the output of the" " target encoder is concatenated onto each word " " vector for the left and right contexts " "therefore the input of the right_text_encoder" "/left_text_encoder is the output dimension of " "the target encoder + the dimension of the word " "embeddings for the left and right contexts.") if (total_out_dim != right_in_dim or total_out_dim != left_in_dim): raise ConfigurationError(config_err_msg) # Ensure that the target field embedder has an output dimension the # same as the input dimension to the target encoder. if self.target_encoder and self.target_field_embedder: target_embed_out = self.target_field_embedder.get_output_dim() target_in = self.target_encoder.get_input_dim() check_dimensions_match(target_in, target_embed_out, 'target_field_embedder output', 'target_encoder input') if self.inter_target_encoding: check_dimensions_match(left_right_out_dim, self.inter_target_encoding.get_input_dim(), 'Output from the left and right encoders', 'Inter Target encoder input dim') # TimeDistributed everything as we are processing multiple Targets at # once as the input is a sentence containing one or more targets self.left_text_encoder = TimeDistributed(self.left_text_encoder) self.right_text_encoder = TimeDistributed(self.right_text_encoder) if self.target_encoder is not None: self.target_encoder = TimeDistributed(self.target_encoder) if self.feedforward is not None: self.feedforward = TimeDistributed(self.feedforward) self.label_projection = TimeDistributed(self.label_projection) self._time_variational_dropout = TimeDistributed( self._variational_dropout) self._naive_dropout = TimeDistributed(self._naive_dropout) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, treebank_embedding: Embedding = None, use_mst_decoding_for_validation: bool = True, use_treebank_embedding: bool = False, dropout: float = 0.0, input_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BiaffineDependencyParserMonolingual, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.encoder = encoder encoder_dim = encoder.get_output_dim() self.head_arc_feedforward = arc_feedforward or \ FeedForward(encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("head_tags") self.head_tag_feedforward = tag_feedforward or \ FeedForward(encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim, tag_representation_dim, num_labels) self._pos_tag_embedding = pos_tag_embedding or None self._treebank_embedding = treebank_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) self._head_sentinel = torch.nn.Parameter(torch.randn([1, 1, encoder.get_output_dim()])) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() if treebank_embedding is not None: representation_dim += treebank_embedding.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim") check_dimensions_match(arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim") self.use_mst_decoding_for_validation = use_mst_decoding_for_validation self.use_treebank_embedding = use_treebank_embedding tags = self.vocab.get_token_to_index_vocabulary("pos") punctuation_tag_indices = {tag: index for tag, index in tags.items() if tag in POS_TO_IGNORE} self._pos_to_ignore = set(punctuation_tag_indices.values()) logger.info(f"Found POS tags corresponding to the following punctuation : {punctuation_tag_indices}. " "Ignoring words with these POS tags for evaluation.") if self.use_treebank_embedding: tbids = self.vocab.get_token_to_index_vocabulary("tbids") tbid_indices = {tb: index for tb, index in tbids.items()} self._tbids = set(tbid_indices.values()) logger.info(f"Found TBIDs corresponding to the following treebanks : {tbid_indices}. " "Embedding these as additional features.") self._attachment_scores = AttachmentScores() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, text_encoder: Seq2SeqEncoder, output_encoder: Seq2VecEncoder, num_cpt_layers: Optional[int] = 2, cpt_highway: bool = True, target_encoder: Optional[Seq2SeqEncoder] = None, feedforward: Optional[FeedForward] = None, target_field_embedder: Optional[TextFieldEmbedder] = None, share_text_target_encoder: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, word_dropout: float = 0.0, dropout: float = 0.0) -> None: ''' Useful acronyms: CPT - Context-Preserving Transformation :param vocab: vocab : A Vocabulary, required in order to compute sizes for input/output projections. :param text_field_embedder: Used to embed the text and target text if target_field_embedder is None but the target_encoder is not None. :param text_encoder: Sequence Encoder that will create the representation of each token in the context sentence. :param output_encoder: The encoder that takes as input the words after they have been transformed through the CPT layers. In the original paper this would be a CNN. :param num_cpt_layers: Number of times to perform the CPT layer to the hidden representation of the words. :param cpt_highway: highway adds the contextualised word vector (input word representation to CPT) to the transformed word vector (output word representation of CPT). Setting this is the equivalent of using Lossless Forwarding (LF) from the original paper. :param target_encoder: Encoder that will create the representation of target text tokens. :param feedforward: An optional feed forward layer to apply after either the text encoder if target encoder is None. Else it would be after the target and the text encoded representations have been concatenated. :param target_field_embedder: Used to embed the target text to give as input to the target_encoder. Thus this allows a seperate embedding for text and target text. :param share_text_target_encoder: Whether or not to use the same encoder for the text and the target. :param initializer: Used to initialize the model parameters. :param regularizer: If provided, will be used to calculate the regularization penalty during training. :param word_dropout: Dropout that is applied after the embedding of the tokens/words. It will drop entire words with this probabilty. :param dropout: To apply dropout after each layer apart from the last layer. All dropout that is applied to timebased data will be `variational dropout`_ all else will be standard dropout. The classifier is based on the model in `Transformation Networks for Target-Oriented Sentiment Classification <https://aclweb.org/anthology/P18-1087>`_. If the `share_text_target_encoder` is `True` and `cpt_highway` is True this model would be equivalent to the TNet-LF model within the original paper. .. _variational dropout: https://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks.pdf ''' super().__init__(vocab, regularizer) if share_text_target_encoder and (target_encoder is not None): config_err = ("The target encoder will not be used when sharing. " "Set the target_encoder to None (default)") raise ConfigurationError(config_err) elif (not share_text_target_encoder) and (target_encoder is None): config_err = ('As the target and text are not sharing the encoder ' 'an encoder is required for the target text') raise ConfigurationError(config_err) self.text_field_embedder = text_field_embedder self.target_field_embedder = target_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.text_encoder = text_encoder if share_text_target_encoder: target_encoder = text_encoder self.target_encoder = target_encoder self.output_encoder = output_encoder text_enc_out = text_encoder.get_output_dim() target_enc_out = target_encoder.get_output_dim() self.cpt = TimeDistributed( CPT(num_cpt_layers, text_enc_out, target_enc_out, cpt_highway, dropout=dropout)) self.feedforward = feedforward if feedforward is not None: output_dim = self.feedforward.get_output_dim() else: output_dim = self.output_encoder.get_output_dim() self.label_projection = Linear(output_dim, self.num_classes) self.metrics = {"accuracy": CategoricalAccuracy()} self.f1_metrics = {} # F1 Scores label_index_name = self.vocab.get_index_to_token_vocabulary('labels') for label_index, label_name in label_index_name.items(): label_name = f'F1_{label_name.capitalize()}' self.f1_metrics[label_name] = F1Measure(label_index) self._word_dropout = WordDrouput(word_dropout) self._variational_dropout = InputVariationalDropout(dropout) self._naive_dropout = Dropout(dropout) self.loss = torch.nn.CrossEntropyLoss() # Ensure that the dimensions of the text field embedder and text encoder # match check_dimensions_match(text_field_embedder.get_output_dim(), text_encoder.get_input_dim(), "text field embedding dim", "text encoder input dim") # Ensure that the dimensions of the target or text field embedder and # the target encoder match target_field_embedder_dim = text_field_embedder.get_output_dim() target_field_error = "text field embedding dim" if self.target_field_embedder: target_field_embedder_dim = target_field_embedder.get_output_dim() target_field_error = "target field embedding dim" check_dimensions_match(target_field_embedder_dim, target_encoder.get_input_dim(), target_field_error, "target encoder input dim") initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, stacked_encoder: Seq2SeqEncoder, span_feedforward: FeedForward, binary_feature_dim: int, max_span_width: int, binary_feature_size: int, distance_feature_size: int, embedding_dropout: float = 0.2, srl_label_namespace: str = "labels", constit_label_namespace: str = "constit_labels", mixing_ratio: float = 1.0, cutoff_epoch: int = -1, fast_mode: bool = True, loss_type: str = "logloss", initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(PropBankScaffoldSpanSrl, self).__init__(vocab, regularizer) # Base token-level encoding. self.text_field_embedder = text_field_embedder self.embedding_dropout = Dropout(p=embedding_dropout) # There are exactly 2 binary features for the verb predicate embedding. self.binary_feature_embedding = Embedding(2, binary_feature_dim) self.stacked_encoder = stacked_encoder if text_field_embedder.get_output_dim( ) + binary_feature_dim != stacked_encoder.get_input_dim(): raise ConfigurationError( "The SRL Model uses a binary verb indicator feature, meaning " "the input dimension of the stacked_encoder must be equal to " "the output dimension of the text_field_embedder + 1.") # Span-level encoding. self.max_span_width = max_span_width self.span_width_embedding = Embedding(max_span_width, binary_feature_size) # Based on the average sentence length in FN train. TODO(Swabha): find out for OntoNotes. self.span_distance_bin = 25 self.span_distance_embedding = Embedding(self.span_distance_bin, distance_feature_size) self.span_direction_embedding = Embedding(2, binary_feature_size) self.span_feedforward = TimeDistributed(span_feedforward) self.head_scorer = TimeDistributed( torch.nn.Linear(stacked_encoder.get_output_dim(), 1)) self.num_srl_args = self.vocab.get_vocab_size(srl_label_namespace) not_a_span_tag = self.vocab.get_token_index("*", srl_label_namespace) outside_span_tag = self.vocab.get_token_index("O", srl_label_namespace) self.semi_crf = SemiMarkovConditionalRandomField( num_tags=self.num_srl_args, max_span_width=max_span_width, loss_type=loss_type, default_tag=not_a_span_tag, outside_span_tag=outside_span_tag) # self.crf = ConditionalRandomField(self.num_classes) self.num_constit_tags = self.vocab.get_vocab_size( constit_label_namespace) # Topmost MLP. self.srl_arg_projection_layer = TimeDistributed( Linear(span_feedforward.get_output_dim(), self.num_srl_args)) self.constit_arg_projection_layer = TimeDistributed( Linear(span_feedforward.get_output_dim(), self.num_constit_tags)) self.mixing_ratio = mixing_ratio self.cutoff_batch = cutoff_epoch self.batch = 0 # Evaluation. self.metrics = { "constituents": NonBioSpanBasedF1Measure(vocab, tag_namespace=constit_label_namespace, ignore_classes=["*"]), "srl": NonBioSpanBasedF1Measure(vocab, tag_namespace=srl_label_namespace, ignore_classes=["V", "*"]) } # Mode for the model, if turned on it only evaluates on dev and calculates loss for train. self.fast_mode = fast_mode initializer(self)
def __init__(self, vocab: Vocabulary, source_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, attention: Attention, beam_size: int, max_decoding_steps: int, target_embedding_dim: int = 30, copy_token: str = "@COPY@", source_namespace: str = "tokens", target_namespace: str = "target_tokens", tensor_based_metric: Metric = None, token_based_metric: Metric = None, emb_dropout: float = 0.0, dec_dropout: float = 0.0, target_pretrained_file: str = None) -> None: super().__init__(vocab) self._source_namespace = source_namespace self._target_namespace = target_namespace self._src_start_index = self.vocab.get_token_index( START_SYMBOL, self._source_namespace) self._src_end_index = self.vocab.get_token_index( END_SYMBOL, self._source_namespace) self._start_index = self.vocab.get_token_index(START_SYMBOL, self._target_namespace) self._end_index = self.vocab.get_token_index(END_SYMBOL, self._target_namespace) self._oov_index = self.vocab.get_token_index(self.vocab._oov_token, self._target_namespace) # pylint: disable=protected-access self._pad_index = self.vocab.get_token_index(self.vocab._padding_token, self._target_namespace) # pylint: disable=protected-access self._copy_index = self.vocab.add_token_to_namespace( copy_token, self._target_namespace) self._tensor_based_metric = tensor_based_metric or \ BLEU(exclude_indices={self._pad_index, self._end_index, self._start_index}) self._token_based_metric = token_based_metric self._target_vocab_size = self.vocab.get_vocab_size( self._target_namespace) # Encoding modules. self._source_embedder = source_embedder self._emb_dropout = Dropout(p=emb_dropout) self._dec_dropout = Dropout(p=dec_dropout) self._encoder = encoder # Decoder output dim needs to be the same as the encoder output dim since we initialize the # hidden state of the decoder with the final hidden state of the encoder. # We arbitrarily set the decoder's input dimension to be the same as the output dimension. self.encoder_output_dim = self._encoder.get_output_dim() self.decoder_output_dim = self.encoder_output_dim self.decoder_input_dim = self.decoder_output_dim target_vocab_size = self.vocab.get_vocab_size(self._target_namespace) # The decoder input will be a function of the embedding of the previous predicted token, # an attended encoder hidden state called the "attentive read", and another # weighted sum of the encoder hidden state called the "selective read". # While the weights for the attentive read are calculated by an `Attention` module, # the weights for the selective read are simply the predicted probabilities # corresponding to each token in the source sentence that matches the target # token from the previous timestep. self._target_embedder = Embedding( target_vocab_size, target_embedding_dim, vocab_namespace=self._target_namespace, pretrained_file=target_pretrained_file) self._attention = attention self._input_projection_layer = Linear( target_embedding_dim + self.encoder_output_dim * 2, self.decoder_input_dim) # We then run the projected decoder input through an LSTM cell to produce # the next hidden state. self._decoder_cell = LSTMCell(self.decoder_input_dim, self.decoder_output_dim) # We create a "generation" score for each token in the target vocab # with a linear projection of the decoder hidden state. self._output_generation_layer = Linear(self.decoder_output_dim, target_vocab_size) # We create a "copying" score for each source token by applying a non-linearity # (tanh) to a linear projection of the encoded hidden state for that token, # and then taking the dot product of the result with the decoder hidden state. self._output_copying_layer = Linear(self.encoder_output_dim, self.decoder_output_dim) # At prediction time, we'll use a beam search to find the best target sequence. self._beam_search = BeamSearch(self._end_index, max_steps=max_decoding_steps, beam_size=beam_size)
def __init__(self, vocab, text_field_embedder, encoder, tag_representation_dim, arc_representation_dim, tag_feedforward=None, arc_feedforward=None, pos_tag_embedding=None, use_mst_decoding_for_validation=True, dropout=0.0, input_dropout=0.0, initializer=InitializerApplicator(), regularizer=None): super(BiaffineDependencyParser, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.encoder = encoder encoder_dim = encoder.get_output_dim() self.head_arc_feedforward = arc_feedforward or\ FeedForward(encoder_dim, 1, arc_representation_dim, Activation.by_name(u"elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size(u"head_tags") self.head_tag_feedforward = tag_feedforward or\ FeedForward(encoder_dim, 1, tag_representation_dim, Activation.by_name(u"elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim, tag_representation_dim, num_labels) self._pos_tag_embedding = pos_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) self._head_sentinel = torch.nn.Parameter( torch.randn([1, 1, encoder.get_output_dim()])) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), u"text field embedding dim", u"encoder input dim") check_dimensions_match(tag_representation_dim, self.head_tag_feedforward.get_output_dim(), u"tag representation dim", u"tag feedforward output dim") check_dimensions_match(arc_representation_dim, self.head_arc_feedforward.get_output_dim(), u"arc representation dim", u"arc feedforward output dim") self.use_mst_decoding_for_validation = use_mst_decoding_for_validation tags = self.vocab.get_token_to_index_vocabulary(u"pos") punctuation_tag_indices = dict((tag, index) for tag, index in list(tags.items()) if tag in POS_TO_IGNORE) self._pos_to_ignore = set(punctuation_tag_indices.values()) logger.info( "Found POS tags correspoding to the following punctuation : {punctuation_tag_indices}. " u"Ignoring words with these POS tags for evaluation.") self._attachment_scores = AttachmentScores() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, pos_tag_embedding: Embedding = None, use_mst_decoding_for_validation: bool = True, dropout: float = 0.0, input_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BiaffineDependencyParser, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.encoder = encoder encoder_dim = encoder.get_output_dim() self.head_arc_projection = torch.nn.Linear(encoder_dim, arc_representation_dim) self.child_arc_projection = torch.nn.Linear(encoder_dim, arc_representation_dim) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("head_tags") self.head_tag_projection = torch.nn.Linear(encoder_dim, tag_representation_dim) self.child_tag_projection = torch.nn.Linear(encoder_dim, tag_representation_dim) self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim, tag_representation_dim, num_labels) self._pos_tag_embedding = pos_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) self._head_sentinel = torch.nn.Parameter( torch.randn([1, 1, encoder.get_output_dim()])) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim") self.use_mst_decoding_for_validation = use_mst_decoding_for_validation tags = self.vocab.get_token_to_index_vocabulary("pos") punctuation_tag_indices = { tag: index for tag, index in tags.items() if tag in POS_TO_IGNORE } self._pos_to_ignore = set(punctuation_tag_indices.values()) logger.info( f"Found POS tags correspoding to the following punctuation : {punctuation_tag_indices}. " "Ignoring words with these POS tags for evaluation.") self._attachment_scores = AttachmentScores() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder_0: Seq2SeqEncoder, encoder_1: Seq2SeqEncoder, encoder_2: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, use_mst_decoding_for_validation: bool = True, use_layer_normalization: bool = True, dropout: float = 0.0, input_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BiaffineDependencyParser, self).__init__(vocab, regularizer) a = vocab.get_index_to_token_vocabulary(namespace='tokens') # glyph_config['idx2word'] = {k: v for k, v in a.items()} # self.glyph = GlyphEmbedding(glyph_config) self.text_field_embedder = text_field_embedder self.encoder_0 = encoder_0 self.encoder_1 = encoder_1 self.encoder_2 = encoder_2 encoder_dim = self.encoder_2.get_output_dim() self.head_arc_feedforward = arc_feedforward or \ FeedForward(encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("head_tags") self.head_tag_feedforward = tag_feedforward or \ FeedForward(encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim, tag_representation_dim, num_labels) self._pos_tag_embedding = pos_tag_embedding or None self._dropout = InputVariationalDropout(dropout) # self._dropout = Dropout(dropout) self._input_dropout = Dropout(input_dropout) self._head_sentinel = torch.nn.Parameter( torch.randn([1, 1, self.encoder_2.get_output_dim()])) self.use_layer_normalization = use_layer_normalization if use_layer_normalization: self.norm_input = torch.nn.LayerNorm( self.encoder_0.get_input_dim()) self.norm_hidden = torch.nn.LayerNorm( self.encoder_0.get_output_dim()) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() # check_dimensions_match(representation_dim, encoder.get_input_dim(), # "text field embedding dim", "encoder input dim") check_dimensions_match(tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim") check_dimensions_match(arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim") self.use_mst_decoding_for_validation = use_mst_decoding_for_validation tags = self.vocab.get_token_to_index_vocabulary("pos") punctuation_tag_indices = { tag: index for tag, index in tags.items() if tag in POS_TO_IGNORE } self._pos_to_ignore = set(punctuation_tag_indices.values()) logger.info( f"Found POS tags corresponding to the following punctuation : {punctuation_tag_indices}. " "Ignoring words with these POS tags for evaluation.") self._attachment_scores = AttachmentScores() initializer(self)
def __init__(self, x, h, L, v_t, W, R, N, dropout_rate=0.8, prior=None): super(APDNC, self).__init__() # debugging usages self.last_state_dict = None '''PARAMETERS''' # input vector size x_t # dataset specific self.x = x # single hidden unit output size h^l_t # state size # output size, forget gate size, input gate size are all equal to state size s # all weight matrices in equation 1-5 then has dimension (s, x+2*h) # by equation 5, h=s=o self.h = h # Controller RNN layers count # refers to the number of parallel RNN units self.L = L # Controller output v_t size # dataset specific self.v_t = v_t # Memory location width # Memory read heads count R # Controller interface epsilon_t size, derived self.W = W self.R = R # Total memory address count # Total memory block (N, W) self.N = N self.bs = None self.E_t = W * R + 3 * W + 3 * R + 3 '''CONTROLLER''' # self.RNN_list = nn.ModuleList() # for _ in range(self.L): # self.RNN_list.append(LSTM_Unit(self.x, self.R, self.W, self.h, self.bs)) self.W_y = Parameter( torch.Tensor(self.L * self.h * 2, self.v_t).cuda()) self.W_E = Parameter( torch.Tensor(self.L * self.h * 2, self.E_t).cuda()) self.controller = Stock_LSTM(self.x, self.R, self.W, self.h, self.L, self.v_t) # every time step every layer has 1 channel*space only self.layernorm = LayerNorm(1) self.dropout = Dropout(p=dropout_rate) '''COMPUTER''' self.W_r = Parameter(torch.Tensor(self.W * self.R, self.v_t).cuda()) # print("Using 0.4.1 PyTorch BatchNorm1d") # self.bn = nn.BatchNorm1d(self.x, eps=1e-3, momentum=1e-10, affine=False) self.bn = nn.BatchNorm1d(self.x) self.reset_parameters() '''States''' self.hidden_previous_timestep = None # self.precedence_weighting=None # self.temporal_memory_linkage=None self.memory = None self.last_read_weightings = None self.last_usage_vector = None self.last_write_weighting = None self.last_read_vector = None self.not_first_t_flag = None '''prior''' # this is the prior probability of each label predicting true # this is added to the logit self.prior = prior if self.prior is not None: if isinstance(self.prior, np.ndarray): self.prior = torch.from_numpy(self.prior).float() self.prior = Variable(self.prior, requires_grad=False) elif isinstance(self.prior, torch.Tensor): self.prior = Variable(self.prior, requires_grad=False) else: assert (isinstance(self.prior, Variable)) # transform to logits # because we are using sigmoid, not softmax, self.prior=log(P(y))-log(P(not y)) # sigmoid_input = z + self.prior # z = log(P(x|y)) - log(P(x|not y)) # sigmoid output is the posterior positive self.prior = self.prior.clamp(1e-8, 1 - 1e-8) self.prior = torch.log(self.prior) - torch.log(1 - self.prior) a = Variable(torch.Tensor([0])) self.prior = torch.cat((a, self.prior)) self.prior = self.prior.cuda() print("Using DNC with prior probability")
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, lemmatize_helper: LemmatizeHelper, task_config: TaskConfig, morpho_vector_dim: int = 0, gram_val_representation_dim: int = -1, lemma_representation_dim: int = -1, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, use_mst_decoding_for_validation: bool = True, dropout: float = 0.0, input_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(DependencyParser, self).__init__(vocab, regularizer) self.TopNCnt = 3 self.text_field_embedder = text_field_embedder self.encoder = encoder self.lemmatize_helper = lemmatize_helper self.task_config = task_config encoder_dim = encoder.get_output_dim() self.head_arc_feedforward = arc_feedforward or \ FeedForward(encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("head_tags") self.head_tag_feedforward = tag_feedforward or \ FeedForward(encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim, tag_representation_dim, num_labels) self._pos_tag_embedding = pos_tag_embedding or None assert self.task_config.params.get("use_pos_tag", False) == (self._pos_tag_embedding is not None) self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) self._head_sentinel = torch.nn.Parameter( torch.randn([1, 1, encoder.get_output_dim()])) if gram_val_representation_dim <= 0: self._gram_val_output = torch.nn.Linear( encoder_dim, self.vocab.get_vocab_size("grammar_value_tags")) else: self._gram_val_output = torch.nn.Sequential( Dropout(dropout), torch.nn.Linear(encoder_dim, gram_val_representation_dim), Dropout(dropout), torch.nn.Linear( gram_val_representation_dim, self.vocab.get_vocab_size("grammar_value_tags"))) if lemma_representation_dim <= 0: self._lemma_output = torch.nn.Linear(encoder_dim, len(lemmatize_helper)) else: # Заведем выход предсказания грамматической метки на вход лемматизатора -- ЭКСПЕРИМЕНТАЛЬНОЕ #actual_input_dim = encoder_dim actual_input_dim = encoder_dim + self.vocab.get_vocab_size( "grammar_value_tags") self._lemma_output = torch.nn.Sequential( Dropout(dropout), torch.nn.Linear(actual_input_dim, lemma_representation_dim), Dropout(dropout), torch.nn.Linear(lemma_representation_dim, len(lemmatize_helper))) representation_dim = text_field_embedder.get_output_dim( ) + morpho_vector_dim if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim") check_dimensions_match(arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim") self.use_mst_decoding_for_validation = use_mst_decoding_for_validation tags = self.vocab.get_token_to_index_vocabulary("pos") punctuation_tag_indices = { tag: index for tag, index in tags.items() if tag in POS_TO_IGNORE } self._pos_to_ignore = set(punctuation_tag_indices.values()) logger.info("HELLO FROM INIT") logger.info( f"Found POS tags corresponding to the following punctuation : {punctuation_tag_indices}. " "Ignoring words with these POS tags for evaluation.") self._attachment_scores = AttachmentScores() self._gram_val_prediction_accuracy = CategoricalAccuracy() self._lemma_prediction_accuracy = CategoricalAccuracy() initializer(self)
def __init__(self, vocab: Vocabulary, hidden_dim: int, action_dim: int, ratio_dim: int, num_layers: int, word_dim: int = 0, text_field_embedder: TextFieldEmbedder = None, mces_metric: Metric = None, recurrent_dropout_probability: float = 0.0, layer_dropout_probability: float = 0.0, same_dropout_mask_per_instance: bool = True, input_dropout: float = 0.0, lemma_text_field_embedder: TextFieldEmbedder = None, pos_tag_embedding: Embedding = None, deprel_embedding: Embedding = None, bios_embedding: Embedding = None, lexcat_embedding: Embedding = None, ss_embedding: Embedding = None, ss2_embedding: Embedding = None, action_embedding: Embedding = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None ) -> None: super(TransitionParser, self).__init__(vocab, regularizer) self._primary_labeled_correct = 0 self._primary_unlabeled_correct = 0 self._primary_total_edges_predicted = 0 self._primary_total_edges_actual = 0 self._primary_exact_labeled_correct = 0 self._primary_exact_unlabeled_correct = 0 self._remote_labeled_correct = 0 self._remote_unlabeled_correct = 0 self._remote_total_edges_predicted = 0 self._remote_total_edges_actual = 0 self._remote_exact_labeled_correct = 0 self._remote_exact_unlabeled_correct = 0 self._total_sentences = 0 self.num_actions = vocab.get_vocab_size('actions') self.text_field_embedder = text_field_embedder self.lemma_text_field_embedder = lemma_text_field_embedder self._pos_tag_embedding = pos_tag_embedding self._deprel_embedding = deprel_embedding self._bios_embedding = bios_embedding self._lexcat_embedding = lexcat_embedding self._ss_embedding = ss_embedding self._ss2_embedding = ss2_embedding self._mces_metric = mces_metric node_dim = 0 if self.text_field_embedder: node_dim += word_dim for embedding in pos_tag_embedding, deprel_embedding, bios_embedding, lexcat_embedding, ss_embedding, \ ss2_embedding: if embedding: node_dim += embedding.output_dim self.node_dim = node_dim self.word_dim = word_dim self.hidden_dim = hidden_dim self.ratio_dim = ratio_dim self.action_dim = action_dim self.action_embedding = action_embedding if action_embedding is None: self.action_embedding = Embedding(num_embeddings=self.num_actions, embedding_dim=self.action_dim, trainable=False) # syntactic composition self.p_comp = torch.nn.Linear(self.hidden_dim * 5 + self.ratio_dim, node_dim) # parser state to hidden self.p_s2h = torch.nn.Linear(self.hidden_dim * 3 + self.ratio_dim, self.hidden_dim) # hidden to action self.p_act = torch.nn.Linear(self.hidden_dim + self.ratio_dim, self.num_actions) self.update_concept_node = torch.nn.Linear(self.hidden_dim + self.ratio_dim, node_dim) self.pempty_buffer_emb = torch.nn.Parameter(torch.randn(self.hidden_dim)) self.proot_stack_emb = torch.nn.Parameter(torch.randn(node_dim)) self.pempty_action_emb = torch.nn.Parameter(torch.randn(self.hidden_dim)) self.pempty_stack_emb = torch.nn.Parameter(torch.randn(self.hidden_dim)) self._input_dropout = Dropout(input_dropout) self.buffer = StackRnn(input_size=node_dim, hidden_size=self.hidden_dim, num_layers=num_layers, recurrent_dropout_probability=recurrent_dropout_probability, layer_dropout_probability=layer_dropout_probability, same_dropout_mask_per_instance=same_dropout_mask_per_instance) self.stack = StackRnn(input_size=node_dim, hidden_size=self.hidden_dim, num_layers=num_layers, recurrent_dropout_probability=recurrent_dropout_probability, layer_dropout_probability=layer_dropout_probability, same_dropout_mask_per_instance=same_dropout_mask_per_instance) self.action_stack = StackRnn(input_size=self.action_dim, hidden_size=self.hidden_dim, num_layers=num_layers, recurrent_dropout_probability=recurrent_dropout_probability, layer_dropout_probability=layer_dropout_probability, same_dropout_mask_per_instance=same_dropout_mask_per_instance) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, text_encoder: Seq2SeqEncoder, target_encoder: Seq2VecEncoder, feedforward: Optional[FeedForward] = None, target_field_embedder: Optional[TextFieldEmbedder] = None, target_concat_text_embedding: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, word_dropout: float = 0.0, dropout: float = 0.0) -> None: ''' :param vocab: vocab : A Vocabulary, required in order to compute sizes for input/output projections. :param text_field_embedder: Used to embed the text and target text if target_field_embedder is None but the target_encoder is not None. :param text_encoder: Sequence Encoder that will create the representation of each token in the context sentence. :param target_encoder: Encoder that will create the representation of target text tokens. :param feedforward: An optional feed forward layer to apply after either the text encoder if target encoder is None. Else it would be after the target and the text encoded representations have been concatenated. :param target_field_embedder: Used to embed the target text to give as input to the target_encoder. Thus this allows a seperate embedding for text and target text. :param target_concat_text_embedding: Whether or not the target should be concatenated to the each word embedding within the text before being encoded. :param initializer: Used to initialize the model parameters. :param regularizer: If provided, will be used to calculate the regularization penalty during training. :param word_dropout: Dropout that is applied after the embedding of the tokens/words. It will drop entire words with this probabilty. :param dropout: To apply dropout after each layer apart from the last layer. All dropout that is applied to timebased data will be `variational dropout`_ all else will be standard dropout. This class is all based around the following paper `Attention-based LSTM for Aspect-level Sentiment Classification <https://www.aclweb.org/anthology/D16-1058>`_. The default model here is the equivalent to the AT-LSTM within this paper (Figure 2). If the `target_concat_text_embedding` argument is `True` then the model becomes the ATAE-LSTM within the cited paper (Figure 3). The only difference between this model and the attention based models in the paper is that the final sentence representation is `r` rather than `h* = tanh(Wpr + WxhN)` as we found this projection to not help the performance. .. _variational dropout: https://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks.pdf ''' super().__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.target_field_embedder = target_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.text_encoder = text_encoder self.target_encoder = target_encoder self.feedforward = feedforward target_text_encoder_dim = (target_encoder.get_output_dim() + text_encoder.get_output_dim()) self.encoded_target_text_fusion = TimeDistributed( Linear(target_text_encoder_dim, target_text_encoder_dim)) self.attention_vector = Parameter( torch.Tensor(target_text_encoder_dim)) self.attention_layer = DotProductAttention(normalize=True) if feedforward is not None: output_dim = self.feedforward.get_output_dim() else: output_dim = text_encoder.get_output_dim() self.label_projection = Linear(output_dim, self.num_classes) self.metrics = {"accuracy": CategoricalAccuracy()} self.f1_metrics = {} # F1 Scores label_index_name = self.vocab.get_index_to_token_vocabulary('labels') for label_index, label_name in label_index_name.items(): label_name = f'F1_{label_name.capitalize()}' self.f1_metrics[label_name] = F1Measure(label_index) self._word_dropout = WordDrouput(word_dropout) self._variational_dropout = InputVariationalDropout(dropout) self._naive_dropout = Dropout(dropout) self.target_concat_text_embedding = target_concat_text_embedding self.loss = torch.nn.CrossEntropyLoss() # Ensure the text encoder has the correct input dimension if target_concat_text_embedding: text_encoder_expected_in = (text_field_embedder.get_output_dim() + target_encoder.get_output_dim()) check_dimensions_match( text_encoder_expected_in, text_encoder.get_input_dim(), "text field embedding dim + target encoder output dim", "text encoder input dim") else: check_dimensions_match(text_field_embedder.get_output_dim(), text_encoder.get_input_dim(), "text field embedding dim", "text encoder input dim") # Ensure that the dimensions of the target or text field embedder and # the target encoder match target_field_embedder_dim = text_field_embedder.get_output_dim() target_field_error = "text field embedding dim" if self.target_field_embedder: target_field_embedder_dim = target_field_embedder.get_output_dim() target_field_error = "target field embedding dim" check_dimensions_match(target_field_embedder_dim, target_encoder.get_input_dim(), target_field_error, "target encoder input dim") self.reset_parameters() initializer(self)
def __init__(self, vocab: Vocabulary, context_field_embedder: TextFieldEmbedder, context_encoder: Seq2SeqEncoder, target_encoder: Seq2SeqEncoder, feedforward: Optional[FeedForward] = None, context_attention_activation_function: str = 'tanh', target_attention_activation_function: str = 'tanh', target_field_embedder: Optional[TextFieldEmbedder] = None, inter_target_encoding: Optional[InterTarget] = None, target_position_weight: Optional[TargetPositionWeight] = None, target_position_embedding: Optional[TextFieldEmbedder] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, dropout: float = 0.0, label_name: str = 'target-sentiment-labels', loss_weights: Optional[List[float]] = None, use_target_sequences: bool = False) -> None: super().__init__(vocab, regularizer) ''' :param vocab: A Vocabulary, required in order to compute sizes for input/output projections. :param context_field_embedder: Used to embed the context/sentence and target text if target_field_embedder is None but the target_encoder is NOT None. :param context_encoder: Encoder that will create the representation for the sentence/context that the target appears in. :param target_encoder: Encoder that will create the representation of target text tokens. :param feedforward: An optional feed forward layer to apply after the encoder. :param context_attention_activation_function: The attention method to be used on the context. :param target_attention_activation_function: The attention method to be used on the target text. :param target_field_embedder: Used to embed the target text to give as input to the target_encoder. Thus this allows a separate embedding for context and target text. :param inter_target_encoding: Whether to model the relationship between targets/aspect. :param target_position_weight: Whether to weight the output of the context encoding based on the position of the tokens to the target tokens. This weighting is applied before any attention is applied. :param target_position_embedding: Whether or not to concatenate a position embedding on to the input embeddings before being an input to the `context_encoder`. :param initializer: Used to initialize the model parameters. :param regularizer: If provided, will be used to calculate the regularization penalty during training. :param dropout: To apply dropout after each layer apart from the last layer. All dropout that is applied to timebased data will be `variational dropout <https://arxiv.org/abs/1512.05287>`_ all else will be standard dropout. Variation dropout is applied to the target vectors after they have been processed by the `inter_target_encoding` if this is set. :param label_name: Name of the label name space. :param loss_weights: The amount of weight to give the negative, neutral, positive classes respectively. e.g. [0.2, 0.5, 0.3] would weight the negative class by a factor of 0.2, neutral by 0.5 and positive by 0.3. NOTE It assumes the sentiment labels are the following: [negative, neutral, positive]. :param use_target_sequences: Whether or not to use target tokens within the context as the targets contextualized word representation (CWR). This would only make sense to use if the word representation i.e. field embedder is a contextualized embedder e.g. ELMO etc. This also requires that the dataset reader has the following argument set to True `target_sequences`. ANOTHER reason why you would want to use this even when not using CWR is that you want to get contextualised POS/Dep tags etc. This is based on the `Interactive Attention Networks for Aspect-Level Sentiment Classification <https://www.ijcai.org/proceedings/2017/0568.pdf>`_. The model is also known as `IAN`. .. _variational dropout: https://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks.pdf ''' self.label_name = label_name self.context_field_embedder = context_field_embedder self.target_field_embedder = target_field_embedder self.num_classes = self.vocab.get_vocab_size(self.label_name) self.target_encoder = target_encoder self.context_encoder = context_encoder self.feedforward = feedforward self._use_target_sequences = use_target_sequences if self._use_target_sequences and self.target_field_embedder: raise ConfigurationError( '`use_target_sequences` cannot be True at' ' the same time as a value for ' '`target_field_embedder` as the embeddings' ' come from the context and not a separate embedder') context_attention_activation_function = Activation.by_name( f'{context_attention_activation_function}')() target_attention_activation_function = Activation.by_name( f'{target_attention_activation_function}')() target_encoder_out = self.target_encoder.get_output_dim() context_encoder_out = self.context_encoder.get_output_dim() self.context_attention_layer = BilinearAttention( target_encoder_out, context_encoder_out, context_attention_activation_function, normalize=True) self.target_attention_layer = BilinearAttention( context_encoder_out, target_encoder_out, target_attention_activation_function, normalize=True) # To be used as the pooled input into the target attention layer as # the query vector. self._context_averager = BagOfEmbeddingsEncoder(context_encoder_out, averaged=True) # To be used as the pooled input into the context attention layer as # the query vector. self._target_averager = BagOfEmbeddingsEncoder(target_encoder_out, averaged=True) # Set the loss weights (have to sort them by order of label index in # the vocab) self.loss_weights = target_sentiment.util.loss_weight_order( self, loss_weights, self.label_name) # Inter target modelling self.inter_target_encoding = inter_target_encoding if feedforward is not None: output_dim = self.feedforward.get_output_dim() elif self.inter_target_encoding is not None: output_dim = self.inter_target_encoding.get_output_dim() else: output_dim = target_encoder_out + context_encoder_out self.label_projection = Linear(output_dim, self.num_classes) self.metrics = {"accuracy": CategoricalAccuracy()} self.f1_metrics = {} # F1 Scores label_index_name = self.vocab.get_index_to_token_vocabulary( self.label_name) for label_index, _label_name in label_index_name.items(): _label_name = f'F1_{_label_name.capitalize()}' self.f1_metrics[_label_name] = F1Measure(label_index) # Dropout self._variational_dropout = InputVariationalDropout(dropout) self._naive_dropout = Dropout(dropout) # position embeddings self.target_position_embedding = target_position_embedding # Ensure that the dimensions of the text field embedder and text encoder # match if self.target_position_embedding: context_and_position_dim = ( context_field_embedder.get_output_dim() + self.target_position_embedding.get_output_dim()) check_dimensions_match( context_and_position_dim, context_encoder.get_input_dim(), "context field embedding dim and the position embeddings", "text encoder input dim") else: check_dimensions_match(context_field_embedder.get_output_dim(), context_encoder.get_input_dim(), "context field embedding dim", "text encoder input dim") # Ensure that the dimensions of the target or text field embedder and # the target encoder match target_field_embedder_dim = context_field_embedder.get_output_dim() target_field_error = "context field embedding dim" if self.target_field_embedder: target_field_embedder_dim = target_field_embedder.get_output_dim() target_field_error = "target field embedding dim" check_dimensions_match(target_field_embedder_dim, target_encoder.get_input_dim(), target_field_error, "target encoder input dim") if self.inter_target_encoding: check_dimensions_match(target_encoder_out + context_encoder_out, self.inter_target_encoding.get_input_dim(), 'Output from target and context encdoers', 'Inter Target encoder input dim') self.target_position_weight = target_position_weight # TimeDistributed anything that is related to the targets. if self.feedforward is not None: self.feedforward = TimeDistributed(self.feedforward) self.label_projection = TimeDistributed(self.label_projection) self._time_naive_dropout = TimeDistributed(self._naive_dropout) initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, lemma_tag_embedding: Embedding = None, upos_tag_embedding: Embedding = None, xpos_tag_embedding: Embedding = None, feats_tag_embedding: Embedding = None, head_information_embedding: Embedding = None, head_tag_embedding: Embedding = None, dropout: float = 0.0, input_dropout: float = 0.0, edge_prediction_threshold: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), **kwargs, ) -> None: super().__init__(vocab, **kwargs) self.text_field_embedder = text_field_embedder self.encoder = encoder self.edge_prediction_threshold = edge_prediction_threshold if not 0 < edge_prediction_threshold < 1: raise ConfigurationError( f"edge_prediction_threshold must be between " f"0 and 1 (exclusive) but found {edge_prediction_threshold}.") encoder_dim = encoder.get_output_dim() self.head_arc_feedforward = arc_feedforward or FeedForward( encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("deps") self.head_tag_feedforward = tag_feedforward or FeedForward( encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = BilinearMatrixAttention(tag_representation_dim, tag_representation_dim, label_dim=num_labels) self._lemma_tag_embedding = lemma_tag_embedding or None self._upos_tag_embedding = upos_tag_embedding or None self._xpos_tag_embedding = xpos_tag_embedding or None self._feats_tag_embedding = feats_tag_embedding or None self._head_tag_embedding = head_tag_embedding or None self._head_information_embedding = head_information_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) # add a head sentinel to accommodate for extra root token in EUD graphs self._head_sentinel = torch.nn.Parameter( torch.randn([1, 1, encoder.get_output_dim()])) representation_dim = text_field_embedder.get_output_dim() if lemma_tag_embedding is not None: representation_dim += lemma_tag_embedding.get_output_dim() if upos_tag_embedding is not None: representation_dim += upos_tag_embedding.get_output_dim() if xpos_tag_embedding is not None: representation_dim += xpos_tag_embedding.get_output_dim() if feats_tag_embedding is not None: representation_dim += feats_tag_embedding.get_output_dim() if head_tag_embedding is not None: representation_dim += head_tag_embedding.get_output_dim() if head_information_embedding is not None: representation_dim += head_information_embedding.get_output_dim() check_dimensions_match( representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim", ) check_dimensions_match( tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim", ) check_dimensions_match( arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim", ) self._enhanced_attachment_scores = EnhancedAttachmentScores() self._arc_loss = torch.nn.BCEWithLogitsLoss(reduction="none") self._tag_loss = torch.nn.CrossEntropyLoss(reduction="none") initializer(self)
def __init__( self, vocab: Vocabulary, bert_model: Union[str, AutoModel], mismatched_embedder: TokenEmbedder = None, lp: bool = False, lpsmap: bool = False, lpsmap_core_roles_only: bool = True, validation_inference: bool = True, batch_size: int = None, encoder: Seq2SeqEncoder = None, reinitialize_pos_embedding: bool = False, embedding_dropout: float = 0.0, mlp_hidden_size: int = 300, initializer: InitializerApplicator = InitializerApplicator(), label_smoothing: float = None, ignore_span_metric: bool = False, srl_eval_path: str = DEFAULT_SRL_EVAL_PATH, label_encoding: str = "BIO", constrain_crf_decoding: bool = None, include_start_end_transitions: bool = True, label_namespace: str = "labels", **kwargs, ) -> None: super().__init__(vocab, **kwargs) if isinstance(bert_model, str): if mismatched_embedder is None: self.bert_model = AutoModel.from_pretrained(bert_model) self.bert_config = AutoConfig.from_pretrained(bert_model) else: if mismatched_embedder is None: self.bert_model = bert_model self.bert_config = bert_model.config if reinitialize_pos_embedding: self.bert_model._init_weights( self.bert_model.embeddings.position_embeddings) # self.bert_model._init_weights(self.bert_model.embeddings.token_type_embeddings) if mismatched_embedder is not None: self.bert_model = mismatched_embedder self._label_namespace = label_namespace self.num_classes = self.vocab.get_vocab_size(label_namespace) if srl_eval_path is not None: # For the span based evaluation, we don't want to consider labels # for verb, because the verb index is provided to the model. self.span_metric = SrlEvalScorer(srl_eval_path, ignore_classes=["V"]) else: self.span_metric = None if constrain_crf_decoding is None: constrain_crf_decoding = label_encoding is not None self.label_encoding = label_encoding self.constrain_crf_decoding = constrain_crf_decoding if constrain_crf_decoding: if not label_encoding: raise ConfigurationError( "constrain_crf_decoding is True, but no label_encoding was specified." ) labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(label_encoding, labels) else: constraints = None self.include_start_end_transitions = include_start_end_transitions self.crf = ConditionalRandomField( self.num_classes, constraints, include_start_end_transitions=include_start_end_transitions) self._encoder = encoder representation_size = self.bert_config.hidden_size if self.bert_config.type_vocab_size == 1: representation_size = self.bert_config.hidden_size * 2 if encoder is None: self.tag_projection_layer = torch.nn.Sequential( Linear(representation_size, mlp_hidden_size), torch.nn.ReLU(), Linear(mlp_hidden_size, self.num_classes)) else: self.tag_projection_layer = torch.nn.Sequential( Linear(encoder.get_output_dim() * 2, mlp_hidden_size), torch.nn.ReLU(), Linear(mlp_hidden_size, self.num_classes)) self.embedding_dropout = Dropout(p=embedding_dropout) self.predicate_embedding = torch.nn.Embedding(num_embeddings=2, embedding_dim=10) self._label_smoothing = label_smoothing self.ignore_span_metric = ignore_span_metric self._lp = lp self._lpsmap = lpsmap self._lpsmap_core_only = lpsmap_core_roles_only self._val_inference = validation_inference if self._lpsmap: self._core_roles = [] for i in range(6): try: self._core_roles.append( self.vocab.get_token_index( "B-ARG" + str(i), namespace=self._label_namespace)) except: logger.info("B-ARG" + str(i) + " is not in labels") self._r_roles = [] self._c_roles = [] for i in range(self.num_classes): token = self.vocab.get_token_from_index( i, namespace=self._label_namespace) if token[:4] == "B-R-" and token[4:] != "ARG1": try: base_arg_index = self.vocab.get_token_index( "B-" + token[4:], namespace=self._label_namespace) self._r_roles.append((i, base_arg_index)) except: logger.info("B-" + token[4:] + " is not in labels") elif token[:4] == "B-C-" and token[4:] != "ARG1": try: base_arg_index = self.vocab.get_token_index( "B-" + token[4:], namespace=self._label_namespace) self._c_roles.append((i, base_arg_index)) except: logger.info("B-" + token[4:] + " is not in labels") # self._core_roles = [index for index in range(self.vocab.get_vocab_size("labels")) if index in [self.vocab.get_token_index("B-ARG"+str(i), namespace="labels") for i in range(3)]] self.lpsmap = None if lp: """self._layer_list = [] self.length_map = {} self.lengths = [] for max_sequence_length in [70, 100, 200, 300]: x = cp.Variable((max_sequence_length, self.vocab.get_vocab_size(namespace="labels"))) S = cp.Parameter((max_sequence_length, self.vocab.get_vocab_size(namespace="labels"))) constraints = [x >= 0, cp.sum(x, axis=1) == 1] objective = cp.Maximize(cp.sum(cp.multiply(x, S))) problem = cp.Problem(objective, constraints) assert problem.is_dpp() lp_layer = CvxpyLayer(problem, parameters=[S], variables=[x]) self._layer_list.append(lp_layer) self.length_map[max_sequence_length] = len(self._layer_list)-1 self.lengths.append(max_sequence_length) self._layer_list = torch.nn.ModuleList(self._layer_list)""" pass initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, dropout: float = 0.0, input_dropout: float = 0.0, edge_prediction_threshold: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), **kwargs, ) -> None: super().__init__(vocab, **kwargs) self.text_field_embedder = text_field_embedder self.encoder = encoder self.edge_prediction_threshold = edge_prediction_threshold if not 0 < edge_prediction_threshold < 1: raise ConfigurationError( f"edge_prediction_threshold must be between " f"0 and 1 (exclusive) but found {edge_prediction_threshold}.") encoder_dim = encoder.get_output_dim() self.head_arc_feedforward = arc_feedforward or FeedForward( encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("labels") self.head_tag_feedforward = tag_feedforward or FeedForward( encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = BilinearMatrixAttention(tag_representation_dim, tag_representation_dim, label_dim=num_labels) self._pos_tag_embedding = pos_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match( representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim", ) check_dimensions_match( tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim", ) check_dimensions_match( arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim", ) self._unlabelled_f1 = F1Measure(positive_label=1) self._arc_loss = torch.nn.BCEWithLogitsLoss(reduction="none") self._tag_loss = torch.nn.CrossEntropyLoss(reduction="none") initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, word_dim: int, hidden_dim: int, action_dim: int, num_layers: int, mces_metric: Metric = None, recurrent_dropout_probability: float = 0.0, layer_dropout_probability: float = 0.0, same_dropout_mask_per_instance: bool = True, input_dropout: float = 0.0, lemma_text_field_embedder: TextFieldEmbedder = None, pos_tag_embedding: Embedding = None, action_embedding: Embedding = None, frame_tagger_encoder: Seq2SeqEncoder = None, pos_tagger_encoder: Seq2SeqEncoder = None, node_label_tagger_encoder: Seq2SeqEncoder = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(TransitionParser, self).__init__(vocab, regularizer) self._unlabeled_correct = 0 self._labeled_correct = 0 self._total_edges_predicted = 0 self._total_edges_actual = 0 self._exact_unlabeled_correct = 0 self._exact_labeled_correct = 0 self._total_sentences = 0 self.num_actions = vocab.get_vocab_size('actions') self.text_field_embedder = text_field_embedder self.pos_tag_embedding = pos_tag_embedding self._mces_metric = mces_metric self.action_embedding = action_embedding if action_embedding is None: self.action_embedding = Embedding(num_embeddings=self.num_actions, embedding_dim=action_dim, trainable=False) # syntactic composition self.p_comp = torch.nn.Linear(hidden_dim * 4, word_dim) # parser state to hidden self.p_s2h = torch.nn.Linear(hidden_dim * 4, hidden_dim) # hidden to action self.p_act = torch.nn.Linear(hidden_dim, self.num_actions) self.pempty_buffer_emb = torch.nn.Parameter(torch.randn(hidden_dim)) self.proot_stack_emb = torch.nn.Parameter(torch.randn(word_dim)) self.pempty_action_emb = torch.nn.Parameter(torch.randn(hidden_dim)) self.pempty_deque_emb = torch.nn.Parameter(torch.randn(hidden_dim)) self._input_dropout = Dropout(input_dropout) self.frame_tagger_encoder = frame_tagger_encoder self.pos_tagger_encoder = pos_tagger_encoder self.node_label_tagger_encoder = node_label_tagger_encoder self.buffer = StackRnn( input_size=word_dim, hidden_size=hidden_dim, num_layers=num_layers, recurrent_dropout_probability=recurrent_dropout_probability, layer_dropout_probability=layer_dropout_probability, same_dropout_mask_per_instance=same_dropout_mask_per_instance) self.stack = StackRnn( input_size=word_dim, hidden_size=hidden_dim, num_layers=num_layers, recurrent_dropout_probability=recurrent_dropout_probability, layer_dropout_probability=layer_dropout_probability, same_dropout_mask_per_instance=same_dropout_mask_per_instance) self.deque = StackRnn( input_size=word_dim, hidden_size=hidden_dim, num_layers=num_layers, recurrent_dropout_probability=recurrent_dropout_probability, layer_dropout_probability=layer_dropout_probability, same_dropout_mask_per_instance=same_dropout_mask_per_instance) self.action_stack = StackRnn( input_size=action_dim, hidden_size=hidden_dim, num_layers=num_layers, recurrent_dropout_probability=recurrent_dropout_probability, layer_dropout_probability=layer_dropout_probability, same_dropout_mask_per_instance=same_dropout_mask_per_instance) self.frame_tagger = SimpleTagger( vocab=vocab, text_field_embedder=text_field_embedder, encoder=self.frame_tagger_encoder, label_namespace='frame') self.pos_tagger = SimpleTagger(vocab=vocab, text_field_embedder=text_field_embedder, encoder=self.pos_tagger_encoder, label_namespace='pos_tag') self.node_label_tagger = SimpleTagger( vocab=vocab, text_field_embedder=text_field_embedder, encoder=self.node_label_tagger_encoder, label_namespace='node_label') initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, model_name: str = None, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, use_mst_decoding_for_validation: bool = True, dropout: float = 0.0, input_dropout: float = 0.0, word_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.encoder = encoder if model_name: from src.data.token_indexers import PretrainedAutoTokenizer self._tokenizer = PretrainedAutoTokenizer.load(model_name) encoder_dim = encoder.get_output_dim() self.head_arc_feedforward = arc_feedforward or FeedForward( encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("head_tags") self.head_tag_feedforward = tag_feedforward or FeedForward( encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim, tag_representation_dim, num_labels) self._pos_tag_embedding = pos_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) self._word_dropout = word_dropout self._head_sentinel = torch.nn.Parameter( torch.randn([1, 1, encoder.get_output_dim()])) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match( representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim", ) check_dimensions_match( tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim", ) check_dimensions_match( arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim", ) self.use_mst_decoding_for_validation = use_mst_decoding_for_validation tags = self.vocab.get_token_to_index_vocabulary("pos") punctuation_tag_indices = { tag: index for tag, index in tags.items() if tag in POS_TO_IGNORE } self._pos_to_ignore = set(punctuation_tag_indices.values()) logger.info( f"Found POS tags corresponding to the following punctuation : {punctuation_tag_indices}. " "Ignoring words with these POS tags for evaluation.") self._attachment_scores = AttachmentScores() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, stacked_encoder: Seq2SeqEncoder, span_feedforward: FeedForward, binary_feature_dim: int, max_span_width: int, binary_feature_size: int, distance_feature_size: int, embedding_dropout: float = 0.2, label_namespace: str = "labels", fast_mode: bool = False, loss_type: str = "logloss", initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(SemiCrfSemanticRoleLabeler, self).__init__(vocab, regularizer) # Base token-level encoding. self.text_field_embedder = text_field_embedder self.embedding_dropout = Dropout(p=embedding_dropout) # There are exactly 2 binary features for the verb predicate embedding. self.binary_feature_embedding = Embedding(2, binary_feature_dim) self.stacked_encoder = stacked_encoder if text_field_embedder.get_output_dim( ) + binary_feature_dim != stacked_encoder.get_input_dim(): raise ConfigurationError( "The SRL Model uses a binary verb indicator feature, meaning " "the input dimension of the stacked_encoder must be equal to " "the output dimension of the text_field_embedder + 1.") # Span-level encoding. self.max_span_width = max_span_width self.span_width_embedding = Embedding(max_span_width, binary_feature_size) # Based on the average sentence length in FN train. TODO(Swabha): find out for OntoNotes. self.span_distance_bin = 25 self.span_distance_embedding = Embedding(self.span_distance_bin, distance_feature_size) self.span_direction_embedding = Embedding(2, binary_feature_size) self.span_feedforward = TimeDistributed(span_feedforward) self.head_scorer = TimeDistributed( torch.nn.Linear(stacked_encoder.get_output_dim(), 1)) self.num_classes = self.vocab.get_vocab_size(label_namespace) self.not_a_span_tag = self.vocab.get_token_index("*", label_namespace) self.outside_span_tag = self.vocab.get_token_index( "O", label_namespace) self.semi_crf = SemiMarkovConditionalRandomField( num_tags=self.num_classes, max_span_width=max_span_width, loss_type=loss_type, default_tag=self.not_a_span_tag, outside_span_tag=self.outside_span_tag) # Topmost MLP. self.tag_projection_layer = TimeDistributed( Linear(span_feedforward.get_output_dim(), self.num_classes)) # Evaluation. # For the span based evaluation, we don't want to consider labels # for verb, because the verb index is provided to the model. self.non_bio_span_metric = NonBioSpanBasedF1Measure( vocab, tag_namespace=label_namespace, ignore_classes=["V", "*"]) # Mode for the model, if turned on it only evaluates on dev and calculates loss for train. self.fast_mode = fast_mode initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, left_text_encoder: Seq2VecEncoder, right_text_encoder: Seq2VecEncoder, feedforward: Optional[FeedForward] = None, target_field_embedder: Optional[TextFieldEmbedder] = None, target_encoder: Optional[Seq2VecEncoder] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, word_dropout: float = 0.0, dropout: float = 0.0) -> None: super().__init__(vocab, regularizer) ''' :param vocab: vocab : A Vocabulary, required in order to compute sizes for input/output projections. :param text_field_embedder: Used to embed the text and target text if target_field_embedder is None but the target_encoder is not None. :param left_text_encoder: Encoder that will create the representation of the tokens left of the target and the target itself if included from the dataset reader. :param right_text_encoder: Encoder that will create the representation of the tokens right of the target and the target itself if included from the dataset reader. :param feedforward: An optional feed forward layer to apply after the encoder. :param target_field_embedder: Used to embed the target text to give as input to the target_encoder. Thus this allows a seperate embedding for text and target text. :param target_encoder: Encoder that will create the representation of target text tokens. :param initializer: Used to initialize the model parameters. :param regularizer: If provided, will be used to calculate the regularization penalty during training. :param word_dropout: Dropout that is applied after the embedding of the tokens/words. It will drop entire words with this probabilty. :param dropout: To apply dropout after each layer apart from the last layer. All dropout that is applied to timebased data will be `variational dropout`_ all else will be standard dropout. Without the target encoder this will be the standard TDLSTM method from `Effective LSTM's for Target-Dependent Sentiment classification`_ . With the target encoder this will then become the TCLSTM method from `Effective LSTM's for Target-Dependent Sentiment classification`_. .. _variational dropout: https://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks.pdf .. _Effective LSTM's for Target-Dependent Sentiment classification: https://aclanthology.coli.uni-saarland.de/papers/C16-1311/c16-1311 ''' self.text_field_embedder = text_field_embedder self.target_field_embedder = target_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.left_text_encoder = left_text_encoder self.right_text_encoder = right_text_encoder self.target_encoder = target_encoder self.feedforward = feedforward if feedforward is not None: output_dim = self.feedforward.get_output_dim() else: left_out_dim = self.left_text_encoder.get_output_dim() right_out_dim = self.right_text_encoder.get_output_dim() output_dim = left_out_dim + right_out_dim self.label_projection = Linear(output_dim, self.num_classes) self.metrics = {"accuracy": CategoricalAccuracy()} self.f1_metrics = {} # F1 Scores label_index_name = self.vocab.get_index_to_token_vocabulary('labels') for label_index, label_name in label_index_name.items(): label_name = f'F1_{label_name.capitalize()}' self.f1_metrics[label_name] = F1Measure(label_index) # Dropout self._word_dropout = WordDrouput(word_dropout) self._variational_dropout = InputVariationalDropout(dropout) self._naive_dropout = Dropout(dropout) self.loss = torch.nn.CrossEntropyLoss() # Ensure that the input to the right_text_encoder and left_text_encoder # is the size of the target encoder output plus the size of the text # embedding output. if self.target_encoder: right_text_out_dim = self.right_text_encoder.get_input_dim() left_text_out_dim = self.left_text_encoder.get_input_dim() target_dim = self.target_encoder.get_output_dim() text_dim = self.text_field_embedder.get_output_dim() total_out_dim = target_dim + text_dim config_err_msg = ( "As the target is being encoded the output of the" " target encoder is concatenated onto each word " " vector for the left and right contexts " "therefore the input of the right_text_encoder" "/left_text_encoder is the output dimension of " "the target encoder + the dimension of the word " "embeddings for the left and right contexts.") if (total_out_dim != right_text_out_dim or total_out_dim != left_text_out_dim): raise ConfigurationError(config_err_msg) # Ensure that the target field embedder has an output dimension the # same as the input dimension to the target encoder. if self.target_encoder and self.target_field_embedder: target_embed_out = self.target_field_embedder.get_output_dim() target_in = self.target_encoder.get_input_dim() config_embed_err_msg = ("The Target field embedder should have" " the same output size " f"{target_embed_out} as the input to " f"the target encoder {target_in}") if target_embed_out != target_in: raise ConfigurationError(config_embed_err_msg) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, arc_representation_dim: int, tag_representation_dim: int, r_lambda: float = 1e-2, normalize: bool = False, arc_feedforward: FeedForward = None, tag_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, dep_tag_embedding: Embedding = None, predicate_embedding: Embedding = None, delta_type: str = "hinge_ce", subtract_gold: float = 0.0, dropout: float = 0.0, input_dropout: float = 0.0, gumbel_t: float = 0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(SRLGraphParserBase, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.encoder = encoder self.r_lambda = r_lambda self.normalize = normalize self.as_base = False # print ("predicates",self.vocab._index_to_token["predicates"]) # print ("tags",self.vocab._index_to_token["tags"]) self.subtract_gold = subtract_gold self.delta_type = delta_type num_labels = self.vocab.get_vocab_size("tags") print("num_labels", num_labels) self.gumbel_t = gumbel_t node_dim = predicate_embedding.get_output_dim() encoder_dim = encoder.get_output_dim() self.arg_arc_feedforward = arc_feedforward or \ FeedForward(encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.pred_arc_feedforward = copy.deepcopy(self.arg_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) self.arg_tag_feedforward = tag_feedforward or \ FeedForward(encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.pred_tag_feedforward = copy.deepcopy(self.arg_tag_feedforward) self.tag_bilinear = BilinearMatrixAttention( tag_representation_dim, tag_representation_dim, label_dim=num_labels, use_input_biases=True) #,activation=Activation.by_name("tanh")() self.predicte_feedforward = FeedForward(encoder_dim, 1, node_dim, Activation.by_name("elu")()) self._pos_tag_embedding = pos_tag_embedding or None self._dep_tag_embedding = dep_tag_embedding or None self._pred_embedding = predicate_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) # check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim") self._labelled_f1 = IterativeLabeledF1Measure( negative_label=0, negative_pred=0, selected_metrics=["F", "p_F", "l_P", "l_R"]) self._tag_loss = torch.nn.NLLLoss(reduction="none") # ,ignore_index=-1 self._sense_loss = torch.nn.NLLLoss( reduction="none") # ,ignore_index=-1 initializer(self)