def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, similarity_function: SimilarityFunction, modeling_layer: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed(Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = LegacyMatrixAttention(similarity_function) self._modeling_layer = modeling_layer self._span_end_encoder = span_end_encoder encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() span_start_input_dim = encoding_dim * 4 + modeling_dim self._span_start_predictor = TimeDistributed(torch.nn.Linear(span_start_input_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim self._span_end_predictor = TimeDistributed(torch.nn.Linear(span_end_input_dim, 1)) # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily # obvious from the configuration files, so we check here. check_dimensions_match(modeling_layer.get_input_dim(), 4 * encoding_dim, "modeling layer input dim", "4 * encoding dim") check_dimensions_match(text_field_embedder.get_output_dim(), phrase_layer.get_input_dim(), "text field embedder output dim", "phrase layer input dim") check_dimensions_match(span_end_encoder.get_input_dim(), 4 * encoding_dim + 3 * modeling_dim, "span end encoder input dim", "4 * encoding dim + 3 * modeling dim") self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)
def __init__(self, vocab: Vocabulary, question_embedder: TextFieldEmbedder, action_embedding_dim: int, encoder: Seq2SeqEncoder, entity_encoder: Seq2VecEncoder, max_decoding_steps: int, use_neighbor_similarity_for_linking: bool = False, dropout: float = 0.0, num_linking_features: int = 10, rule_namespace: str = 'rule_labels', tables_directory: str = '/wikitables/') -> None: super(WikiTablesSemanticParser, self).__init__(vocab) self._question_embedder = question_embedder self._encoder = encoder self._entity_encoder = TimeDistributed(entity_encoder) self._max_decoding_steps = max_decoding_steps self._use_neighbor_similarity_for_linking = use_neighbor_similarity_for_linking if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._rule_namespace = rule_namespace self._denotation_accuracy = WikiTablesAccuracy(tables_directory) self._action_sequence_accuracy = Average() self._has_logical_form = Average() self._action_padding_index = -1 # the padding value used by IndexField num_actions = vocab.get_vocab_size(self._rule_namespace) self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim) self._output_action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim) self._action_biases = Embedding(num_embeddings=num_actions, embedding_dim=1) # This is what we pass as input in the first step of decoding, when we don't have a # previous action, or a previous question attention. self._first_action_embedding = torch.nn.Parameter(torch.FloatTensor(action_embedding_dim)) self._first_attended_question = torch.nn.Parameter(torch.FloatTensor(encoder.get_output_dim())) torch.nn.init.normal_(self._first_action_embedding) torch.nn.init.normal_(self._first_attended_question) check_dimensions_match(entity_encoder.get_output_dim(), question_embedder.get_output_dim(), "entity word average embedding dim", "question embedding dim") self._num_entity_types = 4 # TODO(mattg): get this in a more principled way somehow? self._num_start_types = 5 # TODO(mattg): get this in a more principled way somehow? self._embedding_dim = question_embedder.get_output_dim() self._type_params = torch.nn.Linear(self._num_entity_types, self._embedding_dim) self._neighbor_params = torch.nn.Linear(self._embedding_dim, self._embedding_dim) if num_linking_features > 0: self._linking_params = torch.nn.Linear(num_linking_features, 1) else: self._linking_params = None if self._use_neighbor_similarity_for_linking: self._question_entity_params = torch.nn.Linear(1, 1) self._question_neighbor_params = torch.nn.Linear(1, 1) else: self._question_entity_params = None self._question_neighbor_params = None
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, label_namespace: str = "labels", constraint_type: str = None, feedforward: FeedForward = None, include_start_end_transitions: bool = True, dropout: float = None, verbose_metrics: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self._verbose_metrics = verbose_metrics if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self._feedforward = feedforward if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = self.encoder.get_output_dim() self.tag_projection_layer = TimeDistributed(Linear(output_dim, self.num_tags)) if constraint_type is not None: labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(constraint_type, labels) else: constraints = None self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions ) self.span_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace, label_encoding=constraint_type or "BIO") check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") if feedforward is not None: check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(), "encoder output dim", "feedforward input dim") initializer(self)
def __init__(self, vocab: Vocabulary, utterance_embedder: TextFieldEmbedder, action_embedding_dim: int, encoder: Seq2SeqEncoder, decoder_beam_search: BeamSearch, max_decoding_steps: int, input_attention: Attention, add_action_bias: bool = True, dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._utterance_embedder = utterance_embedder self._encoder = encoder self._max_decoding_steps = max_decoding_steps self._add_action_bias = add_action_bias self._dropout = torch.nn.Dropout(p=dropout) self._exact_match = Average() self._valid_sql_query = Average() self._action_similarity = Average() self._denotation_accuracy = Average() # the padding value used by IndexField self._action_padding_index = -1 num_actions = vocab.get_vocab_size("rule_labels") input_action_dim = action_embedding_dim if self._add_action_bias: input_action_dim += 1 self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=input_action_dim) self._output_action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim) # This is what we pass as input in the first step of decoding, when we don't have a # previous action, or a previous utterance attention. self._first_action_embedding = torch.nn.Parameter(torch.FloatTensor(action_embedding_dim)) self._first_attended_utterance = torch.nn.Parameter(torch.FloatTensor(encoder.get_output_dim())) torch.nn.init.normal_(self._first_action_embedding) torch.nn.init.normal_(self._first_attended_utterance) self._beam_search = decoder_beam_search self._decoder_trainer = MaximumMarginalLikelihood(beam_size=1) self._transition_function = BasicTransitionFunction(encoder_output_dim=self._encoder.get_output_dim(), action_embedding_dim=action_embedding_dim, input_attention=input_attention, predict_start_type_separately=False, add_action_bias=self._add_action_bias, dropout=dropout) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, pos_tag_embedding: Embedding = None, use_mst_decoding_for_validation: bool = True, dropout: float = 0.0, input_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BiaffineDependencyParser, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.encoder = encoder encoder_dim = encoder.get_output_dim() self.head_arc_projection = torch.nn.Linear(encoder_dim, arc_representation_dim) self.child_arc_projection = torch.nn.Linear(encoder_dim, arc_representation_dim) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("head_tags") self.head_tag_projection = torch.nn.Linear(encoder_dim, tag_representation_dim) self.child_tag_projection = torch.nn.Linear(encoder_dim, tag_representation_dim) self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim, tag_representation_dim, num_labels) self._pos_tag_embedding = pos_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim") self.use_mst_decoding_for_validation = use_mst_decoding_for_validation tags = self.vocab.get_token_to_index_vocabulary("pos") punctuation_tag_indices = {tag: index for tag, index in tags.items() if tag in POS_TO_IGNORE} self._pos_to_ignore = set(punctuation_tag_indices.values()) logger.info(f"Found POS tags correspoding to the following punctuation : {punctuation_tag_indices}. " "Ignoring words with these POS tags for evaluation.") self._attachment_scores = AttachmentScores() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, span_extractor: SpanExtractor, encoder: Seq2SeqEncoder, feedforward_layer: FeedForward = None, pos_tag_embedding: Embedding = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, evalb_directory_path: str = None) -> None: super(SpanConstituencyParser, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.span_extractor = span_extractor self.num_classes = self.vocab.get_vocab_size("labels") self.encoder = encoder self.feedforward_layer = TimeDistributed(feedforward_layer) if feedforward_layer else None self.pos_tag_embedding = pos_tag_embedding or None if feedforward_layer is not None: output_dim = feedforward_layer.get_output_dim() else: output_dim = span_extractor.get_output_dim() self.tag_projection_layer = TimeDistributed(Linear(output_dim, self.num_classes)) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), "representation dim (tokens + optional POS tags)", "encoder input dim") check_dimensions_match(encoder.get_output_dim(), span_extractor.get_input_dim(), "encoder input dim", "span extractor input dim") if feedforward_layer is not None: check_dimensions_match(span_extractor.get_output_dim(), feedforward_layer.get_input_dim(), "span extractor output dim", "feedforward input dim") self.tag_accuracy = CategoricalAccuracy() if evalb_directory_path is not None: self._evalb_score = EvalbBracketingScorer(evalb_directory_path) else: self._evalb_score = None initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, context_layer: Seq2SeqEncoder, mention_feedforward: FeedForward, antecedent_feedforward: FeedForward, feature_size: int, max_span_width: int, spans_per_word: float, max_antecedents: int, lexical_dropout: float = 0.2, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(CoreferenceResolver, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._context_layer = context_layer self._antecedent_feedforward = TimeDistributed(antecedent_feedforward) feedforward_scorer = torch.nn.Sequential( TimeDistributed(mention_feedforward), TimeDistributed(torch.nn.Linear(mention_feedforward.get_output_dim(), 1))) self._mention_pruner = SpanPruner(feedforward_scorer) self._antecedent_scorer = TimeDistributed(torch.nn.Linear(antecedent_feedforward.get_output_dim(), 1)) self._endpoint_span_extractor = EndpointSpanExtractor(context_layer.get_output_dim(), combination="x,y", num_width_embeddings=max_span_width, span_width_embedding_dim=feature_size, bucket_widths=False) self._attentive_span_extractor = SelfAttentiveSpanExtractor(input_dim=text_field_embedder.get_output_dim()) # 10 possible distance buckets. self._num_distance_buckets = 10 self._distance_embedding = Embedding(self._num_distance_buckets, feature_size) self._max_span_width = max_span_width self._spans_per_word = spans_per_word self._max_antecedents = max_antecedents self._mention_recall = MentionRecall() self._conll_coref_scores = ConllCorefScores() if lexical_dropout > 0: self._lexical_dropout = torch.nn.Dropout(p=lexical_dropout) else: self._lexical_dropout = lambda x: x initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, similarity_function: SimilarityFunction, projection_feedforward: FeedForward, inference_encoder: Seq2SeqEncoder, output_feedforward: FeedForward, output_logit: FeedForward, dropout: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._encoder = encoder self._matrix_attention = LegacyMatrixAttention(similarity_function) self._projection_feedforward = projection_feedforward self._inference_encoder = inference_encoder if dropout: self.dropout = torch.nn.Dropout(dropout) self.rnn_input_dropout = InputVariationalDropout(dropout) else: self.dropout = None self.rnn_input_dropout = None self._output_feedforward = output_feedforward self._output_logit = output_logit self._num_labels = vocab.get_vocab_size(namespace="labels") check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(encoder.get_output_dim() * 4, projection_feedforward.get_input_dim(), "encoder output dim", "projection feedforward input") check_dimensions_match(projection_feedforward.get_output_dim(), inference_encoder.get_input_dim(), "proj feedforward output dim", "inference lstm input dim") self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, sent_encoder: Seq2SeqEncoder, classifier_feedforward: FeedForward, encoder_attention: Attention = DotProductAttention( normalize=True), label_namespace: str = "labels", using_extra_len_feature=True, class_weight=[1.0, 1.0], dropout: Optional[float] = None, calculate_f1: bool = None, calculate_auc: bool = None, calculate_auc_pr: bool = None, positive_label: int = 1, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(SentAtt, self).__init__(vocab, regularizer) self.label_namespace = label_namespace self.num_tags = self.vocab.get_vocab_size() self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size(label_namespace) self.sent_encoder = sent_encoder self.attention = encoder_attention self.using_extra_len_feature = using_extra_len_feature # self.attention_scale = math.sqrt(encoder.get_output_dim()) if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self.classifier_feedforward = classifier_feedforward if classifier_feedforward is not None: output_dim = classifier_feedforward.get_output_dim() self.metrics = {"accuracy": CategoricalAccuracy()} if isinstance(class_weight, list) and len(class_weight) > 0: # [0.2419097587861097, 1.0] self.loss = torch.nn.CrossEntropyLoss( weight=torch.FloatTensor(class_weight)) else: self.loss = torch.nn.CrossEntropyLoss() self.positive_label = positive_label self.calculate_f1 = calculate_f1 self.calculate_auc = calculate_auc self.calculate_auc_pr = calculate_auc_pr if calculate_f1: self._f1_metric = F1Measure(positive_label) if calculate_auc: self._auc = Auc(positive_label) if calculate_auc_pr: self._auc_pr = AucPR(positive_label) # check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), # "text field embedding dim", "encoder input dim") if classifier_feedforward is not None: check_dimensions_match( sent_encoder.get_output_dim() + 2 if using_extra_len_feature else 0, classifier_feedforward.get_input_dim(), "encoder output dim", "feedforward input dim") initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, phrase_layer: Seq2SeqEncoder, residual_encoder: Seq2SeqEncoder, span_start_encoder: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, initializer: InitializerApplicator, dropout: float = 0.2, num_context_answers: int = 0, marker_embedding_dim: int = 10, max_span_length: int = 30, max_turn_length: int = 12) -> None: super().__init__(vocab) self._num_context_answers = num_context_answers self._max_span_length = max_span_length self._text_field_embedder = text_field_embedder self._phrase_layer = phrase_layer self._marker_embedding_dim = marker_embedding_dim self._encoding_dim = phrase_layer.get_output_dim() self._matrix_attention = LinearMatrixAttention(self._encoding_dim, self._encoding_dim, 'x,y,x*y') self._merge_atten = TimeDistributed(torch.nn.Linear(self._encoding_dim * 4, self._encoding_dim)) self._residual_encoder = residual_encoder if num_context_answers > 0: self._question_num_marker = torch.nn.Embedding(max_turn_length, marker_embedding_dim * num_context_answers) self._prev_ans_marker = torch.nn.Embedding((num_context_answers * 4) + 1, marker_embedding_dim) self._self_attention = LinearMatrixAttention(self._encoding_dim, self._encoding_dim, 'x,y,x*y') self._followup_lin = torch.nn.Linear(self._encoding_dim, 3) self._merge_self_attention = TimeDistributed(torch.nn.Linear(self._encoding_dim * 3, self._encoding_dim)) self._span_start_encoder = span_start_encoder self._span_end_encoder = span_end_encoder self._span_start_predictor = TimeDistributed(torch.nn.Linear(self._encoding_dim, 1)) self._span_end_predictor = TimeDistributed(torch.nn.Linear(self._encoding_dim, 1)) self._span_yesno_predictor = TimeDistributed(torch.nn.Linear(self._encoding_dim, 3)) self._span_followup_predictor = TimeDistributed(self._followup_lin) check_dimensions_match(phrase_layer.get_input_dim(), text_field_embedder.get_output_dim() + marker_embedding_dim * num_context_answers, "phrase layer input dim", "embedding dim + marker dim * num context answers") initializer(self) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_yesno_accuracy = CategoricalAccuracy() self._span_followup_accuracy = CategoricalAccuracy() self._span_gt_yesno_accuracy = CategoricalAccuracy() self._span_gt_followup_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._official_f1 = Average() self._variational_dropout = InputVariationalDropout(dropout)
def __init__(self, vocab: Vocabulary, task: str, encoder: Seq2SeqEncoder, prev_task: str, prev_task_embed_dim: int = None, label_smoothing: float = 0.0, dropout: float = 0.0, adaptive: bool = False, features: List[str] = None, metric: str = "acc", loss_weight: float = 1.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, threshold: float = 0.5, max_heads: int = 2, focal_gamma: float = None, focal_alpha: float = None) -> None: super(MultiTagDecoder, self).__init__(vocab, regularizer) self.task = task self.dropout = torch.nn.Dropout(p=dropout) self.encoder = encoder self.output_dim = encoder.get_output_dim() self.label_smoothing = label_smoothing self.num_classes = self.vocab.get_vocab_size(task) self.adaptive = adaptive #self.features = features if features else [] self.metric = metric self._loss3 = torch.nn.BCEWithLogitsLoss() self.threshold = threshold self.max_heads = max_heads self.gamma = focal_gamma self.alpha = focal_alpha self.loss_weight = loss_weight # A: add all possible relative encoding to vocabulary if self.vocab.get_token_index('100,root') == 1: for head in self.vocab.get_token_to_index_vocabulary('head_tags').keys(): all_encodings = get_all_relative_encodings(head) self.vocab.add_tokens_to_namespace(tokens=all_encodings, namespace='dep_encoded') # make sure to put end token '100,root' self.vocab.add_token_to_namespace(token='100,root', namespace='dep_encoded') self.prev_task_tag_embedding = None if prev_task_embed_dim is not None and prev_task_embed_dim is not 0 and prev_task is not None: if not prev_task == 'rependency': self.prev_task_tag_embedding = Embedding(self.vocab.get_vocab_size(prev_task), prev_task_embed_dim) else: self.prev_task_tag_embedding = Embedding(self.vocab.get_vocab_size('dep_encoded'), prev_task_embed_dim) # Choose the metric to use for the evaluation (from the defined # "metric" value of the task). If not specified, default to accuracy. if self.metric == "acc": self.metrics = {"acc": CategoricalAccuracy()} elif self.metric == "multi_span_f1": self.metrics = {"multi_span_f1": MultiSpanBasedF1Measure( self.vocab, tag_namespace=self.task, label_encoding="BIO", threshold=self.threshold, max_heads=self.max_heads)} else: logger.warning(f"ERROR. Metric: {self.metric} unrecognized. Using accuracy instead.") self.metrics = {"acc": CategoricalAccuracy()} if self.adaptive: # TODO adaptive_cutoffs = [round(self.num_classes / 15), 3 * round(self.num_classes / 15)] self.task_output = AdaptiveLogSoftmaxWithLoss(self.output_dim, self.num_classes, cutoffs=adaptive_cutoffs, div_value=4.0) else: self.task_output = TimeDistributed(Linear(self.output_dim, self.num_classes)) # self.feature_outputs = torch.nn.ModuleDict() # self.features_metrics = {} # for feature in self.features: # self.feature_outputs[feature] = TimeDistributed(Linear(self.output_dim, # vocab.get_vocab_size(feature))) # self.features_metrics[feature] = { # "acc": CategoricalAccuracy(), # } initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, dropout: float = 0.0, input_dropout: float = 0.0, edge_prediction_threshold: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(GraphParser, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.encoder = encoder self.edge_prediction_threshold = edge_prediction_threshold if not 0 < edge_prediction_threshold < 1: raise ConfigurationError(f"edge_prediction_threshold must be between " f"0 and 1 (exclusive) but found {edge_prediction_threshold}.") encoder_dim = encoder.get_output_dim() self.head_arc_feedforward = arc_feedforward or \ FeedForward(encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("labels") self.head_tag_feedforward = tag_feedforward or \ FeedForward(encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = BilinearMatrixAttention(tag_representation_dim, tag_representation_dim, label_dim=num_labels) self._pos_tag_embedding = pos_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim") check_dimensions_match(arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim") self._unlabelled_f1 = F1Measure(positive_label=1) self._arc_loss = torch.nn.BCEWithLogitsLoss(reduction='none') self._tag_loss = torch.nn.CrossEntropyLoss(reduction='none') initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, dropout: float = 0.0, input_dropout: float = 0.0, edge_prediction_threshold: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(GraphParser, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.encoder = encoder self.edge_prediction_threshold = edge_prediction_threshold if not 0 < edge_prediction_threshold < 1: raise ConfigurationError( f"edge_prediction_threshold must be between " f"0 and 1 (exclusive) but found {edge_prediction_threshold}.") encoder_dim = encoder.get_output_dim() self.head_arc_feedforward = arc_feedforward or \ FeedForward(encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("labels") self.head_tag_feedforward = tag_feedforward or \ FeedForward(encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = BilinearMatrixAttention(tag_representation_dim, tag_representation_dim, label_dim=num_labels) self._pos_tag_embedding = pos_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim") check_dimensions_match(arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim") self._unlabelled_f1 = F1Measure(positive_label=1) self._arc_loss = torch.nn.BCEWithLogitsLoss(reduction='none') self._tag_loss = torch.nn.CrossEntropyLoss(reduction='none') initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, char_field_embedder: TextFieldEmbedder, # num_highway_layers: int, phrase_layer: Seq2SeqEncoder, char_rnn: Seq2SeqEncoder, hops: int, hidden_dim: int, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._char_field_embedder = char_field_embedder self._features_embedder = nn.Embedding(2, 5) # self._highway_layer = TimeDistributed(Highway(text_field_embedder.get_output_dim() + 5 * 3, # num_highway_layers)) self._phrase_layer = phrase_layer self._encoding_dim = phrase_layer.get_output_dim() # self._stacked_brnn = PytorchSeq2SeqWrapper( # StackedBidirectionalLstm(input_size=self._encoding_dim, hidden_size=hidden_dim, # num_layers=3, recurrent_dropout_probability=0.2)) self._char_rnn = char_rnn self.hops = hops self.interactive_aligners = nn.ModuleList() self.interactive_SFUs = nn.ModuleList() self.self_aligners = nn.ModuleList() self.self_SFUs = nn.ModuleList() self.aggregate_rnns = nn.ModuleList() for i in range(hops): # interactive aligner self.interactive_aligners.append( layers.SeqAttnMatch(self._encoding_dim)) self.interactive_SFUs.append( layers.SFU(self._encoding_dim, 3 * self._encoding_dim)) # self aligner self.self_aligners.append(layers.SelfAttnMatch(self._encoding_dim)) self.self_SFUs.append( layers.SFU(self._encoding_dim, 3 * self._encoding_dim)) # aggregating self.aggregate_rnns.append( PytorchSeq2SeqWrapper( nn.LSTM(input_size=self._encoding_dim, hidden_size=hidden_dim, num_layers=1, dropout=0.2, bidirectional=True, batch_first=True))) # Memmory-based Answer Pointer self.mem_ans_ptr = layers.MemoryAnsPointer(x_size=self._encoding_dim, y_size=self._encoding_dim, hidden_size=hidden_dim, hop=hops, dropout_rate=0.2, normalize=True) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_yesno_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, attention_similarity_function: SimilarityFunction, residual_encoder: Seq2SeqEncoder, span_start_encoder: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, feed_forward: FeedForward, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(ModelSQUAD, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed( Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer #self._matrix_attention = MatrixAttention(attention_similarity_function) self._residual_encoder = residual_encoder self._span_end_encoder = span_end_encoder self._span_start_encoder = span_start_encoder self._feed_forward = feed_forward encoding_dim = phrase_layer.get_output_dim() self._span_start_predictor = TimeDistributed( torch.nn.Linear(encoding_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() self._span_end_predictor = TimeDistributed( torch.nn.Linear(encoding_dim, 1)) self._no_answer_predictor = TimeDistributed( torch.nn.Linear(encoding_dim, 1)) #self._self_matrix_attention = MatrixAttention(attention_similarity_function) self._linear_layer = TimeDistributed( torch.nn.Linear(4 * encoding_dim, encoding_dim)) self._residual_linear_layer = TimeDistributed( torch.nn.Linear(3 * encoding_dim, encoding_dim)) self._w_p = torch.nn.Parameter(torch.Tensor(encoding_dim)) self._w_q = torch.nn.Parameter(torch.Tensor(encoding_dim)) self._w_pq = torch.nn.Parameter(torch.Tensor(encoding_dim)) std = math.sqrt(6 / (encoding_dim * 3 + 1)) self._w_p.data.uniform_(-std, std) self._w_q.data.uniform_(-std, std) self._w_pq.data.uniform_(-std, std) self._w_x = torch.nn.Parameter(torch.Tensor(encoding_dim)) self._w_y = torch.nn.Parameter(torch.Tensor(encoding_dim)) self._w_xy = torch.nn.Parameter(torch.Tensor(encoding_dim)) #std = math.sqrt(6/ (encoding_dim*3 + 1)) self._w_x.data.uniform_(-std, std) self._w_y.data.uniform_(-std, std) self._w_xy.data.uniform_(-std, std) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)
def __init__( self, use_citation_graph_embeddings: str, citation_embedding_file: str, doc_to_idx_mapping_file: str, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, context_layer: Seq2SeqEncoder, modules: Params, loss_weights: Dict[str, int], lexical_dropout: float = 0.2, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, display_metrics: List[str] = None, ) -> None: super(RelationsOnlyModel, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._context_layer = context_layer self._lexical_dropout = torch.nn.Dropout(p=lexical_dropout) if use_citation_graph_embeddings is True or ( isinstance(use_citation_graph_embeddings, str) and use_citation_graph_embeddings.lower() == "true"): if citation_embedding_file == "" or doc_to_idx_mapping_file == "": raise ValueError( "Must supply citation embedding files to use graph embedding features" ) self._document_embedding = initialize_graph_embeddings( citation_embedding_file, finetune_embedding=False) self._doc_to_idx_mapping = json.load(open(doc_to_idx_mapping_file)) else: self._document_embedding = None self._doc_to_idx_mapping = None modules = Params(modules) self._cluster_n_ary_relation = NAryRelationExtractor.from_params( vocab=vocab, params=modules.pop("n_ary_relation"), document_embedding=self._document_embedding, doc_to_idx_mapping=self._doc_to_idx_mapping) self._endpoint_span_extractor = EndpointSpanExtractor( context_layer.get_output_dim(), combination="x,y") self._attentive_span_extractor = SelfAttentiveSpanExtractor( input_dim=context_layer.get_output_dim()) for k in loss_weights: loss_weights[k] = float(loss_weights[k]) self._loss_weights = loss_weights self._permanent_loss_weights = copy.deepcopy(self._loss_weights) self._display_metrics = display_metrics self._multi_task_loss_metrics = { k: Average() for k in ["n_ary_relation"] } self.training_mode = True self.prediction_mode = False initializer(self)
def __init__(self, vocab: Vocabulary, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, pos_embed_dim: int = None, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, use_mst_decoding_for_validation: bool = True, dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(DependencyDecoder, self).__init__(vocab, regularizer) self.pos_tag_embedding = None if pos_embed_dim is not None: self.pos_tag_embedding = Embedding(self.vocab.get_vocab_size("upos"), pos_embed_dim) self.dropout = torch.nn.Dropout(p=dropout) self.encoder = encoder encoder_output_dim = encoder.get_output_dim() self.head_arc_feedforward = arc_feedforward or \ FeedForward(encoder_output_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("head_tags") self.head_tag_feedforward = tag_feedforward or \ FeedForward(encoder_output_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim, tag_representation_dim, num_labels) self._dropout = InputVariationalDropout(dropout) self._head_sentinel = torch.nn.Parameter(torch.randn([1, 1, encoder_output_dim])) check_dimensions_match(tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim") check_dimensions_match(arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim") self.use_mst_decoding_for_validation = use_mst_decoding_for_validation tags = self.vocab.get_token_to_index_vocabulary("pos") punctuation_tag_indices = {tag: index for tag, index in tags.items() if tag in POS_TO_IGNORE} self._pos_to_ignore = set(punctuation_tag_indices.values()) logger.info(f"Found POS tags corresponding to the following punctuation : {punctuation_tag_indices}. " "Ignoring words with these POS tags for evaluation.") self._attachment_scores = AttachmentScores() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, lemmatize_helper: LemmatizeHelper, task_config: TaskConfig, morpho_vector_dim: int = 0, gram_val_representation_dim: int = -1, lemma_representation_dim: int = -1, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, use_mst_decoding_for_validation: bool = True, dropout: float = 0.0, input_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(DependencyParser, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.encoder = encoder self.lemmatize_helper = lemmatize_helper self.task_config = task_config encoder_dim = encoder.get_output_dim() self.head_arc_feedforward = arc_feedforward or \ FeedForward(encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("head_tags") self.head_tag_feedforward = tag_feedforward or \ FeedForward(encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim, tag_representation_dim, num_labels) self._pos_tag_embedding = pos_tag_embedding or None assert self.task_config.params.get("use_pos_tag", False) == (self._pos_tag_embedding is not None) self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) self._head_sentinel = torch.nn.Parameter( torch.randn([1, 1, encoder.get_output_dim()])) if gram_val_representation_dim <= 0: self._gram_val_output = torch.nn.Linear( encoder_dim, self.vocab.get_vocab_size("grammar_value_tags")) else: self._gram_val_output = torch.nn.Sequential( Dropout(dropout), torch.nn.Linear(encoder_dim, gram_val_representation_dim), Dropout(dropout), torch.nn.Linear( gram_val_representation_dim, self.vocab.get_vocab_size("grammar_value_tags"))) if lemma_representation_dim <= 0: self._lemma_output = torch.nn.Linear(encoder_dim, len(lemmatize_helper)) else: self._lemma_output = torch.nn.Sequential( Dropout(dropout), torch.nn.Linear(encoder_dim, lemma_representation_dim), Dropout(dropout), torch.nn.Linear(lemma_representation_dim, len(lemmatize_helper))) representation_dim = text_field_embedder.get_output_dim( ) + morpho_vector_dim if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim") check_dimensions_match(arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim") self.use_mst_decoding_for_validation = use_mst_decoding_for_validation tags = self.vocab.get_token_to_index_vocabulary("pos") punctuation_tag_indices = { tag: index for tag, index in tags.items() if tag in POS_TO_IGNORE } self._pos_to_ignore = set(punctuation_tag_indices.values()) logger.info( f"Found POS tags corresponding to the following punctuation : {punctuation_tag_indices}. " "Ignoring words with these POS tags for evaluation.") self._attachment_scores = AttachmentScores() self._gram_val_prediction_accuracy = CategoricalAccuracy() self._lemma_prediction_accuracy = CategoricalAccuracy() initializer(self)
def __init__(self, question_embedder: TextFieldEmbedder, input_memory_embedder: TextFieldEmbedder, output_memory_embedder: TextFieldEmbedder, question_encoder: Seq2SeqEncoder, input_memory_encoder: Seq2VecEncoder, output_memory_encoder: Seq2VecEncoder, decoder_beam_search: BeamSearch, input_attention: Attention, past_attention: Attention, action_embedding_dim: int, max_decoding_steps: int, nhop: int, decoding_nhop: int, vocab: Vocabulary, dataset_path: str = 'dataset', parse_sql_on_decoding: bool = True, training_beam_size: int = None, add_action_bias: bool = True, decoder_self_attend: bool = True, decoder_num_layers: int = 1, dropout: float = 0.0, rule_namespace: str = 'rule_labels') -> None: super().__init__(vocab) self.question_embedder = question_embedder self._input_mm_embedder = input_memory_embedder self._output_mm_embedder = output_memory_embedder self._question_encoder = question_encoder self._input_mm_encoder = TimeDistributed(input_memory_encoder) self._output_mm_encoder = TimeDistributed(output_memory_encoder) self.parse_sql_on_decoding = parse_sql_on_decoding self._self_attend = decoder_self_attend self._max_decoding_steps = max_decoding_steps self._add_action_bias = add_action_bias self._rule_namespace = rule_namespace num_actions = vocab.get_vocab_size(self._rule_namespace) if self._add_action_bias: input_action_dim = action_embedding_dim + 1 else: input_action_dim = action_embedding_dim self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=input_action_dim) self._input_action_embedder = Embedding( num_embeddings=num_actions, embedding_dim=action_embedding_dim) self._output_action_embedder = Embedding( num_embeddings=num_actions, embedding_dim=action_embedding_dim) self._num_entity_types = 9 self._entity_type_decoder_input_embedding = Embedding( self._num_entity_types, action_embedding_dim) self._entity_type_decoder_output_embedding = Embedding( self._num_entity_types, action_embedding_dim) self._entity_type_encoder_embedding = Embedding( self._num_entity_types, (int)(question_encoder.get_output_dim() / 2)) self._decoder_num_layers = decoder_num_layers self._action_embedding_dim = action_embedding_dim self._ent2ent_ff = FeedForward(action_embedding_dim, 1, action_embedding_dim, Activation.by_name('relu')()) if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._first_action_embedding = torch.nn.Parameter( torch.FloatTensor(action_embedding_dim)) self._first_attended_utterance = torch.nn.Parameter( torch.FloatTensor(question_encoder.get_output_dim())) torch.nn.init.normal_(self._first_action_embedding) torch.nn.init.normal_(self._first_attended_utterance) if self._self_attend: self._transition_function = AttendPastSchemaItemsTransitionFunction( encoder_output_dim=question_encoder.get_output_dim(), action_embedding_dim=action_embedding_dim, input_attention=input_attention, past_attention=past_attention, decoding_nhop=decoding_nhop, predict_start_type_separately=False, add_action_bias=self._add_action_bias, dropout=dropout, num_layers=self._decoder_num_layers) else: self._transition_function = LinkingTransitionFunction( encoder_output_dim=question_encoder.get_output_dim(), action_embedding_dim=action_embedding_dim, input_attention=input_attention, predict_start_type_separately=False, add_action_bias=self._add_action_bias, dropout=dropout, num_layers=self._decoder_num_layers) self._mm_attn = MemAttn(question_encoder.get_output_dim(), nhop) self._beam_search = decoder_beam_search self._decoder_trainer = MaximumMarginalLikelihood(training_beam_size) self._action_padding_index = -1 # the padding value used by IndexField self._exact_match = Average() self._sql_evaluator_match = Average() self._action_similarity = Average() self._acc_single = Average() self._acc_multi = Average() self._beam_hit = Average() # TODO: Remove hard-coded dirs self._evaluate_func = partial( evaluate, db_dir=os.path.join(dataset_path, 'database'), table=os.path.join(dataset_path, 'tables.json'), check_valid=False)
def __init__( self, # Vocabluary. vocab: Vocabulary, # Embeddings. source_field_embedder: TextFieldEmbedder, target_embedding_size: int, # Encoders and Decoders. encoder: Seq2SeqEncoder, decoder_type: str, output_projection_layer: FeedForward, source_namespace: str = "source", target_namespace: str = "target", # Hyperparamters and flags. decoder_attention_function: BilinearAttention = None, decoder_is_bidirectional: bool = False, decoder_num_layers: int = 1, apply_attention: Optional[bool] = False, max_decoding_steps: int = 100, scheduled_sampling_ratio: float = 0.4, # Logistical. initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) if encoder.get_input_dim() != source_field_embedder.get_output_dim(): raise ConfigurationError( "The input dimension of the encoder must match the embedding" "size of the source_field_embedder. Found {} and {}, respectively." .format(encoder.get_input_dim(), source_field_embedder.get_output_dim())) if output_projection_layer.get_output_dim() != vocab.get_vocab_size( target_namespace): raise ConfigurationError( "The output dimension of the output_projection_layer must match the " "size of the French vocabulary. Found {} and {}, " "respectively.".format( output_projection_layer.get_output_dim(), vocab.get_vocab_size(target_namespace))) if decoder_type not in SequenceToSequence.DECODERS: raise ConfigurationError( "Unrecognized decoder option '{}'".format(decoder_type)) # For dealing with input. self.source_vocab_size = vocab.get_vocab_size(source_namespace) self.target_vocab_size = vocab.get_vocab_size(target_namespace) self.source_field_embedder = source_field_embedder or TextFieldEmbedder( ) self.encoder = encoder # For dealing with / producing output. self.target_vocab_size = vocab.get_vocab_size(target_namespace) self.target_embedder = Embedding(self.target_vocab_size, target_embedding_size) # Input size will either be the target embedding size or the target embedding size plus the # encoder hidden size to attend on the input. # # When making a custom attention function that uses neither of those input sizes, you will # have to define the decoder yourself. decoder_input_size = target_embedding_size if apply_attention: decoder_input_size += encoder.get_output_dim() # Hidden size of the encoder and decoder should match. decoder_hidden_size = encoder.get_output_dim() self.decoder = SequenceToSequence.DECODERS[decoder_type]( decoder_input_size, decoder_hidden_size, num_layers=decoder_num_layers, batch_first=True, bias=True, bidirectional=decoder_is_bidirectional) self.output_projection_layer = output_projection_layer self.apply_attention = apply_attention self.decoder_attention_function = decoder_attention_function or BilinearAttention( matrix_dim=encoder.get_output_dim(), vector_dim=encoder.get_output_dim()) # Hyperparameters. self._max_decoding_steps = max_decoding_steps self._scheduled_sampling_ratio = scheduled_sampling_ratio # Used for prepping the translation primer (initialization of the target word-level # encoder's hidden state). # # If the decoder is an LSTM, both hidden states and cell states must be initialized. # Also, hidden states that prime translation via this encoder must be duplicated # across by number of layers they has. self._decoder_is_lstm = isinstance(self.decoder, torch.nn.LSTM) self._decoder_num_layers = decoder_num_layers self._start_index = vocab.get_token_index(START_SYMBOL, target_namespace) self._end_index = vocab.get_token_index(END_SYMBOL, target_namespace) self._source_namespace = source_namespace self._target_namespace = target_namespace self._batch_size = None initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, intent_encoder: Seq2SeqEncoder = None, tag_encoder: Seq2SeqEncoder = None, attention: Attention = None, attention_function: SimilarityFunction = None, context_for_intent: bool = True, context_for_tag: bool = True, attention_for_intent: bool = True, attention_for_tag: bool = True, sequence_label_namespace: str = "labels", intent_label_namespace: str = "intent_labels", feedforward: Optional[FeedForward] = None, label_encoding: Optional[str] = None, include_start_end_transitions: bool = True, crf_decoding: bool = False, constrain_crf_decoding: bool = None, focal_loss_gamma: float = None, nongeneral_intent_weight: float = 5., num_train_examples: float = None, calculate_span_f1: bool = None, dropout: Optional[float] = None, verbose_metrics: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.context_for_intent = context_for_intent self.context_for_tag = context_for_tag self.attention_for_intent = attention_for_intent self.attention_for_tag = attention_for_tag self.sequence_label_namespace = sequence_label_namespace self.intent_label_namespace = intent_label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(sequence_label_namespace) self.num_intents = self.vocab.get_vocab_size(intent_label_namespace) self.encoder = encoder self.intent_encoder = intent_encoder self.tag_encoder = intent_encoder self._feedforward = feedforward self._verbose_metrics = verbose_metrics self.rl = False if attention: if attention_function: raise ConfigurationError("You can only specify an attention module or an " "attention function, but not both.") self.attention = attention elif attention_function: self.attention = LegacyAttention(attention_function) if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None projection_input_dim = feedforward.get_output_dim() if self._feedforward else self.encoder.get_output_dim() if self.context_for_intent: projection_input_dim += self.encoder.get_output_dim() if self.attention_for_intent: projection_input_dim += self.encoder.get_output_dim() self.intent_projection_layer = Linear(projection_input_dim, self.num_intents) if num_train_examples: try: pos_weight = torch.tensor([log10((num_train_examples - self.vocab._retained_counter[intent_label_namespace][t]) / self.vocab._retained_counter[intent_label_namespace][t]) for i, t in self.vocab.get_index_to_token_vocabulary(intent_label_namespace).items()]) except: pos_weight = torch.tensor([1. for i, t in self.vocab.get_index_to_token_vocabulary(intent_label_namespace).items()]) else: # pos_weight = torch.tensor([(lambda t: 1. if "general" in t else nongeneral_intent_weight)(t) for i, t in pos_weight = torch.tensor([(lambda t: nongeneral_intent_weight if "Request" in t else 1.)(t) for i, t in self.vocab.get_index_to_token_vocabulary(intent_label_namespace).items()]) self.intent_loss = torch.nn.BCEWithLogitsLoss(pos_weight=pos_weight, reduction="none") tag_projection_input_dim = feedforward.get_output_dim() if self._feedforward else self.encoder.get_output_dim() if self.context_for_tag: tag_projection_input_dim += self.encoder.get_output_dim() if self.attention_for_tag: tag_projection_input_dim += self.encoder.get_output_dim() self.tag_projection_layer = TimeDistributed(Linear(tag_projection_input_dim, self.num_tags)) # if constrain_crf_decoding and calculate_span_f1 are not # provided, (i.e., they're None), set them to True # if label_encoding is provided and False if it isn't. if constrain_crf_decoding is None: constrain_crf_decoding = label_encoding is not None if calculate_span_f1 is None: calculate_span_f1 = label_encoding is not None self.label_encoding = label_encoding if constrain_crf_decoding: if not label_encoding: raise ConfigurationError("constrain_crf_decoding is True, but " "no label_encoding was specified.") labels = self.vocab.get_index_to_token_vocabulary(sequence_label_namespace) constraints = allowed_transitions(label_encoding, labels) else: constraints = None self.include_start_end_transitions = include_start_end_transitions if crf_decoding: self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions ) else: self.crf = None self._intent_f1_metric = MultiLabelF1Measure(vocab, namespace=intent_label_namespace) self.calculate_span_f1 = calculate_span_f1 if calculate_span_f1: if not label_encoding: raise ConfigurationError("calculate_span_f1 is True, but " "no label_encoding was specified.") self._f1_metric = SpanBasedF1Measure(vocab, tag_namespace=sequence_label_namespace, label_encoding=label_encoding) self._dai_f1_metric = DialogActItemF1Measure() check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") if feedforward is not None: check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(), "encoder output dim", "feedforward input dim") initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, activation = Activation.by_name("tanh")(), tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, use_mst_decoding_for_validation: bool = False, dropout: float = 0.0, input_dropout: float = 0.0, edge_prediction_threshold: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), **kwargs, ) -> None: super().__init__(vocab, **kwargs) self.text_field_embedder = text_field_embedder self.encoder = encoder self.activation = activation encoder_dim = encoder.get_output_dim() # edge FeedForward self.head_arc_feedforward = arc_feedforward or FeedForward( encoder_dim, 1, arc_representation_dim, Activation.by_name("tanh")() ) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) # label FeedForward self.head_tag_feedforward = tag_feedforward or FeedForward( encoder_dim, 1, tag_representation_dim, Activation.by_name("tanh")() ) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.arc_out_layer = Linear(arc_representation_dim, 1) num_labels = self.vocab.get_vocab_size("head_tags") self.tag_out_layer = Linear(arc_representation_dim, num_labels) self._pos_tag_embedding = pos_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) # add a head sentinel to accommodate for extra root token self._head_sentinel = torch.nn.Parameter(torch.randn([1, 1, encoder.get_output_dim()])) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match( representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim", ) check_dimensions_match( tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim", ) check_dimensions_match( arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim", ) self.use_mst_decoding_for_validation = use_mst_decoding_for_validation tags = self.vocab.get_token_to_index_vocabulary("pos") punctuation_tag_indices = { tag: index for tag, index in tags.items() if tag in POS_TO_IGNORE } self._pos_to_ignore = set(punctuation_tag_indices.values()) logger.info( f"Found POS tags corresponding to the following punctuation : {punctuation_tag_indices}. " "Ignoring words with these POS tags for evaluation." ) self._attachment_scores = AttachmentScores() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, text_encoder: Seq2SeqEncoder, target_encoder: Seq2VecEncoder, feedforward: Optional[FeedForward] = None, target_field_embedder: Optional[TextFieldEmbedder] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, dropout: float = 0.0, target_scale: bool = False, context_preserving: bool = False) -> None: ''' :param vocab: vocab : A Vocabulary, required in order to compute sizes for input/output projections. :param text_field_embedder: Used to embed the text and target text if target_field_embedder is None but the target_encoder is not None. :param text_encoder: Sequence Encoder that will create the representation of each token in the context sentence. :param target_encoder: Encoder that will create the representation of target text tokens. :param feedforward: An optional feed forward layer to apply after either the text encoder if target encoder is None. Else it would be after the target and the text encoded representations have been concatenated. :param target_field_embedder: Used to embed the target text to give as input to the target_encoder. Thus this allows a seperate embedding for text and target text. :param target_concat_text_embedding: Whether or not the target should be concatenated to the each word embedding within the text before being encoded. :param initializer: Used to initialize the model parameters. :param regularizer: If provided, will be used to calculate the regularization penalty during training. :param word_dropout: Dropout that is applied after the embedding of the tokens/words. It will drop entire words with this probabilty. :param dropout: To apply dropout after each layer apart from the last layer. All dropout that is applied to timebased data will be `variational dropout`_ all else will be standard dropout. .. _variational dropout: https://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks.pdf ''' super().__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.target_field_embedder = target_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.text_encoder = text_encoder self.target_encoder = TimeDistributed(target_encoder) self.feedforward = feedforward if self.feedforward: self.time_feedforward = TimeDistributed(self.feedforward) self.attention_layer = BilinearMatrixAttention( text_encoder.get_output_dim(), target_encoder.get_output_dim()) # Whether to concat the encoded text representation with the weighted # representation from the attention self.context_preserving = context_preserving if feedforward is not None: output_dim = self.feedforward.get_output_dim() else: if self.context_preserving: output_dim = (text_encoder.get_output_dim() * 2) else: output_dim = text_encoder.get_output_dim() self.label_projection = TimeDistributed( Linear(output_dim, self.num_classes)) self.metrics = {"accuracy": CategoricalAccuracy()} self.f1_metrics = {} # F1 Scores label_index_name = self.vocab.get_index_to_token_vocabulary('labels') for label_index, label_name in label_index_name.items(): label_name = f'F1_{label_name.capitalize()}' self.f1_metrics[label_name] = F1Measure(label_index) self._variational_dropout = InputVariationalDropout(dropout) self._naive_dropout = Dropout(dropout) self._time_naive_dropout = TimeDistributed(self._naive_dropout) self._time_variational_dropout = TimeDistributed( self._variational_dropout) self.target_scale = target_scale self.loss = torch.nn.CrossEntropyLoss() check_dimensions_match(text_field_embedder.get_output_dim(), text_encoder.get_input_dim(), "text field embedding dim", "text encoder input dim") # Ensure that the dimensions of the target or text field embedder and # the target encoder match target_field_embedder_dim = text_field_embedder.get_output_dim() target_field_error = "text field embedding dim" if self.target_field_embedder: target_field_embedder_dim = target_field_embedder.get_output_dim() target_field_error = "target field embedding dim" check_dimensions_match(target_field_embedder_dim, target_encoder.get_input_dim(), target_field_error, "target encoder input dim") initializer(self)
def __init__( self, vocab: Vocabulary, utterance_embedder: TextFieldEmbedder, action_embedding_dim: int, encoder: Seq2SeqEncoder, decoder_beam_search: BeamSearch, max_decoding_steps: int, input_attention: Attention, add_action_bias: bool = True, training_beam_size: int = None, decoder_num_layers: int = 1, dropout: float = 0.0, rule_namespace: str = "rule_labels", database_file="/atis/atis.db", ) -> None: # Atis semantic parser init super().__init__(vocab) self._utterance_embedder = utterance_embedder self._encoder = encoder self._max_decoding_steps = max_decoding_steps self._add_action_bias = add_action_bias if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._rule_namespace = rule_namespace self._exact_match = Average() self._valid_sql_query = Average() self._action_similarity = Average() self._denotation_accuracy = Average() self._executor = SqlExecutor(database_file) self._action_padding_index = -1 # the padding value used by IndexField num_actions = vocab.get_vocab_size(self._rule_namespace) if self._add_action_bias: input_action_dim = action_embedding_dim + 1 else: input_action_dim = action_embedding_dim self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=input_action_dim) self._output_action_embedder = Embedding( num_embeddings=num_actions, embedding_dim=action_embedding_dim) # This is what we pass as input in the first step of decoding, when we don't have a # previous action, or a previous utterance attention. self._first_action_embedding = torch.nn.Parameter( torch.FloatTensor(action_embedding_dim)) self._first_attended_utterance = torch.nn.Parameter( torch.FloatTensor(encoder.get_output_dim())) torch.nn.init.normal_(self._first_action_embedding) torch.nn.init.normal_(self._first_attended_utterance) self._num_entity_types = 2 # TODO(kevin): get this in a more principled way somehow? self._entity_type_decoder_embedding = Embedding( num_embeddings=self._num_entity_types, embedding_dim=action_embedding_dim) self._decoder_num_layers = decoder_num_layers self._beam_search = decoder_beam_search self._decoder_trainer = MaximumMarginalLikelihood(training_beam_size) self._transition_function = LinkingTransitionFunction( encoder_output_dim=self._encoder.get_output_dim(), action_embedding_dim=action_embedding_dim, input_attention=input_attention, add_action_bias=self._add_action_bias, dropout=dropout, num_layers=self._decoder_num_layers, )
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, matrix_attention: MatrixAttention, modeling_layer: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed( Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = matrix_attention self._modeling_layer = modeling_layer self._span_end_encoder = span_end_encoder encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() span_start_input_dim = encoding_dim * 4 + modeling_dim self._span_start_predictor = TimeDistributed( torch.nn.Linear(span_start_input_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim self._span_end_predictor = TimeDistributed( torch.nn.Linear(span_end_input_dim, 1)) # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily # obvious from the configuration files, so we check here. check_dimensions_match( modeling_layer.get_input_dim(), 4 * encoding_dim, "modeling layer input dim", "4 * encoding dim", ) check_dimensions_match( text_field_embedder.get_output_dim(), phrase_layer.get_input_dim(), "text field embedder output dim", "phrase layer input dim", ) check_dimensions_match( span_end_encoder.get_input_dim(), 4 * encoding_dim + 3 * modeling_dim, "span end encoder input dim", "4 * encoding dim + 3 * modeling dim", ) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, context_layer: Seq2SeqEncoder, modules, # TODO(dwadden) Add type. feature_size: int, max_span_width: int, loss_weights: Dict[str, int], lexical_dropout: float = 0.2, lstm_dropout: float = 0.4, use_attentive_span_extractor: bool = False, co_train: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, display_metrics: List[str] = None) -> None: super(DyGIE, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._context_layer = context_layer self._loss_weights = loss_weights self._permanent_loss_weights = copy.deepcopy(self._loss_weights) # Need to add this line so things don't break. TODO(dwadden) sort out what's happening. modules = Params(modules) self._coref = CorefResolver.from_params(vocab=vocab, feature_size=feature_size, params=modules.pop("coref")) self._ner = NERTagger.from_params(vocab=vocab, feature_size=feature_size, params=modules.pop("ner")) self._relation = RelationExtractor.from_params( vocab=vocab, feature_size=feature_size, params=modules.pop("relation")) self._events = EventExtractor.from_params(vocab=vocab, feature_size=feature_size, params=modules.pop("events")) # Make endpoint span extractor. self._endpoint_span_extractor = EndpointSpanExtractor( context_layer.get_output_dim(), combination="x,y", num_width_embeddings=max_span_width, span_width_embedding_dim=feature_size, bucket_widths=False) if use_attentive_span_extractor: self._attentive_span_extractor = SelfAttentiveSpanExtractor( input_dim=text_field_embedder.get_output_dim()) else: self._attentive_span_extractor = None self._max_span_width = max_span_width self._display_metrics = display_metrics if lexical_dropout > 0: self._lexical_dropout = torch.nn.Dropout(p=lexical_dropout) else: self._lexical_dropout = lambda x: x # Do co-training if we're training on ACE and ontonotes. self._co_train = co_train # Big gotcha: PyTorch doesn't add dropout to the LSTM's output layer. We need to do this # manually. if lstm_dropout > 0: self._lstm_dropout = torch.nn.Dropout(p=lstm_dropout) else: self._lstm_dropout = lambda x: x initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, label_namespace: str = "labels", feedforward: Optional[FeedForward] = None, label_encoding: Optional[str] = None, include_start_end_transitions: bool = True, constrain_crf_decoding: bool = None, calculate_span_f1: bool = None, dropout: Optional[float] = None, verbose_metrics: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self._verbose_metrics = verbose_metrics if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self._feedforward = feedforward if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = self.encoder.get_output_dim() self.tag_projection_layer = TimeDistributed(Linear(output_dim, self.num_tags)) # if constrain_crf_decoding and calculate_span_f1 are not # provided, (i.e., they're None), set them to True # if label_encoding is provided and False if it isn't. if constrain_crf_decoding is None: constrain_crf_decoding = label_encoding is not None if calculate_span_f1 is None: calculate_span_f1 = label_encoding is not None self.label_encoding = label_encoding if constrain_crf_decoding: if not label_encoding: raise ConfigurationError("constrain_crf_decoding is True, but " "no label_encoding was specified.") labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(label_encoding, labels) else: constraints = None self.include_start_end_transitions = include_start_end_transitions self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions ) self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } self.calculate_span_f1 = calculate_span_f1 if calculate_span_f1: if not label_encoding: raise ConfigurationError("calculate_span_f1 is True, but " "no label_encoding was specified.") self._f1_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace, label_encoding=label_encoding) check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") if feedforward is not None: check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(), "encoder output dim", "feedforward input dim") initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder = None, label_namespace: str = "labels", feedforward: Optional[FeedForward] = None, dropout: Optional[float] = None, use_upos_constraints: bool = True, use_lemma_constraints: bool = True, train_with_constraints: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.train_with_constraints = train_with_constraints self.encoder = encoder if self.encoder is not None: encoder_output_dim = self.encoder.get_output_dim() else: encoder_output_dim = self.text_field_embedder.get_output_dim() if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self.feedforward = feedforward if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = encoder_output_dim self.tag_projection_layer = TimeDistributed( Linear(output_dim, self.num_tags)) self._label_namespace = label_namespace labels = self.vocab.get_index_to_token_vocabulary( self._label_namespace) self.use_upos_constraints = use_upos_constraints self.use_lemma_constraints = use_lemma_constraints if self.use_lemma_constraints and not self.use_upos_constraints: raise ConfigurationError( "If lemma constraints are applied, UPOS constraints must be applied as well." ) if self.use_upos_constraints: # Get a dict with a mapping from UPOS to allowed LEXCAT here. self._upos_to_allowed_lexcats: Dict[ str, Set[str]] = get_upos_allowed_lexcats( stronger_constraints=self.use_lemma_constraints) # Dict with a amapping from UPOS to dictionary of [UPOS, list of additionally allowed LEXCATS] self._lemma_to_allowed_lexcats: Dict[str, Dict[ str, List[str]]] = get_lemma_allowed_lexcats() # Use labels and the upos_to_allowed_lexcats to get a dict with # a mapping from UPOS to a mask with 1 at allowed label indices and 0 at # disallowed label indices. self._upos_to_label_mask: Dict[str, torch.Tensor] = {} for upos in ALL_UPOS: # Shape: (num_labels,) upos_label_mask = torch.zeros( len(labels), device=next(self.tag_projection_layer.parameters()).device) # Go through the labels and indices and fill in the values that are allowed. for label_index, label in labels.items(): if len(label.split("-")) == 1: upos_label_mask[label_index] = 1 continue label_lexcat = label.split("-")[1] if not label.startswith("O-") and not label.startswith( "o-"): # Label does not start with O-/o-, always allowed. upos_label_mask[label_index] = 1 elif label_lexcat in self._upos_to_allowed_lexcats[upos]: # Label starts with O-/o-, but the lexcat is in allowed # lexcats for the current upos. upos_label_mask[label_index] = 1 self._upos_to_label_mask[upos] = upos_label_mask # Use labels and the lemma_to_allowed_lexcats to get a dict with # a mapping from lemma to a mask with 1 at an _additionally_ allowed label index # and 0 at disallowed label indices. If lemma_to_label_mask has a 0, and upos_to_label_mask # has a 0, the lexcat is not allowed for the (upos, lemma). If either lemma_to_label_mask or # upos_to_label_mask has a 1, the lexcat is allowed for the (upos, lemma) pair. self._lemma_upos_to_label_mask: Dict[Tuple[str, str], torch.Tensor] = {} for lemma in SPECIAL_LEMMAS: for upos_tag in ALL_UPOS: # No additional constraints, should be all zero if upos_tag not in self._lemma_to_allowed_lexcats[lemma]: continue # Shape: (num_labels,) lemma_upos_label_mask = torch.zeros( len(labels), device=next( self.tag_projection_layer.parameters()).device) # Go through the labels and indices and fill in the values that are allowed. for label_index, label in labels.items(): # For ~i, etc. tags. We don't deal with them here. if len(label.split("-")) == 1: continue label_lexcat = label.split("-")[1] if not label.startswith("O-") and not label.startswith( "o-"): # Label does not start with O-/o-, so we don't deal with it here continue if label_lexcat in self._lemma_to_allowed_lexcats[ lemma][upos_tag]: # Label starts with O-/o-, but the lexcat is in allowed # lexcats for the current upos. lemma_upos_label_mask[label_index] = 1 self._lemma_upos_to_label_mask[( lemma, upos_tag)] = lemma_upos_label_mask self.accuracy_metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } if encoder is not None: check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") if feedforward is not None: check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(), "encoder output dim", "feedforward input dim") initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, phrase_layer: Seq2SeqEncoder, residual_encoder: Seq2SeqEncoder, span_start_encoder: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, initializer: Optional[InitializerApplicator] = None, dropout: float = 0.2, num_context_answers: int = 0, marker_embedding_dim: int = 10, max_span_length: int = 30, max_turn_length: int = 12, ) -> None: super().__init__(vocab) self._num_context_answers = num_context_answers self._max_span_length = max_span_length self._text_field_embedder = text_field_embedder self._phrase_layer = phrase_layer self._marker_embedding_dim = marker_embedding_dim self._encoding_dim = phrase_layer.get_output_dim() self._matrix_attention = LinearMatrixAttention(self._encoding_dim, self._encoding_dim, "x,y,x*y") self._merge_atten = TimeDistributed( torch.nn.Linear(self._encoding_dim * 4, self._encoding_dim)) self._residual_encoder = residual_encoder if num_context_answers > 0: self._question_num_marker = torch.nn.Embedding( max_turn_length, marker_embedding_dim * num_context_answers) self._prev_ans_marker = torch.nn.Embedding( (num_context_answers * 4) + 1, marker_embedding_dim) self._self_attention = LinearMatrixAttention(self._encoding_dim, self._encoding_dim, "x,y,x*y") self._followup_lin = torch.nn.Linear(self._encoding_dim, 3) self._merge_self_attention = TimeDistributed( torch.nn.Linear(self._encoding_dim * 3, self._encoding_dim)) self._span_start_encoder = span_start_encoder self._span_end_encoder = span_end_encoder self._span_start_predictor = TimeDistributed( torch.nn.Linear(self._encoding_dim, 1)) self._span_end_predictor = TimeDistributed( torch.nn.Linear(self._encoding_dim, 1)) self._span_yesno_predictor = TimeDistributed( torch.nn.Linear(self._encoding_dim, 3)) self._span_followup_predictor = TimeDistributed(self._followup_lin) check_dimensions_match( phrase_layer.get_input_dim(), text_field_embedder.get_output_dim() + marker_embedding_dim * num_context_answers, "phrase layer input dim", "embedding dim + marker dim * num context answers", ) if initializer is not None: initializer(self) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_yesno_accuracy = CategoricalAccuracy() self._span_followup_accuracy = CategoricalAccuracy() self._span_gt_yesno_accuracy = CategoricalAccuracy() self._span_gt_followup_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._official_f1 = Average() self._variational_dropout = InputVariationalDropout(dropout)
def __init__(self, vocab: Vocabulary, question_embedder: TextFieldEmbedder, action_embedding_dim: int, encoder: Seq2SeqEncoder, entity_encoder: Seq2VecEncoder, mixture_feedforward: FeedForward, decoder_beam_search: BeamSearch, max_decoding_steps: int, attention_function: SimilarityFunction, use_neighbor_similarity_for_linking: bool = False, dropout: float = 0.0, num_linking_features: int = 10, rule_namespace: str = 'rule_labels', tables_directory: str = '/wikitables/') -> None: super(WikiTablesSemanticParser, self).__init__(vocab) self._question_embedder = question_embedder self._encoder = encoder self._entity_encoder = TimeDistributed(entity_encoder) self._beam_search = decoder_beam_search self._max_decoding_steps = max_decoding_steps self._use_neighbor_similarity_for_linking = use_neighbor_similarity_for_linking if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._rule_namespace = rule_namespace self._denotation_accuracy = WikiTablesAccuracy(tables_directory) self._action_sequence_accuracy = Average() self._has_logical_form = Average() self._action_padding_index = -1 # the padding value used by IndexField num_actions = vocab.get_vocab_size(self._rule_namespace) self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim) self._output_action_embedder = Embedding( num_embeddings=num_actions, embedding_dim=action_embedding_dim) self._action_biases = Embedding(num_embeddings=num_actions, embedding_dim=1) # This is what we pass as input in the first step of decoding, when we don't have a # previous action, or a previous question attention. self._first_action_embedding = torch.nn.Parameter( torch.FloatTensor(action_embedding_dim)) self._first_attended_question = torch.nn.Parameter( torch.FloatTensor(encoder.get_output_dim())) torch.nn.init.normal(self._first_action_embedding) torch.nn.init.normal(self._first_attended_question) check_dimensions_match(entity_encoder.get_output_dim(), question_embedder.get_output_dim(), "entity word average embedding dim", "question embedding dim") self._num_entity_types = 4 # TODO(mattg): get this in a more principled way somehow? self._num_start_types = 5 # TODO(mattg): get this in a more principled way somehow? self._embedding_dim = question_embedder.get_output_dim() self._type_params = torch.nn.Linear(self._num_entity_types, self._embedding_dim) self._neighbor_params = torch.nn.Linear(self._embedding_dim, self._embedding_dim) if num_linking_features > 0: self._linking_params = torch.nn.Linear(num_linking_features, 1) else: self._linking_params = None if self._use_neighbor_similarity_for_linking: self._question_entity_params = torch.nn.Linear(1, 1) self._question_neighbor_params = torch.nn.Linear(1, 1) else: self._question_entity_params = None self._question_neighbor_params = None self._decoder_trainer = MaximumMarginalLikelihood() self._decoder_step = WikiTablesDecoderStep( encoder_output_dim=self._encoder.get_output_dim(), action_embedding_dim=action_embedding_dim, attention_function=attention_function, num_start_types=self._num_start_types, num_entity_types=self._num_entity_types, mixture_feedforward=mixture_feedforward, dropout=dropout)
def __init__(self, vocab: Vocabulary, utterance_embedder: TextFieldEmbedder, action_embedding_dim: int, encoder: Seq2SeqEncoder, decoder_beam_search: BeamSearch, max_decoding_steps: int, input_attention: Attention, add_action_bias: bool = True, training_beam_size: int = None, decoder_num_layers: int = 1, dropout: float = 0.0, rule_namespace: str = 'rule_labels', database_file='/atis/atis.db') -> None: # Atis semantic parser init super().__init__(vocab) self._utterance_embedder = utterance_embedder self._encoder = encoder self._max_decoding_steps = max_decoding_steps self._add_action_bias = add_action_bias if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._rule_namespace = rule_namespace self._exact_match = Average() self._valid_sql_query = Average() self._action_similarity = Average() self._denotation_accuracy = Average() self._executor = SqlExecutor(database_file) self._action_padding_index = -1 # the padding value used by IndexField num_actions = vocab.get_vocab_size(self._rule_namespace) if self._add_action_bias: input_action_dim = action_embedding_dim + 1 else: input_action_dim = action_embedding_dim self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=input_action_dim) self._output_action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim) # This is what we pass as input in the first step of decoding, when we don't have a # previous action, or a previous utterance attention. self._first_action_embedding = torch.nn.Parameter(torch.FloatTensor(action_embedding_dim)) self._first_attended_utterance = torch.nn.Parameter(torch.FloatTensor(encoder.get_output_dim())) torch.nn.init.normal_(self._first_action_embedding) torch.nn.init.normal_(self._first_attended_utterance) self._num_entity_types = 2 # TODO(kevin): get this in a more principled way somehow? self._entity_type_decoder_embedding = Embedding(self._num_entity_types, action_embedding_dim) self._decoder_num_layers = decoder_num_layers self._beam_search = decoder_beam_search self._decoder_trainer = MaximumMarginalLikelihood(training_beam_size) self._transition_function = LinkingTransitionFunction(encoder_output_dim=self._encoder.get_output_dim(), action_embedding_dim=action_embedding_dim, input_attention=input_attention, predict_start_type_separately=False, add_action_bias=self._add_action_bias, dropout=dropout, num_layers=self._decoder_num_layers)
def __init__(self, vocab: Vocabulary, encoder: Seq2SeqEncoder, entity_encoder: Seq2VecEncoder, decoder_beam_search: BeamSearch, question_embedder: TextFieldEmbedder, input_attention: Attention, past_attention: Attention, max_decoding_steps: int, action_embedding_dim: int, gnn: bool = True, decoder_use_graph_entities: bool = True, decoder_self_attend: bool = True, gnn_timesteps: int = 2, parse_sql_on_decoding: bool = True, add_action_bias: bool = True, use_neighbor_similarity_for_linking: bool = True, dataset_path: str = 'dataset', training_beam_size: int = None, decoder_num_layers: int = 1, dropout: float = 0.0, rule_namespace: str = 'rule_labels', scoring_dev_params: dict = None, debug_parsing: bool = False) -> None: super().__init__(vocab) self.vocab = vocab self._encoder = encoder self._max_decoding_steps = max_decoding_steps if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._rule_namespace = rule_namespace self._question_embedder = question_embedder self._add_action_bias = add_action_bias self._scoring_dev_params = scoring_dev_params or {} self.parse_sql_on_decoding = parse_sql_on_decoding self._entity_encoder = TimeDistributed(entity_encoder) self._use_neighbor_similarity_for_linking = use_neighbor_similarity_for_linking self._self_attend = decoder_self_attend self._decoder_use_graph_entities = decoder_use_graph_entities self._action_padding_index = -1 # the padding value used by IndexField self._exact_match = Average() self._sql_evaluator_match = Average() self._action_similarity = Average() self._acc_single = Average() self._acc_multi = Average() self._beam_hit = Average() self._action_embedding_dim = action_embedding_dim num_actions = vocab.get_vocab_size(self._rule_namespace) if self._add_action_bias: input_action_dim = action_embedding_dim + 1 else: input_action_dim = action_embedding_dim self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=input_action_dim) self._output_action_embedder = Embedding( num_embeddings=num_actions, embedding_dim=action_embedding_dim) encoder_output_dim = encoder.get_output_dim() if gnn: encoder_output_dim += action_embedding_dim self._first_action_embedding = torch.nn.Parameter( torch.FloatTensor(action_embedding_dim)) self._first_attended_utterance = torch.nn.Parameter( torch.FloatTensor(encoder_output_dim)) self._first_attended_output = torch.nn.Parameter( torch.FloatTensor(action_embedding_dim)) torch.nn.init.normal_(self._first_action_embedding) torch.nn.init.normal_(self._first_attended_utterance) torch.nn.init.normal_(self._first_attended_output) self._num_entity_types = 9 self._embedding_dim = question_embedder.get_output_dim() self._entity_type_encoder_embedding = Embedding( self._num_entity_types, self._embedding_dim) self._entity_type_decoder_embedding = Embedding( self._num_entity_types, action_embedding_dim) self._linking_params = torch.nn.Linear(16, 1) torch.nn.init.uniform_(self._linking_params.weight, 0, 1) num_edge_types = 3 self._gnn = GatedGraphConv(self._embedding_dim, gnn_timesteps, num_edge_types=num_edge_types, dropout=dropout) self._decoder_num_layers = decoder_num_layers self._beam_search = decoder_beam_search self._decoder_trainer = MaximumMarginalLikelihood(training_beam_size) if decoder_self_attend: self._transition_function = AttendPastSchemaItemsTransitionFunction( encoder_output_dim=encoder_output_dim, action_embedding_dim=action_embedding_dim, input_attention=input_attention, past_attention=past_attention, predict_start_type_separately=False, add_action_bias=self._add_action_bias, dropout=dropout, num_layers=self._decoder_num_layers) else: self._transition_function = LinkingTransitionFunction( encoder_output_dim=encoder_output_dim, action_embedding_dim=action_embedding_dim, input_attention=input_attention, predict_start_type_separately=False, add_action_bias=self._add_action_bias, dropout=dropout, num_layers=self._decoder_num_layers) self._ent2ent_ff = FeedForward(action_embedding_dim, 1, action_embedding_dim, Activation.by_name('relu')()) self._neighbor_params = torch.nn.Linear(self._embedding_dim, self._embedding_dim) # TODO: Remove hard-coded dirs self._evaluate_func = partial( evaluate, db_dir=os.path.join(dataset_path, 'database'), table=os.path.join(dataset_path, 'tables.json'), check_valid=False) self.debug_parsing = debug_parsing
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, lexical_feedforward: FeedForward, contextual_encoder: Seq2SeqEncoder, attention_feedforward: FeedForward, matrix_attention: MatrixAttention, memory_encoder: Seq2SeqEncoder, output_feedforward: FeedForward, output_logit: FeedForward, answer_steps: int = 5, dropout: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._lexical_feedforward = TimeDistributed(lexical_feedforward) self._contextual_encoder = contextual_encoder self._attention_feedforward = TimeDistributed(attention_feedforward) self._matrix_attention = matrix_attention self._memory_encoder = memory_encoder self._output_feedforward = output_feedforward self._output_logit = output_logit self._answer_steps = answer_steps self._answer_gru_cell = torch.nn.GRUCell( self._memory_encoder.get_output_dim(), self._memory_encoder.get_output_dim(), ) self._answer_attention = TimeDistributed( torch.nn.Linear(self._memory_encoder.get_output_dim(), 1)) self._answer_bilinear = BilinearAttention( self._memory_encoder.get_output_dim(), self._memory_encoder.get_output_dim(), ) check_dimensions_match(text_field_embedder.get_output_dim(), lexical_feedforward.get_input_dim(), "text field embedding dim", "lexical feedforward input dim") check_dimensions_match(lexical_feedforward.get_output_dim(), contextual_encoder.get_input_dim(), "lexical feedforwrd input dim", "contextual layer input dim") check_dimensions_match(contextual_encoder.get_output_dim(), attention_feedforward.get_input_dim(), "contextual layer output dim", "attention feedforward input dim") check_dimensions_match(contextual_encoder.get_output_dim() * 2, memory_encoder.get_input_dim(), "contextual layer output dim", "memory encoder input dim") check_dimensions_match(memory_encoder.get_output_dim() * 4, output_feedforward.get_input_dim(), "memory encoder output dim", "output feedforward input") check_dimensions_match(output_feedforward.get_output_dim(), output_logit.get_input_dim(), "output feedforward output dim", "output logit input") self._dropout = torch.nn.Dropout(dropout) if dropout else None self._accuracy = CategoricalAccuracy() self._loss = torch.nn.NLLLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, label_namespace: str = "labels", feedforward: Optional[FeedForward] = None, label_encoding: Optional[str] = None, constraint_type: Optional[str] = None, include_start_end_transitions: bool = True, constrain_crf_decoding: bool = None, calculate_span_f1: bool = None, dropout: Optional[float] = None, verbose_metrics: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self._verbose_metrics = verbose_metrics if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self._feedforward = feedforward if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = self.encoder.get_output_dim() self.tag_projection_layer = TimeDistributed(Linear(output_dim, self.num_tags)) if constraint_type is not None: warnings.warn("'constraint_type' was removed and replaced with" "'label_encoding', 'constrain_crf_decoding', and " "'calculate_span_f1' in version 0.6.1. It will be " "removed in version 0.8.", DeprecationWarning) label_encoding = constraint_type # if constrain_crf_decoding and calculate_span_f1 are not # provided, (i.e., they're None), set them to True # if label_encoding is provided and False if it isn't. if constrain_crf_decoding is None: constrain_crf_decoding = label_encoding is not None if calculate_span_f1 is None: calculate_span_f1 = label_encoding is not None self.label_encoding = label_encoding if constrain_crf_decoding: if not label_encoding: raise ConfigurationError("constrain_crf_decoding is True, but " "no label_encoding was specified.") labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(label_encoding, labels) else: constraints = None self.include_start_end_transitions = include_start_end_transitions self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions ) self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } self.calculate_span_f1 = calculate_span_f1 if calculate_span_f1: if not label_encoding: raise ConfigurationError("calculate_span_f1 is True, but " "no label_encoding was specified.") self._f1_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace, label_encoding=label_encoding) elif constraint_type is not None: # Maintain deprecated behavior if constraint_type is provided self._f1_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace, label_encoding=constraint_type) check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") if feedforward is not None: check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(), "encoder output dim", "feedforward input dim") initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, arc_representation_dim: int, tag_representation_dim: int, capsule_dim: int, iter_num: int, arc_feedforward: FeedForward = None, tag_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, #dep_tag_embedding: Embedding = None, predicate_embedding: Embedding = None, delta_type: str = "hinge_ce", subtract_gold: bool = False, dropout: float = 0.0, input_dropout: float = 0.0, edge_prediction_threshold: float = 0.5, gumbel_t: float = 1, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(SRLGraphParserBase, self).__init__(vocab, regularizer) self.capsule_dim = capsule_dim self.iter_num = iter_num self.text_field_embedder = text_field_embedder self.encoder = encoder self.subtract_gold = subtract_gold self.edge_prediction_threshold = edge_prediction_threshold if not 0 < edge_prediction_threshold < 1: raise ConfigurationError( f"edge_prediction_threshold must be between " f"0 and 1 (exclusive) but found {edge_prediction_threshold}.") # print ("predicates",self.vocab._index_to_token["predicates"]) # print ("arc_types",self.vocab._index_to_token["arc_types"]) self.delta_type = delta_type num_labels = self.vocab.get_vocab_size("arc_types") print("num_labels", num_labels) self.gumbel_t = gumbel_t node_dim = predicate_embedding.get_output_dim() encoder_dim = encoder.get_output_dim() self.arg_arc_feedforward = arc_feedforward or \ FeedForward(encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.pred_arc_feedforward = copy.deepcopy(self.arg_arc_feedforward) self.arc_attention = BilinearMatrixAttention( arc_representation_dim, arc_representation_dim, #label_dim=capsule_dim, use_input_biases=True) self.arg_tag_feedforward = tag_feedforward or \ FeedForward(encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.pred_tag_feedforward = copy.deepcopy(self.arg_tag_feedforward) self.tag_bilinear = BilinearMatrixAttention( tag_representation_dim, tag_representation_dim, label_dim=num_labels * capsule_dim, use_input_biases=True) #,activation=Activation.by_name("tanh")() self.predicte_feedforward = FeedForward(encoder_dim, 1, node_dim, Activation.by_name("elu")()) self._pos_tag_embedding = pos_tag_embedding or None #self._dep_tag_embedding = dep_tag_embedding or None self._pred_embedding = predicate_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) # check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim") self._labelled_f1 = IterativeLabeledF1Measure( negative_label=0, negative_pred=0, selected_metrics=["F", "l_F", "p_F", "u_F"]) self._tag_loss = torch.nn.NLLLoss(reduction="none") # ,ignore_index=-1 self._sense_loss = torch.nn.NLLLoss( reduction="none") # ,ignore_index=-1 initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, similarity_function: SimilarityFunction, modeling_layer: Seq2SeqEncoder, modeling_layer_memory: Seq2SeqEncoder, margin: float, max: float, dropout: float = 0.2, mask_lstms: bool = False, memory_enabled: bool = False, memory_update: bool = True, memory_concat: bool = False, save_memory_snapshots: bool = False, save_entity_embeddings: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, answer_layer_image: Seq2SeqEncoder = None, answer_layer_text: Seq2SeqEncoder = None, question_image_encoder: Seq2SeqEncoder = None, step_layer: Seq2SeqEncoder = None, num_heads: int = 2, num_slots: int = 61, # Maximum number of entities in the training set. last_layer_hidden_dims: List[int] = None, last_layer_num_layers: int = 4, projection_input_dim: int = 2048, projection_hidden_dims: List[int] = None, save_step_wise_attentions=False) -> None: super(ProceduralReasoningNetworksforRecipeQA, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed( Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = LegacyMatrixAttention(similarity_function) self._modeling_layer = modeling_layer self._modeling_layer_memory = modeling_layer_memory self.margin = torch.FloatTensor([margin]).cuda() self.cos = nn.CosineSimilarity(dim=-1, eps=1e-6).cuda() self.for_max = torch.FloatTensor([max]).cuda() self._memory_enabled = memory_enabled self._memory_update = memory_update self._memory_concat = memory_concat self._save_memory_snapshots = save_memory_snapshots self._save_entity_embeddings = save_entity_embeddings self._step_layer = step_layer self._label_acc = CategoricalAccuracy() self.save_step_wise_attentions = save_step_wise_attentions if self._memory_enabled: head_size = int(step_layer.get_output_dim() / num_heads) self.mem_module = RelationalMemory( mem_slots=num_slots, head_size=head_size, input_size=head_size * num_heads, num_heads=num_heads, num_blocks=1, forget_bias=1., input_bias=0., ).cuda(0) last_layer_input_dim = 10 * modeling_layer.get_output_dim() else: last_layer_input_dim = 5 * modeling_layer.get_output_dim() self._activation = torch.nn.Tanh() self._last_layer = FeedForward(last_layer_input_dim, last_layer_num_layers, last_layer_hidden_dims, self._activation, dropout) self._answer_layer_image = answer_layer_image # uses image encoder for image input self._answer_layer_text = answer_layer_text # uses text encoder for text input self._question_image_encoder = question_image_encoder # converts question image inputs to encoding dim self._vocab = vocab # TODO: Replace hard coded parameters with config parameters self._mlp_projector = TimeDistributed( torch.nn.Sequential( torch.nn.Dropout(0.1, inplace=False), torch.nn.Linear(projection_input_dim, projection_hidden_dims[0]), torch.nn.Tanh(), torch.nn.Dropout(0.1, inplace=False), torch.nn.Linear(projection_hidden_dims[0], projection_hidden_dims[1]), torch.nn.Tanh(), torch.nn.Dropout(0.1, inplace=False), torch.nn.Linear(projection_hidden_dims[1], projection_hidden_dims[2]), torch.nn.Tanh(), torch.nn.Dropout(0.1, inplace=False), torch.nn.Linear(projection_hidden_dims[2], projection_hidden_dims[3]), )) if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms if self._save_memory_snapshots: if os.path.isfile('memory_snapshots_by_recipe.pkl' ): # make sure we start with a clean file os.remove('memory_snapshots_by_recipe.pkl') if self._save_entity_embeddings: if os.path.isfile('entity_embeddings_final.pkl' ): # make sure we start with a clean file os.remove('entity_embeddings_final.pkl') initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, highway_embedding_size: int, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, # match_layer: Seq2SeqEncoder, matrix_attention_layer: MatrixAttention, modeling_layer: Seq2SeqEncoder, pointer_net: PointerNet, span_end_lstm: Seq2SeqEncoder, language: str = 'en', ptr_dim: int = 200, dropout: float = 0.2, loss_ratio: float = 0.3, max_num_passages: int = 5, max_num_character: int = 4, max_passage_len: int = 4, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) # self._span_end_encoder = span_end_lstm self.language = language self.loss_ratio = loss_ratio self.max_num_character = max_num_character self.relu = torch.nn.ReLU() self.max_num_passages = max_num_passages self.max_passage_len = max_passage_len self.ptr_dim = ptr_dim self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed(ElasticHighway(text_field_embedder.get_output_dim(), highway_embedding_size, num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = DotProductMatrixAttention() self._modeling_layer = modeling_layer modeling_dim = modeling_layer.get_output_dim() encoding_dim = phrase_layer.get_output_dim() # self._match_layer = match_layer self._ptr_layer_1 = TimeDistributed(torch.nn.Linear(encoding_dim * 4 + modeling_dim, 1)) self._ptr_layer_2 = TimeDistributed(torch.nn.Linear(encoding_dim * 4 + modeling_dim, 1)) # self._naive_layer_1 = TimeDistributed(torch.nn.Linear(highway_embedding_size, 1)) # self._naive_layer_2 = TimeDistributed(torch.nn.Linear(highway_embedding_size, 1)) self._content_layer_1 = TimeDistributed(torch.nn.Linear(encoding_dim * 4 + modeling_dim, ptr_dim)) self._content_layer_2 = TimeDistributed(torch.nn.Linear(ptr_dim, 1)) self._passages_matrix_attention = matrix_attention_layer self._pointer_net = pointer_net # self._pointer_net_decoder = PointerNetDecoder(encoding_dim * 4 + # modeling_dim, # ptr_dim) self._passage_predictor = TimeDistributed(torch.nn.Linear(self.max_num_passages, 1)) self._start_h_embedding = torch.nn.Parameter(data=torch.zeros(1, 1, 1).float(), requires_grad=True) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._rouge_metrics = MsmarcoRouge() self._bleu_metrics = DureaderBleu() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, citation_text_encoder: Seq2SeqEncoder, classifier_feedforward: FeedForward, classifier_feedforward_2: FeedForward, classifier_feedforward_3: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, report_auxiliary_metrics: bool = False, predict_mode: bool = False, ) -> None: """ Additional Args: lexicon_embedder_params: parameters for the lexicon attention model use_sparse_lexicon_features: whether to use sparse (onehot) lexicon features multilabel: whether the classification is multi-label data_format: s2 or jurgens report_auxiliary_metrics: report metrics for aux tasks predict_mode: predict unlabeled examples """ super(ScaffoldBilstmAttentionClassifier, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.num_classes_sections = self.vocab.get_vocab_size("section_labels") self.num_classes_cite_worthiness = self.vocab.get_vocab_size( "cite_worthiness_labels") self.citation_text_encoder = citation_text_encoder self.classifier_feedforward = classifier_feedforward self.classifier_feedforward_2 = classifier_feedforward_2 self.classifier_feedforward_3 = classifier_feedforward_3 self.label_accuracy = CategoricalAccuracy() self.label_f1_metrics = {} self.label_f1_metrics_sections = {} self.label_f1_metrics_cite_worthiness = {} # for i in range(self.num_classes): # self.label_f1_metrics[vocab.get_token_from_index(index=i, namespace="labels")] =\ # F1Measure(positive_label=i) for i in range(self.num_classes): self.label_f1_metrics[vocab.get_token_from_index(index=i, namespace="labels")] =\ F1Measure(positive_label=i) for i in range(self.num_classes_sections): self.label_f1_metrics_sections[vocab.get_token_from_index(index=i, namespace="section_labels")] =\ F1Measure(positive_label=i) for i in range(self.num_classes_cite_worthiness): self.label_f1_metrics_cite_worthiness[vocab.get_token_from_index(index=i, namespace="cite_worthiness_labels")] =\ F1Measure(positive_label=i) self.loss = torch.nn.CrossEntropyLoss() self.attention_seq2seq = Attention( citation_text_encoder.get_output_dim()) self.report_auxiliary_metrics = report_auxiliary_metrics self.predict_mode = predict_mode initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, matrix_attention_layer: MatrixAttention, modeling_layer: Seq2SeqEncoder, dropout_prob: float = 0.1, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) text_embed_dim = text_field_embedder.get_output_dim() encoding_in_dim = phrase_layer.get_input_dim() encoding_out_dim = phrase_layer.get_output_dim() modeling_in_dim = modeling_layer.get_input_dim() modeling_out_dim = modeling_layer.get_output_dim() self._text_field_embedder = text_field_embedder self._embedding_proj_layer = torch.nn.Linear(text_embed_dim, encoding_in_dim) self._highway_layer = Highway(encoding_in_dim, num_highway_layers) self._encoding_proj_layer = torch.nn.Linear(encoding_in_dim, encoding_in_dim) self._phrase_layer = phrase_layer self._matrix_attention = matrix_attention_layer self._modeling_proj_layer = torch.nn.Linear(encoding_out_dim * 4, modeling_in_dim) self._modeling_layer = modeling_layer self._span_start_predictor = torch.nn.Linear(modeling_out_dim * 2, 1) self._span_end_predictor = torch.nn.Linear(modeling_out_dim * 2, 1) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._metrics = SquadEmAndF1() self._dropout = torch.nn.Dropout( p=dropout_prob) if dropout_prob > 0 else lambda x: x # evaluation # BLEU self._bleu_score_types_to_use = ["BLEU1", "BLEU2", "BLEU3", "BLEU4"] self._bleu_scores = { x: Average() for x in self._bleu_score_types_to_use } # ROUGE using pyrouge self._rouge_score_types_to_use = ['rouge-n', 'rouge-l', 'rouge-w'] # if we have rouge-n as metric we actualy get n scores like rouge-1, rouge-2, .., rouge-n max_rouge_n = 4 rouge_n_metrics = [] if "rouge-n" in self._rouge_score_types_to_use: rouge_n_metrics = [ "rouge-{0}".format(x) for x in range(1, max_rouge_n + 1) ] rouge_scores_names = rouge_n_metrics + [ y for y in self._rouge_score_types_to_use if y != 'rouge-n' ] self._rouge_scores = {x: Average() for x in rouge_scores_names} self._rouge_evaluator = rouge.Rouge( metrics=self._rouge_score_types_to_use, max_n=max_rouge_n, limit_length=True, length_limit=100, length_limit_type='words', apply_avg=False, apply_best=False, alpha=0.5, # Default F1_score weight_factor=1.2, stemming=True) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, intent_encoder: Seq2SeqEncoder = None, sequence_label_namespace: str = "labels", intent_label_namespace: str = "intent_labels", feedforward: Optional[FeedForward] = None, label_encoding: Optional[str] = None, include_start_end_transitions: bool = True, crf_decoding: bool = False, constrain_crf_decoding: bool = None, focal_loss_gamma: float = None, calculate_span_f1: bool = None, dropout: Optional[float] = None, verbose_metrics: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.sequence_label_namespace = sequence_label_namespace self.intent_label_namespace = intent_label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(sequence_label_namespace) self.num_intents = self.vocab.get_vocab_size(intent_label_namespace) self.encoder = encoder self.intent_encoder = intent_encoder self._verbose_metrics = verbose_metrics if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self._feedforward = feedforward # if feedforward is not None: # output_dim = feedforward.get_output_dim() # else: # output_dim = self.encoder.get_output_dim() self.tag_projection_layer = TimeDistributed( Linear(self.encoder.get_output_dim(), self.num_tags)) if self._feedforward is not None: self.intent_projection_layer = Linear(feedforward.get_output_dim(), self.num_intents) else: self.intent_projection_layer = Linear( self.encoder.get_output_dim(), self.num_intents) if focal_loss_gamma is not None: self.intent_loss = FocalBCEWithLogitsLoss(gamma=focal_loss_gamma) # self.intent_loss2 = torch.nn.BCEWithLogitsLoss() else: self.intent_loss = torch.nn.BCEWithLogitsLoss() # if constrain_crf_decoding and calculate_span_f1 are not # provided, (i.e., they're None), set them to True # if label_encoding is provided and False if it isn't. if constrain_crf_decoding is None: constrain_crf_decoding = label_encoding is not None if calculate_span_f1 is None: calculate_span_f1 = label_encoding is not None self.label_encoding = label_encoding if constrain_crf_decoding: if not label_encoding: raise ConfigurationError("constrain_crf_decoding is True, but " "no label_encoding was specified.") labels = self.vocab.get_index_to_token_vocabulary( sequence_label_namespace) constraints = allowed_transitions(label_encoding, labels) else: constraints = None self.include_start_end_transitions = include_start_end_transitions if crf_decoding: self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions) else: self.crf = None # self.metrics = { # "int_acc": BinaryAccuracy(), # "tag_acc": CategoricalAccuracy() # } self._intent_f1_metric = MultiLabelF1Measure( vocab, namespace=intent_label_namespace) self.calculate_span_f1 = calculate_span_f1 if calculate_span_f1: if not label_encoding: raise ConfigurationError("calculate_span_f1 is True, but " "no label_encoding was specified.") self._f1_metric = SpanBasedF1Measure( vocab, tag_namespace=sequence_label_namespace, label_encoding=label_encoding) self._dai_f1_metric = DialogActItemF1Measure() check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") if feedforward is not None: check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(), "encoder output dim", "feedforward input dim") initializer(self)
def __init__(self, vocab: Vocabulary, question_embedder: TextFieldEmbedder, action_embedding_dim: int, encoder: Seq2SeqEncoder, entity_encoder: Seq2VecEncoder, max_decoding_steps: int, add_action_bias: bool = True, use_neighbor_similarity_for_linking: bool = False, dropout: float = 0.0, num_linking_features: int = 10, rule_namespace: str = 'rule_labels') -> None: super().__init__(vocab) self._question_embedder = question_embedder self._encoder = encoder self._entity_encoder = TimeDistributed(entity_encoder) self._max_decoding_steps = max_decoding_steps self._add_action_bias = add_action_bias self._use_neighbor_similarity_for_linking = use_neighbor_similarity_for_linking if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._rule_namespace = rule_namespace self._denotation_accuracy = Average() self._action_sequence_accuracy = Average() self._has_logical_form = Average() self._action_padding_index = -1 # the padding value used by IndexField num_actions = vocab.get_vocab_size(self._rule_namespace) if self._add_action_bias: self._action_biases = Embedding(num_embeddings=num_actions, embedding_dim=1) self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim) self._output_action_embedder = Embedding( num_embeddings=num_actions, embedding_dim=action_embedding_dim) # This is what we pass as input in the first step of decoding, when we don't have a # previous action, or a previous question attention. self._first_action_embedding = torch.nn.Parameter( torch.FloatTensor(action_embedding_dim)) self._first_attended_question = torch.nn.Parameter( torch.FloatTensor(encoder.get_output_dim())) torch.nn.init.normal_(self._first_action_embedding) torch.nn.init.normal_(self._first_attended_question) check_dimensions_match(entity_encoder.get_output_dim(), question_embedder.get_output_dim(), "entity word average embedding dim", "question embedding dim") self._num_entity_types = 5 # TODO(mattg): get this in a more principled way somehow? self._num_start_types = 3 # TODO(mattg): get this in a more principled way somehow? self._embedding_dim = question_embedder.get_output_dim() self._entity_type_encoder_embedding = Embedding( self._num_entity_types, self._embedding_dim) self._entity_type_decoder_embedding = Embedding( self._num_entity_types, action_embedding_dim) self._neighbor_params = torch.nn.Linear(self._embedding_dim, self._embedding_dim) if num_linking_features > 0: self._linking_params = torch.nn.Linear(num_linking_features, 1) else: self._linking_params = None if self._use_neighbor_similarity_for_linking: self._question_entity_params = torch.nn.Linear(1, 1) self._question_neighbor_params = torch.nn.Linear(1, 1) else: self._question_entity_params = None self._question_neighbor_params = None
def __init__( self, vocab: Vocabulary, bert_model: Union[str, BertModel], mention_feedforward: FeedForward, context_layer: Seq2SeqEncoder = None, embedding_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), max_span_width: int = 30, feature_size: int = 10, spans_per_word: float = 100, label_smoothing: float = None, ignore_span_metric: bool = False, srl_eval_path: str = DEFAULT_SRL_EVAL_PATH, **kwargs, ) -> None: super().__init__(vocab, **kwargs) if isinstance(bert_model, str): self.bert_model = BertModel.from_pretrained(bert_model) else: self.bert_model = bert_model self.num_classes = self.vocab.get_vocab_size("span_labels") if srl_eval_path is not None: # For the span based evaluation, we don't want to consider labels # for verb, because the verb index is provided to the model. self.span_metric = SrlEvalScorer(srl_eval_path, ignore_classes=["V"]) else: self.span_metric = None self.tag_projection_layer = Linear(self.bert_model.config.hidden_size, self.num_classes) self.embedding_dropout = Dropout(p=embedding_dropout) self._label_smoothing = label_smoothing self.ignore_span_metric = ignore_span_metric self._mention_feedforward = TimeDistributed(mention_feedforward) self._mention_scorer = TimeDistributed( torch.nn.Linear(mention_feedforward.get_output_dim(), 1)) self._attentive_span_extractor = SelfAttentiveSpanExtractor( input_dim=self.bert_model.config.hidden_size) self.span_representation_dim = self._attentive_span_extractor.get_output_dim( ) self._context_layer = context_layer if context_layer is not None: self._endpoint_span_extractor = EndpointSpanExtractor( context_layer.get_output_dim(), combination="x,y", num_width_embeddings=max_span_width, span_width_embedding_dim=feature_size, bucket_widths=False, ) self.span_representation_dim = self._endpoint_span_extractor.get_output_dim( ) self.hidden_layer = torch.nn.Sequential( torch.nn.Linear(self.span_representation_dim + self.bert_model.config.hidden_size, self.span_representation_dim, bias=False), torch.nn.ReLU()) self.output_layer = torch.nn.Linear(self.span_representation_dim, self.num_classes - 1, bias=False) self._max_span_width = max_span_width self._spans_per_word = spans_per_word self._ce_loss = torch.nn.CrossEntropyLoss(reduction='none') self._bce_loss = torch.nn.BCEWithLogitsLoss(reduction='none') initializer(self)