def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, phrase_layer: Seq2SeqEncoder, residual_encoder: Seq2SeqEncoder, span_start_encoder: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, initializer: InitializerApplicator, dropout: float = 0.2, pair2vec_dropout: float = 0.15, max_span_length: int = 30, pair2vec_model_file: str = None, pair2vec_config_file: str = None) -> None: super().__init__(vocab) self._max_span_length = max_span_length self._text_field_embedder = text_field_embedder self._phrase_layer = phrase_layer self._encoding_dim = phrase_layer.get_output_dim() self.pair2vec = pair2vec_util.get_pair2vec(pair2vec_config_file, pair2vec_model_file) self._pair2vec_dropout = torch.nn.Dropout(pair2vec_dropout) self._matrix_attention = LinearMatrixAttention(self._encoding_dim, self._encoding_dim, 'x,y,x*y') # atten_dim = self._encoding_dim * 4 + 600 if ablation_type == 'attn_over_rels' else self._encoding_dim * 4 atten_dim = self._encoding_dim * 4 + 600 self._merge_atten = TimeDistributed( torch.nn.Linear(atten_dim, self._encoding_dim)) self._residual_encoder = residual_encoder self._self_attention = LinearMatrixAttention(self._encoding_dim, self._encoding_dim, 'x,y,x*y') self._merge_self_attention = TimeDistributed( torch.nn.Linear(self._encoding_dim * 3, self._encoding_dim)) self._span_start_encoder = span_start_encoder self._span_end_encoder = span_end_encoder self._span_start_predictor = TimeDistributed( torch.nn.Linear(self._encoding_dim, 1)) self._span_end_predictor = TimeDistributed( torch.nn.Linear(self._encoding_dim, 1)) self._squad_metrics = SquadEmAndF1() initializer(self) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._official_em = Average() self._official_f1 = Average() self._span_accuracy = BooleanAccuracy() self._variational_dropout = InputVariationalDropout(dropout)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, phrase_layer: Seq2SeqEncoder, residual_encoder: Seq2SeqEncoder, span_start_encoder: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, initializer: InitializerApplicator, dropout: float = 0.2, num_context_answers: int = 0, marker_embedding_dim: int = 10, max_span_length: int = 30) -> None: super().__init__(vocab) self._num_context_answers = num_context_answers self._max_span_length = max_span_length self._text_field_embedder = text_field_embedder self._phrase_layer = phrase_layer self._marker_embedding_dim = marker_embedding_dim self._encoding_dim = phrase_layer.get_output_dim() max_turn_length = 12 self._matrix_attention = LinearMatrixAttention(self._encoding_dim, self._encoding_dim, 'x,y,x*y') self._merge_atten = TimeDistributed(torch.nn.Linear(self._encoding_dim * 4, self._encoding_dim)) self._residual_encoder = residual_encoder if num_context_answers > 0: self._question_num_marker = torch.nn.Embedding(max_turn_length, marker_embedding_dim * num_context_answers) self._prev_ans_marker = torch.nn.Embedding((num_context_answers * 4) + 1, marker_embedding_dim) self._self_attention = LinearMatrixAttention(self._encoding_dim, self._encoding_dim, 'x,y,x*y') self._followup_lin = torch.nn.Linear(self._encoding_dim, 3) self._merge_self_attention = TimeDistributed(torch.nn.Linear(self._encoding_dim * 3, self._encoding_dim)) self._span_start_encoder = span_start_encoder self._span_end_encoder = span_end_encoder self._span_start_predictor = TimeDistributed(torch.nn.Linear(self._encoding_dim, 1)) self._span_end_predictor = TimeDistributed(torch.nn.Linear(self._encoding_dim, 1)) self._span_yesno_predictor = TimeDistributed(torch.nn.Linear(self._encoding_dim, 3)) self._span_followup_predictor = TimeDistributed(self._followup_lin) initializer(self) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_yesno_accuracy = CategoricalAccuracy() self._span_followup_accuracy = CategoricalAccuracy() self._span_gt_yesno_accuracy = CategoricalAccuracy() self._span_gt_followup_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._official_f1 = Average() self._variational_dropout = InputVariationalDropout(dropout)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, question_encoder: Seq2VecEncoder, answers_encoder: Seq2VecEncoder, captions_encoder: Seq2VecEncoder, classifier_feedforward: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size('labels') self.question_encoder = question_encoder self.answers_encoder = TimeDistributed(answers_encoder) self.captions_encoder = TimeDistributed(captions_encoder) self.classifier_feedforward = classifier_feedforward # self.classifier_feedforward = TimeDistributed(classifier_feedforward) self._encoding_dim = captions_encoder.get_output_dim() self.ques_cap_att = LinearMatrixAttention(self._encoding_dim, self._encoding_dim, 'x,y,x*y') self.loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, input_dim: int, integrator_x: Seq2SeqEncoder, integrator_y: Seq2SeqEncoder, tie_integrator: bool = False, integrator_dropout: float = 0.0, combination: str = 'x,y') -> None: super(BiAttentionEncoder, self).__init__() self._self_attention = LinearMatrixAttention(input_dim, input_dim, combination) self._integrator_x = integrator_x self._integrator_y = integrator_y if tie_integrator: self._integrator_y = self._integrator_x self._integrator_dropout = torch.nn.Dropout(integrator_dropout) self._x_linear_layers = torch.nn.Linear(integrator_x.get_output_dim(), 1) self._y_linear_layers = torch.nn.Linear(integrator_y.get_output_dim(), 1) self._output_dim = input_dim self.input_dim = input_dim
def __init__(self, input_dim, rnn_size, dropout): super(GCATLayer, self).__init__() self.input_dim = input_dim self.rnn_size = rnn_size self.dropout = dropout self.fs = nn.Linear(self.input_dim, self.input_dim) #self.coattn = nn.Linear(3*self.input_dim,1,bias=False) self.coattn = LinearMatrixAttention(self.input_dim, self.input_dim, combination='x,y,x*y') self.proj = nn.Linear(4*self.input_dim, self.input_dim)
def __init__(self, input_dim: int, combination: str = 'x,y', dropout_prob: float = 0.0) -> None: super(AttentionEncoder, self).__init__() self._self_attention = LinearMatrixAttention(input_dim, input_dim, combination) self._linear_layers = torch.nn.Linear(input_dim, 1) self._dropout = torch.nn.Dropout(dropout_prob) self._output_dim = input_dim self.input_dim = input_dim
def __init__(self, vocab: Vocabulary, base_dim, loss_scale_by_num_values, use_pre_calc_elmo_embeddings, elmo_embedding_path, domain_slot_list_path, word_embeddings, token_indexers: Dict[str, TokenIndexer], text_field_embedder: TextFieldEmbedder, text_field_char_embedder: TextFieldEmbedder, symbol_embedder: TextFieldEmbedder, phrase_layer: Seq2SeqEncoder, class_prediction_layer: FeedForward, span_prediction_layer: FeedForward, span_start_encoder: FeedForward, span_end_encoder: FeedForward, span_label_predictor: FeedForward, initializer: InitializerApplicator, use_graph, bi_dropout: float = 0.2, dropout: float = 0.2) -> None: super().__init__(vocab) self._is_in_training_mode = False self._loss_scale_by_num_values = loss_scale_by_num_values self._use_pre_calc_elmo_embeddings = use_pre_calc_elmo_embeddings self._word_embeddings = word_embeddings self._is_use_elmo = True if self._word_embeddings == "elmo" else False self._is_use_graph = use_graph if self._is_use_elmo and use_pre_calc_elmo_embeddings: self._dialog_elmo_embeddings = self.load_elmo_embeddings(elmo_embedding_path) self._dialog_scalar_mix = ScalarMix(mixture_size = 3, trainable=True) self._domains, self._ds_id2text, self._ds_text2id, self.value_file_path, \ self._ds_type, self._ds_use_value_list, num_ds_use_value, self._ds_masked \ = self.read_domain_slot_list(domain_slot_list_path) self._value_id2text, self._value_text2id = self.load_value_list(domain_slot_list_path) self._span_id2text, self._class_id2text = dstqa_util.gen_id2text(self._ds_id2text, self._ds_type) self._token_indexers = token_indexers self._text_field_embedder = text_field_embedder self._text_field_char_embedder = text_field_char_embedder self._symbol_embedder = symbol_embedder self._ds_dialog_attention = LinearMatrixAttention(base_dim, base_dim, 'x,y,x*y') self._dialog_dsv_attention = LinearMatrixAttention(base_dim, base_dim, 'x,y,x*y') self._dsv_dialog_attention = LinearMatrixAttention(base_dim, base_dim, 'x,y,x*y') self._ds_attention = LinearMatrixAttention(base_dim, base_dim, 'x,y,x*y') self._dsv_attention = LinearMatrixAttention(base_dim, base_dim, 'x,y,x*y') self._agg_value = torch.nn.Linear(base_dim, base_dim) self._agg_nodes = torch.nn.Linear(base_dim, base_dim) self._graph_gamma = torch.nn.Linear(base_dim, 1) self._class_prediction_layer = class_prediction_layer self._span_prediction_layer = span_prediction_layer self._span_label_predictor = span_label_predictor self._span_start_encoder = span_start_encoder self._span_end_encoder = span_end_encoder self._phrase_layer = phrase_layer self._cross_entropy = CrossEntropyLoss(ignore_index=-1) self._accuracy = Accuracy(self._ds_id2text, self._ds_type) self._dropout = torch.nn.Dropout(dropout) self._bi_dropout = torch.nn.Dropout(bi_dropout) self._dropout2 = torch.nn.Dropout(0.1) self._sigmoid = torch.nn.Sigmoid() initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, phrase_layer: Seq2SeqEncoder, residual_encoder: Seq2SeqEncoder, ctx_q_encoder: ContextualizedQuestionEncoder, # this is used to get some m_t span_start_encoder: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, initializer: InitializerApplicator, dropout: float = 0.2, num_context_answers: int = 0, marker_embedding_dim: int = 10, max_span_length: int = 30) -> None: super().__init__(vocab) print('INIT MODEL') self._num_context_answers = num_context_answers self._max_span_length = max_span_length self._text_field_embedder = text_field_embedder self._phrase_layer = phrase_layer self._marker_embedding_dim = marker_embedding_dim self._encoding_dim = phrase_layer.get_output_dim() self._train_coref_module = True # combine memory with question max_turn_length = 12 self._ctx_q_encoder = ctx_q_encoder self._matrix_attention = LinearMatrixAttention(self._encoding_dim, self._encoding_dim, 'x,y,x*y') self._merge_atten = TimeDistributed( torch.nn.Linear(self._encoding_dim * 4, self._encoding_dim)) self._residual_encoder = residual_encoder if num_context_answers > 0: self._question_num_marker = torch.nn.Embedding( max_turn_length, marker_embedding_dim) self._prev_ans_marker = torch.nn.Embedding( (num_context_answers * 4) + 1, marker_embedding_dim) if self._ctx_q_encoder.use_ling: pos_tags = self.vocab.get_vocab_size('pos_tags') self._pos_emb = torch.nn.Embedding(pos_tags, marker_embedding_dim) self._self_attention = LinearMatrixAttention(self._encoding_dim, self._encoding_dim, 'x,y,x*y') self._followup_lin = torch.nn.Linear(self._encoding_dim, 3) self._merge_self_attention = TimeDistributed( torch.nn.Linear(self._encoding_dim * 3, self._encoding_dim)) self._span_start_encoder = span_start_encoder self._span_end_encoder = span_end_encoder self._span_start_predictor = TimeDistributed( torch.nn.Linear(self._encoding_dim, 1)) self._span_end_predictor = TimeDistributed( torch.nn.Linear(self._encoding_dim, 1)) self._span_yesno_predictor = TimeDistributed( torch.nn.Linear(self._encoding_dim, 3)) self._span_followup_predictor = TimeDistributed(self._followup_lin) initializer(self) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_yesno_accuracy = CategoricalAccuracy() self._span_followup_accuracy = CategoricalAccuracy() self._span_gt_yesno_accuracy = CategoricalAccuracy() self._span_gt_followup_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._official_f1 = Average() self._variational_dropout = InputVariationalDropout(dropout)