def __init__(self, vocab: Vocabulary, action_embedding_dim: int, text_field_embedder: TextFieldEmbedder = None, dropout: float = 0.0, rule_namespace: str = 'rule_labels', debug: bool=False, regularizer: Optional[RegularizerApplicator] = None) -> None: super(DROPParserBase, self).__init__(vocab=vocab, regularizer=regularizer) self._denotation_accuracy = Average() self._consistency = Average() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._rule_namespace = rule_namespace # This flag turns on the debugging mode which prints a bunch of stuff in self.decode (inside functions as well) self._debug = debug self._action_embedder = Embedding(num_embeddings=vocab.get_vocab_size(self._rule_namespace), embedding_dim=action_embedding_dim, vocab_namespace=self._rule_namespace) self._action_embedding_dim = action_embedding_dim # This is what we pass as input in the first step of decoding, when we don't have a # previous action. self._first_action_embedding = torch.nn.Parameter(torch.FloatTensor(action_embedding_dim)) torch.nn.init.normal_(self._first_action_embedding, mean=0.0, std=0.001)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, dropout: float = 0.2, regularizer: Optional[RegularizerApplicator] = None) -> None: super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder encoding_dim = text_field_embedder.get_output_dim() self._dropout = torch.nn.Dropout(p=dropout) self._squad_metrics = SquadEmAndF1() self.linear_start = nn.Linear(encoding_dim, 1) self.linear_end = nn.Linear(encoding_dim, 1) self.linear_type = nn.Linear(encoding_dim, 3) self._loss_trackers = { 'loss': Average(), 'start_loss': Average(), 'end_loss': Average(), 'type_loss': Average() }
def __init__(self, vocab: Vocabulary, question_embedder: TextFieldEmbedder, action_embedding_dim: int, encoder: Seq2SeqEncoder, entity_encoder: Seq2VecEncoder, max_decoding_steps: int, use_neighbor_similarity_for_linking: bool = False, dropout: float = 0.0, num_linking_features: int = 10, rule_namespace: str = 'rule_labels', tables_directory: str = '/wikitables/') -> None: super(WikiTablesSemanticParser, self).__init__(vocab) self._question_embedder = question_embedder self._encoder = encoder self._entity_encoder = TimeDistributed(entity_encoder) self._max_decoding_steps = max_decoding_steps self._use_neighbor_similarity_for_linking = use_neighbor_similarity_for_linking if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._rule_namespace = rule_namespace self._denotation_accuracy = WikiTablesAccuracy(tables_directory) self._action_sequence_accuracy = Average() self._has_logical_form = Average() self._action_padding_index = -1 # the padding value used by IndexField num_actions = vocab.get_vocab_size(self._rule_namespace) self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim) self._output_action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim) self._action_biases = Embedding(num_embeddings=num_actions, embedding_dim=1) # This is what we pass as input in the first step of decoding, when we don't have a # previous action, or a previous question attention. self._first_action_embedding = torch.nn.Parameter(torch.FloatTensor(action_embedding_dim)) self._first_attended_question = torch.nn.Parameter(torch.FloatTensor(encoder.get_output_dim())) torch.nn.init.normal_(self._first_action_embedding) torch.nn.init.normal_(self._first_attended_question) check_dimensions_match(entity_encoder.get_output_dim(), question_embedder.get_output_dim(), "entity word average embedding dim", "question embedding dim") self._num_entity_types = 4 # TODO(mattg): get this in a more principled way somehow? self._num_start_types = 5 # TODO(mattg): get this in a more principled way somehow? self._embedding_dim = question_embedder.get_output_dim() self._type_params = torch.nn.Linear(self._num_entity_types, self._embedding_dim) self._neighbor_params = torch.nn.Linear(self._embedding_dim, self._embedding_dim) if num_linking_features > 0: self._linking_params = torch.nn.Linear(num_linking_features, 1) else: self._linking_params = None if self._use_neighbor_similarity_for_linking: self._question_entity_params = torch.nn.Linear(1, 1) self._question_neighbor_params = torch.nn.Linear(1, 1) else: self._question_entity_params = None self._question_neighbor_params = None
def __init__(self, vocab: Vocabulary, mydatabase: str, schema_path: str, utterance_embedder: TextFieldEmbedder, action_embedding_dim: int, encoder: Seq2SeqEncoder, decoder_beam_search: BeamSearch, max_decoding_steps: int, input_attention: Attention, add_action_bias: bool = True, dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._utterance_embedder = utterance_embedder self._encoder = encoder self._max_decoding_steps = max_decoding_steps self._add_action_bias = add_action_bias self._dropout = torch.nn.Dropout(p=dropout) self._exact_match = Average() self._action_similarity = Average() self._valid_sql_query = SqlValidity(mydatabase=mydatabase) self._token_match = TokenSequenceAccuracy() self._kb_match = KnowledgeBaseConstsAccuracy(schema_path=schema_path) self._schema_free_match = GlobalTemplAccuracy(schema_path=schema_path) self._coverage_loss = CoverageAttentionLossMetric() # the padding value used by IndexField self._action_padding_index = -1 num_actions = vocab.get_vocab_size("rule_labels") input_action_dim = action_embedding_dim if self._add_action_bias: input_action_dim += 1 self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=input_action_dim) self._output_action_embedder = Embedding( num_embeddings=num_actions, embedding_dim=action_embedding_dim) # This is what we pass as input in the first step of decoding, when we don't have a # previous action, or a previous utterance attention. self._first_action_embedding = torch.nn.Parameter( torch.FloatTensor(action_embedding_dim)) self._first_attended_utterance = torch.nn.Parameter( torch.FloatTensor(encoder.get_output_dim())) torch.nn.init.normal_(self._first_action_embedding) torch.nn.init.normal_(self._first_attended_utterance) self._beam_search = decoder_beam_search self._decoder_trainer = MaximumMarginalLikelihood(beam_size=1) self._transition_function = BasicTransitionFunction( encoder_output_dim=self._encoder.get_output_dim(), action_embedding_dim=action_embedding_dim, input_attention=input_attention, add_action_bias=self._add_action_bias, dropout=dropout) initializer(self)
def __init__( self, num_entities: int, num_relations: int, embedding_dim: int, box_type: str = 'SigmoidBoxTensor', softbox_temp: float = 10., margin: float = 0.0, vocab: Optional[None] = None, debug: bool = False # we don't need vocab but some api relies on its presence as an argument ) -> None: super().__init__() self.debug = debug self.num_entities = num_entities self.num_relations = num_relations self.embedding_dim = embedding_dim self.box_type = box_type self.create_embeddings_layer(num_entities, num_relations, embedding_dim) self.loss_f = torch.nn.MarginRankingLoss( # type: ignore margin=margin, reduction='mean') self.softbox_temp = softbox_temp self.margin = margin # used only during eval self.precesion_parent = Average() self.recall_parent = Average() self.precesion_child = Average() self.recall_child = Average()
def __init__( self, text_field_embedder: TextFieldEmbedder, vocab: Vocabulary, seq2vec_encoder: Seq2VecEncoder = None, dropout: float = None, regularizer: RegularizerApplicator = None, ): super().__init__(vocab, regularizer) if dropout: self._dropout = torch.nn.Dropout(dropout) else: self._dropout = None self.sym_size = sym_size self.embeddings = text_field_embedder self.vec_encoder = seq2vec_encoder self.hidden_dim = self.vec_encoder.get_output_dim() self.linear_class = torch.nn.Linear(self.hidden_dim, self.sym_size) # self.f_linear = torch.nn.Linear(self.hidden_dim * 2, self.hidden_dim * 2) with open('data/gcn_graph.pk', 'rb') as f: self.graph = torch.tensor(pickle.load(f)).cuda() self.topic_acc = Average() self.topic_rec = Average() self.topic_f1 = Average() self.macro_f = MacroF(self.sym_size) self.turn_acc = Average() # self.micro_f = FBetaMeasure(beta=1, average='micro') # self.macro_f = FBetaMeasure(beta=1, average='macro') self.future_acc = Average()
def __init__(self, vocab: Vocabulary, sentence_embedder: TextFieldEmbedder, action_embedding_dim: int, encoder: Seq2SeqEncoder, dropout: float = 0.0, rule_namespace: str = 'rule_labels') -> None: super(NlvrSemanticParser, self).__init__(vocab=vocab) self._sentence_embedder = sentence_embedder self._denotation_accuracy = Average() self._consistency = Average() self._encoder = encoder if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._rule_namespace = rule_namespace self._action_embedder = Embedding(num_embeddings=vocab.get_vocab_size( self._rule_namespace), embedding_dim=action_embedding_dim) # This is what we pass as input in the first step of decoding, when we don't have a # previous action. self._first_action_embedding = torch.nn.Parameter( torch.FloatTensor(action_embedding_dim)) torch.nn.init.normal_(self._first_action_embedding)
def __init__( self, vocab: Vocabulary, passage_attention_to_span: Seq2SeqEncoder, scaling: bool = False, dropout: float = 0.0, initializers: InitializerApplicator = InitializerApplicator(), ) -> None: super(PassageAttnToSpan, self).__init__(vocab=vocab) self._scaling = scaling self.scaling_vals = [1, 2, 5, 10] self.passage_attention_to_span = passage_attention_to_span if self._scaling: assert len(self.scaling_vals) == self.passage_attention_to_span.get_input_dim() self._span_rnn_hsize = self.passage_attention_to_span.get_output_dim() self.passage_startend_predictor = torch.nn.Linear(self.passage_attention_to_span.get_output_dim(), 2) self.start_acc_metric = Average() self.end_acc_metric = Average() self.span_acc_metric = Average() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x initializers(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, gate_sent_encoder: Seq2SeqEncoder, gate_self_attention_layer: Seq2SeqEncoder, span_gate: Seq2SeqEncoder, dropout: float = 0.2, output_att_scores: bool = True, sent_labels_src: str = 'sp', regularizer: Optional[RegularizerApplicator] = None) -> None: super(GateBidirectionalAttentionFlow, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._dropout = torch.nn.Dropout(p=dropout) self._output_att_scores = output_att_scores self._sent_labels_src = sent_labels_src self._span_gate = span_gate if span_gate._gate_self_att: self._gate_sent_encoder = gate_sent_encoder self._gate_self_attention_layer = gate_self_attention_layer else: self._gate_sent_encoder = None self._gate_self_attention_layer = None self._f1_metrics = F1Measure(1) self.evd_ans_metric = Average() self._loss_trackers = {'loss': Average()}
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, gate_sent_encoder: Seq2SeqEncoder, gate_self_attention_layer: Seq2SeqEncoder, bert_projection: FeedForward, span_gate: Seq2SeqEncoder, dropout: float = 0.2, output_att_scores: bool = True, regularizer: Optional[RegularizerApplicator] = None) -> None: super(PTNChainBidirectionalAttentionFlow, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._dropout = torch.nn.Dropout(p=dropout) self._output_att_scores = output_att_scores self._span_gate = span_gate self._bert_projection = bert_projection #self._gate_sent_encoder = gate_sent_encoder self._gate_self_attention_layer = gate_self_attention_layer self._gate_sent_encoder = None self._gate_self_attention_layer = None self._f1_metrics = AttF1Measure(0.5, top_k=False) self._loss_trackers = {'loss': Average(), 'rl_loss': Average()} self.evd_sup_acc_metric = ChainAccuracy() self.evd_ans_metric = Average() self.evd_beam_ans_metric = Average()
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, initializer: InitializerApplicator, max_span_length: int = 30, use_multi_label_loss: bool = False, stats_report_freq: float = None, debug_experiment_name: str = None) -> None: super().__init__(vocab) self._max_span_length = max_span_length self._text_field_embedder = text_field_embedder self._stats_report_freq = stats_report_freq self._debug_experiment_name = debug_experiment_name self._use_multi_label_loss = use_multi_label_loss # see usage below for explanation self.qa_outputs = torch.nn.Linear( self._text_field_embedder.get_output_dim(), 2) self.qa_yesno = torch.nn.Linear( self._text_field_embedder.get_output_dim(), 3) initializer(self) self._official_f1 = Average() self._official_EM = Average()
def __init__(self, vocab: Vocabulary, utterance_embedder: TextFieldEmbedder, action_embedding_dim: int, encoder: Seq2SeqEncoder, decoder_beam_search: BeamSearch, max_decoding_steps: int, input_attention: Attention, add_action_bias: bool = True, training_beam_size: int = None, decoder_num_layers: int = 1, dropout: float = 0.0, rule_namespace: str = 'rule_labels', database_file='/atis/atis.db') -> None: # Atis semantic parser init super().__init__(vocab) self._utterance_embedder = utterance_embedder self._encoder = encoder self._max_decoding_steps = max_decoding_steps self._add_action_bias = add_action_bias if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._rule_namespace = rule_namespace self._exact_match = Average() self._valid_sql_query = Average() self._action_similarity = Average() self._denotation_accuracy = Average() self._executor = SqlExecutor(database_file) self._action_padding_index = -1 # the padding value used by IndexField num_actions = vocab.get_vocab_size(self._rule_namespace) if self._add_action_bias: input_action_dim = action_embedding_dim + 1 else: input_action_dim = action_embedding_dim self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=input_action_dim) self._output_action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim) # This is what we pass as input in the first step of decoding, when we don't have a # previous action, or a previous utterance attention. self._first_action_embedding = torch.nn.Parameter(torch.FloatTensor(action_embedding_dim)) self._first_attended_utterance = torch.nn.Parameter(torch.FloatTensor(encoder.get_output_dim())) torch.nn.init.normal_(self._first_action_embedding) torch.nn.init.normal_(self._first_attended_utterance) self._num_entity_types = 2 # TODO(kevin): get this in a more principled way somehow? self._entity_type_decoder_embedding = Embedding(self._num_entity_types, action_embedding_dim) self._decoder_num_layers = decoder_num_layers self._beam_search = decoder_beam_search self._decoder_trainer = MaximumMarginalLikelihood(training_beam_size) self._transition_function = LinkingTransitionFunction(encoder_output_dim=self._encoder.get_output_dim(), action_embedding_dim=action_embedding_dim, input_attention=input_attention, add_action_bias=self._add_action_bias, dropout=dropout, num_layers=self._decoder_num_layers)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, seq2seq_encoder: Seq2SeqEncoder, feedforward_encoder: Seq2SeqEncoder, dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ): super(SimpleGeneratorModel, self).__init__(vocab, regularizer) self._vocabulary = vocab self._text_field_embedder = text_field_embedder self._seq2seq_encoder = seq2seq_encoder self._dropout = torch.nn.Dropout(p=dropout) self._feedforward_encoder = feedforward_encoder self._classifier_input_dim = feedforward_encoder.get_output_dim() self._classification_layer = torch.nn.Linear( self._classifier_input_dim, 1) self._rationale_f1_metric = F1Measure(positive_label=1) self._rationale_length = Average() self._rationale_supervision_loss = Average() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, initializer: InitializerApplicator, dropout: float = 0.2, max_span_length: int = 30, predictions_file=None, use_multi_label_loss: bool = False, stats_report_freq: float = None, debug_experiment_name: str = None) -> None: super().__init__(vocab) self._max_span_length = max_span_length self._text_field_embedder = text_field_embedder self._stats_report_freq = stats_report_freq self._debug_experiment_name = debug_experiment_name self._use_multi_label_loss = use_multi_label_loss self._predictions_file = predictions_file # TODO move to predict if predictions_file is not None and os.path.isfile(predictions_file): os.remove(predictions_file) # see usage below for explanation self._all_qa_count = 0 self._qas_used_fraction = 1.0 self.qa_outputs = torch.nn.Linear( self._text_field_embedder.get_output_dim(), 2) initializer(self) self._official_f1 = Average() self._official_EM = Average()
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, phrase_layer: Seq2SeqEncoder, residual_encoder: Seq2SeqEncoder, span_start_encoder: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, initializer: InitializerApplicator, dropout: float = 0.2, pair2vec_dropout: float = 0.15, max_span_length: int = 30, pair2vec_model_file: str = None, pair2vec_config_file: str = None) -> None: super().__init__(vocab) self._max_span_length = max_span_length self._text_field_embedder = text_field_embedder self._phrase_layer = phrase_layer self._encoding_dim = phrase_layer.get_output_dim() self.pair2vec = pair2vec_util.get_pair2vec(pair2vec_config_file, pair2vec_model_file) self._pair2vec_dropout = torch.nn.Dropout(pair2vec_dropout) self._matrix_attention = LinearMatrixAttention(self._encoding_dim, self._encoding_dim, 'x,y,x*y') # atten_dim = self._encoding_dim * 4 + 600 if ablation_type == 'attn_over_rels' else self._encoding_dim * 4 atten_dim = self._encoding_dim * 4 + 600 self._merge_atten = TimeDistributed( torch.nn.Linear(atten_dim, self._encoding_dim)) self._residual_encoder = residual_encoder self._self_attention = LinearMatrixAttention(self._encoding_dim, self._encoding_dim, 'x,y,x*y') self._merge_self_attention = TimeDistributed( torch.nn.Linear(self._encoding_dim * 3, self._encoding_dim)) self._span_start_encoder = span_start_encoder self._span_end_encoder = span_end_encoder self._span_start_predictor = TimeDistributed( torch.nn.Linear(self._encoding_dim, 1)) self._span_end_predictor = TimeDistributed( torch.nn.Linear(self._encoding_dim, 1)) self._squad_metrics = SquadEmAndF1() initializer(self) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._official_em = Average() self._official_f1 = Average() self._span_accuracy = BooleanAccuracy() self._variational_dropout = InputVariationalDropout(dropout)
def __init__(self, path, max_seq_len, name="sts_benchmark"): ''' ''' super(STSBTask, self).__init__(name, 1) self.categorical = 0 self.val_metric = "%s_accuracy" % self.name self.val_metric_decreases = False self.scorer1 = Average() self.scorer2 = Average() self.load_data(path, max_seq_len)
def __init__(self, vocab: Vocabulary, utterance_embedder: TextFieldEmbedder, action_embedding_dim: int, encoder: Seq2SeqEncoder, decoder_beam_search: BeamSearch, max_decoding_steps: int, input_attention: Attention, database_file: str, add_action_bias: bool = True, dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._utterance_embedder = utterance_embedder self._encoder = encoder self._max_decoding_steps = max_decoding_steps self._add_action_bias = add_action_bias self._dropout = torch.nn.Dropout(p=dropout) self._exact_match = Average() self._valid_sql_query = Average() self._action_similarity = Average() self._denotation_accuracy = Average() self._executor = SqlExecutor(database_file) # the padding value used by IndexField self._action_padding_index = -1 num_actions = vocab.get_vocab_size("rule_labels") input_action_dim = action_embedding_dim if self._add_action_bias: input_action_dim += 1 self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=input_action_dim) self._output_action_embedder = Embedding( num_embeddings=num_actions, embedding_dim=action_embedding_dim) # This is what we pass as input in the first step of decoding, when we don't have a # previous action, or a previous utterance attention. self._first_action_embedding = torch.nn.Parameter( torch.FloatTensor(action_embedding_dim)) self._first_attended_utterance = torch.nn.Parameter( torch.FloatTensor(encoder.get_output_dim())) torch.nn.init.normal_(self._first_action_embedding) torch.nn.init.normal_(self._first_attended_utterance) self._beam_search = decoder_beam_search self._decoder_trainer = MaximumMarginalLikelihood(beam_size=1) self._transition_function = BasicTransitionFunction( encoder_output_dim=self._encoder.get_output_dim(), action_embedding_dim=action_embedding_dim, input_attention=input_attention, predict_start_type_separately=False, add_action_bias=self._add_action_bias, dropout=dropout) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, attention_similarity_function: SimilarityFunction, modeling_layer: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, initializer: InitializerApplicator, dropout: float = 0.2, mask_lstms: bool = True, evaluation_json_file: str = None) -> None: super(BidirectionalAttentionFlow, self).__init__(vocab) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed(Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = MatrixAttention(attention_similarity_function) self._modeling_layer = modeling_layer self._span_end_encoder = span_end_encoder encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() span_start_input_dim = encoding_dim * 4 + modeling_dim self._span_start_predictor = TimeDistributed(torch.nn.Linear(span_start_input_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim self._span_end_predictor = TimeDistributed(torch.nn.Linear(span_end_input_dim, 1)) initializer(self) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._official_em = Average() self._official_f1 = Average() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms if evaluation_json_file: logger.info("Prepping official evaluation dataset from %s", evaluation_json_file) with open(evaluation_json_file) as dataset_file: dataset_json = json.load(dataset_file) question_to_answers = {} for article in dataset_json['data']: for paragraph in article['paragraphs']: for question in paragraph['qas']: question_id = question['id'] answers = [answer['text'] for answer in question['answers']] question_to_answers[question_id] = answers self._official_eval_dataset = question_to_answers else: self._official_eval_dataset = None
def __init__( self, text_field_embedder: TextFieldEmbedder, vocab: Vocabulary, seq2vec_encoder: Seq2VecEncoder = None, dropout: float = None, regularizer: RegularizerApplicator = None, ): super().__init__(vocab, regularizer) if dropout: self._dropout = torch.nn.Dropout(dropout) else: self._dropout = None self.sym_size = sym_size self.embeddings = text_field_embedder self.vec_encoder = seq2vec_encoder self.hidden_dim = self.vec_encoder.get_output_dim() self.linear_class = torch.nn.Linear(self.hidden_dim, self.sym_size) # self.f_linear = torch.nn.Linear(self.hidden_dim * 2, self.hidden_dim * 2) self.dim = [12, 62, 4, 40, 62] self.true_list = [Average() for i in range(5)] self.pre_total = [Average() for i in range(5)] self.pre_true = [Average() for i in range(5)] self.total_pre = Average() self.total_true = Average() self.total_pre_true = Average() self.total_future_true = Average() self.macro_f = MacroF(self.sym_size) self.turn_acc = Average() self.future_acc = Average()
def __init__(self, vocab: Vocabulary, encoder: VariationalEncoder, decoder: VariationalDecoder, generator: Model, discriminator: Model, mse_weight: float = 2.0, train_temperature: float = 1.0, inference_temperature: float = 1e-5, num_responses: int = 10) -> None: super().__init__(vocab) self._encoder = encoder self._decoder = decoder self._mse_weight = mse_weight self.train_temperature = train_temperature self.inference_temperature = inference_temperature self._num_responses = num_responses self._start_index = self.vocab.get_token_index(START_SYMBOL) self._end_index = self.vocab.get_token_index(END_SYMBOL) self._pad_index = self.vocab.get_token_index(self.vocab._padding_token) # pylint: disable=protected-access self.s_bleu4 = NLTKSentenceBLEU( n_hyps=self._num_responses, smoothing_function=SmoothingFunction().method7, exclude_indices={ self._pad_index, self._end_index, self._start_index }, prefix='_S_BLEU4') self.n_bleu2 = NLTKSentenceBLEU(ngram_weights=(1 / 2, 1 / 2), n_hyps=self._num_responses, exclude_indices={ self._pad_index, self._end_index, self._start_index }, prefix='_BLEU2') # We need our optimizer to know which parameters came from # which model, so we cheat by adding tags to them. for param in generator.parameters(): setattr(param, '_generator', True) for param in discriminator.parameters(): setattr(param, '_discriminator', True) self.generator = generator self.discriminator = discriminator self._disc_metrics = { "dfl": Average(), "dfacc": Average(), "drl": Average(), "dracc": Average(), } self._gen_metrics = { "_gl": Average(), "gce": Average(), "_gmse": Average(), "_mean": Average(), "_stdev": Average() }
def __init__(self, word_embeddings: TextFieldEmbedder, encoder: Seq2VecEncoder, vocab: Vocabulary, args, e_dim, inv_temp: float = None, temp_inc: float = None, task_embed=None) -> None: super().__init__(vocab) self.args = args self.word_embeddings = word_embeddings self.encoder = encoder self.vocab = vocab self.tasks_vocabulary = {"default": vocab} self.current_task = "default" self.num_task = 0 self.task2id = {"default": 0} self.accuracy = CategoricalAccuracy() self.loss_function = torch.nn.CrossEntropyLoss() self.average = Average() self.micro_avg = Average() self.activations = [] self.labels = [] self.inv_temp = inv_temp self.temp_inc = temp_inc self.e_dim = e_dim self.task_embedder = True if task_embed else None # Use transformer style task encoding self.task_encoder = TaskEncoding( self.e_dim) if self.args.task_encode else None self.task_projection = TaskProjection( self.e_dim) if self.args.task_projection else None self.pos_embedding = PositionalEncoding( self.e_dim, 0.5) if self.args.position_embed else None self.args = args self.use_task_memory = args.use_task_memory self.task_memory = TaskMemory(self.encoder.get_output_dim(), args.mem_size) self.classifier_dim = self.encoder.get_output_dim() if self.use_task_memory: self.classifier_dim = self.task_memory.get_output_dim() self.classification_layers = torch.nn.ModuleList([ torch.nn.Linear(in_features=self.classifier_dim, out_features=self.vocab.get_vocab_size('labels')) ]) self._len_dataset = None if self.args.ewc or self.args.oewc: self.ewc = EWC(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, phrase_layer: Seq2SeqEncoder, modeling_layer: Seq2SeqEncoder, span_start_encoder: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, dropout: float = 0.2, regularizer: Optional[RegularizerApplicator] = None) -> None: super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._phrase_layer = phrase_layer encoding_dim = phrase_layer.get_output_dim() self._modeling_layer = modeling_layer self._dropout = torch.nn.Dropout(p=dropout) self._squad_metrics = SquadEmAndF1() self._f1_metrics = F1Measure(1) self.linear_1 = nn.Sequential( nn.Linear(encoding_dim * 4, encoding_dim), nn.ReLU()) self.linear_2 = nn.Sequential( nn.Linear(encoding_dim * 4, encoding_dim), nn.ReLU()) self.qc_att = BiAttention(encoding_dim, dropout) self._coref_f1_metric = AttF1Measure(0.1) self._span_start_encoder = span_start_encoder self._span_end_encoder = span_end_encoder self.linear_start = nn.Linear(encoding_dim, 1) self.linear_end = nn.Linear(encoding_dim, 1) self.linear_type = nn.Linear(encoding_dim * 3, 3) self._loss_trackers = { 'loss': Average(), 'start_loss': Average(), 'end_loss': Average(), 'type_loss': Average() }
def __init__(self, vocab, text_field_embedder, phrase_layer, residual_encoder, span_start_encoder, span_end_encoder, initializer, dropout=0.2, mask_lstms=True): super(BiDAFSelfAttention, self).__init__(vocab) # Initialize layers. self._text_field_embedder = text_field_embedder self._phrase_layer = phrase_layer # Inintialize start/end span predictors. encoding_dim = phrase_layer.get_output_dim() self._matrix_attention = TriLinearAttention(encoding_dim) self._merge_atten = TimeDistributed( torch.nn.Linear(encoding_dim * 4, encoding_dim)) self._residual_encoder = residual_encoder self._self_atten = TriLinearAttention(encoding_dim) self._merge_self_atten = TimeDistributed( torch.nn.Linear(encoding_dim * 3, encoding_dim)) self._span_start_encoder = span_start_encoder self._span_end_encoder = span_end_encoder self._span_start_predictor = TimeDistributed( torch.nn.Linear(encoding_dim, 1)) self._span_end_predictor = TimeDistributed( torch.nn.Linear(encoding_dim, 1)) initializer(self) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() self._official_em = Average() self._official_f1 = Average() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) # self._dropout = VariationalDropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms
def __init__( self, vocab: Vocabulary, passage_attention_to_count: Seq2SeqEncoder, dropout: float = 0.2, initializers: InitializerApplicator = InitializerApplicator() ) -> None: super(PassageAttnToCount, self).__init__(vocab=vocab) self.scaling_vals = [1, 2, 5, 10] self.passage_attention_to_count = passage_attention_to_count assert len(self.scaling_vals ) == self.passage_attention_to_count.get_input_dim() self.num_counts = 10 # self.passage_count_predictor = torch.nn.Linear(self.passage_attention_to_count.get_output_dim(), # self.num_counts, bias=False) # We want to predict a score for each passage token self.passage_count_hidden2logits = torch.nn.Linear( self.passage_attention_to_count.get_output_dim(), 1, bias=True) self.passagelength_to_bias = torch.nn.Linear(1, 1, bias=True) self.count_acc = Average() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x initializers(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, context_layer: Seq2SeqEncoder, modules: Params, loss_weights: Dict[str, int], lexical_dropout: float = 0.2, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, display_metrics: List[str] = None, ) -> None: super(NEROnlyModel, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._context_layer = context_layer self._lexical_dropout = torch.nn.Dropout(p=lexical_dropout) modules = Params(modules) self._ner = NERTagger.from_params(vocab=vocab, params=modules.pop("ner")) for k in loss_weights: loss_weights[k] = float(loss_weights[k]) self._loss_weights = loss_weights self._permanent_loss_weights = copy.deepcopy(self._loss_weights) self._display_metrics = display_metrics self._multi_task_loss_metrics = {k: Average() for k in ["ner"]} self.training_mode = True self.prediction_mode = False initializer(self)
def __init__( self, vocab: Vocabulary, variational_encoder: VariationalEncoder, decoder: Decoder, kl_weight: LossWeight, temperature: float = 1.0, initializer: InitializerApplicator = InitializerApplicator() ) -> None: super(VAE, self).__init__(vocab) self._encoder = variational_encoder self._decoder = decoder self._latent_dim = variational_encoder.latent_dim self._encoder_output_dim = self._encoder.get_encoder_output_dim() self._start_index = self.vocab.get_token_index(START_SYMBOL) self._end_index = self.vocab.get_token_index(END_SYMBOL) self._pad_index = self.vocab.get_token_index(self.vocab._padding_token) # pylint: disable=protected-access self._bleu = BLEU(exclude_indices={ self._pad_index, self._end_index, self._start_index }) self._kl_metric = Average() self.kl_weight = kl_weight self._temperature = temperature initializer(self)
def __init__(self, vocab: Vocabulary, loss_ratio: float = 1.0, remove_sos: bool = True, remove_eos: bool = False, target_namespace: str = "tokens", initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(CTCLayer, self).__init__(vocab, regularizer) self.loss_ratio = loss_ratio self._remove_sos = remove_sos self._remove_eos = remove_eos self._target_namespace = target_namespace self._num_classes = self.vocab.get_vocab_size(target_namespace) self._pad_index = self.vocab.get_token_index(DEFAULT_PADDING_TOKEN, self._target_namespace) self._loss = CTCLoss(blank=self._pad_index) self._start_index = self.vocab.get_token_index(START_SYMBOL, self._target_namespace) self._end_index = self.vocab.get_token_index(END_SYMBOL, self._target_namespace) exclude_indices = {self._pad_index, self._end_index, self._start_index} self._wer: Metric = WER(exclude_indices=exclude_indices) self._bleu: Metric = BLEU(exclude_indices=exclude_indices) self._dal: Metric = Average() initializer(self)
def __init__(self, num_entities: int, num_relations: int, embedding_dim: int, box_type: str = 'SigmoidBoxTensor', single_box: bool = False, softbox_temp: float = 10., margin: float = 1., number_of_negative_samples: int = 0, debug: bool = False, regularization_weight: float = 0, init_interval_center: float = 0.25, init_interval_delta: float = 0.1) -> None: super().__init__(num_entities, num_relations, embedding_dim, box_type=box_type, single_box=single_box, softbox_temp=softbox_temp, number_of_negative_samples=number_of_negative_samples, debug=debug, regularization_weight=regularization_weight, init_interval_center=init_interval_center, init_interval_delta=init_interval_delta) self.number_of_negative_samples = number_of_negative_samples self.centre_loss_metric = Average() self.loss_f_centre: torch.nn.modules._Loss = torch.nn.MarginRankingLoss( # type: ignore margin=margin, reduction='mean')
def __init__( self, vocab, num_embeddings=None, # Backwards compatibility. embedding_dim=50, rnn_dim=650, stack_dim=16, rnn_cell_type=torch.nn.LSTMCell, push_rnn_state=False, swap_push_pop=True, # Backward compatibility. push_ones=True): super().__init__(vocab) self._vocab_size = vocab.get_vocab_size() if num_embeddings is None: num_embeddings = self._vocab_size embedding = torch.nn.Embedding(num_embeddings, embedding_dim) self._embedder = BasicTextFieldEmbedder({"tokens": embedding}) self._rnn_dim = rnn_dim self._stack_dim = stack_dim self._push_rnn_state = push_rnn_state if rnn_cell_type == "gru": rnn_cell_type = torch.nn.GRUCell self._rnn_cell = rnn_cell_type(embedding_dim + stack_dim, rnn_dim) self._control_layer = ControlLayer(rnn_dim, stack_dim, vision=4) self._classifier = torch.nn.Linear(rnn_dim, 1) self._accuracy = BooleanAccuracy() self._pop_strength = Average() self._criterion = torch.nn.BCEWithLogitsLoss() self._push_ones = push_ones self._swap_push_pop = swap_push_pop
def __init__(self, task='node', emb_dim=256, input_dim=768, batch_size=64, epochs=30): super().__init__() self.batch_size = batch_size self.epochs = epochs self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') self.nclass = type_class_dict[task] #mark self.model = ProjectedProber(nfeat=emb_dim, nclass=self.nclass, input_emb=input_dim, dropout=0.1).to(self.device) self.optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.model.parameters()), lr=1e-4, betas=(0.7, 0.99)) self.criterion = nn.CrossEntropyLoss() self.scorer = Average() self.mask_emb = None if task == 'event2entity': self.mask_emb = nn.Embedding.from_pretrained( torch.from_numpy(get_argroletyping_masks())).to(self.device) self.mask_emb.weight.requires_grad = False