示例#1
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 share_encoder: Seq2VecEncoder = None,
                 private_encoder: Seq2VecEncoder = None,
                 dropout: float = None,
                 input_dropout: float = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: RegularizerApplicator = None) -> None:
        super(JointSentimentClassifier, self).__init__(vocab=vocab,
                                                       regularizer=regularizer)

        self._text_field_embedder = text_field_embedder
        if share_encoder is None and private_encoder is None:
            share_rnn = nn.LSTM(
                input_size=self._text_field_embedder.get_output_dim(),
                hidden_size=150,
                batch_first=True,
                dropout=dropout,
                bidirectional=True)
            share_encoder = PytorchSeq2SeqWrapper(share_rnn)
            private_rnn = nn.LSTM(
                input_size=self._text_field_embedder.get_output_dim(),
                hidden_size=150,
                batch_first=True,
                dropout=dropout,
                bidirectional=True)
            private_encoder = PytorchSeq2SeqWrapper(private_rnn)
            logger.info("Using LSTM as encoder")
            self._domain_embeddings = Embedding(
                len(TASKS_NAME), self._text_field_embedder.get_output_dim())
        self._share_encoder = share_encoder

        self._s_domain_discriminator = Discriminator(
            share_encoder.get_output_dim(), len(TASKS_NAME))

        self._p_domain_discriminator = Discriminator(
            private_encoder.get_output_dim(), len(TASKS_NAME))

        # TODO individual valid discriminator
        self._valid_discriminator = Discriminator(
            self._domain_embeddings.get_output_dim(), 2)

        for task in TASKS_NAME:
            tagger = SentimentClassifier(
                vocab=vocab,
                text_field_embedder=self._text_field_embedder,
                share_encoder=self._share_encoder,
                private_encoder=copy.deepcopy(private_encoder),
                s_domain_discriminator=self._s_domain_discriminator,
                p_domain_discriminator=self._p_domain_discriminator,
                valid_discriminator=self._valid_discriminator,
                dropout=dropout,
                input_dropout=input_dropout,
                label_smoothing=0.1,
                initializer=initializer)
            self.add_module("_tagger_{}".format(task), tagger)

        logger.info("Multi-Task Learning Model has been instantiated.")
 def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder,
              encoder: Seq2VecEncoder):
     super().__init__(vocab)
     self.embedder = embedder
     self.encoder = encoder
     num_labels = vocab.get_vocab_size("labels")
     print("==> encoded input shape: {}, output shape: {}\n".format(
         encoder.get_input_dim(), encoder.get_output_dim()))
     logger.info("==> encoded input shape: {}, output shape: {}\n".format(
         encoder.get_input_dim(), encoder.get_output_dim()))
     self.classifier = torch.nn.Linear(self.encoder.get_output_dim(),
                                       num_labels)
     self.accuracy = CategoricalAccuracy()
    def __init__(self,
                 vocab: Vocabulary,
                 question_embedder: TextFieldEmbedder,
                 action_embedding_dim: int,
                 encoder: Seq2SeqEncoder,
                 entity_encoder: Seq2VecEncoder,
                 max_decoding_steps: int,
                 use_neighbor_similarity_for_linking: bool = False,
                 dropout: float = 0.0,
                 num_linking_features: int = 10,
                 rule_namespace: str = 'rule_labels',
                 tables_directory: str = '/wikitables/') -> None:
        super(WikiTablesSemanticParser, self).__init__(vocab)
        self._question_embedder = question_embedder
        self._encoder = encoder
        self._entity_encoder = TimeDistributed(entity_encoder)
        self._max_decoding_steps = max_decoding_steps
        self._use_neighbor_similarity_for_linking = use_neighbor_similarity_for_linking
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
        self._rule_namespace = rule_namespace
        self._denotation_accuracy = WikiTablesAccuracy(tables_directory)
        self._action_sequence_accuracy = Average()
        self._has_logical_form = Average()

        self._action_padding_index = -1  # the padding value used by IndexField
        num_actions = vocab.get_vocab_size(self._rule_namespace)
        self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim)
        self._output_action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim)
        self._action_biases = Embedding(num_embeddings=num_actions, embedding_dim=1)

        # This is what we pass as input in the first step of decoding, when we don't have a
        # previous action, or a previous question attention.
        self._first_action_embedding = torch.nn.Parameter(torch.FloatTensor(action_embedding_dim))
        self._first_attended_question = torch.nn.Parameter(torch.FloatTensor(encoder.get_output_dim()))
        torch.nn.init.normal_(self._first_action_embedding)
        torch.nn.init.normal_(self._first_attended_question)

        check_dimensions_match(entity_encoder.get_output_dim(), question_embedder.get_output_dim(),
                               "entity word average embedding dim", "question embedding dim")

        self._num_entity_types = 4  # TODO(mattg): get this in a more principled way somehow?
        self._num_start_types = 5  # TODO(mattg): get this in a more principled way somehow?
        self._embedding_dim = question_embedder.get_output_dim()
        self._type_params = torch.nn.Linear(self._num_entity_types, self._embedding_dim)
        self._neighbor_params = torch.nn.Linear(self._embedding_dim, self._embedding_dim)

        if num_linking_features > 0:
            self._linking_params = torch.nn.Linear(num_linking_features, 1)
        else:
            self._linking_params = None

        if self._use_neighbor_similarity_for_linking:
            self._question_entity_params = torch.nn.Linear(1, 1)
            self._question_neighbor_params = torch.nn.Linear(1, 1)
        else:
            self._question_entity_params = None
            self._question_neighbor_params = None
示例#4
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 question_encoder: Seq2VecEncoder,
                 answers_encoder: Seq2VecEncoder,
                 captions_encoder: Seq2VecEncoder,
                 classifier_feedforward: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.num_classes = self.vocab.get_vocab_size('labels')

        self.question_encoder = question_encoder
        self.answers_encoder = TimeDistributed(answers_encoder)
        self.captions_encoder = TimeDistributed(captions_encoder)
        self.classifier_feedforward = classifier_feedforward
        # self.classifier_feedforward = TimeDistributed(classifier_feedforward)

        self._encoding_dim = captions_encoder.get_output_dim()
        self.ques_cap_att = LinearMatrixAttention(self._encoding_dim,
                                                  self._encoding_dim,
                                                  'x,y,x*y')

        self.loss = torch.nn.CrossEntropyLoss()

        initializer(self)
示例#5
0
    def __init__(self,
                 word_embeddings: TextFieldEmbedder,
                 embedding_dropout: float,
                 encoder: Seq2VecEncoder,
                 encoder_dropout: float,
                 out_dim: int,
                 vocab: Vocabulary,
                 verbose=False) -> None:
        super().__init__(vocab)
        # 将word id 转为vector representations
        self._word_embeddings = word_embeddings
        self._embedding_dropout = torch.nn.Dropout(embedding_dropout)
        self._encoder = encoder
        self._encoder_dropout = torch.nn.Dropout(encoder_dropout)
        # fc 层将上一层的维度转为输出的类别数
        self._linear = torch.nn.Linear(in_features=encoder.get_output_dim(),
                                       out_features=out_dim)

        # 评价指标,分类准确率, F1 score
        # self.accuracy = CategoricalAccuracy()
        # self.f1_measure = F1Measure(positive_label)

        # 对于分类任务,交叉熵作为loss 函数
        # 而pytorch中的CrossEntropyLoss内部包含了一个softmax 和log likelihood loss,因此不必显示定义softmax层
        # self.loss_function = torch.nn.CrossEntropyLoss()
        self.loss_function = torch.nn.BCEWithLogitsLoss()
        self._verbose = verbose
示例#6
0
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 seq2vec_encoder: Seq2VecEncoder,
                 initializer: InitializerApplicator) -> None:
        super(BertModel, self).__init__(vocab)

        self.text_field_embedder = text_field_embedder
        self.seq2vec_encoder = seq2vec_encoder

        self.num_types = self.vocab.get_vocab_size("state_change_type_labels")
        self.aggregate_feedforward = Linear(seq2vec_encoder.get_output_dim(),
                                            self.num_types)

        self._type_accuracy = CategoricalAccuracy()

        self.type_f1_metrics = {}
        self.type_labels_vocab = self.vocab.get_index_to_token_vocabulary(
            "state_change_type_labels")
        for type_label in self.type_labels_vocab.values():
            self.type_f1_metrics["type_" + type_label] = F1Measure(
                self.vocab.get_token_index(type_label,
                                           "state_change_type_labels"))

        self._loss = torch.nn.CrossEntropyLoss()
        initializer(self)
示例#7
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        seq2vec_encoder: Seq2VecEncoder,
        dropout: float = 0,
        label_namespace: str = 'label',
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
        feedforward: Optional[FeedForward] = None,
    ) -> None:

        super().__init__(vocab, regularizer)
        self._text_field_embedder = text_field_embedder
        self._seq2vec_encoder = seq2vec_encoder
        self._feedforward = feedforward

        if feedforward is not None:
            self._classifier_input_dim = feedforward.get_output_dim()
        else:
            self._classifier_input_dim = seq2vec_encoder.get_output_dim()
        self.bn = nn.BatchNorm1d(num_features=self._classifier_input_dim)
        if dropout:
            self._dropout = nn.Dropout(dropout)
        else:
            self._dropout = None

        self._num_labels = vocab.get_vocab_size(namespace=label_namespace)
        self._classification_layer = nn.Linear(self._classifier_input_dim,
                                               self._num_labels)
        self._accuracy = CategoricalAccuracy()
        self._loss = nn.CrossEntropyLoss()

        initializer(self)
示例#8
0
    def __init__(self, word_embeddings: TextFieldEmbedder,
                 encoder: Seq2VecEncoder, vocab: Vocabulary) -> None:
        super().__init__(vocab)
        # We need the embeddings to convert word IDs to their vector representations
        self.word_embeddings = word_embeddings

        self.encoder = encoder

        # After converting a sequence of vectors to a single vector, we feed it into
        # a fully-connected linear layer to reduce the dimension to the total number of labels.
        self.linear = torch.nn.Linear(
            in_features=encoder.get_output_dim(),
            out_features=vocab.get_vocab_size('labels'))

        # Monitor the metrics - we use accuracy, as well as prec, rec, f1 for 4 (very positive)
        self.accuracy = CategoricalAccuracy()
        self.f1_measure_positive = F1Measure(
            vocab.get_token_index("positive", "labels"))
        self.f1_measure_negative = F1Measure(
            vocab.get_token_index("negative", "labels"))
        self.f1_measure_neutral = F1Measure(
            vocab.get_token_index("neutral", "labels"))

        # We use the cross entropy loss because this is a classification task.
        # Note that PyTorch's CrossEntropyLoss combines softmax and log likelihood loss,
        # which makes it unnecessary to add a separate softmax layer.
        self.loss_function = torch.nn.CrossEntropyLoss()
示例#9
0
文件: models.py 项目: wjn922/allennlp
    def __init__(self,
                 vocab: Vocabulary,
                 source_text_embedder: TextFieldEmbedder,
                 encoder: Seq2VecEncoder,
                 tied_source_embedder_key: Optional[str] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 positive_label: str = "algebra",  
                 target_namespace: str = "tokens")-> None:

        super(TextClassifier, self).__init__(vocab, regularizer)

        self._source_text_embedder = source_text_embedder
        self._target_namespace = target_namespace
        self._encoder = encoder
        self._linear = torch.nn.Linear(in_features=encoder.get_output_dim(), 
                                        out_features=vocab.get_vocab_size('labels'))
        self._start_index = self.vocab.get_token_index(START_SYMBOL, self._target_namespace)
        self._end_index = self.vocab.get_token_index(END_SYMBOL, self._target_namespace)
        
        self.accuracy = CategoricalAccuracy()
        positive_label = vocab.get_token_index(positive_label, namespace='labels')
        # for comnputing precision, recall and f1
        self.f1_measure = F1Measure(positive_label)

        # the loss function combines logsoftmax and NLLloss, the input to this function is logits
        self.loss_function = torch.nn.CrossEntropyLoss()  

        
        initializer(self)
 def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder,
              encoder: Seq2VecEncoder):
     super().__init__(vocab)
     self.embedder = embedder
     self.encoder = encoder
     num_labels = vocab.get_vocab_size("labels")
     self.classifier = torch.nn.Linear(encoder.get_output_dim(), num_labels)
示例#11
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 inner_encoder: Seq2VecEncoder,
                 outer_encoder: Seq2SeqEncoder,
                 label_namespace: str = "labels",
                 dropout: float = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(HierarchicalRNN, self).__init__(vocab, regularizer)

        self.label_namespace = label_namespace
        self.text_field_embedder = text_field_embedder
        self.num_tags = self.vocab.get_vocab_size(label_namespace)
        self.inner_encoder = inner_encoder
        self.outer_encoder = outer_encoder
        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
        else:
            self.dropout = None
        self.label_projection_layer = TimeDistributed(
            Linear(outer_encoder.get_output_dim(), self.num_tags))
        # self.metrics = {"accuracy": FuckingAccuracy()}
        self.metrics = {"accuracy": CategoricalAccuracy()}
        self._loss = torch.nn.CrossEntropyLoss()

        check_dimensions_match(text_field_embedder.get_output_dim(),
                               inner_encoder.get_input_dim(),
                               'text field embedding dim',
                               'inner encoder input dim')
        check_dimensions_match(inner_encoder.get_output_dim(),
                               outer_encoder.get_input_dim(),
                               'inner encoder output dim',
                               'outer encoder input dim')
        initializer(self)
示例#12
0
    def __init__(self,
                 vocab: Vocabulary,
                 question_embedder: TextFieldEmbedder,
                 action_embedding_dim: int,
                 encoder: Seq2SeqEncoder,
                 entity_encoder: Seq2VecEncoder,
                 max_decoding_steps: int,
                 use_neighbor_similarity_for_linking: bool = False,
                 dropout: float = 0.0,
                 num_linking_features: int = 10,
                 rule_namespace: str = 'rule_labels',
                 tables_directory: str = '/wikitables/') -> None:
        super(WikiTablesSemanticParser, self).__init__(vocab)
        self._question_embedder = question_embedder
        self._encoder = encoder
        self._entity_encoder = TimeDistributed(entity_encoder)
        self._max_decoding_steps = max_decoding_steps
        self._use_neighbor_similarity_for_linking = use_neighbor_similarity_for_linking
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
        self._rule_namespace = rule_namespace
        self._denotation_accuracy = WikiTablesAccuracy(tables_directory)
        self._action_sequence_accuracy = Average()
        self._has_logical_form = Average()

        self._action_padding_index = -1  # the padding value used by IndexField
        num_actions = vocab.get_vocab_size(self._rule_namespace)
        self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim)
        self._output_action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim)
        self._action_biases = Embedding(num_embeddings=num_actions, embedding_dim=1)

        # This is what we pass as input in the first step of decoding, when we don't have a
        # previous action, or a previous question attention.
        self._first_action_embedding = torch.nn.Parameter(torch.FloatTensor(action_embedding_dim))
        self._first_attended_question = torch.nn.Parameter(torch.FloatTensor(encoder.get_output_dim()))
        torch.nn.init.normal_(self._first_action_embedding)
        torch.nn.init.normal_(self._first_attended_question)

        check_dimensions_match(entity_encoder.get_output_dim(), question_embedder.get_output_dim(),
                               "entity word average embedding dim", "question embedding dim")

        self._num_entity_types = 4  # TODO(mattg): get this in a more principled way somehow?
        self._num_start_types = 5  # TODO(mattg): get this in a more principled way somehow?
        self._embedding_dim = question_embedder.get_output_dim()
        self._type_params = torch.nn.Linear(self._num_entity_types, self._embedding_dim)
        self._neighbor_params = torch.nn.Linear(self._embedding_dim, self._embedding_dim)

        if num_linking_features > 0:
            self._linking_params = torch.nn.Linear(num_linking_features, 1)
        else:
            self._linking_params = None

        if self._use_neighbor_similarity_for_linking:
            self._question_entity_params = torch.nn.Linear(1, 1)
            self._question_neighbor_params = torch.nn.Linear(1, 1)
        else:
            self._question_entity_params = None
            self._question_neighbor_params = None
    def __init__(self,
                 vocab: Vocabulary,
                 embedder: TextFieldEmbedder,
                 encoder: Seq2VecEncoder,
                 feedforward: Optional[FeedForward] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 dropout: float = 0.0,
                 label_name: str = 'target-sentiment-labels') -> None:
        super().__init__(vocab, regularizer)
        '''
        :param vocab: A Vocabulary, required in order to compute sizes 
                      for input/output projections.
        :param embedder: Used to embed the text.
        :param encoder: Encodes the sentence/text. E.g. LSTM
        :param feedforward: An optional feed forward layer to apply after the 
                            encoder
        :param initializer: Used to initialize the model parameters.
        :param regularizer: If provided, will be used to calculate the 
                            regularization penalty during training.
        :param dropout: To apply dropout after each layer apart from the last 
                        layer. All dropout that is applied to timebased data 
                        will be `variational dropout`_ all else will be  
                        standard dropout.
        :param label_name: Name of the label name space.
        
        This is based on the LSTM model by 
        `Tang et al. 2016 <https://www.aclweb.org/anthology/C16-1311.pdf>`_
        
        '''
        self.label_name = label_name
        self.embedder = embedder
        self.encoder = encoder
        self.num_classes = self.vocab.get_vocab_size(self.label_name)
        self.feedforward = feedforward

        if feedforward is not None:
            output_dim = self.feedforward.get_output_dim()
        else:
            output_dim = self.encoder.get_output_dim()
        self.label_projection = Linear(output_dim, self.num_classes)

        self.metrics = {"accuracy": CategoricalAccuracy()}
        self.f1_metrics = {}
        # F1 Scores
        label_index_name = self.vocab.get_index_to_token_vocabulary(
            self.label_name)
        for label_index, _label_name in label_index_name.items():
            _label_name = f'F1_{_label_name.capitalize()}'
            self.f1_metrics[_label_name] = F1Measure(label_index)
        self._variational_dropout = InputVariationalDropout(dropout)
        self._naive_dropout = Dropout(dropout)
        check_dimensions_match(embedder.get_output_dim(),
                               encoder.get_input_dim(), 'Embedding', 'Encoder')
        if self.feedforward is not None:
            check_dimensions_match(encoder.get_output_dim(),
                                   feedforward.get_input_dim(), 'Encoder',
                                   'FeedForward')
        initializer(self)
示例#14
0
    def __init__(
            self,
            vocab: Vocabulary,
            input_unit: Seq2VecEncoder,
            text_field_embedder: TextFieldEmbedder,
            # embedding_projection_dim: int = None,
            classifier_feedforward: FeedForward = None,
            max_step: int = 12,
            n_memories: int = 3,
            self_attention: bool = False,
            memory_gate: bool = False,
            dropout: int = 0.15,
            loss_weights=None,
            initializer: InitializerApplicator = InitializerApplicator(),
            regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self.num_classes = max(self.vocab.get_vocab_size("labels"), 2)

        self.text_field_embedder = text_field_embedder

        self.proj = nn.Linear(text_field_embedder.get_output_dim(),
                              input_unit.get_input_dim())
        self.input_unit = input_unit
        self.mac = MACCell(
            text_field_embedder.get_output_dim(
            ),  # input_unit.get_output_dim(),
            max_step=max_step,
            n_memories=n_memories,
            self_attention=self_attention,
            memory_gate=memory_gate,
            dropout=dropout,
            save_attns=False,
        )

        hidden_size = 2 * input_unit.get_output_dim()
        n_layers = 3
        self.classifier = classifier_feedforward or FeedForward(
            input_dim=hidden_size,
            num_layers=n_layers,
            hidden_dims=(n_layers - 1) * [hidden_size] + [self.num_classes],
            activations=[
                Activation.by_name("relu")(),
                Activation.by_name("relu")(),
                Activation.by_name("linear")()
            ],
            dropout=[dropout, dropout, 0.0])

        self.metrics = {
            "accuracy": CategoricalAccuracy(),
            "f1": F1Measure(positive_label=1),
            "weighted_f1": WeightedF1Measure(),
            "fbeta": FBetaMeasure(average='micro')
        }

        weights = loss_weights and torch.FloatTensor(loss_weights)
        self.loss = nn.CrossEntropyLoss(weight=weights)

        initializer(self)
示例#15
0
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2VecEncoder,
                 label_namespace: str = "logical_form",
                 feedforward: Optional[FeedForward] = None,
                 dropout: Optional[float] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self.num_tags = self.vocab.get_vocab_size("logical_form")
        self.encoder = encoder
        
        self.text_field_embedder = text_field_embedder
        self.BOW_embedder_question = BagOfWordCountsTokenEmbedder(
            vocab, "tokens", projection_dim=self.encoder.get_output_dim())
        self.BOW_embedder_description = BagOfWordCountsTokenEmbedder(
            vocab, "tokens", projection_dim=self.encoder.get_output_dim())
        self.BOW_embedder_detail = BagOfWordCountsTokenEmbedder(
            vocab, "tokens", projection_dim=self.encoder.get_output_dim())
        
        
        # using crf as the estimator for sequential tags
        self.crf = ConditionalRandomField(
            self.num_tags, 
            include_start_end_transitions=False
        )
        
        self.softmax_layer = Softmax()
        self.ce_loss = CrossEntropyLoss()
        
        self.matched = 0
        self.all_pred = 0

        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
        else:
            self.dropout = None
        self._feedforward = feedforward

        if feedforward is not None:
            output_dim = feedforward.get_output_dim()
        else:
            output_dim = self.encoder.get_output_dim()
            
        self.question_pred_layer = Linear(4*output_dim, 3*self.num_tags)
        # if  constrain_crf_decoding and calculate_span_f1 are not
        # provided, (i.e., they're None), set them to True
        # if label_encoding is provided and False if it isn't.

        self.metrics = {}
        check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(),
                               "text field embedding dim", "encoder input dim")
        if feedforward is not None:
            check_dimensions_match(4*encoder.get_output_dim(), feedforward.get_input_dim(),
                                   "encoder output dim", "feedforward input dim")
        initializer(self)
示例#16
0
 def __init__(self, vocab: Vocabulary, embedder: TokenEmbedder,
              seq2vec_encoder: Seq2VecEncoder, wbrun: Any):
     super().__init__(vocab)
     self.embedder = embedder
     self.seq2vec_encoder = seq2vec_encoder
     num_labels = vocab.get_vocab_size("labels")
     log.debug(f"Labels: {num_labels}.")
     self.classifier = torch.nn.Linear(seq2vec_encoder.get_output_dim(),
                                       num_labels)
     self.accuracy = CategoricalAccuracy()
     wbrun.watch(self.classifier, log=all)
     log.debug("Model init complete.")
 def __init__(self,
              vocab: Vocabulary,
              embedder: TextFieldEmbedder,
              encoder: Seq2VecEncoder,
              regularizer_applicator: RegularizerApplicator = None):
     super().__init__(vocab, regularizer_applicator)
     self.embedder = embedder
     self.encoder = encoder
     num_labels = vocab.get_vocab_size("labels")
     self.classifier = torch.nn.Linear(encoder.get_output_dim(), num_labels)
     self.accuracy = CategoricalAccuracy()
     self.auc = Auc()
     self.reg_app = regularizer_applicator
示例#18
0
    def __init__(self,
                 vocab: Vocabulary,
                 encoder: Seq2VecEncoder,
                 embedding: Embedding = None) -> None:
        super().__init__(vocab)
        # Dense embedding of source vocab tokens.
        self._embedding = embedding

        # Encodes the sequence of source embeddings into a sequence of hidden states.
        self._encoder = encoder

        self._sigmoid = Sigmoid()

        self._projection_layer = Linear(encoder.get_output_dim(), 1)
示例#19
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        clauses_encoder: Seq2VecEncoder,
        outer_encoder: Seq2SeqEncoder,
        label_namespace: str = "labels",
        constraint_type: str = None,
        include_start_end_transitions: bool = True,
        dropout: float = None,
        loss_weights: Optional[List] = [],
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
    ) -> None:
        super(JCC, self).__init__(vocab, regularizer)

        self.label_namespace = label_namespace
        self.text_field_embedder = text_field_embedder
        self.num_tags = self.vocab.get_vocab_size(label_namespace)
        self.clauses_encoder = inner_encoder
        self.outer_encoder = outer_encoder
        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
        else:
            self.dropout = None
        self.label_projection_layer = TimeDistributed(
            Linear(outer_encoder.get_output_dim(), self.num_tags))

        labels = self.vocab.get_index_to_token_vocabulary(label_namespace)
        constraints = allowed_transitions(constraint_type, labels)
        self.crf = ConditionalRandomField(
            self.num_tags,
            constraints,
            include_start_end_transitions=include_start_end_transitions,
        )
        self.metrics = {"accuracy": Accuracy()}

        check_dimensions_match(
            text_field_embedder.get_output_dim(),
            clauses_encoder.get_input_dim(),
            "text field embedding dim",
            "clauses encoder input dim",
        )
        check_dimensions_match(
            clauses_encoder.get_output_dim(),
            outer_encoder.get_input_dim(),
            "clauses encoder output dim",
            "outer encoder input dim",
        )
        initializer(self)
示例#20
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 startphrase_encoder: Seq2VecEncoder,
                 ending_encoder: Seq2VecEncoder,
                 similarity: SimilarityFunction,
                 initializer: InitializerApplicator,
                 regularizer: RegularizerApplicator = None) -> None:
        super().__init__(vocab, regularizer)

        # validate the configuration
        check_dimensions_match(text_field_embedder.get_output_dim(),
                               startphrase_encoder.get_input_dim(),
                               "text field embedding dim",
                               "startphrase encoder input dim")
        check_dimensions_match(text_field_embedder.get_output_dim(),
                               ending_encoder.get_input_dim(),
                               "text field embedding dim",
                               "ending encoder input dim")
        check_dimensions_match(startphrase_encoder.get_output_dim(),
                               ending_encoder.get_output_dim(),
                               "startphrase embedding dim",
                               "ending embedding dim")

        # bind all attributes to the instance
        self.text_field_embedder = text_field_embedder
        self.startphrase_encoder = startphrase_encoder
        self.ending_encoder = ending_encoder
        self.similarity = similarity

        # set the training and validation losses
        self.xentropy = torch.nn.CrossEntropyLoss()
        self.accuracy = CategoricalAccuracy()

        # initialize all variables
        initializer(self)
示例#21
0
    def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder,
                 encoder: Seq2VecEncoder):
        super().__init__(vocab)
        self.embedder = embedder
        self.encoder = encoder
        num_labels = vocab.get_vocab_size("labels")
        self.hidden_layer = torch.nn.Sequential(
            torch.nn.Dropout(p=0.5),
            torch.nn.utils.weight_norm(
                torch.nn.Linear(encoder.get_output_dim(), 128)),
            torch.nn.LeakyReLU(inplace=True),
        )
        self.output_layer = torch.nn.Linear(128, num_labels)

        self.accuracy = CategoricalAccuracy()
示例#22
0
    def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder,
                 encoder: Seq2VecEncoder):
        super().__init__(vocab)
        self.embedder = embedder
        self.encoder = encoder
        print("num labels is")
        print(vocab.get_vocab_size("labels"))

        num_labels = vocab.get_vocab_size("labels")

        print("it is probably since we have a seq2seq in reality")

        self.classifier = torch.nn.Linear(encoder.get_output_dim(), num_labels)
        self.accuracy = CategoricalAccuracy()
        self.auc = Auc()
示例#23
0
    def __init__(self,
                 pooler: Seq2VecEncoder,
                 knowledge_encoder: Seq2SeqEncoder = None):
        super().__init__()
        self.pooler = pooler
        pass_thru = PassThroughEncoder(pooler.get_input_dim())

        self.knowledge_encoder = TimeDistributed(
            knowledge_encoder or pass_thru)  # TimeDistributed(context_encoder)

        self.knowledge_attn = DotProductMatrixAttention(
        )  # CosineMatrixAttention()
        # self.attn = DotProductMatrixAttention()

        self.input_dim = pooler.get_input_dim()
        self.output_dim = pooler.get_output_dim()
示例#24
0
 def __init__(self,
              vocab: Vocabulary,
              embedder: TextFieldEmbedder,
              encoder: Seq2VecEncoder,
              regularizer_applicator: RegularizerApplicator = None):
     super().__init__(vocab, regularizer_applicator)
     self.embedder = embedder
     self.encoder = encoder
     num_labels = vocab.get_vocab_size(
         "labels"
     )  # the labels was constructed. i.e. even though we did not explicitly do anything to it, it knows how large it should be!
     logger.info("num labels is as follows: {}".format(num_labels)
                 )  # why does it ned to know the labels converison however?
     self.classifier = torch.nn.Linear(encoder.get_output_dim(), num_labels)
     self.accuracy = CategoricalAccuracy()
     self.auc = Auc()
     self.reg_app = regularizer_applicator
示例#25
0
    def __init__(self, vocabulary: Vocabulary, embedder: TextFieldEmbedder,
                 encoder: Seq2VecEncoder) -> None:

        super(TextClassifier, self).__init__(vocab=vocabulary)

        self.vocabulary = vocabulary
        self.embedder = embedder
        self.encoder = encoder

        self.num_classes = self.vocabulary.get_vocab_size("labels")

        self.feedforward = torch.nn.Linear(
            in_features=encoder.get_output_dim(),
            out_features=self.num_classes)

        self.metrics = {"accuracy": CategoricalAccuracy()}
        self.loss = torch.nn.CrossEntropyLoss()
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2VecEncoder,
                 classifier_feedforward: FeedForward = None,
                 loss_weights=None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")

        self.encoder = encoder.train()

        hidden_size = encoder.get_output_dim()
        self.classifier_feedforward = classifier_feedforward or FeedForward(
            input_dim=hidden_size,
            num_layers=3,
            hidden_dims=[hidden_size, hidden_size, self.num_classes],
            activations=[
                Activation.by_name("relu")(),
                Activation.by_name("relu")(),
                Activation.by_name("linear")()
            ],
            dropout=[0.2, 0.2, 0.0])

        if text_field_embedder.get_output_dim() != encoder.get_input_dim():
            raise ConfigurationError(
                "The output dimension of the text_field_embedder must match the "
                "input dimension of the encoder. Found {} and {}, "
                "respectively.".format(text_field_embedder.get_output_dim(),
                                       encoder.get_input_dim()))
        self.metrics = {
            "accuracy": CategoricalAccuracy(),
            "f1": F1Measure(positive_label=1),
            "weighted_f1": WeightedF1Measure(),
        }

        args = {
            weight: torch.FloatTensor(loss_weights)
        } if loss_weights else {}
        self.loss = nn.CrossEntropyLoss(**args)

        initializer(self)
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 question_encoder: Seq2VecEncoder,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder

        self.nb_classes = self.vocab.get_vocab_size("labels")
        self.question_encoder = question_encoder
        self.enc_dropout = torch.nn.Dropout(0.5)
        self.classifier_feedforward = Linear(question_encoder.get_output_dim(), self.nb_classes)
        self.ff_dropout = torch.nn.Dropout(0.5)
        self.metrics = {
            "accuracy": CategoricalAccuracy(),
        }
        self.loss = torch.nn.CrossEntropyLoss()
        initializer(self)
    def __init__(self,
                 vocab: Vocabulary,
                 model_text_field_embedder: TextFieldEmbedder,
                 internal_text_encoder: Seq2VecEncoder,
                 classifier_feedforward: FeedForward,
                 use_batch_norm: bool = False,
                 embedding_token_dropout: Optional[float] = None,
                 embedding_dropout: Optional[float] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self._model_text_field_embedder = model_text_field_embedder
        self._num_classes = self.vocab.get_vocab_size("labels")
        self._internal_text_encoder = internal_text_encoder
        self._classifier_feedforward = classifier_feedforward
        self._embedding_token_dropout = nn.Dropout(
            embedding_token_dropout) if embedding_token_dropout else None
        self._embedding_dropout = nn.Dropout(
            embedding_dropout) if embedding_dropout else None
        self._batch_norm = nn.modules.BatchNorm1d(
            num_features=internal_text_encoder.get_output_dim(
            )) if use_batch_norm else None

        if model_text_field_embedder.get_output_dim(
        ) != internal_text_encoder.get_input_dim():
            raise ConfigurationError(
                "The output dimension of the model_text_field_embedder must match the "
                "input dimension of the title_encoder. Found {} and {}, "
                "respectively.".format(
                    model_text_field_embedder.get_output_dim(),
                    internal_text_encoder.get_input_dim()))

        self._metrics = {
            "accuracy": CategoricalAccuracy(),
            "f1": F1Measure(
                1
            )  # Assuming binary classification and we set to 1 suggestion which is what semeval task is about.
        }
        self._loss = torch.nn.CrossEntropyLoss()

        initializer(self)
示例#29
0
    def __init__(self,
                 pooler: Seq2VecEncoder,
                 context_encoder: Seq2SeqEncoder = None,
                 kb_path: str = None,
                 kb_shape: Tuple[int, int] = None,
                 trainable_kb: bool = False,
                 projection_dim: int = None):
        super().__init__()

        kb = (torch.load(kb_path) if kb_path else torch.ones(kb_shape)).float()
        self.knowledge = nn.Parameter(kb, requires_grad=trainable_kb).float()
        self.projection_dim = projection_dim
        if projection_dim:
            self.kb_proj = nn.Linear(self.knowledge.size(0),
                                     self.projection_dim)

        self.context_encoder = context_encoder or PassThroughEncoder(
            pooler.get_input_dim())
        self.pooler = pooler
        self.output_dim = pooler.get_output_dim()
    def __init__(self,
                 word_embeddings: TextFieldEmbedder,
                 encoder: Seq2VecEncoder,
                 vocab: Vocabulary) -> None:
        super().__init__(vocab)
        self.word_embeddings = word_embeddings
        self.encoder = encoder

        # input(vocab.print_statistics())
        # Wow, this is dominated by the LabelField's default namespace, see label_field.py where label_namespace: str = 'labels',

        self.linear = torch.nn.Linear(in_features=encoder.get_output_dim(),
                                      out_features=vocab.get_vocab_size('labels'))

        # input(self.linear.weight.shape)
        self.accuracy = CategoricalAccuracy()
        self.loss_function = torch.nn.CrossEntropyLoss()
        self.f1_measure = F1Measure(2)
        self.softmax = torch.nn.Softmax(
            dim=1)  # softmax over the last output dimension output=Tensor(batch_size, label_size)
示例#31
0
    def __init__(self,
                 vocab: Vocabulary,
                 pivot_phrase_embedder: TextFieldEmbedder,
                 encoder: Seq2VecEncoder,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 negative_sampling: bool = True,
                 num_negative_examples: int = 10) -> None:
        super().__init__(vocab, regularizer)

        self.negative_sampling = negative_sampling
        self.num_negative_examples = num_negative_examples
        self.pivot_phrase_embedder = pivot_phrase_embedder
        self.vocab_size = self.vocab.get_vocab_size("words")
        self.encoder = encoder
        self._output_projection_layer = Linear(encoder.get_output_dim(),
                                               self.vocab_size)
        self._context_words_embedder = Embedding(
            self.vocab_size, pivot_phrase_embedder.get_output_dim())

        initializer(self)