Exemplo n.º 1
0
    def __init__(self, 
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2VecEncoder,
                 classifier_feedforward: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        # raise ValueError(self.vocab.get_vocab_size("tokens"))
        # raise ValueError(text_field_embedder.get_output_dim())

        if text_field_embedder.get_output_dim() != encoder.get_input_dim():
            raise ConfigurationError("The output dimension of the text_field_embedder must match the "
                                     "input dimension of the title_encoder. Found {} and {}, "
                                     "respectively.".format(text_field_embedder.get_output_dim(),
                                                            encoder.get_input_dim()))

        self.text_field_embedder = text_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")
        self.encoder = encoder
        self.classifier_feedforward = classifier_feedforward

        self.metrics = {
                "multilabel-f1": MultiLabelF1Measure(),
                'accuracy': BooleanAccuracy()
        }
        self.pearson_r = PearsonCorrelation()
        self.loss = nn.MultiLabelSoftMarginLoss() #BCEWithLogitsLoss() 
        
        self._threshold = 0.5

        initializer(self)
Exemplo n.º 2
0
    def __init__(self,
                 vocab: Vocabulary,
                 model_text_field_embedder: TextFieldEmbedder,
                 internal_text_encoder: Seq2VecEncoder,
                 output_layer: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(Seq2VecClassifier, self).__init__(vocab, regularizer)

        self.model_text_field_embedder = model_text_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")
        self.internal_text_encoder = internal_text_encoder
        self.output_layer = output_layer

        if model_text_field_embedder.get_output_dim(
        ) != internal_text_encoder.get_input_dim():
            raise ConfigurationError(
                "The output dimension of the model_text_field_embedder must match the "
                "input dimension of the title_encoder. Found {} and {}, "
                "respectively.".format(
                    model_text_field_embedder.get_output_dim(),
                    internal_text_encoder.get_input_dim()))
        self.loss = torch.nn.CrossEntropyLoss()

        initializer(self)
Exemplo n.º 3
0
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 SH_field_embedder: TextFieldEmbedder,
                 abstract_text_encoder: Seq2VecEncoder,
                 classifier_feedforward: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(EtdClassifier, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.SH_field_embedder = SH_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")
        self.label_dict = self..vocab.get_index_to_token_vocabulary('labels')
        self.abstract_text_encoder = abstract_text_encoder
        self.classifier_feedforward = classifier_feedforward

        if text_field_embedder.get_output_dim() != abstract_text_encoder.get_input_dim():
            raise ConfigurationError("The output dimension of the text_field_embedder must match the "
                                     "input dimension of the abstract_text_encoder. Found {} and {}, "
                                     "respectively.".format(text_field_embedder.get_output_dim(),
                                                            abstract_text_encoder.get_input_dim()))

        self.metrics = {
                "roc_auc_score": RocAucScore()
        }
        self.loss = torch.nn.BCEWithLogitsLoss()

        initializer(self)
Exemplo n.º 4
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 sentence_encoder: Seq2VecEncoder,
                 classifier_feedforward: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 pretrained_archive=None) -> None:
        super(SentenceClassifier, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.num_classes = self.vocab.get_vocab_size("tokens")
        self.sentence_encoder = sentence_encoder
        self.classifier_feedforward = classifier_feedforward

        if text_field_embedder.get_output_dim(
        ) != sentence_encoder.get_input_dim():
            raise ConfigurationError(
                "The output dimension of the text_field_embedder must match the "
                "input dimension of the sentence_encoder. Found {} and {}, "
                "respectively.".format(text_field_embedder.get_output_dim(),
                                       sentence_encoder.get_input_dim()))
        self.metrics = {
            "accuracy": CategoricalAccuracy(),
        }
        self.loss = torch.nn.CrossEntropyLoss()

        initializer(self)

        # if existing, load pre-trained model
        if pretrained_archive:
            archive = load_archive(pretrained_archive)
            self._initialize_weights_from_archive(archive)
Exemplo n.º 5
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 text_encoder: Seq2VecEncoder,
                 classifier_feedforward: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(EFClassifier, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")
        logger.info("------------------------------------")
        logger.info("num class {}".format(self.num_classes))
        logger.info("------------------------------------")
        self.text_encoder = text_encoder
        self.classifier_feedforward = classifier_feedforward

        if text_field_embedder.get_output_dim() != text_encoder.get_input_dim(
        ):
            raise ConfigurationError(
                "The output dimension of the text_field_embedder must match the "
                "input dimension of the text_encoder. Found {} and {}, "
                "respectively.".format(text_field_embedder.get_output_dim(),
                                       text_encoder.get_input_dim()))
        self.f1 = F1Measure(positive_label=1)
        self.loss = torch.nn.CrossEntropyLoss()
        initializer(self)
Exemplo n.º 6
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 text_encoder: Seq2VecEncoder,
                 classifier_feedforward: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(PriorityCrisisClassifier, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        # self.num_classes = self.vocab.get_vocab_size("labels")
        self.text_encoder = text_encoder
        self.classifier_feedforward = classifier_feedforward

        if text_field_embedder.get_output_dim() != text_encoder.get_input_dim(
        ):
            raise ConfigurationError(
                "The output dimension of the text_field_embedder must match the "
                "input dimension of the title_encoder. Found {} and {}, "
                "respectively.".format(text_field_embedder.get_output_dim(),
                                       text_encoder.get_input_dim()))
        self.metrics = {
            "accuracy": CategoricalAccuracy(),
            "accuracy3": CategoricalAccuracy(top_k=3)
        }

        # the vector [1.0000, 4.6600, 6.0852, 83.3817] is obtained from ./tests/models/priority_crisis_classifier_test.py and learned based on training set
        # a similar function is declared in the test script called get_weights as follows.
        weights = torch.tensor([1.0000, 4.0000, 8.0000, 16.0000],
                               dtype=torch.float32)
        self.loss = torch.nn.CrossEntropyLoss()
        # self.loss = torch.nn.CrossEntropyLoss(weight=weights)
        initializer(self)
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 text_encoder: Seq2VecEncoder,
                 classifier_feedforward: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(PriorityCrisisClassifierWithLossWeight,
              self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        # self.num_classes = self.vocab.get_vocab_size("labels")
        self.text_encoder = text_encoder
        self.classifier_feedforward = classifier_feedforward

        if text_field_embedder.get_output_dim() != text_encoder.get_input_dim(
        ):
            raise ConfigurationError(
                "The output dimension of the text_field_embedder must match the "
                "input dimension of the title_encoder. Found {} and {}, "
                "respectively.".format(text_field_embedder.get_output_dim(),
                                       text_encoder.get_input_dim()))
        self.metrics = {
            "accuracy": CategoricalAccuracy(),
            "accuracy3": CategoricalAccuracy(top_k=3)
        }

        # corresponding to low, medium, high and critical
        weights = torch.tensor([1.0000, 6.1094, 8.3922, 16.8462],
                               dtype=torch.float32)

        self.loss = torch.nn.CrossEntropyLoss(weight=weights)
        initializer(self)
Exemplo n.º 8
0
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 title_encoder: Seq2VecEncoder,
                 abstract_encoder: Seq2VecEncoder,
                 classifier_feedforward: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(AcademicPaperClassifier, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")
        self.title_encoder = title_encoder
        self.abstract_encoder = abstract_encoder
        self.classifier_feedforward = classifier_feedforward

        if text_field_embedder.get_output_dim() != title_encoder.get_input_dim():
            raise ConfigurationError("The output dimension of the text_field_embedder must match the "
                                     "input dimension of the title_encoder. Found {} and {}, "
                                     "respectively.".format(text_field_embedder.get_output_dim(),
                                                            title_encoder.get_input_dim()))
        if text_field_embedder.get_output_dim() != abstract_encoder.get_input_dim():
            raise ConfigurationError("The output dimension of the text_field_embedder must match the "
                                     "input dimension of the abstract_encoder. Found {} and {}, "
                                     "respectively.".format(text_field_embedder.get_output_dim(),
                                                            abstract_encoder.get_input_dim()))
        self.metrics = {
                "accuracy": CategoricalAccuracy(),
                "accuracy3": CategoricalAccuracy(top_k=3)
        }
        self.loss = torch.nn.CrossEntropyLoss()

        initializer(self)
Exemplo n.º 9
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 abstract_encoder: Seq2VecEncoder,
                 classifier_feedforward: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(SentimentClassifier, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")
        self.abstract_encoder = abstract_encoder
        self.classifier_feedforward = classifier_feedforward

        if text_field_embedder.get_output_dim(
        ) != abstract_encoder.get_input_dim():
            raise ConfigurationError(
                "The output dimension of the text_field_embedder must match the "
                "input dimension of the abstract_encoder. Found {} and {}, "
                "respectively.".format(text_field_embedder.get_output_dim(),
                                       abstract_encoder.get_input_dim()))
        self.metrics = {
            "accuracy": CategoricalAccuracy(),
            "accuracy3": CategoricalAccuracy(top_k=3)
        }
        self.loss = torch.nn.CrossEntropyLoss()

        initializer(self)

        model = Sequential()
        model.add(
            Conv2D(64,
                   kernel_size=(3, 3),
                   input_shape=(530, 700, 3),
                   padding='VALID'))
        model.add(Conv2D(64, kernel_size=(3, 3), padding='VALID'))
        model.add(MaxPooling2D(pool_size=(2, 2)))

        model.add(
            Conv2D(128,
                   kernel_size=(3, 3),
                   strides=1,
                   activation='relu',
                   padding='VALID'))
        model.add(
            Conv2D(128,
                   kernel_size=(3, 3),
                   strides=1,
                   activation='relu',
                   padding='VALID'))
        model.add(AveragePooling2D(pool_size=(19, 19)))

        model.add(Flatten())

        model.summary()

        self.image_model = model
 def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder,
              encoder: Seq2VecEncoder):
     super().__init__(vocab)
     self.embedder = embedder
     self.encoder = encoder
     num_labels = vocab.get_vocab_size("labels")
     print("==> encoded input shape: {}, output shape: {}\n".format(
         encoder.get_input_dim(), encoder.get_output_dim()))
     logger.info("==> encoded input shape: {}, output shape: {}\n".format(
         encoder.get_input_dim(), encoder.get_output_dim()))
     self.classifier = torch.nn.Linear(self.encoder.get_output_dim(),
                                       num_labels)
     self.accuracy = CategoricalAccuracy()
Exemplo n.º 11
0
    def __init__(self,
                 pooler: Seq2VecEncoder,
                 knowledge_encoder: Seq2SeqEncoder = None):
        super().__init__()
        self.pooler = pooler
        pass_thru = PassThroughEncoder(pooler.get_input_dim())

        self.knowledge_encoder = TimeDistributed(
            knowledge_encoder or pass_thru)  # TimeDistributed(context_encoder)

        self.knowledge_attn = DotProductMatrixAttention(
        )  # CosineMatrixAttention()
        # self.attn = DotProductMatrixAttention()

        self.input_dim = pooler.get_input_dim()
        self.output_dim = pooler.get_output_dim()
Exemplo n.º 12
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 pivot_phrase_encoder: Seq2VecEncoder,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(SkipGram, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.num_classes = self.vocab.get_vocab_size("shared_words_vocab")
        self.pivot_phrase_encoder = pivot_phrase_encoder
        self.projection_layer = Linear(
            self.pivot_phrase_encoder.get_output_dim(),
            self.num_classes,
            bias=True)

        self.loss = torch.nn.CrossEntropyLoss()

        if text_field_embedder.get_output_dim(
        ) != pivot_phrase_encoder.get_input_dim():
            raise ConfigurationError(
                "The output dimension of the text_field_embedder must match the "
                "input dimension of the pivot_phrase_encoder. Found {} and {}, "
                "respectively.".format(text_field_embedder.get_output_dim(),
                                       title_encoder.get_input_dim()))

        initializer(self)
Exemplo n.º 13
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 inner_encoder: Seq2VecEncoder,
                 outer_encoder: Seq2SeqEncoder,
                 label_namespace: str = "labels",
                 dropout: float = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(HierarchicalRNN, self).__init__(vocab, regularizer)

        self.label_namespace = label_namespace
        self.text_field_embedder = text_field_embedder
        self.num_tags = self.vocab.get_vocab_size(label_namespace)
        self.inner_encoder = inner_encoder
        self.outer_encoder = outer_encoder
        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
        else:
            self.dropout = None
        self.label_projection_layer = TimeDistributed(
            Linear(outer_encoder.get_output_dim(), self.num_tags))
        # self.metrics = {"accuracy": FuckingAccuracy()}
        self.metrics = {"accuracy": CategoricalAccuracy()}
        self._loss = torch.nn.CrossEntropyLoss()

        check_dimensions_match(text_field_embedder.get_output_dim(),
                               inner_encoder.get_input_dim(),
                               'text field embedding dim',
                               'inner encoder input dim')
        check_dimensions_match(inner_encoder.get_output_dim(),
                               outer_encoder.get_input_dim(),
                               'inner encoder output dim',
                               'outer encoder input dim')
        initializer(self)
Exemplo n.º 14
0
    def __init__(
            self,
            vocab: Vocabulary,
            input_unit: Seq2VecEncoder,
            text_field_embedder: TextFieldEmbedder,
            # embedding_projection_dim: int = None,
            classifier_feedforward: FeedForward = None,
            max_step: int = 12,
            n_memories: int = 3,
            self_attention: bool = False,
            memory_gate: bool = False,
            dropout: int = 0.15,
            loss_weights=None,
            initializer: InitializerApplicator = InitializerApplicator(),
            regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self.num_classes = max(self.vocab.get_vocab_size("labels"), 2)

        self.text_field_embedder = text_field_embedder

        self.proj = nn.Linear(text_field_embedder.get_output_dim(),
                              input_unit.get_input_dim())
        self.input_unit = input_unit
        self.mac = MACCell(
            text_field_embedder.get_output_dim(
            ),  # input_unit.get_output_dim(),
            max_step=max_step,
            n_memories=n_memories,
            self_attention=self_attention,
            memory_gate=memory_gate,
            dropout=dropout,
            save_attns=False,
        )

        hidden_size = 2 * input_unit.get_output_dim()
        n_layers = 3
        self.classifier = classifier_feedforward or FeedForward(
            input_dim=hidden_size,
            num_layers=n_layers,
            hidden_dims=(n_layers - 1) * [hidden_size] + [self.num_classes],
            activations=[
                Activation.by_name("relu")(),
                Activation.by_name("relu")(),
                Activation.by_name("linear")()
            ],
            dropout=[dropout, dropout, 0.0])

        self.metrics = {
            "accuracy": CategoricalAccuracy(),
            "f1": F1Measure(positive_label=1),
            "weighted_f1": WeightedF1Measure(),
            "fbeta": FBetaMeasure(average='micro')
        }

        weights = loss_weights and torch.FloatTensor(loss_weights)
        self.loss = nn.CrossEntropyLoss(weight=weights)

        initializer(self)
    def __init__(self,
                 vocab: Vocabulary,
                 embedder: TextFieldEmbedder,
                 encoder: Seq2VecEncoder,
                 feedforward: Optional[FeedForward] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 dropout: float = 0.0,
                 label_name: str = 'target-sentiment-labels') -> None:
        super().__init__(vocab, regularizer)
        '''
        :param vocab: A Vocabulary, required in order to compute sizes 
                      for input/output projections.
        :param embedder: Used to embed the text.
        :param encoder: Encodes the sentence/text. E.g. LSTM
        :param feedforward: An optional feed forward layer to apply after the 
                            encoder
        :param initializer: Used to initialize the model parameters.
        :param regularizer: If provided, will be used to calculate the 
                            regularization penalty during training.
        :param dropout: To apply dropout after each layer apart from the last 
                        layer. All dropout that is applied to timebased data 
                        will be `variational dropout`_ all else will be  
                        standard dropout.
        :param label_name: Name of the label name space.
        
        This is based on the LSTM model by 
        `Tang et al. 2016 <https://www.aclweb.org/anthology/C16-1311.pdf>`_
        
        '''
        self.label_name = label_name
        self.embedder = embedder
        self.encoder = encoder
        self.num_classes = self.vocab.get_vocab_size(self.label_name)
        self.feedforward = feedforward

        if feedforward is not None:
            output_dim = self.feedforward.get_output_dim()
        else:
            output_dim = self.encoder.get_output_dim()
        self.label_projection = Linear(output_dim, self.num_classes)

        self.metrics = {"accuracy": CategoricalAccuracy()}
        self.f1_metrics = {}
        # F1 Scores
        label_index_name = self.vocab.get_index_to_token_vocabulary(
            self.label_name)
        for label_index, _label_name in label_index_name.items():
            _label_name = f'F1_{_label_name.capitalize()}'
            self.f1_metrics[_label_name] = F1Measure(label_index)
        self._variational_dropout = InputVariationalDropout(dropout)
        self._naive_dropout = Dropout(dropout)
        check_dimensions_match(embedder.get_output_dim(),
                               encoder.get_input_dim(), 'Embedding', 'Encoder')
        if self.feedforward is not None:
            check_dimensions_match(encoder.get_output_dim(),
                                   feedforward.get_input_dim(), 'Encoder',
                                   'FeedForward')
        initializer(self)
Exemplo n.º 16
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 entity_embedder: TextFieldEmbedder,
                 encoder: Seq2VecEncoder,
                 label_namespace: str = "logical_form",
                 feedforward: Optional[FeedForward] = None,
                 dropout: Optional[float] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self.num_tags = self.vocab.get_vocab_size("logical_form")
        self.encoder = encoder

        self.text_field_embedder = text_field_embedder
        self.entity_embedder = entity_embedder

        self.BOW_embedder_question = BagOfWordCountsTokenEmbedder(
            vocab, "tokens", projection_dim=self.encoder.get_output_dim())
        self.BOW_embedder_description = BagOfWordCountsTokenEmbedder(
            vocab, "tokens", projection_dim=self.encoder.get_output_dim())
        self.BOW_embedder_detail = BagOfWordCountsTokenEmbedder(
            vocab, "tokens", projection_dim=self.encoder.get_output_dim())

        # using crf as the estimator for sequential tags
        self.crf = ConditionalRandomField(self.num_tags,
                                          include_start_end_transitions=False)

        self.crf_for_both = ConditionalRandomField(
            self.num_tags, include_start_end_transitions=False)

        self.softmax_layer = Softmax()
        self.ce_loss = CrossEntropyLoss()

        self.matched = 0
        self.all_pred = 0

        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
        else:
            self.dropout = None

        output_dim = self.encoder.get_output_dim()

        self.pred_layer = Linear(4 * output_dim, 3 * self.num_tags)
        self.load_pretrained_weights()

        self.pred_layer_both = Linear(8 * output_dim, 3 * self.num_tags)
        # if  constrain_crf_decoding and calculate_span_f1 are not
        # provided, (i.e., they're None), set them to True
        # if label_encoding is provided and False if it isn't.

        self.metrics = {}
        check_dimensions_match(text_field_embedder.get_output_dim(),
                               encoder.get_input_dim(),
                               "text field embedding dim", "encoder input dim")
        initializer(self)
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2VecEncoder,
                 classifier_feedforward: FeedForward = None,
                 loss_weights=None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")

        self.encoder = encoder.train()

        hidden_size = encoder.get_output_dim()
        self.classifier_feedforward = classifier_feedforward or FeedForward(
            input_dim=hidden_size,
            num_layers=3,
            hidden_dims=[hidden_size, hidden_size, self.num_classes],
            activations=[
                Activation.by_name("relu")(),
                Activation.by_name("relu")(),
                Activation.by_name("linear")()
            ],
            dropout=[0.2, 0.2, 0.0])

        if text_field_embedder.get_output_dim() != encoder.get_input_dim():
            raise ConfigurationError(
                "The output dimension of the text_field_embedder must match the "
                "input dimension of the encoder. Found {} and {}, "
                "respectively.".format(text_field_embedder.get_output_dim(),
                                       encoder.get_input_dim()))
        self.metrics = {
            "accuracy": CategoricalAccuracy(),
            "f1": F1Measure(positive_label=1),
            "weighted_f1": WeightedF1Measure(),
        }

        args = {
            weight: torch.FloatTensor(loss_weights)
        } if loss_weights else {}
        self.loss = nn.CrossEntropyLoss(**args)

        initializer(self)
Exemplo n.º 18
0
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 sentence_encoder: Seq2VecEncoder,
                 classifier_feedforward: FeedForward,
                 label_weight: Dict[str, float] = None,
                 use_label_distribution: bool = False,
                 image_classification_ratio: float = 0.0,
                 decay_every_i_step=100000,
                 decay_ratio=0.8,
                 instance_count=100000,
                 max_epoch=10,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None
                 ) -> None:
        super(BasicClassifier, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")
        self.sentence_encoder = sentence_encoder
        self.classifier_feedforward = classifier_feedforward

        if text_field_embedder.get_output_dim() != sentence_encoder.get_input_dim():
            raise ConfigurationError("The output dimension of the text_field_embedder must match the "
                                     "input dimension of the title_encoder. Found {} and {}, "
                                     "respectively.".format(text_field_embedder.get_output_dim(),
                                                            sentence_encoder.get_input_dim()))
        self.metrics = {
                "accuracy": CategoricalAccuracy(),
                "cnn_loss": Average()
        }
        if not use_label_distribution:
            self.loss = torch.nn.CrossEntropyLoss()
        else:
            self.loss = torch.nn.CrossEntropyLoss()
        self.image_classification_ratio = image_classification_ratio
        self.decay_every_i_step = decay_every_i_step
        self.decay_ratio = decay_ratio
        self.training_step = 0
        self.current_ratio = image_classification_ratio
        self.total_steps = max_epoch*instance_count//64
        self.step_every_epoch = instance_count // 64

        print("每个epoch的step数量", self.step_every_epoch)

        initializer(self)
    def __init__(self,
                 vocab: Vocabulary,
                 model_text_field_embedder: TextFieldEmbedder,
                 internal_text_encoder: Seq2VecEncoder,
                 classifier_feedforward: FeedForward,
                 use_batch_norm: bool = False,
                 embedding_token_dropout: Optional[float] = None,
                 embedding_dropout: Optional[float] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self._model_text_field_embedder = model_text_field_embedder
        self._num_classes = self.vocab.get_vocab_size("labels")
        self._internal_text_encoder = internal_text_encoder
        self._classifier_feedforward = classifier_feedforward
        self._embedding_token_dropout = nn.Dropout(
            embedding_token_dropout) if embedding_token_dropout else None
        self._embedding_dropout = nn.Dropout(
            embedding_dropout) if embedding_dropout else None
        self._batch_norm = nn.modules.BatchNorm1d(
            num_features=internal_text_encoder.get_output_dim(
            )) if use_batch_norm else None

        if model_text_field_embedder.get_output_dim(
        ) != internal_text_encoder.get_input_dim():
            raise ConfigurationError(
                "The output dimension of the model_text_field_embedder must match the "
                "input dimension of the title_encoder. Found {} and {}, "
                "respectively.".format(
                    model_text_field_embedder.get_output_dim(),
                    internal_text_encoder.get_input_dim()))

        self._metrics = {
            "accuracy": CategoricalAccuracy(),
            "f1": F1Measure(
                1
            )  # Assuming binary classification and we set to 1 suggestion which is what semeval task is about.
        }
        self._loss = torch.nn.CrossEntropyLoss()

        initializer(self)
Exemplo n.º 20
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 text_encoder: Seq2VecEncoder,
                 threshold: float,
                 classifier_feedforward: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:

        super(CategoryCrisisClassifierWithLossWeight,
              self).__init__(vocab, regularizer)
        self.threshold = threshold
        self.text_field_embedder = text_field_embedder
        self.title_encoder = text_encoder
        self.classifier_feedforward = classifier_feedforward

        if text_field_embedder.get_output_dim() != text_encoder.get_input_dim(
        ):
            raise ConfigurationError(
                "The output dimension of the text_field_embedder must match the "
                "input dimension of the title_encoder. Found {} and {}, "
                "respectively.".format(text_field_embedder.get_output_dim(),
                                       text_encoder.get_input_dim()))

        # loss weights correspond to the order of information types as follows
        # {0: 'Sentiment', 1: 'Hashtags', 2: 'News', 3: 'Irrelevant', 4: 'MultimediaShare', 5: 'ThirdPartyObservation',
        #  6: 'FirstPartyObservation', 7: 'Factoid', 8: 'Discussion', 9: 'OriginalEvent', 10: 'Location', 11: 'Advice',
        #  12: 'ContextualInformation', 13: 'Weather', 14: 'EmergingThreats', 15: 'ServiceAvailable', 16: 'Donations',
        #  17: 'Official', 18: 'NewSubEvent', 19: 'InformationWanted', 20: 'SearchAndRescue', 21: 'MovePeople',
        #  22: 'CleanUp', 23: 'Volunteer', 24: 'GoodsServices'}

        pos_weights = torch.tensor([
            1.0000, 1.0869, 1.2438, 1.5013, 1.5451, 1.9002, 1.9796, 2.4450,
            3.3531, 3.6609, 4.7413, 4.8119, 5.1208, 5.5894, 7.0552, 8.4015,
            11.2066, 11.6695, 13.9506, 14.0638, 17.4586, 20.5034, 24.5000,
            28.9487, 32.3684
        ],
                                   dtype=torch.float32)

        self.loss = torch.nn.BCEWithLogitsLoss(pos_weights)
        initializer(self)
Exemplo n.º 21
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 title_encoder: Seq2VecEncoder,
                 text_encoder: Seq2VecEncoder,
                 classifier_feedforward: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.title_encoder = title_encoder
        self.text_encoder = text_encoder
        self.classifier_feedforward = classifier_feedforward

        if text_field_embedder.get_output_dim() != title_encoder.get_input_dim(
        ):
            raise ConfigurationError(f"The output dimension of the text_field_"
                                     f"embedder must match the input dimension"
                                     f" of the summary_encoder. Found "
                                     f"{text_field_embedder.get_output_dim()} "
                                     f"and {title_encoder.get_input_dim()}, "
                                     f"respectively.")

        if text_field_embedder.get_output_dim() != text_encoder.get_input_dim(
        ):
            raise ConfigurationError(f"The output dimension of the text_field_"
                                     f"embedder must match the input dimension"
                                     f" of the summary_encoder. Found "
                                     f"{text_field_embedder.get_output_dim()} "
                                     f"and {text_encoder.get_input_dim()}, "
                                     f"respectively.")

        self.metrics = {
            "accuracy": CategoricalAccuracy(),
            "accuracy3": CategoricalAccuracy(top_k=3)
        }
        self.loss = torch.nn.CrossEntropyLoss()
        initializer(self)
Exemplo n.º 22
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 abstract_encoder: Seq2VecEncoder,
                 ud_encoder: Seq2VecEncoder,
                 classifier_feedforward: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(SentimentClassifier, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")
        self.abstract_encoder = abstract_encoder
        self.classifier_feedforward = classifier_feedforward

        self.ud_predictor = biaffine_parser_universal_dependencies_todzat_2017(
        )
        self.ud_predictor._model = self.ud_predictor._model.cuda()
        self.ud_encoder = ud_encoder

        if text_field_embedder.get_output_dim(
        ) != abstract_encoder.get_input_dim():
            raise ConfigurationError(
                "The output dimension of the text_field_embedder must match the "
                "input dimension of the abstract_encoder. Found {} and {}, "
                "respectively.".format(text_field_embedder.get_output_dim(),
                                       abstract_encoder.get_input_dim()))
        self.metrics = {
            "accuracy": CategoricalAccuracy(),
            "accuracy3": CategoricalAccuracy(top_k=3)
        }
        self.loss = torch.nn.CrossEntropyLoss()

        initializer(self)

        self.conv1 = nn.Conv2d(3, 8, 3)
        self.conv2 = nn.Conv2d(8, 16, 3)
        self.conv3 = nn.Conv2d(16, 32, 3)
        self.conv4 = nn.Conv2d(32, 64, 3)
Exemplo n.º 23
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 num_highway_layers: int,
                 phrase_layer: Seq2SeqEncoder,
                 similarity_function: SimilarityFunction,
                 modeling_layer: Seq2VecEncoder,
                 answers_encoder: Seq2VecEncoder,
                 classifier_feedforward: FeedForward,
                 dropout: float = 0.2,
                 mask_lstms: bool = True,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:

        super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._highway_layer = TimeDistributed(
            Highway(text_field_embedder.get_output_dim(), num_highway_layers))

        self._classifier_feedforward = classifier_feedforward

        self._phrase_layer = phrase_layer
        self._matrix_attention = LegacyMatrixAttention(similarity_function)
        self._modeling_layer = modeling_layer

        encoding_dim = phrase_layer.get_output_dim()

        self._time_distributed_highway_layer = TimeDistributed(
            self._highway_layer)
        self._answers_encoder = TimeDistributed(answers_encoder)

        # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily
        # obvious from the configuration files, so we check here.
        check_dimensions_match(modeling_layer.get_input_dim(),
                               4 * encoding_dim, "modeling layer input dim",
                               "4 * encoding dim")
        check_dimensions_match(text_field_embedder.get_output_dim(),
                               phrase_layer.get_input_dim(),
                               "text field embedder output dim",
                               "phrase layer input dim")

        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x

        self._mask_lstms = mask_lstms
        self.loss = torch.nn.CrossEntropyLoss()

        initializer(self)
Exemplo n.º 24
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 title_encoder: Seq2VecEncoder,
                 text_encoder: Seq2VecEncoder,
                 regressor_feedforward: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.title_encoder = title_encoder
        self.text_encoder = text_encoder
        self.regressor_feedforward = regressor_feedforward

        if text_field_embedder.get_output_dim() != title_encoder.get_input_dim(
        ):
            raise ConfigurationError(f"The output dimension of the text_field_"
                                     f"embedder must match the input dimension"
                                     f" of the summary_encoder. Found "
                                     f"{text_field_embedder.get_output_dim()} "
                                     f"and {title_encoder.get_input_dim()}, "
                                     f"respectively.")

        if text_field_embedder.get_output_dim() != text_encoder.get_input_dim(
        ):
            raise ConfigurationError(f"The output dimension of the text_field_"
                                     f"embedder must match the input dimension"
                                     f" of the summary_encoder. Found "
                                     f"{text_field_embedder.get_output_dim()} "
                                     f"and {text_encoder.get_input_dim()}, "
                                     f"respectively.")

        self.metrics = {
            "MAE": MeanAbsoluteError(),
        }
        self.loss = torch.nn.BCEWithLogitsLoss()
        initializer(self)
Exemplo n.º 25
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        clauses_encoder: Seq2VecEncoder,
        outer_encoder: Seq2SeqEncoder,
        label_namespace: str = "labels",
        constraint_type: str = None,
        include_start_end_transitions: bool = True,
        dropout: float = None,
        loss_weights: Optional[List] = [],
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
    ) -> None:
        super(JCC, self).__init__(vocab, regularizer)

        self.label_namespace = label_namespace
        self.text_field_embedder = text_field_embedder
        self.num_tags = self.vocab.get_vocab_size(label_namespace)
        self.clauses_encoder = inner_encoder
        self.outer_encoder = outer_encoder
        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
        else:
            self.dropout = None
        self.label_projection_layer = TimeDistributed(
            Linear(outer_encoder.get_output_dim(), self.num_tags))

        labels = self.vocab.get_index_to_token_vocabulary(label_namespace)
        constraints = allowed_transitions(constraint_type, labels)
        self.crf = ConditionalRandomField(
            self.num_tags,
            constraints,
            include_start_end_transitions=include_start_end_transitions,
        )
        self.metrics = {"accuracy": Accuracy()}

        check_dimensions_match(
            text_field_embedder.get_output_dim(),
            clauses_encoder.get_input_dim(),
            "text field embedding dim",
            "clauses encoder input dim",
        )
        check_dimensions_match(
            clauses_encoder.get_output_dim(),
            outer_encoder.get_input_dim(),
            "clauses encoder output dim",
            "outer encoder input dim",
        )
        initializer(self)
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 text_encoder: Seq2VecEncoder,
                 classifier_feedforward: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(AcademicPaperClassifier, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")
        self.text_encoder = text_encoder
        self.classifier_feedforward = classifier_feedforward
        self.all_labels = None
        if text_field_embedder.get_output_dim() != text_encoder.get_input_dim():
            raise ConfigurationError("The output dimension of the text_field_embedder must match the "
                                     "input dimension of the title_encoder. Found {} and {}, "
                                     "respectively.".format(text_field_embedder.get_output_dim(),
                                                            text_encoder.get_input_dim()))

        self.f1 = MultiLabelF1Measure()
        self.loss = torch.nn.MultiLabelSoftMarginLoss()

        initializer(self)
Exemplo n.º 27
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 startphrase_encoder: Seq2VecEncoder,
                 ending_encoder: Seq2VecEncoder,
                 similarity: SimilarityFunction,
                 initializer: InitializerApplicator,
                 regularizer: RegularizerApplicator = None) -> None:
        super().__init__(vocab, regularizer)

        # validate the configuration
        check_dimensions_match(text_field_embedder.get_output_dim(),
                               startphrase_encoder.get_input_dim(),
                               "text field embedding dim",
                               "startphrase encoder input dim")
        check_dimensions_match(text_field_embedder.get_output_dim(),
                               ending_encoder.get_input_dim(),
                               "text field embedding dim",
                               "ending encoder input dim")
        check_dimensions_match(startphrase_encoder.get_output_dim(),
                               ending_encoder.get_output_dim(),
                               "startphrase embedding dim",
                               "ending embedding dim")

        # bind all attributes to the instance
        self.text_field_embedder = text_field_embedder
        self.startphrase_encoder = startphrase_encoder
        self.ending_encoder = ending_encoder
        self.similarity = similarity

        # set the training and validation losses
        self.xentropy = torch.nn.CrossEntropyLoss()
        self.accuracy = CategoricalAccuracy()

        # initialize all variables
        initializer(self)
Exemplo n.º 28
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 few_shot_model: FewShotModel,
                 support_encoder: Seq2VecEncoder,
                 query_encoder: Seq2VecEncoder = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(FewShotRelationClassifier, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.support_encoder = support_encoder
        self.query_encoder = query_encoder or support_encoder

        if text_field_embedder.get_output_dim(
        ) != support_encoder.get_input_dim():
            raise ConfigurationError(
                "The output dimension of the text_field_embedder must match the "
                "input dimension of the support_encoder. Found {} and {}, "
                "respectively.".format(text_field_embedder.get_output_dim(),
                                       support_encoder.get_input_dim()))

        if text_field_embedder.get_output_dim(
        ) != self.query_encoder.get_input_dim():
            raise ConfigurationError(
                "The output dimension of the text_field_embedder must match the "
                "input dimension of the query_encoder. Found {} and {}, "
                "respectively.".format(text_field_embedder.get_output_dim(),
                                       self.query_encoder.get_input_dim()))

        self.few_shot_model = few_shot_model

        self.metrics = {"accuracy": CategoricalAccuracy()}
        self.loss = torch.nn.CrossEntropyLoss()

        initializer(self)
Exemplo n.º 29
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2VecEncoder,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(SequenceClassifier, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")
        self.encoder = encoder
        self.projection_layer = Linear(self.encoder.get_output_dim(),
                                       self.num_classes)

        if text_field_embedder.get_output_dim() != encoder.get_input_dim():
            raise ConfigurationError(
                "The output dimension of the text_field_embedder must match the "
                "input dimension of the sequence encoder. Found {} and {}, "
                "respectively.".format(text_field_embedder.get_output_dim(),
                                       encoder.get_input_dim()))
        self._accuracy = CategoricalAccuracy()
        self._loss = torch.nn.CrossEntropyLoss()

        initializer(self)
Exemplo n.º 30
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 text_encoder: Seq2VecEncoder,
                 threshold: float,
                 classifier_feedforward: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:

        super(CategoryCrisisClassifier, self).__init__(vocab, regularizer)
        self.threshold = threshold
        self.text_field_embedder = text_field_embedder
        self.title_encoder = text_encoder
        self.classifier_feedforward = classifier_feedforward

        if text_field_embedder.get_output_dim() != text_encoder.get_input_dim(
        ):
            raise ConfigurationError(
                "The output dimension of the text_field_embedder must match the "
                "input dimension of the title_encoder. Found {} and {}, "
                "respectively.".format(text_field_embedder.get_output_dim(),
                                       text_encoder.get_input_dim()))
        self.loss = torch.nn.BCEWithLogitsLoss()
        initializer(self)