def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2VecEncoder, classifier_feedforward: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) # raise ValueError(self.vocab.get_vocab_size("tokens")) # raise ValueError(text_field_embedder.get_output_dim()) if text_field_embedder.get_output_dim() != encoder.get_input_dim(): raise ConfigurationError("The output dimension of the text_field_embedder must match the " "input dimension of the title_encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), encoder.get_input_dim())) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.encoder = encoder self.classifier_feedforward = classifier_feedforward self.metrics = { "multilabel-f1": MultiLabelF1Measure(), 'accuracy': BooleanAccuracy() } self.pearson_r = PearsonCorrelation() self.loss = nn.MultiLabelSoftMarginLoss() #BCEWithLogitsLoss() self._threshold = 0.5 initializer(self)
def __init__(self, vocab: Vocabulary, model_text_field_embedder: TextFieldEmbedder, internal_text_encoder: Seq2VecEncoder, output_layer: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(Seq2VecClassifier, self).__init__(vocab, regularizer) self.model_text_field_embedder = model_text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.internal_text_encoder = internal_text_encoder self.output_layer = output_layer if model_text_field_embedder.get_output_dim( ) != internal_text_encoder.get_input_dim(): raise ConfigurationError( "The output dimension of the model_text_field_embedder must match the " "input dimension of the title_encoder. Found {} and {}, " "respectively.".format( model_text_field_embedder.get_output_dim(), internal_text_encoder.get_input_dim())) self.loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, SH_field_embedder: TextFieldEmbedder, abstract_text_encoder: Seq2VecEncoder, classifier_feedforward: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(EtdClassifier, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.SH_field_embedder = SH_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.label_dict = self..vocab.get_index_to_token_vocabulary('labels') self.abstract_text_encoder = abstract_text_encoder self.classifier_feedforward = classifier_feedforward if text_field_embedder.get_output_dim() != abstract_text_encoder.get_input_dim(): raise ConfigurationError("The output dimension of the text_field_embedder must match the " "input dimension of the abstract_text_encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), abstract_text_encoder.get_input_dim())) self.metrics = { "roc_auc_score": RocAucScore() } self.loss = torch.nn.BCEWithLogitsLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, sentence_encoder: Seq2VecEncoder, classifier_feedforward: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, pretrained_archive=None) -> None: super(SentenceClassifier, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("tokens") self.sentence_encoder = sentence_encoder self.classifier_feedforward = classifier_feedforward if text_field_embedder.get_output_dim( ) != sentence_encoder.get_input_dim(): raise ConfigurationError( "The output dimension of the text_field_embedder must match the " "input dimension of the sentence_encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), sentence_encoder.get_input_dim())) self.metrics = { "accuracy": CategoricalAccuracy(), } self.loss = torch.nn.CrossEntropyLoss() initializer(self) # if existing, load pre-trained model if pretrained_archive: archive = load_archive(pretrained_archive) self._initialize_weights_from_archive(archive)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, text_encoder: Seq2VecEncoder, classifier_feedforward: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(EFClassifier, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") logger.info("------------------------------------") logger.info("num class {}".format(self.num_classes)) logger.info("------------------------------------") self.text_encoder = text_encoder self.classifier_feedforward = classifier_feedforward if text_field_embedder.get_output_dim() != text_encoder.get_input_dim( ): raise ConfigurationError( "The output dimension of the text_field_embedder must match the " "input dimension of the text_encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), text_encoder.get_input_dim())) self.f1 = F1Measure(positive_label=1) self.loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, text_encoder: Seq2VecEncoder, classifier_feedforward: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(PriorityCrisisClassifier, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder # self.num_classes = self.vocab.get_vocab_size("labels") self.text_encoder = text_encoder self.classifier_feedforward = classifier_feedforward if text_field_embedder.get_output_dim() != text_encoder.get_input_dim( ): raise ConfigurationError( "The output dimension of the text_field_embedder must match the " "input dimension of the title_encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), text_encoder.get_input_dim())) self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } # the vector [1.0000, 4.6600, 6.0852, 83.3817] is obtained from ./tests/models/priority_crisis_classifier_test.py and learned based on training set # a similar function is declared in the test script called get_weights as follows. weights = torch.tensor([1.0000, 4.0000, 8.0000, 16.0000], dtype=torch.float32) self.loss = torch.nn.CrossEntropyLoss() # self.loss = torch.nn.CrossEntropyLoss(weight=weights) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, text_encoder: Seq2VecEncoder, classifier_feedforward: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(PriorityCrisisClassifierWithLossWeight, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder # self.num_classes = self.vocab.get_vocab_size("labels") self.text_encoder = text_encoder self.classifier_feedforward = classifier_feedforward if text_field_embedder.get_output_dim() != text_encoder.get_input_dim( ): raise ConfigurationError( "The output dimension of the text_field_embedder must match the " "input dimension of the title_encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), text_encoder.get_input_dim())) self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } # corresponding to low, medium, high and critical weights = torch.tensor([1.0000, 6.1094, 8.3922, 16.8462], dtype=torch.float32) self.loss = torch.nn.CrossEntropyLoss(weight=weights) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, title_encoder: Seq2VecEncoder, abstract_encoder: Seq2VecEncoder, classifier_feedforward: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(AcademicPaperClassifier, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.title_encoder = title_encoder self.abstract_encoder = abstract_encoder self.classifier_feedforward = classifier_feedforward if text_field_embedder.get_output_dim() != title_encoder.get_input_dim(): raise ConfigurationError("The output dimension of the text_field_embedder must match the " "input dimension of the title_encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), title_encoder.get_input_dim())) if text_field_embedder.get_output_dim() != abstract_encoder.get_input_dim(): raise ConfigurationError("The output dimension of the text_field_embedder must match the " "input dimension of the abstract_encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), abstract_encoder.get_input_dim())) self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } self.loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, abstract_encoder: Seq2VecEncoder, classifier_feedforward: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(SentimentClassifier, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.abstract_encoder = abstract_encoder self.classifier_feedforward = classifier_feedforward if text_field_embedder.get_output_dim( ) != abstract_encoder.get_input_dim(): raise ConfigurationError( "The output dimension of the text_field_embedder must match the " "input dimension of the abstract_encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), abstract_encoder.get_input_dim())) self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } self.loss = torch.nn.CrossEntropyLoss() initializer(self) model = Sequential() model.add( Conv2D(64, kernel_size=(3, 3), input_shape=(530, 700, 3), padding='VALID')) model.add(Conv2D(64, kernel_size=(3, 3), padding='VALID')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add( Conv2D(128, kernel_size=(3, 3), strides=1, activation='relu', padding='VALID')) model.add( Conv2D(128, kernel_size=(3, 3), strides=1, activation='relu', padding='VALID')) model.add(AveragePooling2D(pool_size=(19, 19))) model.add(Flatten()) model.summary() self.image_model = model
def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder, encoder: Seq2VecEncoder): super().__init__(vocab) self.embedder = embedder self.encoder = encoder num_labels = vocab.get_vocab_size("labels") print("==> encoded input shape: {}, output shape: {}\n".format( encoder.get_input_dim(), encoder.get_output_dim())) logger.info("==> encoded input shape: {}, output shape: {}\n".format( encoder.get_input_dim(), encoder.get_output_dim())) self.classifier = torch.nn.Linear(self.encoder.get_output_dim(), num_labels) self.accuracy = CategoricalAccuracy()
def __init__(self, pooler: Seq2VecEncoder, knowledge_encoder: Seq2SeqEncoder = None): super().__init__() self.pooler = pooler pass_thru = PassThroughEncoder(pooler.get_input_dim()) self.knowledge_encoder = TimeDistributed( knowledge_encoder or pass_thru) # TimeDistributed(context_encoder) self.knowledge_attn = DotProductMatrixAttention( ) # CosineMatrixAttention() # self.attn = DotProductMatrixAttention() self.input_dim = pooler.get_input_dim() self.output_dim = pooler.get_output_dim()
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, pivot_phrase_encoder: Seq2VecEncoder, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(SkipGram, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("shared_words_vocab") self.pivot_phrase_encoder = pivot_phrase_encoder self.projection_layer = Linear( self.pivot_phrase_encoder.get_output_dim(), self.num_classes, bias=True) self.loss = torch.nn.CrossEntropyLoss() if text_field_embedder.get_output_dim( ) != pivot_phrase_encoder.get_input_dim(): raise ConfigurationError( "The output dimension of the text_field_embedder must match the " "input dimension of the pivot_phrase_encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), title_encoder.get_input_dim())) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, inner_encoder: Seq2VecEncoder, outer_encoder: Seq2SeqEncoder, label_namespace: str = "labels", dropout: float = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(HierarchicalRNN, self).__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.inner_encoder = inner_encoder self.outer_encoder = outer_encoder if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self.label_projection_layer = TimeDistributed( Linear(outer_encoder.get_output_dim(), self.num_tags)) # self.metrics = {"accuracy": FuckingAccuracy()} self.metrics = {"accuracy": CategoricalAccuracy()} self._loss = torch.nn.CrossEntropyLoss() check_dimensions_match(text_field_embedder.get_output_dim(), inner_encoder.get_input_dim(), 'text field embedding dim', 'inner encoder input dim') check_dimensions_match(inner_encoder.get_output_dim(), outer_encoder.get_input_dim(), 'inner encoder output dim', 'outer encoder input dim') initializer(self)
def __init__( self, vocab: Vocabulary, input_unit: Seq2VecEncoder, text_field_embedder: TextFieldEmbedder, # embedding_projection_dim: int = None, classifier_feedforward: FeedForward = None, max_step: int = 12, n_memories: int = 3, self_attention: bool = False, memory_gate: bool = False, dropout: int = 0.15, loss_weights=None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.num_classes = max(self.vocab.get_vocab_size("labels"), 2) self.text_field_embedder = text_field_embedder self.proj = nn.Linear(text_field_embedder.get_output_dim(), input_unit.get_input_dim()) self.input_unit = input_unit self.mac = MACCell( text_field_embedder.get_output_dim( ), # input_unit.get_output_dim(), max_step=max_step, n_memories=n_memories, self_attention=self_attention, memory_gate=memory_gate, dropout=dropout, save_attns=False, ) hidden_size = 2 * input_unit.get_output_dim() n_layers = 3 self.classifier = classifier_feedforward or FeedForward( input_dim=hidden_size, num_layers=n_layers, hidden_dims=(n_layers - 1) * [hidden_size] + [self.num_classes], activations=[ Activation.by_name("relu")(), Activation.by_name("relu")(), Activation.by_name("linear")() ], dropout=[dropout, dropout, 0.0]) self.metrics = { "accuracy": CategoricalAccuracy(), "f1": F1Measure(positive_label=1), "weighted_f1": WeightedF1Measure(), "fbeta": FBetaMeasure(average='micro') } weights = loss_weights and torch.FloatTensor(loss_weights) self.loss = nn.CrossEntropyLoss(weight=weights) initializer(self)
def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder, encoder: Seq2VecEncoder, feedforward: Optional[FeedForward] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, dropout: float = 0.0, label_name: str = 'target-sentiment-labels') -> None: super().__init__(vocab, regularizer) ''' :param vocab: A Vocabulary, required in order to compute sizes for input/output projections. :param embedder: Used to embed the text. :param encoder: Encodes the sentence/text. E.g. LSTM :param feedforward: An optional feed forward layer to apply after the encoder :param initializer: Used to initialize the model parameters. :param regularizer: If provided, will be used to calculate the regularization penalty during training. :param dropout: To apply dropout after each layer apart from the last layer. All dropout that is applied to timebased data will be `variational dropout`_ all else will be standard dropout. :param label_name: Name of the label name space. This is based on the LSTM model by `Tang et al. 2016 <https://www.aclweb.org/anthology/C16-1311.pdf>`_ ''' self.label_name = label_name self.embedder = embedder self.encoder = encoder self.num_classes = self.vocab.get_vocab_size(self.label_name) self.feedforward = feedforward if feedforward is not None: output_dim = self.feedforward.get_output_dim() else: output_dim = self.encoder.get_output_dim() self.label_projection = Linear(output_dim, self.num_classes) self.metrics = {"accuracy": CategoricalAccuracy()} self.f1_metrics = {} # F1 Scores label_index_name = self.vocab.get_index_to_token_vocabulary( self.label_name) for label_index, _label_name in label_index_name.items(): _label_name = f'F1_{_label_name.capitalize()}' self.f1_metrics[_label_name] = F1Measure(label_index) self._variational_dropout = InputVariationalDropout(dropout) self._naive_dropout = Dropout(dropout) check_dimensions_match(embedder.get_output_dim(), encoder.get_input_dim(), 'Embedding', 'Encoder') if self.feedforward is not None: check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(), 'Encoder', 'FeedForward') initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, entity_embedder: TextFieldEmbedder, encoder: Seq2VecEncoder, label_namespace: str = "logical_form", feedforward: Optional[FeedForward] = None, dropout: Optional[float] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.num_tags = self.vocab.get_vocab_size("logical_form") self.encoder = encoder self.text_field_embedder = text_field_embedder self.entity_embedder = entity_embedder self.BOW_embedder_question = BagOfWordCountsTokenEmbedder( vocab, "tokens", projection_dim=self.encoder.get_output_dim()) self.BOW_embedder_description = BagOfWordCountsTokenEmbedder( vocab, "tokens", projection_dim=self.encoder.get_output_dim()) self.BOW_embedder_detail = BagOfWordCountsTokenEmbedder( vocab, "tokens", projection_dim=self.encoder.get_output_dim()) # using crf as the estimator for sequential tags self.crf = ConditionalRandomField(self.num_tags, include_start_end_transitions=False) self.crf_for_both = ConditionalRandomField( self.num_tags, include_start_end_transitions=False) self.softmax_layer = Softmax() self.ce_loss = CrossEntropyLoss() self.matched = 0 self.all_pred = 0 if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None output_dim = self.encoder.get_output_dim() self.pred_layer = Linear(4 * output_dim, 3 * self.num_tags) self.load_pretrained_weights() self.pred_layer_both = Linear(8 * output_dim, 3 * self.num_tags) # if constrain_crf_decoding and calculate_span_f1 are not # provided, (i.e., they're None), set them to True # if label_encoding is provided and False if it isn't. self.metrics = {} check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2VecEncoder, classifier_feedforward: FeedForward = None, loss_weights=None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.encoder = encoder.train() hidden_size = encoder.get_output_dim() self.classifier_feedforward = classifier_feedforward or FeedForward( input_dim=hidden_size, num_layers=3, hidden_dims=[hidden_size, hidden_size, self.num_classes], activations=[ Activation.by_name("relu")(), Activation.by_name("relu")(), Activation.by_name("linear")() ], dropout=[0.2, 0.2, 0.0]) if text_field_embedder.get_output_dim() != encoder.get_input_dim(): raise ConfigurationError( "The output dimension of the text_field_embedder must match the " "input dimension of the encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), encoder.get_input_dim())) self.metrics = { "accuracy": CategoricalAccuracy(), "f1": F1Measure(positive_label=1), "weighted_f1": WeightedF1Measure(), } args = { weight: torch.FloatTensor(loss_weights) } if loss_weights else {} self.loss = nn.CrossEntropyLoss(**args) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, sentence_encoder: Seq2VecEncoder, classifier_feedforward: FeedForward, label_weight: Dict[str, float] = None, use_label_distribution: bool = False, image_classification_ratio: float = 0.0, decay_every_i_step=100000, decay_ratio=0.8, instance_count=100000, max_epoch=10, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None ) -> None: super(BasicClassifier, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.sentence_encoder = sentence_encoder self.classifier_feedforward = classifier_feedforward if text_field_embedder.get_output_dim() != sentence_encoder.get_input_dim(): raise ConfigurationError("The output dimension of the text_field_embedder must match the " "input dimension of the title_encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), sentence_encoder.get_input_dim())) self.metrics = { "accuracy": CategoricalAccuracy(), "cnn_loss": Average() } if not use_label_distribution: self.loss = torch.nn.CrossEntropyLoss() else: self.loss = torch.nn.CrossEntropyLoss() self.image_classification_ratio = image_classification_ratio self.decay_every_i_step = decay_every_i_step self.decay_ratio = decay_ratio self.training_step = 0 self.current_ratio = image_classification_ratio self.total_steps = max_epoch*instance_count//64 self.step_every_epoch = instance_count // 64 print("每个epoch的step数量", self.step_every_epoch) initializer(self)
def __init__(self, vocab: Vocabulary, model_text_field_embedder: TextFieldEmbedder, internal_text_encoder: Seq2VecEncoder, classifier_feedforward: FeedForward, use_batch_norm: bool = False, embedding_token_dropout: Optional[float] = None, embedding_dropout: Optional[float] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._model_text_field_embedder = model_text_field_embedder self._num_classes = self.vocab.get_vocab_size("labels") self._internal_text_encoder = internal_text_encoder self._classifier_feedforward = classifier_feedforward self._embedding_token_dropout = nn.Dropout( embedding_token_dropout) if embedding_token_dropout else None self._embedding_dropout = nn.Dropout( embedding_dropout) if embedding_dropout else None self._batch_norm = nn.modules.BatchNorm1d( num_features=internal_text_encoder.get_output_dim( )) if use_batch_norm else None if model_text_field_embedder.get_output_dim( ) != internal_text_encoder.get_input_dim(): raise ConfigurationError( "The output dimension of the model_text_field_embedder must match the " "input dimension of the title_encoder. Found {} and {}, " "respectively.".format( model_text_field_embedder.get_output_dim(), internal_text_encoder.get_input_dim())) self._metrics = { "accuracy": CategoricalAccuracy(), "f1": F1Measure( 1 ) # Assuming binary classification and we set to 1 suggestion which is what semeval task is about. } self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, text_encoder: Seq2VecEncoder, threshold: float, classifier_feedforward: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(CategoryCrisisClassifierWithLossWeight, self).__init__(vocab, regularizer) self.threshold = threshold self.text_field_embedder = text_field_embedder self.title_encoder = text_encoder self.classifier_feedforward = classifier_feedforward if text_field_embedder.get_output_dim() != text_encoder.get_input_dim( ): raise ConfigurationError( "The output dimension of the text_field_embedder must match the " "input dimension of the title_encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), text_encoder.get_input_dim())) # loss weights correspond to the order of information types as follows # {0: 'Sentiment', 1: 'Hashtags', 2: 'News', 3: 'Irrelevant', 4: 'MultimediaShare', 5: 'ThirdPartyObservation', # 6: 'FirstPartyObservation', 7: 'Factoid', 8: 'Discussion', 9: 'OriginalEvent', 10: 'Location', 11: 'Advice', # 12: 'ContextualInformation', 13: 'Weather', 14: 'EmergingThreats', 15: 'ServiceAvailable', 16: 'Donations', # 17: 'Official', 18: 'NewSubEvent', 19: 'InformationWanted', 20: 'SearchAndRescue', 21: 'MovePeople', # 22: 'CleanUp', 23: 'Volunteer', 24: 'GoodsServices'} pos_weights = torch.tensor([ 1.0000, 1.0869, 1.2438, 1.5013, 1.5451, 1.9002, 1.9796, 2.4450, 3.3531, 3.6609, 4.7413, 4.8119, 5.1208, 5.5894, 7.0552, 8.4015, 11.2066, 11.6695, 13.9506, 14.0638, 17.4586, 20.5034, 24.5000, 28.9487, 32.3684 ], dtype=torch.float32) self.loss = torch.nn.BCEWithLogitsLoss(pos_weights) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, title_encoder: Seq2VecEncoder, text_encoder: Seq2VecEncoder, classifier_feedforward: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.title_encoder = title_encoder self.text_encoder = text_encoder self.classifier_feedforward = classifier_feedforward if text_field_embedder.get_output_dim() != title_encoder.get_input_dim( ): raise ConfigurationError(f"The output dimension of the text_field_" f"embedder must match the input dimension" f" of the summary_encoder. Found " f"{text_field_embedder.get_output_dim()} " f"and {title_encoder.get_input_dim()}, " f"respectively.") if text_field_embedder.get_output_dim() != text_encoder.get_input_dim( ): raise ConfigurationError(f"The output dimension of the text_field_" f"embedder must match the input dimension" f" of the summary_encoder. Found " f"{text_field_embedder.get_output_dim()} " f"and {text_encoder.get_input_dim()}, " f"respectively.") self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } self.loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, abstract_encoder: Seq2VecEncoder, ud_encoder: Seq2VecEncoder, classifier_feedforward: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(SentimentClassifier, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.abstract_encoder = abstract_encoder self.classifier_feedforward = classifier_feedforward self.ud_predictor = biaffine_parser_universal_dependencies_todzat_2017( ) self.ud_predictor._model = self.ud_predictor._model.cuda() self.ud_encoder = ud_encoder if text_field_embedder.get_output_dim( ) != abstract_encoder.get_input_dim(): raise ConfigurationError( "The output dimension of the text_field_embedder must match the " "input dimension of the abstract_encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), abstract_encoder.get_input_dim())) self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } self.loss = torch.nn.CrossEntropyLoss() initializer(self) self.conv1 = nn.Conv2d(3, 8, 3) self.conv2 = nn.Conv2d(8, 16, 3) self.conv3 = nn.Conv2d(16, 32, 3) self.conv4 = nn.Conv2d(32, 64, 3)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, similarity_function: SimilarityFunction, modeling_layer: Seq2VecEncoder, answers_encoder: Seq2VecEncoder, classifier_feedforward: FeedForward, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed( Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._classifier_feedforward = classifier_feedforward self._phrase_layer = phrase_layer self._matrix_attention = LegacyMatrixAttention(similarity_function) self._modeling_layer = modeling_layer encoding_dim = phrase_layer.get_output_dim() self._time_distributed_highway_layer = TimeDistributed( self._highway_layer) self._answers_encoder = TimeDistributed(answers_encoder) # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily # obvious from the configuration files, so we check here. check_dimensions_match(modeling_layer.get_input_dim(), 4 * encoding_dim, "modeling layer input dim", "4 * encoding dim") check_dimensions_match(text_field_embedder.get_output_dim(), phrase_layer.get_input_dim(), "text field embedder output dim", "phrase layer input dim") if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms self.loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, title_encoder: Seq2VecEncoder, text_encoder: Seq2VecEncoder, regressor_feedforward: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.title_encoder = title_encoder self.text_encoder = text_encoder self.regressor_feedforward = regressor_feedforward if text_field_embedder.get_output_dim() != title_encoder.get_input_dim( ): raise ConfigurationError(f"The output dimension of the text_field_" f"embedder must match the input dimension" f" of the summary_encoder. Found " f"{text_field_embedder.get_output_dim()} " f"and {title_encoder.get_input_dim()}, " f"respectively.") if text_field_embedder.get_output_dim() != text_encoder.get_input_dim( ): raise ConfigurationError(f"The output dimension of the text_field_" f"embedder must match the input dimension" f" of the summary_encoder. Found " f"{text_field_embedder.get_output_dim()} " f"and {text_encoder.get_input_dim()}, " f"respectively.") self.metrics = { "MAE": MeanAbsoluteError(), } self.loss = torch.nn.BCEWithLogitsLoss() initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, clauses_encoder: Seq2VecEncoder, outer_encoder: Seq2SeqEncoder, label_namespace: str = "labels", constraint_type: str = None, include_start_end_transitions: bool = True, dropout: float = None, loss_weights: Optional[List] = [], initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super(JCC, self).__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.clauses_encoder = inner_encoder self.outer_encoder = outer_encoder if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self.label_projection_layer = TimeDistributed( Linear(outer_encoder.get_output_dim(), self.num_tags)) labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(constraint_type, labels) self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions, ) self.metrics = {"accuracy": Accuracy()} check_dimensions_match( text_field_embedder.get_output_dim(), clauses_encoder.get_input_dim(), "text field embedding dim", "clauses encoder input dim", ) check_dimensions_match( clauses_encoder.get_output_dim(), outer_encoder.get_input_dim(), "clauses encoder output dim", "outer encoder input dim", ) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, text_encoder: Seq2VecEncoder, classifier_feedforward: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(AcademicPaperClassifier, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.text_encoder = text_encoder self.classifier_feedforward = classifier_feedforward self.all_labels = None if text_field_embedder.get_output_dim() != text_encoder.get_input_dim(): raise ConfigurationError("The output dimension of the text_field_embedder must match the " "input dimension of the title_encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), text_encoder.get_input_dim())) self.f1 = MultiLabelF1Measure() self.loss = torch.nn.MultiLabelSoftMarginLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, startphrase_encoder: Seq2VecEncoder, ending_encoder: Seq2VecEncoder, similarity: SimilarityFunction, initializer: InitializerApplicator, regularizer: RegularizerApplicator = None) -> None: super().__init__(vocab, regularizer) # validate the configuration check_dimensions_match(text_field_embedder.get_output_dim(), startphrase_encoder.get_input_dim(), "text field embedding dim", "startphrase encoder input dim") check_dimensions_match(text_field_embedder.get_output_dim(), ending_encoder.get_input_dim(), "text field embedding dim", "ending encoder input dim") check_dimensions_match(startphrase_encoder.get_output_dim(), ending_encoder.get_output_dim(), "startphrase embedding dim", "ending embedding dim") # bind all attributes to the instance self.text_field_embedder = text_field_embedder self.startphrase_encoder = startphrase_encoder self.ending_encoder = ending_encoder self.similarity = similarity # set the training and validation losses self.xentropy = torch.nn.CrossEntropyLoss() self.accuracy = CategoricalAccuracy() # initialize all variables initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, few_shot_model: FewShotModel, support_encoder: Seq2VecEncoder, query_encoder: Seq2VecEncoder = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(FewShotRelationClassifier, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.support_encoder = support_encoder self.query_encoder = query_encoder or support_encoder if text_field_embedder.get_output_dim( ) != support_encoder.get_input_dim(): raise ConfigurationError( "The output dimension of the text_field_embedder must match the " "input dimension of the support_encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), support_encoder.get_input_dim())) if text_field_embedder.get_output_dim( ) != self.query_encoder.get_input_dim(): raise ConfigurationError( "The output dimension of the text_field_embedder must match the " "input dimension of the query_encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), self.query_encoder.get_input_dim())) self.few_shot_model = few_shot_model self.metrics = {"accuracy": CategoricalAccuracy()} self.loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2VecEncoder, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(SequenceClassifier, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.encoder = encoder self.projection_layer = Linear(self.encoder.get_output_dim(), self.num_classes) if text_field_embedder.get_output_dim() != encoder.get_input_dim(): raise ConfigurationError( "The output dimension of the text_field_embedder must match the " "input dimension of the sequence encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), encoder.get_input_dim())) self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, text_encoder: Seq2VecEncoder, threshold: float, classifier_feedforward: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(CategoryCrisisClassifier, self).__init__(vocab, regularizer) self.threshold = threshold self.text_field_embedder = text_field_embedder self.title_encoder = text_encoder self.classifier_feedforward = classifier_feedforward if text_field_embedder.get_output_dim() != text_encoder.get_input_dim( ): raise ConfigurationError( "The output dimension of the text_field_embedder must match the " "input dimension of the title_encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), text_encoder.get_input_dim())) self.loss = torch.nn.BCEWithLogitsLoss() initializer(self)