Пример #1
0
    def from_config(
        cls,
        config: Config,
        metadata: Optional[FieldMeta] = None,
        labels: Optional[Vocabulary] = None,
    ):
        if labels is not None:
            vocab = list(labels)
            vocab_dict = labels.idx
        else:
            vocab = metadata.vocab.itos
            vocab_dict = metadata.vocab.stoi

        label_weights = (get_label_weights(vocab_dict, config.label_weights)
                         if config.label_weights else None)
        loss = create_loss(config.loss, weight=label_weights)

        if isinstance(loss, BinaryCrossEntropyLoss):
            cls = BinaryClassificationOutputLayer
        elif isinstance(loss, MultiLabelSoftMarginLoss):
            cls = MultiLabelOutputLayer
        else:
            cls = MulticlassOutputLayer

        return cls(vocab, create_loss(config.loss, weight=label_weights),
                   config)
Пример #2
0
    def from_config(cls, config: Config, tensorizers: Dict[str, Tensorizer]):
        labels = tensorizers["labels"].vocab
        vocab = tensorizers["tokens"].vocab
        encoder = create_module(
            config.encoder, padding_idx=vocab.get_pad_index(), vocab_size=len(vocab)
        )
        dense_dim = tensorizers["dense"].dim if "dense" in tensorizers else 0
        decoder = create_module(
            config.decoder,
            in_dim=encoder.representation_dim + dense_dim,
            out_dim=len(labels),
        )

        label_weights = (
            get_label_weights(labels.idx, config.output_layer.label_weights)
            if config.output_layer.label_weights
            else None
        )

        loss = create_loss(config.output_layer.loss, weight=label_weights)

        if isinstance(loss, BinaryCrossEntropyLoss):
            output_layer_cls = BinaryClassificationOutputLayer
        elif isinstance(loss, MultiLabelSoftMarginLoss):
            output_layer_cls = MultiLabelOutputLayer
        else:
            output_layer_cls = MulticlassOutputLayer

        output_layer = output_layer_cls(list(labels), loss)
        return cls(encoder, decoder, output_layer)
    def from_config(cls, config: Config, tensorizers: Dict[str, Tensorizer]):
        labels = tensorizers["labels"].vocab
        if not labels:
            raise ValueError("Labels were not created, see preceding errors")

        vocab = tensorizers["tokens"].vocab
        encoder = create_module(config.encoder,
                                padding_idx=vocab.get_pad_index(),
                                vocab_size=len(vocab))

        right_dense_dim = tensorizers["right_dense"].dim
        left_dense_dim = tensorizers["left_dense"].dim

        decoder = create_module(
            config.decoder,
            right_dim=encoder.representation_dim + right_dense_dim,
            left_dim=left_dense_dim,
            to_dim=len(labels),
        )

        label_weights = (get_label_weights(labels.idx,
                                           config.output_layer.label_weights)
                         if config.output_layer.label_weights else None)

        loss = create_loss(config.output_layer.loss, weight=label_weights)

        if isinstance(loss, BinaryCrossEntropyLoss):
            output_layer_cls = BinaryClassificationOutputLayer
        elif isinstance(loss, MultiLabelSoftMarginLoss):
            output_layer_cls = MultiLabelOutputLayer
        else:
            output_layer_cls = MulticlassOutputLayer

        output_layer = output_layer_cls(list(labels), loss)
        return cls(encoder, decoder, output_layer)
Пример #4
0
    def from_config(cls, config: Config, tensorizers: Dict[str, Tensorizer]):
        labels = tensorizers["labels"].vocab
        embedding = cls.create_embedding(config, tensorizers)
        representation = create_module(
            config.representation, embed_dim=embedding.embedding_dim
        )
        decoder = cls.create_decoder(
            config, representation.representation_dim, len(labels)
        )

        label_weights = (
            get_label_weights(labels.idx, config.output_layer.label_weights)
            if config.output_layer.label_weights
            else None
        )
        loss = create_loss(config.output_layer.loss, weight=label_weights)

        if isinstance(loss, BinaryCrossEntropyLoss):
            output_layer_cls = BinaryClassificationOutputLayer
        elif isinstance(loss, MultiLabelSoftMarginLoss):
            output_layer_cls = MultiLabelOutputLayer
        else:
            output_layer_cls = MulticlassOutputLayer

        output_layer = output_layer_cls(list(labels), loss)
        return cls(embedding, representation, decoder, output_layer)
    def from_config(
        cls,
        config: Config,
        metadata: Optional[FieldMeta] = None,
        labels: Optional[Vocabulary] = None,
    ):
        if labels is not None:
            vocab = list(labels)
            vocab_dict = labels.idx
            pad_token_idx = labels.idx.get(labels.pad_token,
                                           Padding.DEFAULT_LABEL_PAD_IDX)
        else:
            vocab = metadata.vocab.itos
            vocab_dict = metadata.vocab.stoi
            pad_token_idx = getattr(metadata, "pad_token_idx", -1)

        label_weights = (get_label_weights(vocab_dict, config.label_weights)
                         if config.label_weights else None)

        loss = create_loss(config.loss,
                           weight=label_weights,
                           ignore_index=pad_token_idx)

        if isinstance(loss, BinaryCrossEntropyLoss):
            cls = BinaryClassificationOutputLayer
        elif isinstance(loss, MultiLabelSoftMarginLoss):
            cls = MultiLabelOutputLayer
        else:
            cls = MulticlassOutputLayer

        return cls(vocab, loss, config)
Пример #6
0
 def from_config(
     cls,
     config: Config,
     metadata: Optional[FieldMeta] = None,
     labels: Optional[Vocabulary] = None,
 ):
     if labels is not None:
         vocab = list(labels)
         vocab_dict = labels.idx
         pad_token_idx = labels.idx.get(labels.pad_token,
                                        Padding.DEFAULT_LABEL_PAD_IDX)
     else:
         vocab = metadata.vocab.itos
         pad_token_idx = metadata.pad_token_idx
         vocab_dict = metadata.vocab.stoi
     label_weights = (get_label_weights(vocab_dict, config.label_weights)
                      if config.label_weights else None)
     return cls(
         vocab,
         create_loss(
             config.loss,
             weight=label_weights,
             ignore_index=pad_token_idx
             if config.ignore_pad_in_loss else -1,
         ),
     )
Пример #7
0
    def create_output_layer(cls, config: Config, labels: VocabConfig):
        label_weights = (get_label_weights(labels.idx,
                                           config.output_layer.label_weights)
                         if config.output_layer.label_weights else None)
        loss = create_loss(config.output_layer.loss, weight=label_weights)

        if isinstance(loss, BinaryCrossEntropyLoss):
            output_layer_cls = BinaryClassificationOutputLayer
        elif isinstance(loss, MultiLabelSoftMarginLoss):
            output_layer_cls = MultiLabelOutputLayer
        else:
            output_layer_cls = MulticlassOutputLayer

        return output_layer_cls(list(labels), loss)
 def from_config(cls, config: Config, labels: Vocabulary):
     vocab = list(labels)
     vocab_dict = labels.idx
     pad_token_idx = labels.idx.get(labels.pad_token,
                                    Padding.DEFAULT_LABEL_PAD_IDX)
     label_weights = (get_label_weights(vocab_dict, config.label_weights)
                      if config.label_weights else None)
     return cls(
         vocab,
         create_loss(
             config.loss,
             weight=label_weights,
             ignore_index=pad_token_idx
             if config.ignore_pad_in_loss else -1,
         ),
     )
Пример #9
0
 def from_config(
     cls,
     config,
     metadata: Optional[FieldMeta] = None,
     labels: Optional[Vocabulary] = None,
 ):
     label_weights = (get_label_weights(labels.idx, config.label_weights)
                      if config.label_weights else None)
     assert (
         config.score_type == OutputScore.raw_cosine
         or config.score_type == OutputScore.norm_cosine
         or config.score_type == OutputScore.sigmoid_cosine
     ), f"Invalid score_type {config.score_type}. See OutputScore enum."
     return cls(
         list(labels),
         create_loss(config.loss, weight=label_weights),
         config.score_threshold,
         config.score_type,
     )
Пример #10
0
    def from_config(cls, config: Config, tensorizers: Dict[str, Tensorizer]):
        labels = tensorizers["labels"].vocab
        if not labels:
            raise ValueError("Labels were not created, see preceding errors")

        vocab = tensorizers["tokens"].vocab
        encoder = create_module(
            config.encoder, padding_idx=vocab.get_pad_index(), vocab_size=len(vocab)
        )
        if getattr(config, "use_selfie", False):
            # No MLP fusion in SELFIE
            dense_dim = 0
        else:
            dense_dim = tensorizers["dense"].dim if "dense" in tensorizers else 0
        decoder = create_module(
            config.decoder,
            in_dim=encoder.representation_dim + dense_dim,
            out_dim=len(labels),
        )

        label_weights = (
            get_label_weights(labels.idx, config.output_layer.label_weights)
            if config.output_layer.label_weights
            else None
        )

        loss = create_loss(config.output_layer.loss, weight=label_weights)

        if isinstance(loss, BinaryCrossEntropyLoss):
            output_layer_cls = BinaryClassificationOutputLayer
        elif isinstance(loss, MultiLabelSoftMarginLoss):
            output_layer_cls = MultiLabelOutputLayer
        else:
            output_layer_cls = MulticlassOutputLayer

        additional_kwargs = {}
        if hasattr(config, "r3f_options"):
            additional_kwargs["r3f_options"] = config.r3f_options

        output_layer = output_layer_cls(list(labels), loss)
        return cls(encoder, decoder, output_layer, **additional_kwargs)
Пример #11
0
    def from_config(cls, config: Config, tensorizers: Dict[str, Tensorizer]):
        labels = tensorizers["labels"].vocab
        if not labels:
            raise ValueError("Labels were not created, see preceding errors")

        if config.use_shared_embedding:
            token_embedding = torch.nn.Embedding(
                config.vocab_size, config.hidden_dim, padding_idx=config.padding_idx
            )
        else:
            token_embedding = None

        right_vocab = tensorizers["right_tokens"].vocab
        right_encoder = create_module(
            config.right_encoder,
            token_embedding=token_embedding,
            padding_idx=right_vocab.get_pad_index(),
            vocab_size=len(right_vocab),
        )
        left_vocab = tensorizers["left_tokens"].vocab
        left_encoder = create_module(
            config.left_encoder,
            token_embedding=token_embedding,
            padding_idx=left_vocab.get_pad_index(),
            vocab_size=len(left_vocab),
        )

        right_dense_dim = tensorizers["right_dense"].dim
        left_dense_dim = tensorizers["left_dense"].dim

        decoder = create_module(
            config.decoder,
            right_dim=right_encoder.representation_dim + right_dense_dim,
            left_dim=left_encoder.representation_dim + left_dense_dim,
            to_dim=len(labels),
        )

        label_weights = (
            get_label_weights(labels.idx, config.output_layer.label_weights)
            if config.output_layer.label_weights
            else None
        )

        loss = create_loss(config.output_layer.loss, weight=label_weights)

        if isinstance(loss, BinaryCrossEntropyLoss):
            output_layer_cls = BinaryClassificationOutputLayer
        elif isinstance(loss, MultiLabelSoftMarginLoss):
            output_layer_cls = MultiLabelOutputLayer
        else:
            output_layer_cls = MulticlassOutputLayer

        output_layer = output_layer_cls(list(labels), loss)
        return cls(
            right_encoder,
            left_encoder,
            decoder,
            output_layer,
            config.use_shared_encoder,
            config.use_shared_embedding,
            config.vocab_size,
            config.hidden_dim,
            config.padding_idx,
            use_dense_in_decoder=config.use_dense_in_decoder,
        )
Пример #12
0
 def test_get_label_weights(self):
     vocab = {"foo": 0, "bar": 1}
     weights = {"foo": 3.2, "foobar": 2.1}
     weights_tensor = label.get_label_weights(vocab, weights)
     np.testing.assert_array_almost_equal(np.array([3.2, 1]),
                                          weights_tensor.detach().numpy())
Пример #13
0
 def get_meta(self):
     meta = super().get_meta()
     weights_tensor = get_label_weights(meta.vocab.stoi, self.label_weights)
     if weights_tensor is not None:
         meta.label_weights = weights_tensor.cpu().numpy()
     return meta