Пример #1
0
 class Config(ConfigBase):
     representation: Union[
         PureDocAttention.Config, BiLSTMDocAttention.Config,
         DocNNRepresentation.Config, ] = BiLSTMDocAttention.Config()
     decoder: MLPDecoder.Config = MLPDecoder.Config()
     output_layer: ClassificationOutputLayer.Config = (
         ClassificationOutputLayer.Config())
Пример #2
0
    class Config(BasePairwiseClassificationModel.Config):
        """
        Attributes:
            encode_relations (bool): if `false`, return the concatenation of the two
                representations; if `true`, also concatenate their pairwise absolute
                difference and pairwise elementwise product (à la arXiv:1705.02364).
                Default: `true`.
            tied_representation: whether to use the same representation, with
              tied weights, for all the input subrepresentations. Default: `true`.
        """
        class ModelInput(BasePairwiseClassificationModel.Config.ModelInput):
            tokens1: TokenTensorizer.Config = TokenTensorizer.Config(
                column="text1")
            tokens2: TokenTensorizer.Config = TokenTensorizer.Config(
                column="text2")
            labels: LabelTensorizer.Config = LabelTensorizer.Config()
            # for metric reporter
            raw_text: JoinStringTensorizer.Config = JoinStringTensorizer.Config(
                columns=["text1", "text2"])

        inputs: ModelInput = ModelInput()
        embedding: WordEmbedding.Config = WordEmbedding.Config()
        representation: Union[
            BiLSTMDocAttention.Config,
            DocNNRepresentation.Config] = BiLSTMDocAttention.Config()
        shared_representations: bool = True
        decoder: MLPDecoder.Config = MLPDecoder.Config()
        # TODO: will need to support different output layer for contrastive loss
        output_layer: ClassificationOutputLayer.Config = (
            ClassificationOutputLayer.Config())
        encode_relations: bool = True
Пример #3
0
 class Config(ConfigBase):
     representation: PairRepresentation.Config = PairRepresentation.Config()
     decoder: MLPDecoder.Config = MLPDecoder.Config()
     # TODO: will need to support different output layer for contrastive loss
     output_layer: ClassificationOutputLayer.Config = (
         ClassificationOutputLayer.Config()
     )
Пример #4
0
 def from_config(cls, config: Config, tensorizers: Dict[str, Tensorizer]):
     labels = tensorizers["labels"].labels
     embedding = cls.create_embedding(config, tensorizers)
     representation = create_module(config.representation,
                                    embed_dim=embedding.embedding_dim)
     decoder = cls.create_decoder(config, representation.representation_dim,
                                  len(labels))
     # TODO change from_config function of ClassificationOutputLayer after migriting to new design
     output_layer = ClassificationOutputLayer(
         list(labels), create_loss(config.output_layer.loss))
     return cls(embedding, representation, decoder, output_layer)
Пример #5
0
    class Config(Model.Config):
        class ModelInput(Model.Config.ModelInput):
            tokens: TokenTensorizer.Config = TokenTensorizer.Config()
            labels: LabelTensorizer.Config = LabelTensorizer.Config()

        inputs: ModelInput = ModelInput()
        embedding: WordEmbedding.Config = WordEmbedding.Config()
        representation: Union[
            PureDocAttention.Config, BiLSTMDocAttention.Config,
            DocNNRepresentation.Config, ] = BiLSTMDocAttention.Config()
        decoder: MLPDecoder.Config = MLPDecoder.Config()
        output_layer: ClassificationOutputLayer.Config = (
            ClassificationOutputLayer.Config())
    class Config(BaseModel.Config):
        class InputConfig(ConfigBase):
            tokens: RoBERTaTensorizer.Config = RoBERTaTensorizer.Config()
            right_dense: FloatListTensorizer.Config = None
            left_dense: FloatListTensorizer.Config = None

            labels: LabelTensorizer.Config = LabelTensorizer.Config()

        inputs: InputConfig = InputConfig()
        encoder: RoBERTaEncoderBase.Config = RoBERTaEncoder.Config()
        decoder: MLPDecoderTwoTower.Config = MLPDecoderTwoTower.Config()
        output_layer: ClassificationOutputLayer.Config = (
            ClassificationOutputLayer.Config())
Пример #7
0
 def from_config(cls, config: Config, tensorizers: Dict[str, Tensorizer]):
     labels = tensorizers["labels"].labels
     embedding = cls.create_embedding(config, tensorizers)
     representation = create_module(config.representation,
                                    embed_dim=embedding.embedding_dim)
     decoder = create_module(
         config.decoder,
         in_dim=representation.representation_dim,
         out_dim=len(labels),
     )
     output_layer = ClassificationOutputLayer(labels,
                                              CrossEntropyLoss(None))
     return cls(embedding, representation, decoder, output_layer)
Пример #8
0
    class Config(BaseModel.Config):
        class EncoderModelInput(BaseModel.Config.ModelInput):
            tokens: Tensorizer.Config = Tensorizer.Config()
            dense: Optional[FloatListTensorizer.Config] = None
            labels: LabelTensorizer.Config = LabelTensorizer.Config()
            # for metric reporter
            num_tokens: NtokensTensorizer.Config = NtokensTensorizer.Config(
                names=["tokens"], indexes=[2])

        inputs: EncoderModelInput = EncoderModelInput()
        encoder: RepresentationBase.Config
        decoder: MLPDecoder.Config = MLPDecoder.Config()
        output_layer: ClassificationOutputLayer.Config = (
            ClassificationOutputLayer.Config())
    class Config(BaseModel.Config):
        class BertModelInput(BaseModel.Config.ModelInput):
            tokens: BERTTensorizer.Config = BERTTensorizer.Config(
                max_seq_len=128)
            dense: Optional[FloatListTensorizer.Config] = None
            labels: LabelTensorizer.Config = LabelTensorizer.Config()
            # for metric reporter
            num_tokens: NtokensTensorizer.Config = NtokensTensorizer.Config(
                names=["tokens"], indexes=[2])

        inputs: BertModelInput = BertModelInput()
        encoder: TransformerSentenceEncoderBase.Config = (
            HuggingFaceBertSentenceEncoder.Config())
        decoder: MLPDecoder.Config = MLPDecoder.Config()
        output_layer: ClassificationOutputLayer.Config = (
            ClassificationOutputLayer.Config())
    class Config(BaseModel.Config):
        class InputConfig(ConfigBase):
            right_tokens: RoBERTaTensorizer.Config = RoBERTaTensorizer.Config()
            left_tokens: RoBERTaTensorizer.Config = RoBERTaTensorizer.Config()
            right_dense: Optional[FloatListTensorizer.Config] = None
            left_dense: Optional[FloatListTensorizer.Config] = None

            labels: LabelTensorizer.Config = LabelTensorizer.Config()

        inputs: InputConfig = InputConfig()
        right_encoder: RoBERTaEncoderBase.Config = RoBERTaEncoder.Config()
        left_encoder: RoBERTaEncoderBase.Config = RoBERTaEncoder.Config()
        decoder: MLPDecoderTwoTower.Config = MLPDecoderTwoTower.Config()
        output_layer: ClassificationOutputLayer.Config = (
            ClassificationOutputLayer.Config())
        use_shared_encoder: Optional[bool] = False
Пример #11
0
    def from_config(cls, config: Config, tensorizers: Dict[str, Tensorizer]):
        vocab = tensorizers["tokens"].vocab
        labels = tensorizers["labels"].labels

        embedding = WordEmbedding(len(vocab), config.embedding.embed_dim, None,
                                  None, vocab.idx[UNK], [])
        representation = create_module(config.representation,
                                       embed_dim=embedding.embedding_dim)
        decoder = create_module(
            config.decoder,
            in_dim=representation.representation_dim,
            out_dim=len(labels),
        )
        output_layer = ClassificationOutputLayer(labels,
                                                 CrossEntropyLoss(None))
        return cls(embedding, representation, decoder, output_layer)
    class Config(BaseModel.Config):
        class InputConfig(ConfigBase):
            right_tokens: RoBERTaTensorizer.Config = RoBERTaTensorizer.Config()
            left_tokens: RoBERTaTensorizer.Config = RoBERTaTensorizer.Config()
            right_dense: Optional[FloatListTensorizer.Config] = None
            left_dense: Optional[FloatListTensorizer.Config] = None

            labels: LabelTensorizer.Config = LabelTensorizer.Config()

        inputs: InputConfig = InputConfig()
        right_encoder: RoBERTaEncoderBase.Config = RoBERTaEncoder.Config()
        left_encoder: RoBERTaEncoderBase.Config = RoBERTaEncoder.Config()
        decoder: MLPDecoderTwoTower.Config = MLPDecoderTwoTower.Config()
        output_layer: ClassificationOutputLayer.Config = (
            ClassificationOutputLayer.Config())
        use_shared_encoder: Optional[bool] = False
        use_shared_embedding: Optional[bool] = False
        vocab_size: Optional[int] = 250002
        hidden_dim: Optional[int] = 768
        padding_idx: Optional[int] = 1
Пример #13
0
 class Config(ConfigBase):
     representation: SeqRepresentation.Config = SeqRepresentation.Config()
     output_layer: ClassificationOutputLayer.Config = (
         ClassificationOutputLayer.Config())
     decoder: MLPDecoder.Config = MLPDecoder.Config()
Пример #14
0
 class Config(BaseModel.Config):
     decoder: MLPDecoder.Config = MLPDecoder.Config()
     output_layer: ClassificationOutputLayer.Config = (
         ClassificationOutputLayer.Config()
     )
     encode_relations: bool = True
 class Config(BaseModel.Config):
     decoder: MLPDecoder.Config = MLPDecoder.Config()
     output_layer: Union[
         ClassificationOutputLayer.Config, PairwiseCosineDistanceOutputLayer.Config
     ] = ClassificationOutputLayer.Config()
     encode_relations: bool = True