def from_config(cls, config: Config, tensorizers: Dict[str, Tensorizer]): # Although the RNN params are configurable, for DrQA we want to set # the following parameters for all cases. config.ques_rnn.dropout = config.dropout config.doc_rnn.dropout = config.dropout embedding = cls.create_embedding(config, tensorizers) ques_aligned_doc_attn = SequenceAlignedAttention( embedding.embedding_dim) ques_rnn = create_module(config.ques_rnn, input_size=embedding.embedding_dim) doc_rnn = create_module(config.doc_rnn, input_size=embedding.embedding_dim * 2) ques_self_attn = DotProductSelfAttention(ques_rnn.representation_dim) start_attn = MultiplicativeAttention(doc_rnn.representation_dim, ques_rnn.representation_dim, normalize=False) end_attn = MultiplicativeAttention(doc_rnn.representation_dim, ques_rnn.representation_dim, normalize=False) doc_rep_pool = SelfAttention( SelfAttention.Config(dropout=config.dropout), n_input=doc_rnn.representation_dim, ) has_answer_labels = ["False", "True"] tensorizers["has_answer"].vocab = Vocabulary(has_answer_labels) has_ans_decoder = MLPDecoder( config=MLPDecoder.Config(), in_dim=doc_rnn.representation_dim, out_dim=len(has_answer_labels), ) output_layer = create_module(config.output_layer, labels=has_answer_labels, is_kd=config.is_kd) return cls( dropout=nn.Dropout(config.dropout), embedding=embedding, ques_rnn=ques_rnn, doc_rnn=doc_rnn, ques_self_attn=ques_self_attn, ques_aligned_doc_attn=ques_aligned_doc_attn, start_attn=start_attn, end_attn=end_attn, doc_rep_pool=doc_rep_pool, has_ans_decoder=has_ans_decoder, output_layer=output_layer, is_kd=config.is_kd, )
class Config(ConfigBase): representation: Union[ PureDocAttention.Config, BiLSTMDocAttention.Config, DocNNRepresentation.Config, ] = BiLSTMDocAttention.Config() decoder: MLPDecoder.Config = MLPDecoder.Config() output_layer: ClassificationOutputLayer.Config = ( ClassificationOutputLayer.Config())
class Config(ConfigBase): representation: PairRepresentation.Config = PairRepresentation.Config() decoder: MLPDecoder.Config = MLPDecoder.Config() # TODO: will need to support different output layer for contrastive loss output_layer: ClassificationOutputLayer.Config = ( ClassificationOutputLayer.Config() )
def __init__(self, config: Config, embed_dim: int) -> None: super().__init__(config) self.dropout = nn.Dropout(config.dropout) # BiLSTM representation. padding_value = (float("-inf") if isinstance(config.pooling, MaxPool.Config) else 0.0) self.lstm = create_module(config.lstm, embed_dim=embed_dim, padding_value=padding_value) # Document attention. self.attention = (create_module(config.pooling, n_input=self.lstm.representation_dim) if config.pooling is not None else None) # Non-linear projection over attended representation. self.dense = None self.representation_dim: int = self.lstm.representation_dim if config.mlp_decoder: self.dense = MLPDecoder(config.mlp_decoder, in_dim=self.lstm.representation_dim) self.representation_dim = self.dense.out_dim log_class_usage(__class__)
class Config(BasePairwiseClassificationModel.Config): """ Attributes: encode_relations (bool): if `false`, return the concatenation of the two representations; if `true`, also concatenate their pairwise absolute difference and pairwise elementwise product (à la arXiv:1705.02364). Default: `true`. tied_representation: whether to use the same representation, with tied weights, for all the input subrepresentations. Default: `true`. """ class ModelInput(BasePairwiseClassificationModel.Config.ModelInput): tokens1: TokenTensorizer.Config = TokenTensorizer.Config( column="text1") tokens2: TokenTensorizer.Config = TokenTensorizer.Config( column="text2") labels: LabelTensorizer.Config = LabelTensorizer.Config() # for metric reporter raw_text: JoinStringTensorizer.Config = JoinStringTensorizer.Config( columns=["text1", "text2"]) inputs: ModelInput = ModelInput() embedding: WordEmbedding.Config = WordEmbedding.Config() representation: Union[ BiLSTMDocAttention.Config, DocNNRepresentation.Config] = BiLSTMDocAttention.Config() shared_representations: bool = True decoder: MLPDecoder.Config = MLPDecoder.Config() # TODO: will need to support different output layer for contrastive loss output_layer: ClassificationOutputLayer.Config = ( ClassificationOutputLayer.Config()) encode_relations: bool = True
class Config(NewBertModel.Config): class ModelInput(BaseModel.Config.ModelInput): squad_input: Union[SquadForBERTTensorizer.Config, SquadForRoBERTaTensorizer. Config] = SquadForBERTTensorizer.Config( max_seq_len=256) # is_impossible label has_answer: LabelTensorizer.Config = LabelTensorizer.Config( column="has_answer") inputs: ModelInput = ModelInput() encoder: TransformerSentenceEncoderBase.Config = ( HuggingFaceBertSentenceEncoder.Config()) pos_decoder: MLPDecoder.Config = MLPDecoder.Config(out_dim=2) has_ans_decoder: MLPDecoder.Config = MLPDecoder.Config(out_dim=2) output_layer: SquadOutputLayer.Config = SquadOutputLayer.Config()
def __init__(self, config: Config, embed_dim: int) -> None: """embed_dim is the dimension of embedded_tokens """ super().__init__(config) self.dropout = nn.Dropout(config.dropout) # Document attention. self.attention = ( create_module(config.pooling, n_input=embed_dim) if config.pooling is not None else None ) # Non-linear projection over attended representation. self.dense = None if ( isinstance(config.pooling, BoundaryPool.Config) and config.pooling.boundary_type == "firstlast" ): # the dimension double because of concatenating bos and eos self.representation_dim = embed_dim * 2 else: self.representation_dim = embed_dim if config.mlp_decoder: self.dense = MLPDecoder(config.mlp_decoder, in_dim=embed_dim) self.representation_dim = self.dense.out_dim log_class_usage(__class__)
def __init__(self, config: Config, embed_dim: int) -> None: super().__init__(config) self.dropout = nn.Dropout(config.dropout) # BiLSTM representation. self.lstm = create_module(config.lstm, embed_dim=embed_dim) # Slot attention. self.attention = None word_representation_dim = self.lstm.representation_dim if config.slot_attention: self.attention = SlotAttention( config.slot_attention, self.lstm.representation_dim, batch_first=True ) word_representation_dim += self.lstm.representation_dim # Projection over attended representation. self.dense = None self.representation_dim: int = self.lstm.representation_dim if config.mlp_decoder: self.dense = MLPDecoder( config.mlp_decoder, in_dim=self.lstm.representation_dim ) self.representation_dim = self.dense.out_dim
class Config(Model.Config): representation: Union[ BiLSTMSlotAttention.Config, BSeqCNNRepresentation.Config, PassThroughRepresentation.Config, ] = BiLSTMSlotAttention.Config() output_layer: Union[ WordTaggingOutputLayer.Config, CRFOutputLayer.Config] = WordTaggingOutputLayer.Config() decoder: MLPDecoder.Config = MLPDecoder.Config()
class Config(BaseModel.Config): class WordTaggingInputConfig(ConfigBase): tokens: RoBERTaTokenLevelTensorizer.Config = ( RoBERTaTokenLevelTensorizer.Config()) inputs: WordTaggingInputConfig = WordTaggingInputConfig() encoder: RoBERTaEncoderBase.Config = RoBERTaEncoderJit.Config() decoder: MLPDecoder.Config = MLPDecoder.Config() output_layer: WordTaggingOutputLayer.Config = WordTaggingOutputLayer.Config( )
class Config(BaseModel.Config): class ModelInput(Model.Config.ModelInput): tokens: TokenTensorizer.Config = TokenTensorizer.Config( add_bos_token=True, add_eos_token=True) inputs: ModelInput = ModelInput() embedding: WordEmbedding.Config = WordEmbedding.Config() representation: BiLSTM.Config = BiLSTM.Config(bidirectional=False) decoder: Optional[MLPDecoder.Config] = MLPDecoder.Config() output_layer: LMOutputLayer.Config = LMOutputLayer.Config() tied_weights: bool = False stateful: bool = False
class Config(Model.Config): class ModelInput(Model.Config.ModelInput): tokens: TokenTensorizer.Config = TokenTensorizer.Config() labels: LabelTensorizer.Config = LabelTensorizer.Config() inputs: ModelInput = ModelInput() embedding: WordEmbedding.Config = WordEmbedding.Config() representation: Union[ PureDocAttention.Config, BiLSTMDocAttention.Config, DocNNRepresentation.Config, ] = BiLSTMDocAttention.Config() decoder: MLPDecoder.Config = MLPDecoder.Config() output_layer: ClassificationOutputLayer.Config = ( ClassificationOutputLayer.Config())
class Config(ConfigBase): class ModelInput(Model.Config.ModelInput): tokens: TokenTensorizer.Config = TokenTensorizer.Config() slots: TokenTensorizer.Config = TokenTensorizer.Config( column="slots") inputs: ModelInput = ModelInput() embedding: WordEmbedding.Config = WordEmbedding.Config() representation: BiLSTMSlotAttention.Config = BiLSTMSlotAttention.Config( ) decoder: MLPDecoder.Config = MLPDecoder.Config() output_layer: MyTaggingOutputLayer.Config = MyTaggingOutputLayer.Config( )
def _create_dummy_model(self): return create_model( DocModel_Deprecated.Config( representation=BiLSTMDocAttention.Config( save_path=self.representation_path), decoder=MLPDecoder.Config(save_path=self.decoder_path), ), FeatureConfig( word_feat=WordEmbedding.Config( embed_dim=300, save_path=self.word_embedding_path), save_path=self.embedding_path, ), self._create_dummy_meta_data(), )
class Config(BaseModel.Config): class EncoderModelInput(BaseModel.Config.ModelInput): tokens: Tensorizer.Config = Tensorizer.Config() dense: Optional[FloatListTensorizer.Config] = None labels: LabelTensorizer.Config = LabelTensorizer.Config() # for metric reporter num_tokens: NtokensTensorizer.Config = NtokensTensorizer.Config( names=["tokens"], indexes=[2]) inputs: EncoderModelInput = EncoderModelInput() encoder: RepresentationBase.Config decoder: MLPDecoder.Config = MLPDecoder.Config() output_layer: ClassificationOutputLayer.Config = ( ClassificationOutputLayer.Config())
class Config(BaseModel.Config): class ModelInput(Model.Config.ModelInput): tokens: Optional[TokenTensorizer.Config] = TokenTensorizer.Config( add_bos_token=True, add_eos_token=True) inputs: ModelInput = ModelInput() embedding: WordEmbedding.Config = WordEmbedding.Config() representation: Union[BiLSTM.Config, CNN.Config] = BiLSTM.Config(bidirectional=False) decoder: Optional[MLPDecoder.Config] = MLPDecoder.Config() output_layer: LMOutputLayer.Config = LMOutputLayer.Config() tied_weights: bool = False stateful: bool = False caffe2_format: ExporterType = ExporterType.PREDICTOR
class Config(BaseModel.Config): class InputConfig(ConfigBase): tokens: BERTTensorizer.Config = BERTTensorizer.Config(max_seq_len=128) inputs: InputConfig = InputConfig() encoder: TransformerSentenceEncoderBase.Config = TransformerSentenceEncoder.Config() decoder: MLPDecoder.Config = MLPDecoder.Config() output_layer: LMOutputLayer.Config = LMOutputLayer.Config() mask_prob: float = 0.15 mask_bos: bool = False # masking masking_strategy: MaskingStrategy = MaskingStrategy.RANDOM # tie weights determines whether the input embedding weights are used # in the output vocabulary projection as well tie_weights: bool = True
class Config(BasePairwiseModel.Config): class EncoderPairwiseModelInput(ModelInputBase): tokens1: Tensorizer.Config = Tensorizer.Config() tokens2: Tensorizer.Config = Tensorizer.Config() labels: LabelTensorizer.Config = LabelTensorizer.Config() # for metric reporter num_tokens: NtokensTensorizer.Config = NtokensTensorizer.Config( names=["tokens1", "tokens2"], indexes=[2, 2]) inputs: EncoderPairwiseModelInput = EncoderPairwiseModelInput() encoder: RepresentationBase.Config # Decoder is a fully connected layer that expects concatenated encodings. # So, if decoder is provided we will concatenate the encodings from the # encoders and then pass to the decoder. decoder: Optional[MLPDecoder.Config] = MLPDecoder.Config() shared_encoder: bool = True
class Config(BaseModel.Config): class BertModelInput(BaseModel.Config.ModelInput): tokens: BERTTensorizer.Config = BERTTensorizer.Config( max_seq_len=128) dense: Optional[FloatListTensorizer.Config] = None labels: LabelTensorizer.Config = LabelTensorizer.Config() # for metric reporter num_tokens: NtokensTensorizer.Config = NtokensTensorizer.Config( names=["tokens"], indexes=[2]) inputs: BertModelInput = BertModelInput() encoder: TransformerSentenceEncoderBase.Config = ( HuggingFaceBertSentenceEncoder.Config()) decoder: MLPDecoder.Config = MLPDecoder.Config() output_layer: ClassificationOutputLayer.Config = ( ClassificationOutputLayer.Config())
class Config(Model.Config): class ModelInput(Model.Config.ModelInput): tokens: TokenTensorizer.Config = TokenTensorizer.Config() labels: SlotLabelTensorizer.Config = SlotLabelTensorizer.Config() inputs: ModelInput = ModelInput() embedding: WordEmbedding.Config = WordEmbedding.Config() representation: Union[BiLSTMSlotAttention. Config, # TODO: make default when sorting solved BSeqCNNRepresentation.Config, PassThroughRepresentation. Config, ] = PassThroughRepresentation.Config() output_layer: Union[ WordTaggingOutputLayer.Config, CRFOutputLayer.Config] = WordTaggingOutputLayer.Config() decoder: MLPDecoder.Config = MLPDecoder.Config()
class Config(BasePairwiseModel.Config): class ModelInput(ModelInputBase): tokens1: BERTTensorizerBase.Config = BERTTensorizer.Config( columns=["text1"], max_seq_len=128) tokens2: BERTTensorizerBase.Config = BERTTensorizer.Config( columns=["text2"], max_seq_len=128) labels: LabelTensorizer.Config = LabelTensorizer.Config() # for metric reporter num_tokens: NtokensTensorizer.Config = NtokensTensorizer.Config( names=["tokens1", "tokens2"], indexes=[2, 2]) inputs: ModelInput = ModelInput() encoder: TransformerSentenceEncoderBase.Config = ( HuggingFaceBertSentenceEncoder.Config()) # Decoder is a fully connected layer that expects concatenated encodings. # So, if decoder is provided we will concatenate the encodings from the # encoders and then pass to the decoder. decoder: Optional[MLPDecoder.Config] = MLPDecoder.Config() shared_encoder: bool = True
class Config(ConfigBase): """ Configuration class for `LMLSTM`. Attributes: representation (BiLSTM.Config): Config for the BiLSTM representation. decoder (MLPDecoder.Config): Config for the MLP Decoder. output_layer (LMOutputLayer.Config): Config for the language model output layer. tied_weights (bool): If `True` use a common weights matrix between the word embeddings and the decoder. Defaults to `False`. stateful (bool): If `True`, do not reset hidden state of LSTM across batches. """ representation: BiLSTM.Config = BiLSTM.Config(bidirectional=False) decoder: MLPDecoder.Config = MLPDecoder.Config() output_layer: LMOutputLayer.Config = LMOutputLayer.Config() tied_weights: bool = False stateful: bool = False
def __init__(self, in_dim, out_dim, temp): super().__init__() self.mlp = MLPDecoder.from_config( MLPDecoder.Config(bias=False, temperature=temp), in_dim, out_dim )
def test_load_save(self): text_field_meta = FieldMeta() text_field_meta.vocab = VocabStub() text_field_meta.vocab_size = 4 text_field_meta.unk_token_idx = 1 text_field_meta.pad_token_idx = 0 text_field_meta.pretrained_embeds_weight = None label_meta = FieldMeta() label_meta.vocab = VocabStub() label_meta.vocab_size = 3 metadata = CommonMetadata() metadata.features = {DatasetFieldName.TEXT_FIELD: text_field_meta} metadata.target = label_meta saved_model = create_model( DocModel.Config( representation=BiLSTMDocAttention.Config( save_path=self.representation_path), decoder=MLPDecoder.Config(save_path=self.decoder_path), ), FeatureConfig(save_path=self.embedding_path), metadata, ) saved_model.save_modules() loaded_model = create_model( DocModel.Config( representation=BiLSTMDocAttention.Config( load_path=self.representation_path), decoder=MLPDecoder.Config(load_path=self.decoder_path), ), FeatureConfig(load_path=self.embedding_path), metadata, ) random_model = create_model( DocModel.Config(representation=BiLSTMDocAttention.Config(), decoder=MLPDecoder.Config()), FeatureConfig(), metadata, ) # Loaded and saved modules should be equal. Neither should be equal to # a randomly initialised model. for p1, p2, p3 in itertools.zip_longest( saved_model.embedding.parameters(), loaded_model.embedding.parameters(), random_model.embedding.parameters(), ): self.assertTrue(p1.equal(p2)) self.assertFalse(p3.equal(p1)) self.assertFalse(p3.equal(p2)) for p1, p2, p3 in itertools.zip_longest( saved_model.representation.parameters(), loaded_model.representation.parameters(), random_model.representation.parameters(), ): self.assertTrue(p1.equal(p2)) self.assertFalse(p3.equal(p1)) self.assertFalse(p3.equal(p2)) for p1, p2, p3 in itertools.zip_longest( saved_model.decoder.parameters(), loaded_model.decoder.parameters(), random_model.decoder.parameters(), ): self.assertTrue(p1.equal(p2)) self.assertFalse(p3.equal(p1)) self.assertFalse(p3.equal(p2))
class Config(ConfigBase): representation: SeqRepresentation.Config = SeqRepresentation.Config() output_layer: ClassificationOutputLayer.Config = ( ClassificationOutputLayer.Config()) decoder: MLPDecoder.Config = MLPDecoder.Config()
class Config(BaseModel.Config): decoder: MLPDecoder.Config = MLPDecoder.Config() output_layer: Union[ ClassificationOutputLayer.Config, PairwiseCosineDistanceOutputLayer.Config ] = ClassificationOutputLayer.Config() encode_relations: bool = True
class Config(BaseModel.Config): decoder: MLPDecoder.Config = MLPDecoder.Config() output_layer: ClassificationOutputLayer.Config = ( ClassificationOutputLayer.Config() ) encode_relations: bool = True