def _get_model(self) -> nn.Module: word_embedder = TrainableWordEmbedder( embedding_type=self.hparams.get("emb_type"), datasets_manager=self.data_manager, ) elmo_embedder = BowElmoEmbedder(datasets_manager=self.data_manager, layer_aggregation="sum") embedder = ConcatEmbedders([word_embedder, elmo_embedder]) lstm2seqencoder = Lstm2SeqEncoder( embedder=embedder, hidden_dim=self.hparams.get("hidden_dim"), bidirectional=self.hparams.get("bidirectional"), combine_strategy=self.hparams.get("combine_strategy"), rnn_bias=True, dropout_value=self.hparams.get("lstm2seq_dropout", 0.0), add_projection_layer=False, ) model = RnnSeqCrfTagger( rnn2seqencoder=lstm2seqencoder, encoding_dim=2 * self.hparams.get("hidden_dim") if self.hparams.get("bidirectional") and self.hparams.get("combine_strategy") == "concat" else self.hparams.get("hidden_dim"), datasets_manager=self.data_manager, ) return model
def build_model(self): embedder = TrainableWordEmbedder( embedding_type=self.hparams.get("emb_type"), datasets_manager=self.data_manager, device=self.hparams.get("device"), ) embedder = ConcatEmbedders([embedder]) lstm2seqencoder = Lstm2SeqEncoder( embedder=embedder, dropout_value=self.hparams.get("dropout"), hidden_dim=self.hparams.get("hidden_dim"), bidirectional=self.hparams.get("bidirectional"), combine_strategy=self.hparams.get("combine_strategy"), rnn_bias=True, device=self.hparams.get("device"), num_layers=self.hparams.get("num_layers"), ) model = RnnSeqCrfTagger( rnn2seqencoder=lstm2seqencoder, encoding_dim=2 * self.hparams.get("hidden_dim") if self.hparams.get("bidirectional") and self.hparams.get("combine_strategy") == "concat" else self.hparams.get("hidden_dim"), device=self.hparams.get("device"), tagging_type="IOB1", datasets_manager=self.data_manager, ) return model
def build_science_ie_model(dirname: str): exp_dirpath = pathlib.Path(dirname) data_dir = pathlib.Path(DATA_DIR) train_filename = data_dir.joinpath("train_science_ie_conll.txt") dev_filename = data_dir.joinpath("dev_science_ie_conll.txt") test_filename = data_dir.joinpath("dev_science_ie_conll.txt") data_manager = CoNLLDatasetManager( train_filename=train_filename, dev_filename=dev_filename, test_filename=test_filename, column_names=["TASK", "PROCESS", "MATERIAL"], ) word_embedder = TrainableWordEmbedder( embedding_type="glove_6B_100", datasets_manager=data_manager ) char_embedder = CharEmbedder( char_embedding_dimension=20, hidden_dimension=25, datasets_manager=data_manager ) embedder = ConcatEmbedders([word_embedder, char_embedder]) lstm2seqencoder = Lstm2SeqEncoder( embedder=embedder, hidden_dim=350, bidirectional=True, combine_strategy="concat", rnn_bias=True, device=torch.device("cpu"), num_layers=2, ) model = RnnSeqCrfTagger( rnn2seqencoder=lstm2seqencoder, encoding_dim=700, datasets_manager=data_manager, namespace_to_constraints=None, tagging_type="BIOUL", ) infer = SequenceLabellingInference( model=model, model_filepath=str(exp_dirpath.joinpath("checkpoints", "best_model.pt")), datasets_manager=data_manager, ) return infer
def build_model(self): word_embedder = TrainableWordEmbedder( embedding_type=self.hparams.get("emb_type"), datasets_manager=self.data_manager, ) char_embedder = CharEmbedder( char_embedding_dimension=self.hparams.get("char_emb_dim"), hidden_dimension=self.hparams.get("char_encoder_hidden_dim"), datasets_manager=self.data_manager, ) elmo_embedder = BowElmoEmbedder(datasets_manager=self.data_manager) embedder = ConcatEmbedders([word_embedder, char_embedder, elmo_embedder]) lstm2seqencoder = Lstm2SeqEncoder( embedder=embedder, hidden_dim=self.hparams.get("hidden_dim"), bidirectional=self.hparams.get("bidirectional"), combine_strategy=self.hparams.get("combine_strategy"), rnn_bias=True, ) model = RnnSeqCrfTagger( rnn2seqencoder=lstm2seqencoder, encoding_dim=2 * self.hparams.get("hidden_dim") if self.hparams.get("bidirectional") and self.hparams.get("combine_strategy") == "concat" else self.hparams.get("hidden_dim"), datasets_manager=self.data_manager, ) self.printer.good("Finished Loading the Model") return model
args = parser.parse_args() msg_printer = wasabi.Printer() DATA_DIR = pathlib.Path(DATA_DIR) train_filename = DATA_DIR.joinpath("train_science_ie_conll.txt") dev_filename = DATA_DIR.joinpath("dev_science_ie_conll.txt") data_manager = CoNLLDatasetManager( train_filename=train_filename, dev_filename=dev_filename, test_filename=dev_filename, column_names=["TASK", "PROCESS", "MATERIAL"], ) embedder = TrainableWordEmbedder(embedding_type=args.emb_type, datasets_manager=data_manager, device=args.device) char_embedder = CharEmbedder( char_embedding_dimension=args.char_emb_dim, hidden_dimension=args.char_encoder_hidden_dim, datasets_manager=data_manager, device=args.device, ) embedder = ConcatEmbedders([embedder, char_embedder]) lstm2seqencoder = Lstm2SeqEncoder( embedder=embedder, dropout_value=args.dropout, hidden_dim=args.hidden_dim, bidirectional=args.bidirectional,
def setup_embedder(setup_parscit_dataset_manager, request): data_manager = setup_parscit_dataset_manager embedding_type = request.param embedder = TrainableWordEmbedder(datasets_manager=data_manager, embedding_type=embedding_type) return embedder, data_manager
lstm2seqencoder = Lstm2SeqEncoder( embedder=embedder, dropout_value=args.dropout, hidden_dim=args.hidden_dim, bidirectional=args.bidirectional, combine_strategy=args.combine_strategy, rnn_bias=True, device=args.device, num_layers=args.num_layers, add_projection_layer=args.add_projection_layer, ) attn = DotProductAttention() context_embedder = TrainableWordEmbedder( embedding_type="glove_6B_300", datasets_manager=data_manager, device=args.device ) lstm2seq_attn_encoder = Lstm2SeqAttnContextEncoder( rnn2seqencoder=lstm2seqencoder, attn_module=attn, context_embedder=context_embedder, device=args.device, ) model = RnnSeqCrfTagger( rnn2seqencoder=lstm2seq_attn_encoder, encoding_dim=600, device=args.device, tagging_type="BIOUL", datasets_manager=data_manager, include_start_end_trainsitions=False, )