def __init__(self, nlp_toolkit: NLPToolkit): self.nlp_toolkit = nlp_toolkit archive_loader = ArchiveLoader(_MODEL_ARCHIVE) config = archive_loader.get_config() model_params = config.get('model') vocabulary = archive_loader.get_vocabulary() self.bidaf_model = BidirectionalAttentionFlow.from_params( vocabulary, model_params) model_state = archive_loader.get_model_state() self.bidaf_model.load_state_dict(model_state) self.vocab_reader = archive_loader.get_vocab_reader()
def setUp(self): super(BidirectionalAttentionFlowTest, self).setUp() constants.GLOVE_PATH = 'tests/fixtures/glove.6B.100d.sample.txt.gz' reader_params = Params({ 'token_indexers': { 'tokens': { 'type': 'single_id' }, 'token_characters': { 'type': 'characters' } } }) dataset = SquadReader.from_params(reader_params).read( 'tests/fixtures/data/squad.json') vocab = Vocabulary.from_dataset(dataset) self.vocab = vocab dataset.index_instances(vocab) self.dataset = dataset self.token_indexers = { 'tokens': SingleIdTokenIndexer(), 'token_characters': TokenCharactersIndexer() } self.model = BidirectionalAttentionFlow.from_params( self.vocab, Params({})) small_params = Params({ 'text_field_embedder': { 'tokens': { 'type': 'embedding', 'pretrained_file': constants.GLOVE_PATH, 'trainable': False, 'projection_dim': 4 }, 'token_characters': { 'type': 'character_encoding', 'embedding': { 'embedding_dim': 8 }, 'encoder': { 'type': 'cnn', 'embedding_dim': 8, 'num_filters': 4, 'ngram_filter_sizes': [5] } } }, 'phrase_layer': { 'type': 'lstm', 'bidirectional': True, 'input_size': 8, 'hidden_size': 4, 'num_layers': 1, }, 'similarity_function': { 'type': 'linear', 'combination': 'x,y,x*y', 'tensor_1_dim': 8, 'tensor_2_dim': 8 }, 'modeling_layer': { 'type': 'lstm', 'bidirectional': True, 'input_size': 32, 'hidden_size': 4, 'num_layers': 1, }, 'span_end_encoder': { 'type': 'lstm', 'bidirectional': True, 'input_size': 56, 'hidden_size': 4, 'num_layers': 1, }, }) self.small_model = BidirectionalAttentionFlow.from_params( self.vocab, small_params)