def __init__(self, nlp_toolkit: NLPToolkit):
        self.nlp_toolkit = nlp_toolkit
        archive_loader = ArchiveLoader(_MODEL_ARCHIVE)
        config = archive_loader.get_config()
        model_params = config.get('model')
        vocabulary = archive_loader.get_vocabulary()

        self.bidaf_model = BidirectionalAttentionFlow.from_params(
            vocabulary, model_params)
        model_state = archive_loader.get_model_state()
        self.bidaf_model.load_state_dict(model_state)

        self.vocab_reader = archive_loader.get_vocab_reader()
示例#2
0
    def setUp(self):
        super(BidirectionalAttentionFlowTest, self).setUp()

        constants.GLOVE_PATH = 'tests/fixtures/glove.6B.100d.sample.txt.gz'
        reader_params = Params({
            'token_indexers': {
                'tokens': {
                    'type': 'single_id'
                },
                'token_characters': {
                    'type': 'characters'
                }
            }
        })
        dataset = SquadReader.from_params(reader_params).read(
            'tests/fixtures/data/squad.json')
        vocab = Vocabulary.from_dataset(dataset)
        self.vocab = vocab
        dataset.index_instances(vocab)
        self.dataset = dataset
        self.token_indexers = {
            'tokens': SingleIdTokenIndexer(),
            'token_characters': TokenCharactersIndexer()
        }

        self.model = BidirectionalAttentionFlow.from_params(
            self.vocab, Params({}))

        small_params = Params({
            'text_field_embedder': {
                'tokens': {
                    'type': 'embedding',
                    'pretrained_file': constants.GLOVE_PATH,
                    'trainable': False,
                    'projection_dim': 4
                },
                'token_characters': {
                    'type': 'character_encoding',
                    'embedding': {
                        'embedding_dim': 8
                    },
                    'encoder': {
                        'type': 'cnn',
                        'embedding_dim': 8,
                        'num_filters': 4,
                        'ngram_filter_sizes': [5]
                    }
                }
            },
            'phrase_layer': {
                'type': 'lstm',
                'bidirectional': True,
                'input_size': 8,
                'hidden_size': 4,
                'num_layers': 1,
            },
            'similarity_function': {
                'type': 'linear',
                'combination': 'x,y,x*y',
                'tensor_1_dim': 8,
                'tensor_2_dim': 8
            },
            'modeling_layer': {
                'type': 'lstm',
                'bidirectional': True,
                'input_size': 32,
                'hidden_size': 4,
                'num_layers': 1,
            },
            'span_end_encoder': {
                'type': 'lstm',
                'bidirectional': True,
                'input_size': 56,
                'hidden_size': 4,
                'num_layers': 1,
            },
        })
        self.small_model = BidirectionalAttentionFlow.from_params(
            self.vocab, small_params)