Пример #1
0
    def from_config(cls, model_config, feature_config, metadata: CommonMetadata):
        if model_config.compositional_type == RNNGParser.Config.CompositionalType.SUM:
            p_compositional = CompositionalSummationNN(
                lstm_dim=model_config.lstm.lstm_dim
            )
        elif (
            model_config.compositional_type == RNNGParser.Config.CompositionalType.BLSTM
        ):
            p_compositional = CompositionalNN(lstm_dim=model_config.lstm.lstm_dim)
        else:
            raise ValueError(
                "Cannot understand compositional flag {}".format(
                    model_config.compositional_type
                )
            )

        return cls(
            ablation=model_config.ablation,
            constraints=model_config.constraints,
            lstm_num_layers=model_config.lstm.num_layers,
            lstm_dim=model_config.lstm.lstm_dim,
            max_open_NT=model_config.max_open_NT,
            dropout=model_config.dropout,
            actions_vocab=metadata.actions_vocab,
            shift_idx=metadata.shift_idx,
            reduce_idx=metadata.reduce_idx,
            ignore_subNTs_roots=metadata.ignore_subNTs_roots,
            valid_NT_idxs=metadata.valid_NT_idxs,
            valid_IN_idxs=metadata.valid_IN_idxs,
            valid_SL_idxs=metadata.valid_SL_idxs,
            embedding=Model.create_embedding(feature_config, metadata=metadata),
            p_compositional=p_compositional,
        )
Пример #2
0
    def test_CompositionFunction(self):
        lstm_dim = 100
        embedding = torch.ones(1, lstm_dim)
        input_sequence = [embedding for _ in range(10)]

        compositionalNN = CompositionalNN(lstm_dim)
        self.assertEqual(compositionalNN(input_sequence).shape, embedding.shape)

        compositionalSummationNN = CompositionalSummationNN(lstm_dim)
        self.assertEqual(
            compositionalSummationNN(input_sequence).shape, embedding.shape
        )
Пример #3
0
    def from_config(
        cls,
        model_config,
        feature_config=None,
        metadata: CommonMetadata = None,
        tensorizers: Dict[str, Tensorizer] = None,
    ):
        if model_config.compositional_type == RNNGParser.Config.CompositionalType.SUM:
            p_compositional = CompositionalSummationNN(
                lstm_dim=model_config.lstm.lstm_dim)
        elif (model_config.compositional_type ==
              RNNGParser.Config.CompositionalType.BLSTM):
            p_compositional = CompositionalNN(
                lstm_dim=model_config.lstm.lstm_dim)
        else:
            raise ValueError("Cannot understand compositional flag {}".format(
                model_config.compositional_type))

        if tensorizers is not None:
            embedding = EmbeddingList(
                [
                    create_module(model_config.embedding,
                                  tensorizer=tensorizers["tokens"])
                ],
                concat=True,
            )
            actions_params = tensorizers["actions"]
            actions_vocab = actions_params.vocab
        else:
            embedding = Model.create_embedding(feature_config,
                                               metadata=metadata)
            actions_params = metadata
            actions_vocab = metadata.actions_vocab

        return cls(
            ablation=model_config.ablation,
            constraints=model_config.constraints,
            lstm_num_layers=model_config.lstm.num_layers,
            lstm_dim=model_config.lstm.lstm_dim,
            max_open_NT=model_config.max_open_NT,
            dropout=model_config.dropout,
            actions_vocab=actions_vocab,
            shift_idx=actions_params.shift_idx,
            reduce_idx=actions_params.reduce_idx,
            ignore_subNTs_roots=actions_params.ignore_subNTs_roots,
            valid_NT_idxs=actions_params.valid_NT_idxs,
            valid_IN_idxs=actions_params.valid_IN_idxs,
            valid_SL_idxs=actions_params.valid_SL_idxs,
            embedding=embedding,
            p_compositional=p_compositional,
        )
Пример #4
0
    def setUp(self):
        actions_counter = Counter()
        for action in [
            "IN:A",
            "IN:B",
            "IN:UNSUPPORTED",
            "REDUCE",
            "SHIFT",
            "SL:C",
            "SL:D",
        ]:
            actions_counter[action] += 1
        actions_vocab = Vocab(actions_counter, specials=[])

        self.parser = RNNGParser(
            ablation=RNNGParser.Config.AblationParams(),
            constraints=RNNGParser.Config.RNNGConstraints(),
            lstm_num_layers=2,
            lstm_dim=20,
            max_open_NT=10,
            dropout=0.2,
            beam_size=3,
            top_k=3,
            actions_vocab=actions_vocab,
            shift_idx=4,
            reduce_idx=3,
            ignore_subNTs_roots=[2],
            valid_NT_idxs=[0, 1, 2, 5, 6],
            valid_IN_idxs=[0, 1, 2],
            valid_SL_idxs=[5, 6],
            embedding=EmbeddingList(
                embeddings=[
                    WordEmbedding(
                        num_embeddings=5,
                        embedding_dim=20,
                        embeddings_weight=None,
                        init_range=[-1, 1],
                        unk_token_idx=4,
                        mlp_layer_dims=[],
                    ),
                    DictEmbedding(
                        num_embeddings=4, embed_dim=10, pooling_type=PoolingType.MEAN
                    ),
                ],
                concat=True,
            ),
            p_compositional=CompositionalNN(lstm_dim=20),
        )
        self.parser.train()
Пример #5
0
 def setUp(self):
     contextual_emb_dim = 1
     emb_module = EmbeddingList(
         embeddings=[
             WordEmbedding(num_embeddings=103, embedding_dim=100),
             DictEmbedding(
                 num_embeddings=59, embed_dim=10, pooling_type=PoolingType.MEAN
             ),
             ContextualTokenEmbedding(contextual_emb_dim),
         ],
         concat=True,
     )
     self.training_model = RNNGModel(
         input_for_trace=RNNGModel.get_input_for_trace(contextual_emb_dim),
         embedding=emb_module,
         ablation=RNNGParser.Config.AblationParams(),
         constraints=RNNGParser.Config.RNNGConstraints(),
         lstm_num_layers=2,
         lstm_dim=32,
         max_open_NT=10,
         dropout=0.4,
         num_actions=20,
         shift_idx=0,
         reduce_idx=1,
         ignore_subNTs_roots=[8, 15],
         valid_NT_idxs=[2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
         + [12, 13, 14, 15, 16, 17, 18, 19],
         valid_IN_idxs=[2, 4, 7, 8, 10, 12, 13, 14, 15],
         valid_SL_idxs=[3, 5, 6, 9, 11, 16, 17, 18, 19],
         embedding_dim=emb_module.embedding_dim,
         p_compositional=CompositionalNN(lstm_dim=32, device="cpu"),
         device="cpu",
     )
     self.training_model.train()
     self.inference_model = RNNGInference(
         self.training_model.trace_embedding(),
         self.training_model.jit_model,
         MockVocab(["<unk>", "foo", "bar"]),
         MockVocab(["<unk>", "a", "b"]),
         MockVocab(["SHIFT", "REDUCE", "IN:END_CALL", "SL:METHOD_CALL"]),
     )
     self.inference_model.eval()
Пример #6
0
    def from_config(cls, model_config, feature_config,
                    metadata: CommonMetadata):
        device = ("cuda:{}".format(torch.cuda.current_device())
                  if cuda.CUDA_ENABLED else "cpu")
        if model_config.compositional_type == RNNGParser.Config.CompositionalType.SUM:
            p_compositional = CompositionalSummationNN(
                lstm_dim=model_config.lstm.lstm_dim)
        elif (model_config.compositional_type ==
              RNNGParser.Config.CompositionalType.BLSTM):
            p_compositional = CompositionalNN(
                lstm_dim=model_config.lstm.lstm_dim, device=device)
        else:
            raise ValueError("Cannot understand compositional flag {}".format(
                model_config.compositional_type))
        emb_module = Model.create_embedding(feature_config, metadata=metadata)
        contextual_emb_dim = feature_config.contextual_token_embedding.embed_dim

        return cls(
            cls.get_input_for_trace(contextual_emb_dim),
            embedding=emb_module,
            ablation=model_config.ablation,
            constraints=model_config.constraints,
            lstm_num_layers=model_config.lstm.num_layers,
            lstm_dim=model_config.lstm.lstm_dim,
            max_open_NT=model_config.max_open_NT,
            dropout=model_config.dropout,
            num_actions=len(metadata.actions_vocab),
            shift_idx=metadata.shift_idx,
            reduce_idx=metadata.reduce_idx,
            ignore_subNTs_roots=metadata.ignore_subNTs_roots,
            valid_NT_idxs=metadata.valid_NT_idxs,
            valid_IN_idxs=metadata.valid_IN_idxs,
            valid_SL_idxs=metadata.valid_SL_idxs,
            embedding_dim=emb_module.embedding_dim,
            p_compositional=p_compositional,
            device=device,
        )