def from_config(cls, model_config, feature_config, metadata: CommonMetadata): if model_config.compositional_type == RNNGParser.Config.CompositionalType.SUM: p_compositional = CompositionalSummationNN( lstm_dim=model_config.lstm.lstm_dim ) elif ( model_config.compositional_type == RNNGParser.Config.CompositionalType.BLSTM ): p_compositional = CompositionalNN(lstm_dim=model_config.lstm.lstm_dim) else: raise ValueError( "Cannot understand compositional flag {}".format( model_config.compositional_type ) ) return cls( ablation=model_config.ablation, constraints=model_config.constraints, lstm_num_layers=model_config.lstm.num_layers, lstm_dim=model_config.lstm.lstm_dim, max_open_NT=model_config.max_open_NT, dropout=model_config.dropout, actions_vocab=metadata.actions_vocab, shift_idx=metadata.shift_idx, reduce_idx=metadata.reduce_idx, ignore_subNTs_roots=metadata.ignore_subNTs_roots, valid_NT_idxs=metadata.valid_NT_idxs, valid_IN_idxs=metadata.valid_IN_idxs, valid_SL_idxs=metadata.valid_SL_idxs, embedding=Model.create_embedding(feature_config, metadata=metadata), p_compositional=p_compositional, )
def test_CompositionFunction(self): lstm_dim = 100 embedding = torch.ones(1, lstm_dim) input_sequence = [embedding for _ in range(10)] compositionalNN = CompositionalNN(lstm_dim) self.assertEqual(compositionalNN(input_sequence).shape, embedding.shape) compositionalSummationNN = CompositionalSummationNN(lstm_dim) self.assertEqual( compositionalSummationNN(input_sequence).shape, embedding.shape )
def from_config( cls, model_config, feature_config=None, metadata: CommonMetadata = None, tensorizers: Dict[str, Tensorizer] = None, ): if model_config.compositional_type == RNNGParser.Config.CompositionalType.SUM: p_compositional = CompositionalSummationNN( lstm_dim=model_config.lstm.lstm_dim) elif (model_config.compositional_type == RNNGParser.Config.CompositionalType.BLSTM): p_compositional = CompositionalNN( lstm_dim=model_config.lstm.lstm_dim) else: raise ValueError("Cannot understand compositional flag {}".format( model_config.compositional_type)) if tensorizers is not None: embedding = EmbeddingList( [ create_module(model_config.embedding, tensorizer=tensorizers["tokens"]) ], concat=True, ) actions_params = tensorizers["actions"] actions_vocab = actions_params.vocab else: embedding = Model.create_embedding(feature_config, metadata=metadata) actions_params = metadata actions_vocab = metadata.actions_vocab return cls( ablation=model_config.ablation, constraints=model_config.constraints, lstm_num_layers=model_config.lstm.num_layers, lstm_dim=model_config.lstm.lstm_dim, max_open_NT=model_config.max_open_NT, dropout=model_config.dropout, actions_vocab=actions_vocab, shift_idx=actions_params.shift_idx, reduce_idx=actions_params.reduce_idx, ignore_subNTs_roots=actions_params.ignore_subNTs_roots, valid_NT_idxs=actions_params.valid_NT_idxs, valid_IN_idxs=actions_params.valid_IN_idxs, valid_SL_idxs=actions_params.valid_SL_idxs, embedding=embedding, p_compositional=p_compositional, )
def setUp(self): actions_counter = Counter() for action in [ "IN:A", "IN:B", "IN:UNSUPPORTED", "REDUCE", "SHIFT", "SL:C", "SL:D", ]: actions_counter[action] += 1 actions_vocab = Vocab(actions_counter, specials=[]) self.parser = RNNGParser( ablation=RNNGParser.Config.AblationParams(), constraints=RNNGParser.Config.RNNGConstraints(), lstm_num_layers=2, lstm_dim=20, max_open_NT=10, dropout=0.2, beam_size=3, top_k=3, actions_vocab=actions_vocab, shift_idx=4, reduce_idx=3, ignore_subNTs_roots=[2], valid_NT_idxs=[0, 1, 2, 5, 6], valid_IN_idxs=[0, 1, 2], valid_SL_idxs=[5, 6], embedding=EmbeddingList( embeddings=[ WordEmbedding( num_embeddings=5, embedding_dim=20, embeddings_weight=None, init_range=[-1, 1], unk_token_idx=4, mlp_layer_dims=[], ), DictEmbedding( num_embeddings=4, embed_dim=10, pooling_type=PoolingType.MEAN ), ], concat=True, ), p_compositional=CompositionalNN(lstm_dim=20), ) self.parser.train()
def setUp(self): contextual_emb_dim = 1 emb_module = EmbeddingList( embeddings=[ WordEmbedding(num_embeddings=103, embedding_dim=100), DictEmbedding( num_embeddings=59, embed_dim=10, pooling_type=PoolingType.MEAN ), ContextualTokenEmbedding(contextual_emb_dim), ], concat=True, ) self.training_model = RNNGModel( input_for_trace=RNNGModel.get_input_for_trace(contextual_emb_dim), embedding=emb_module, ablation=RNNGParser.Config.AblationParams(), constraints=RNNGParser.Config.RNNGConstraints(), lstm_num_layers=2, lstm_dim=32, max_open_NT=10, dropout=0.4, num_actions=20, shift_idx=0, reduce_idx=1, ignore_subNTs_roots=[8, 15], valid_NT_idxs=[2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + [12, 13, 14, 15, 16, 17, 18, 19], valid_IN_idxs=[2, 4, 7, 8, 10, 12, 13, 14, 15], valid_SL_idxs=[3, 5, 6, 9, 11, 16, 17, 18, 19], embedding_dim=emb_module.embedding_dim, p_compositional=CompositionalNN(lstm_dim=32, device="cpu"), device="cpu", ) self.training_model.train() self.inference_model = RNNGInference( self.training_model.trace_embedding(), self.training_model.jit_model, MockVocab(["<unk>", "foo", "bar"]), MockVocab(["<unk>", "a", "b"]), MockVocab(["SHIFT", "REDUCE", "IN:END_CALL", "SL:METHOD_CALL"]), ) self.inference_model.eval()
def from_config(cls, model_config, feature_config, metadata: CommonMetadata): device = ("cuda:{}".format(torch.cuda.current_device()) if cuda.CUDA_ENABLED else "cpu") if model_config.compositional_type == RNNGParser.Config.CompositionalType.SUM: p_compositional = CompositionalSummationNN( lstm_dim=model_config.lstm.lstm_dim) elif (model_config.compositional_type == RNNGParser.Config.CompositionalType.BLSTM): p_compositional = CompositionalNN( lstm_dim=model_config.lstm.lstm_dim, device=device) else: raise ValueError("Cannot understand compositional flag {}".format( model_config.compositional_type)) emb_module = Model.create_embedding(feature_config, metadata=metadata) contextual_emb_dim = feature_config.contextual_token_embedding.embed_dim return cls( cls.get_input_for_trace(contextual_emb_dim), embedding=emb_module, ablation=model_config.ablation, constraints=model_config.constraints, lstm_num_layers=model_config.lstm.num_layers, lstm_dim=model_config.lstm.lstm_dim, max_open_NT=model_config.max_open_NT, dropout=model_config.dropout, num_actions=len(metadata.actions_vocab), shift_idx=metadata.shift_idx, reduce_idx=metadata.reduce_idx, ignore_subNTs_roots=metadata.ignore_subNTs_roots, valid_NT_idxs=metadata.valid_NT_idxs, valid_IN_idxs=metadata.valid_IN_idxs, valid_SL_idxs=metadata.valid_SL_idxs, embedding_dim=emb_module.embedding_dim, p_compositional=p_compositional, device=device, )