Exemplo n.º 1
0
    def __init__(self, params, vocab):
        super(Lstm, self).__init__()
        self.n_layer = params.n_layer
        self.emb_dim = params.emb_dim
        self.n_words = vocab.n_words
        self.hidden_dim = params.hidden_dim
        self.dropout = params.dropout
        self.bidirection = params.bidirection
        self.freeze_emb = params.freeze_emb
        self.emb_file = params.emb_file

        # embedding layer
        self.embedding = nn.Embedding(self.n_words,
                                      self.emb_dim,
                                      padding_idx=PAD_INDEX)
        # load embedding
        if self.emb_file.endswith("npy"):
            embedding = load_embedding_from_npy(self.emb_file)
        else:
            embedding = load_embedding(vocab, self.emb_dim, self.emb_file)
        self.embedding.weight.data.copy_(torch.FloatTensor(embedding))

        # LSTM layers
        self.lstm = nn.LSTM(self.emb_dim,
                            self.hidden_dim,
                            num_layers=self.n_layer,
                            dropout=self.dropout,
                            bidirectional=self.bidirection,
                            batch_first=True)
Exemplo n.º 2
0
    def __init__(self, params):
        super(EntityNamePredictor, self).__init__()
        self.input_dim = params.hidden_dim * 2 if params.bidirection else params.hidden_dim
        self.enc_type = params.enc_type
        if self.enc_type == "trs":
            print("using transformer to encoder entity tokens")
            self.trs_enc = TransformerEncoder(
                input_size=self.input_dim,
                hidden_size=params.trs_hidden_dim,
                num_layers=params.trs_layers,
                num_heads=params.num_heads,
                dim_key=params.dim_key,
                dim_value=params.dim_value,
                filter_size=params.filter_size)
        elif self.enc_type == "lstm":
            print("using lstm to encoder entity tokens")
            self.lstm_enc = nn.LSTM(self.input_dim,
                                    params.trs_hidden_dim // 2,
                                    num_layers=params.trs_layers,
                                    bidirectional=True,
                                    batch_first=True)
        else:
            print("simply sum the entity token features")

        self.entity_type_embs = torch.cuda.FloatTensor(
            load_embedding_from_npy(
                params.ner_entity_type_emb_file)).transpose(
                    0, 1)  # (emb_dim, num_entity)
def gen_example_embs_based_on_each_domain(emb_file):
    # 1. generate example embeddings for each slot
    # get vocabulary
    _, vocab = datareader()
    # get word embeddings
    embedding = load_embedding_from_npy(emb_file)

    example2embs = {}
    for slot, examples in slot2example.items():
        example_embs_list = []
        for example in examples:
            tok_list = example.split()
            emb = np.zeros(400)  # word and char level embeddings
            for token in tok_list:
                index = vocab.word2index[token]
                emb = emb + embedding[index]
            example_embs_list.append(emb)
        
        example2embs[slot] = np.stack(example_embs_list, axis=-1)

    # 2. generate example embeddings based on each domain
    example_embs_based_on_each_domain = {}
    for domain, slot_names in domain2slot.items():
        example_embs = np.zeros((len(slot_names), 400, 2))
        for i, slot in enumerate(slot_names):
            embs = example2embs[slot]
            example_embs[i] = embs
        
        example_embs_based_on_each_domain[domain] = example_embs
    
    with open("../data/snips/emb/example_embs_based_on_each_domain.dict", "wb") as f:
        pickle.dump(example_embs_based_on_each_domain, f)
Exemplo n.º 4
0
    def __init__(self, params, vocab):
        super(ConceptTagger, self).__init__()

        self.use_example = params.use_example
        if self.use_example:
            hidden_dim = params.hidden_dim * 2 if params.bidirection else params.hidden_dim
            self.w_a = nn.Parameter(torch.FloatTensor(hidden_dim))
            torch.nn.init.uniform(self.w_a.data, -0.01, 0.01)
            self.softmax = nn.Softmax(dim=-1)

            self.example_embs = torch.cuda.FloatTensor(
                load_embedding_from_npy(
                    params.ner_example_emb_file))  # (num_entity, emb_dim, 2)

        self.lstm_encoder = Lstm(params, vocab)
        self.lstm_predictor = LstmBasedSlotPredictor(params)
        self.entity_embs = torch.cuda.FloatTensor(
            load_embedding_from_npy(
                params.ner_entity_type_emb_file))  # (num_entity, emb_dim)
        self.entity_embs_size = self.entity_embs.size()