def pretrained_embedding_layer(word_to_vec_map, word_to_index): """ 创建Keras Embedding()层,加载已经训练好了的50维GloVe向量 参数: word_to_vec_map -- 字典类型的单词与词嵌入的映射 word_to_index -- 字典类型的单词到词汇表(400,001个单词)的索引的映射。 返回: embedding_layer() -- 训练好了的Keras的实体层。 """ vocab_len = len(word_to_index) + 1 emb_dim = word_to_vec_map["cucumber"].shape[0] emb_matrix = np.zeros((vocab_len, emb_dim)) #这里生成一个每一行是对应行号的词向量的词矩阵 for word, index in word_to_index.items(): emb_matrix[index, :] = word_to_vec_map[word] embedding_layer = Embedding(vocab_len, emb_dim, trainable=False) # 构建embedding层。 embedding_layer.build((None, )) # 将嵌入层的权重设置为嵌入矩阵。 embedding_layer.set_weights([emb_matrix]) return embedding_layer
def pretrained_embedding_layer(word_to_vec_map, word_to_index): """ 参数: word_to_vec_map -- 字典,映射出每个词的向量表示 word_to_index -- 字典,映射出词在的字典中词典表示 Returns: embedding_layer -- keras的embedding层 """ from keras.layers.embeddings import Embedding import numpy as np #keras embedding层+1 vocab_len = len(word_to_index) + 1 #embedding的向量维度 emb_dim = word_to_vec_map['崔'].shape[0] # 初始化embedding矩阵,维度为(vocab_len,emb_dim) emb_matrix = np.zeros((vocab_len, emb_dim)) # 读取embedding每个字的向量表示,并填入embedding矩阵中 for word, index in word_to_index.items(): emb_matrix[index, :] = word_to_vec_map[word] # 创建keras的embedding实例,不可训练的,并初始化一个权重 embedding_layer = Embedding(vocab_len, emb_dim, trainable=False) embedding_layer.build((None, )) embedding_layer.set_weights([emb_matrix]) return embedding_layer
def pretrained_embedding_layer(word_to_vec_map, word_to_index): """ Creates a Keras Embedding() layer and loads in pre-trained GloVe 50-dimensional vectors. Arguments: word_to_vec_map -- dictionary mapping words to their GloVe vector representation. word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words) Returns: embedding_layer -- pretrained layer Keras instance """ vocab_len = len(word_to_index) + 1 # adding 1 to fit Keras embedding (requirement) emb_dim = word_to_vec_map["cucumber"].shape[0] # define dimensionality of your GloVe word vectors (= 50) # Initialize the embedding matrix as a numpy array of zeros of shape (vocab_len, dimensions of word vectors = emb_dim) emb_matrix = np.zeros((vocab_len,emb_dim)) # Set each row "index" of the embedding matrix to be the word vector representation of the "index"th word of the vocabulary for word, index in word_to_index.items(): emb_matrix[index, :] = word_to_vec_map[word] # Define Keras embedding layer with the correct output/input sizes, make it trainable. Use Embedding(...). Make sure to set trainable=False. embedding_layer = Embedding(vocab_len,emb_dim,trainable=False) # Build the embedding layer, it is required before setting the weights of the embedding layer. Do not modify the "None". embedding_layer.build((None,)) # Set the weights of the embedding layer to the embedding matrix. Your layer is now pretrained. embedding_layer.set_weights([emb_matrix]) return embedding_layer
def pretrained_embedding_layer(word_to_vec_map, word_to_index): vocab_length = len( word_to_index ) + 1 #adding 1 to fit the Embedding layer (keras requirement) emb_dim = word_to_vec_map['at'].shape[0] emb_matrix = np.zeros((vocab_length, emb_dim)) success = 0 fail = [] for word, index in word_to_index.items(): try: emb_matrix[index, :] = word_to_vec_map[word] success += 1 except Exception as e: fail.append(word) emb_matrix[index, :] = -np.random.randn(emb_dim) / 20 embedding_layer = Embedding(vocab_length, emb_dim, trainable=False) embedding_layer.build((None, )) embedding_layer.set_weights([emb_matrix]) return embedding_layer
def pretrained_embedding_layer(hotel_to_vec_map, hotel_to_index): """ Creates a Keras Embedding() layer and loads in pre-trained GloVe 50-dimensional vectors. Arguments: word_to_vec_map -- dictionary mapping words to their GloVe vector representation. word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words) Returns: embedding_layer -- pretrained layer Keras instance """ vocab_len = len(word_to_index) + 1 emb_dim = hotel_to_vec_map[5101].shape[ 0] # define dimensionality of word2vec vectors (= 300) emb_matrix = np.zeros((vocab_len, emb_dim)) for hotel, idx in hotel_to_index.items(): emb_matrix[idx, :] = hotel_to_vec_map[hotel] embedding_layer = Embedding(input_dim=vocab_len, output_dim=emb_dim, trainable=False) embedding_layer.build( (None, )) # Do not modify the "None". This line of code is complete as-is. embedding_layer.set_weights([emb_matrix]) return embedding_layer
def pretrained_embedding_layer(word_to_vec_map, word_to_index): """ Creates a Keras Embedding() layer and loads in pre-trained GloVe 50-dimensional vectors. Arguments: word_to_vec_map -- dictionary mapping words to their GloVe vector representation. word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words) Returns: embedding_layer -- pretrained layer Keras instance """ vocab_len = len( word_to_index) + 1 # adding 1 to fit Keras embedding (requirement) emb_dim = word_to_vec_map['cucumber'].shape[ 0] # define dimensionality of GloVe word vector (50,here) emb_matrix = np.zeros((vocab_len, emb_dim)) for word, idx in word_to_index.items(): emb_matrix[idx, :] = word_to_vec_map[word] embedding_layer = Embedding(input_dim=vocab_len, output_dim=emb_dim) embedding_layer.build( (None, ) ) # Build the embedding layer, it is required before setting the weights of the embedding layer. embedding_layer.set_weights([emb_matrix]) return embedding_layer
def pretrained_embedding_layer(word_to_vec_map, word_to_index): vocab_len = len( word_to_index) + 1 # Keras requires vocab length start from index 1 emb_dim = word_to_vec_map["cucumber"].shape[0] # Initialize the embedding matrix as a numpy array of zeros of shape (vocab_len, dimensions of word vectors = emb_dim) emb_matrix = np.zeros((vocab_len, emb_dim)) # Set each row "index" of the embedding matrix to be the word vector representation of the "index"th word of the vocabulary for word, index in word_to_index.items(): emb_matrix[index, :] = word_to_vec_map[word] # Define Keras embedding layer with the correct output/input sizes, make it trainable. Use Embedding(...). Make sure to set trainable=False. embedding_layer = Embedding(input_dim=vocab_len, output_dim=emb_dim, trainable=False) # Build the embedding layer, it is required before setting the weights of the embedding layer. Do not modify the "None". embedding_layer.build((None, )) # Set the weights of the embedding layer to the embedding matrix. Your layer is now pretrained. embedding_layer.set_weights([emb_matrix]) return embedding_layer
def pretrained_embedding_layer(self,word_to_vec_map, word_to_index): """ A function to insert keras embedding before the sentences directly go into hidden layers :param word_to_vec_map: dict. a dictionary mapping words to their GloVe vector representations :param word_to_index: dict. a dictionary containing the each word mapped to its index :return: embedding layer. pretrained layer Keras instance """ vocab_len = len(word_to_index) + 1 # adding 1 to fit keras embedding emb_dim = word_to_vec_map["cucumber"].shape[0] # defining the dim of GloVe vector # initializing the embedding matrix with zeros of shape (vocab_len, emb_dim) emb_matrix = np.zeros(shape=(vocab_len, emb_dim)) for word, index in word_to_index.items(): try: emb_matrix[index, 0:len(word_to_vec_map[word])] = word_to_vec_map[word] except ValueError as e: print("Error inserting word into embedding matrix: ".format(word)) # defining keras embedding layer with the correct output/input sizes, make it trainable embedding_layer = Embedding(vocab_len,emb_dim, trainable=True) # building the embedding layer, it is required before setting the weights of the embedding layer embedding_layer.build((None,)) # setting the weights embedding_layer.set_weights([emb_matrix]) return embedding_layer
def pretrained_embedding_layer(word_to_vec_map, word_to_index): """ Creates a Keras Embedding() layer and loads in pre-trained GloVe 50-dimensional vectors. Arguments: word_to_vec_map -- dictionary mapping words to their GloVe vector representation. word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words) Returns: embedding_layer -- pretrained layer Keras instance """ vocab_len = len(word_to_index) + 1 emb_dim = word_to_vec_map['cucumber'].shape[0] emb_matrix = np.zeros((vocab_len, emb_dim)) for word, index in word_to_index.items(): emb_matrix[index, :] = word_to_vec_map[word] embedding_layer = Embedding(input_dim=vocab_len, output_dim=emb_dim, trainable=False) embedding_layer.build((None, )) embedding_layer.set_weights([emb_matrix]) return embedding_layer
def pre_trained_embedding_layer(word_to_vector, word_to_idx): """ Compute an embedding layer for the given glove vector embedding matrix """ vocab_len = len(word_to_idx) + 1 emb_dim = word_to_vector["hi"].shape[0] emb_matrix = np.zeros(shape=(vocab_len, emb_dim)) # Prepare the embedding matrix for each word index for word, index in word_to_idx.items(): emb_matrix[index, :] = word_to_vector[word] # Create an embedding instance for input vocal dimension and output as embedding dimension embedding = Embedding(input_dim=vocab_len, output_dim=emb_dim, embeddings_initializer='glorot_uniform', trainable=False) # Build the embedding layer before setting the weights embedding.build((None, )) # Set the weights for the embedding layer embedding.set_weights([emb_matrix]) return embedding
def pretrained_embedding_layer(word_to_vec_map, word_to_index): """ Creates a Keras Embedding() layer and loads in pre-trained GloVe 50-dimensional vectors. Arguments: word_to_vec_map -- dictionary mapping words to their GloVe vector representation. word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words) Returns: embedding_layer -- pretrained layer Keras instance """ vocab_len = len(word_to_index) + 1 emb_dim = 50 # Initialize the embedding matrix as a numpy array of zeros of shape # (vocab_len, dimensions of word vectors = emb_dim) emb_matrix = np.zeros((vocab_len, emb_dim)) # Set each row "index" of the embedding matrix to be the word vector # representation of the "index"th word of the vocabulary for word, index in word_to_index.items(): emb_matrix[index, :] = word_to_vec_map[word] # Define Keras embedding layer with the correct output/input sizes, make it non-trainable. # Use Embedding(...). Make sure to set trainable=False. embedding_layer = Embedding(vocab_len, emb_dim, trainable=False) embedding_layer.build((None, )) embedding_layer.set_weights([emb_matrix]) return embedding_layer
def pretrained_embedding_layer(word_to_vec_map, word_to_index): """ 读入预训练的权重,并创建Embedding层 输入: word_to_vec_map -- 单词:预训练权重形式的字典 word_to_index -- 单词:索引形式的字典 输出: embedding_layer -- Keras层 """ vocab_len = len(word_to_index) + 1 emb_dim = word_to_vec_map["cucumber"].shape[0] # 词向量维度 # embedding矩阵的零初始化 emb_matrix = np.zeros((vocab_len,emb_dim)) # 对于每个单词,将对应的权重放在索引位置上 for word, index in word_to_index.items(): emb_matrix[index, :] = word_to_vec_map[word] # 定义Keras层(不可训练) embedding_layer = Embedding(vocab_len,emb_dim,trainable=False) # 将预训练结果赋给Keras层 embedding_layer.build((None,)) embedding_layer.set_weights([emb_matrix]) return embedding_layer
def pretrained_embedding_layer(word_to_vec_map, ): """ Creates a Keras Embedding() layer Arguments: events_to_vec_map -- dictionary mapping -- dictionary mapping from words to their indices Returns: embedding_layer -- pretrained layer Keras instance """ NUM_EVENTS = 8 # len + 1 adding 1 to fit Keras embedding EMB_DIM = 5 # events map one to one to 7 separate embedded dimensions -2 events not seen, -1 for Keras = 5 # Initialize the embedding matrix emb_matrix = np.zeros([NUM_EVENTS, EMB_DIM]) for index in range(7): emb_matrix[index, :] = events_to_vec_map[str(index)] # Define Keras embedding layer with the correct output/input sizes embedding_layer = Embedding(NUM_EVENTS, EMB_DIM, trainable='False') # Build the embedding layer embedding_layer.build((None,)) # Set the weights of the embedding layer to the embedding matrix. embedding_layer.set_weights([emb_matrix]) return embedding_layer
def createBaseModel(word2vec, word2idx): global n_tags # 0 for mask_zero, Nw+1 for UNK word embedding_weights = np.zeros((Nw + 2, V_dim)) for word, index in word2idx.items(): embedding_weights[index, :] = word2vec[word] # create the model input = Input(shape=(Tx, ), name='input', dtype=np.int32) embedding_layer = Embedding(input_dim=Nw + 2, output_dim=V_dim, mask_zero=True, trainable=False) embedding_layer.build( (None, )) # if you don't do this, the next step won't work embedding_layer.set_weights([embedding_weights]) bi_lstm_layer = Bidirectional(LSTM(n_a, return_sequences=True), input_shape=(Tx, 2 * n_a), name='bi-lstm') emb_vec = embedding_layer(input) a = bi_lstm_layer(emb_vec) if use_dropout: a = Dropout(rate=0.5, name="post_lstm_drop")(a) output = Dense(n_tags, activation='softmax', name='post-lstm-dense-softmax')(a) model = Model(input=input, output=output) #model.summary() return model
def read_glove_vector(): embeddings_index = {} words = set() coefs = {} with open('glove.6B.50d.txt', 'r', encoding='UTF-8') as f: for line in f: values = line.strip().split() values[0] = re.sub('[^a-zA-Z]', '', values[0]) if len(values[0]) > 0: words.add(values[0]) coefs[values[0]] = np.array(values[1:], dtype=np.float64) i = 1 word_index = {} index_word = {} for word in sorted(words): word_index[word] = i index_word[i] = word i = i + 1 vocab_size = len(word_index) + 1 embed_dim = coefs['word'].shape[0] # print(vocab_size) # print(embed_dim) embeddings_matrix = np.zeros((vocab_size, embed_dim)) for word, i in word_index.items(): embeddings_matrix[i, :] = coefs[word] embedding_layer = Embedding(vocab_size, embed_dim) embedding_layer.build((None, )) embedding_layer.set_weights([embeddings_matrix]) print(embeddings_matrix.shape) return embedding_layer
def pretrained_embedding_layer(word_to_vec_map, word_to_index): vocab_len = len(word_to_index) + 1 # adding 1 to fit Keras embedding (requirement) emb_dim = word_to_vec_map["cucumber"].shape[0] # define dimensionality of your GloVe word vectors (= 50) # Step 1 # Initialize the embedding matrix as a numpy array of zeros. # See instructions above to choose the correct shape. emb_matrix = np.zeros((emb_dim, vocab_len)) # Step 2 # Set each row "idx" of the embedding matrix to be # the word vector representation of the idx'th word of the vocabulary for word, idx in word_to_index.items(): emb_matrix[idx, :] = word_to_vec_map[word] # Step 3 # Define Keras embedding layer with the correct input and output sizes # Make it non-trainable. embedding_layer = Embedding(vocab_len, emb_dim) # Step 4 (already done for you; please do not modify) # Build the embedding layer, it is required before setting the weights of the embedding layer. embedding_layer.build((None,)) # Do not modify the "None". This line of code is complete as-is. # Set the weights of the embedding layer to the embedding matrix. Your layer is now pretrained. embedding_layer.set_weights([emb_matrix]) return embedding_layer
def embedding_layer(emb_matrix): # prepare embedding layer vocab_len = emb_matrix.shape[0] emb_dim = emb_matrix.shape[1] emb_layer = Embedding(vocab_len, emb_dim, trainable=False) emb_layer.build((None, )) emb_layer.set_weights([emb_matrix]) return emb_layer
def pretrained_embedding_layer(emb_matrix): vocab_len, emb_dim = emb_matrix.shape # Define Keras embedding layer with the correct output/input sizes, make it trainable. Use Embedding(...). Make sure to set trainable=False. embedding_layer = Embedding(vocab_len, emb_dim, trainable=False) # Build the embedding layer, it is required before setting the weights of the embedding layer. embedding_layer.build((None, )) # Set the weights of the embedding layer to the embedding matrix. embedding_layer.set_weights([emb_matrix]) return embedding_layer
def emm_instance(labeling,word_to_vec): vocab=len(labeling)+1 #len(word_to_vec.keys())+1 vec_dim=word_to_vec['the'].shape[0] emb_matrix=np.zeros((vocab,vec_dim)) for i,j in labeling.items(): emb_matrix[j,:]=word_to_vec[i] emb_layer=Embedding(vocab,vec_dim,trainable=False) emb_layer.build((None,)) emb_layer.set_weights([emb_matrix]) return emb_layer
def pretrained_embedding_layer(word_to_vec_map, word_to_index): vocab_len = len(word_to_index) + 1 emb_dim = word_to_vec_map["cucumber"].shape[0] emb_matrix = np.zeros((vocab_len, emb_dim)) for word, index in word_to_index.items(): emb_matrix[index, :] = word_to_vec_map[word] embedding_layer = Embedding(vocab_len, emb_dim, trainable=False) embedding_layer.build((None, )) embedding_layer.set_weights([emb_matrix]) return embedding_layer
def set_embedding_layer(word2index, word2vec): len_vocab = len(word2index) + 1 embd_dim = 50 #Because using 50-dimensional GloVe vector embd_array = np.zeros((len_vocab, embd_dim)) for word,index in word2index.items(): embd_array[index, :] = word2vec[word] embedding_layer = Embedding(len_vocab, embd_dim, trainable=False) embedding_layer.build((None,)) embedding_layer.set_weights([embd_array]) return embedding_layer
def pretrained_embedding_layer(word_to_vec_map, word_to_index): emb_matrix = np.zeros([VOCAB_SIZE, EMBEDDING_DIMS]) for word, index in word_to_index.items(): emb_matrix[index, :] = word_to_vec_map[word] embedding_layer = Embedding(VOCAB_SIZE, EMBEDDING_DIMS, trainable=False) embedding_layer.build((None, )) embedding_layer.set_weights([emb_matrix]) return embedding_layer
def pretrained_embedding_layer(word2vec_map, word_to_index, embedding_dim, vocab_size, trainable=False): embedding_matrix = utils.get_embedding_matrix(word2vec_map, word_to_index, embedding_dim) embedding_layer = Embedding(vocab_size, embedding_dim, trainable=trainable) embedding_layer.build((None, )) embedding_layer.set_weights([embedding_matrix]) return embedding_layer
def pretrained_embedding_layer(word_to_vec_map, word_to_index): vocab_len = len(word_to_index) + 1 # 查询Embedding层的API input_dim=vocab_len:输入数据最大下标 + 1 emb_dim = word_to_vec_map["cucumber"].shape[0] # word_to_vec_map的值是glove向量返回的,所以每行表示一个单词,单词是由50维向量表示 emb_matrix = np.zeros((vocab_len, emb_dim)) # 初始化一个词嵌入矩阵 for word, index in word_to_index.items(): emb_matrix[index, :] = word_to_vec_map[word] # 将glove向量返回,存放在词嵌入矩阵中 # 定义词嵌入层 embedding_layer = Embedding(vocab_len, emb_dim, trainable=False) # input_dim=vocab_len,输入数据最大下标+1,设置trainable=False使得这个编码层不可再训练。 embedding_layer.build((None,)) embedding_layer.set_weights([emb_matrix]) return embedding_layer
def pretrained_embedding_layer(embedding_df): embedding_layer = Embedding(embedding_df.shape[1] + 1, embedding_df.shape[0], trainable=False) embedding_layer.build((None, )) embed_matrix = np.transpose(embedding_df.values) embed_matrix = np.concatenate( [embed_matrix, np.zeros([1, embed_matrix.shape[1]])], axis=0) embedding_layer.set_weights([embed_matrix]) return embedding_layer
def pretrained_model(word_index, word_to_vec): dim = len(word_to_vec['the']) vocab_len = len(word_index) + 1 emb_mat = np.zeros((vocab_len, dim)) for word, i in word_index.items(): if word_to_vec.get(word) is not None: emb_mat[i, :] = word_to_vec[word] print(emb_mat.shape) embedding_layer = Embedding(vocab_len, dim, trainable=False) embedding_layer.build((None, )) embedding_layer.set_weights([emb_mat]) return embedding_layer
def glove_embedding_layer(word_to_vec, word_to_index): vocab_size = len(word_to_index) + 1 embedding_dim = word_to_vec['bus'].shape[0] embedding_matrix = np.zeros((vocab_size, embedding_dim)) for word, index in word_to_index.items(): embedding_matrix[index] = word_to_vec[word] embedding_layer = Embedding(vocab_size, embedding_dim, trainable=True) embedding_layer.build((None,)) embedding_layer.set_weights([embedding_matrix]) return embedding_layer
def pretrained_embedding_layer(word_to_vec_map, word_to_index): vocab_len = len(word_to_index) + 1 emb_dim = word_to_vec_map["cucumber"].shape[0] emb_matrix = np.zeros((vocab_len, emb_dim)) for word, idx in word_to_index.items(): emb_matrix[idx, :] = word_to_vec_map[word] embedding_layer = Embedding(vocab_len, emb_dim, trainable=False) embedding_layer.build( (None, )) # Do not modify the "None". This line of code is complete as-is. embedding_layer.set_weights([emb_matrix]) return embedding_layer
def pretrained_embedding_layer(voc_embedding, word_to_index): voc_len = len(word_to_index) + 1 emb_dim = len(list(voc_embedding.values())[1]) emb_matrix = np.zeros((voc_len, emb_dim)) for word, index in word_to_index.items(): emb_matrix[index, :] = np.array(voc_embedding[word]) embedding_layer = Embedding(voc_len, emb_dim, trainable=False) embedding_layer.build((None, )) embedding_layer.set_weights([emb_matrix]) return embedding_layer
def create_embedding_layer(word_to_index, word_to_vec): corpus_len = len(word_to_index) + 1 embed_dim = word_to_vec['word'].shape[0] embed_matrix = np.zeros((corpus_len, embed_dim)) for word, index in word_to_index.items(): embed_matrix[index, :] = word_to_vec[word] embedding_layer = Embedding(corpus_len, embed_dim) embedding_layer.build((None, )) embedding_layer.set_weights([embed_matrix]) return embedding_layer