def __call__(self, tokenized_sentences_lst): # Input placeholders to the biLM. context_character_ids = tf.placeholder('int32', shape=(None, None, self.max_characters_per_token)) # Get ops to compute the LM embeddings. context_embeddings_op = self.bilm(context_character_ids) # Get an op to compute ELMo (weighted average of the internal biLM layers) elmo_context_input = weight_layers('input', context_embeddings_op, l2_coef=0.0) elmo_context_output = weight_layers('output', context_embeddings_op, l2_coef=0.0) # Now we can compute embeddings. context_tokens = [sentence.split() for sentence in tokenized_sentences_lst] with tf.Session() as sess: # It is necessary to initialize variables once before running inference. sess.run(tf.global_variables_initializer()) # Create batches of data. context_ids = self.batcher.batch_sentences(context_tokens) # Compute ELMo representations (here for the input only, for simplicity). elmo_context_vecs = sess.run( [elmo_context_input['weighted_op']], feed_dict={context_character_ids: context_ids} ) return elmo_context_vecs[0] #, context_tokens, context_ids
def build_embeddings_op(self, context_ids_ph, utterances_ids_ph, context_sentence_ids_ph): bilm = BidirectionalLanguageModel( self.elmo_options_file, self.elmo_weight_file, use_character_inputs=False, embedding_weight_file=self.token_embedding_file) context_emb_op = bilm(context_ids_ph) utterances_emb_op = bilm(utterances_ids_ph) context_sentence_emb_op = bilm(context_sentence_ids_ph) elmo_context_input = weight_layers('input', context_emb_op, l2_coef=0.0) with tf.variable_scope('', reuse=True): elmo_utterances_input = weight_layers('input', utterances_emb_op, l2_coef=0.0) elmo_context_sentence_input = weight_layers( 'input', context_sentence_emb_op, l2_coef=0.0) return (elmo_context_input, elmo_utterances_input, elmo_context_sentence_input)
def __init__(self): self.vocab_file = 'vocab_small.txt' # Location of pretrained LM. Here we use the test fixtures. datadir = os.path.join('pretrained') options_file = os.path.join( datadir, 'elmo_2x4096_512_2048cnn_2xhighway_5.5B_options.json') weight_file = os.path.join( datadir, 'elmo_2x4096_512_2048cnn_2xhighway_5.5B_weights.hdf5') # Dump the token embeddings to a file. Run this once for your dataset. token_embedding_file = 'elmo_token_embeddings.hdf5' dump_token_embeddings(self.vocab_file, options_file, weight_file, token_embedding_file) self.batcher = TokenBatcher(self.vocab_file) # Input placeholders to the biLM. self.context_token_ids = tf.placeholder('int32', shape=(None, None)) # Build the biLM graph. bilm = BidirectionalLanguageModel( options_file, weight_file, use_character_inputs=False, embedding_weight_file=token_embedding_file) # Get ops to compute the LM embeddings. context_embeddings_op = bilm(self.context_token_ids) self.elmo_context_input = weight_layers('input', context_embeddings_op, l2_coef=0.0) self.elmo_context_output = weight_layers('output', context_embeddings_op, l2_coef=0.0)
def bilm_build_graph(options_file, weight_file): # Build the biLM graph. bilm = BidirectionalLanguageModel(options_file, weight_file) # Get ops to compute the LM embeddings. context_embeddings_op = bilm(context_elmo) question_embeddings_op = bilm(question_elmo) # Get an op to compute ELMo (weighted average of the internal biLM layers) # Our SQuAD model includes ELMo at both the input and output layers # of the task GRU, so we need 4x ELMo representations for the question # and context at each of the input and output. # We use the same ELMo weights for both the question and context # at each of the input and output. elmo_context_input = weight_layers('input', context_embeddings_op, l2_coef=0.0)['weighted_op'] with tf.variable_scope('', reuse=True): # the reuse=True scope reuses weights from the context for the question elmo_question_input = weight_layers('input', question_embeddings_op, l2_coef=0.0)['weighted_op'] """ elmo_context_output = weight_layers( 'output', context_embeddings_op, l2_coef=0.0 )['weighted_op'] with tf.variable_scope('', reuse=True): # the reuse=True scope reuses weights from the context for the question elmo_question_output = weight_layers( 'output', question_embeddings_op, l2_coef=0.0 ) """ return elmo_context_input, elmo_question_input
def elmo_input_embedding(self, tag): que1_embeddings_op = self.bilm(self.que1) que2_embeddings_op = self.bilm(self.que2) elmo_que1 = weight_layers(tag, que1_embeddings_op, l2_coef=0.)['weighted_op'] with tf.variable_scope('', reuse=True): elmo_que2 = weight_layers(tag, que2_embeddings_op, l2_coef=0.)['weighted_op'] return elmo_que1, elmo_que2
def add_embeddings_op(self): """Defines self.word_embeddings""" b_size = tf.shape(self.cand_entities_ids)[0] cand_spans = tf.shape(self.cand_entities_ids)[1] cand_ents = tf.shape(self.cand_entities_ids)[2] entities = tf.reshape(self.cand_entities_ids, [b_size, cand_spans, cand_ents // 22, 22]) entities = tf.reshape(entities, [-1, 22]) zeros_count = tf.reduce_sum(tf.cast(tf.equal(entities, 0), tf.int32), axis=1) lengths = tf.math.maximum(0, 20 - zeros_count) with tf.variable_scope('bilm_1'): entitites_embeddings_op = self.entity_bilm(entities) # [batch_size, max_token] with tf.variable_scope('bilm_2'): words_embeddings_op = self.bilm(self.words) with tf.variable_scope("words"): self.word_embeddings = weight_layers('words', words_embeddings_op, l2_coef=0.0)['weighted_op'] print("word_embeddings (after lookup) ", self.word_embeddings) with tf.variable_scope("entities"): from preprocessing.util import load_wikiid2nnid self.nentities = len(load_wikiid2nnid(extension_name=self.args.entity_extension)) self.entity_embeddings = tf.reshape(weight_layers( 'entities', entitites_embeddings_op, l2_coef=0.0 )['weighted_op'], [b_size, cand_spans, cand_ents // 22, 20, -1]) # [batch_size, max_token, vdim] #cell_fw = tf.contrib.rnn.LSTMCell(self.args.hidden_size_lstm // 2) #cell_bw = tf.contrib.rnn.LSTMCell(self.args.hidden_size_lstm // 2) #(output_fw, output_bw), _ = tf.nn.bidirectional_dynamic_rnn( # cell_fw, cell_bw, output, # sequence_length=lengths, dtype=tf.float32) #output = tf.concat([output_fw, output_bw], axis=-1) #output = tf.concat([output[:, 0, :], output[:, -1, :]], axis=-1) # coeffs = tf.nn.softmax(tf.squeeze(tf.layers.dense(output, 1))) # output = tf.reduce_sum(output * coeffs[..., None], 1) # self.entity_embeddings = tf.layers.dense(tf.reshape(output, [b_size, cand_spans, cand_ents // 22, 256]), 300) #mask = tf.math.logical_not(tf.equal(entities, 0)[:, 1:-1]) #Q = tf.layers.dense(output, self.args.hidden_size_lstm) # [batch_size, sequence_length, hidden_dim] #K = tf.layers.dense(output, self.args.hidden_size_lstm) # [batch_size, sequence_length, hidden_dim] #V = tf.layers.dense(output, 300) # [batch_size, sequence_length, n_classes] #query_value_attention_seq = tf.keras.layers.Attention()([Q, V, K], [mask, mask]) #query_value_attention = tf.keras.layers.GlobalAveragePooling1D()(query_value_attention_seq) #self.entity_embeddings = tf.reshape(query_value_attention, [b_size, cand_spans, cand_ents // 22, -1]) # self.entity_embeddings = util.ffnn(self.entity_embeddings, 1, 300, 300, dropout=None) self.pure_entity_embeddings = self.entity_embeddings if self.args.ent_vecs_regularization.startswith("l2"): # 'l2' or 'l2dropout' self.entity_embeddings = tf.nn.l2_normalize(self.entity_embeddings, dim=3) # not necessary since i do normalization in the entity embed creation as well, just for safety if self.args.ent_vecs_regularization == "dropout" or \ self.args.ent_vecs_regularization == "l2dropout": self.entity_embeddings = tf.nn.dropout(self.entity_embeddings, self.dropout)
def _embed_ids(self): print('[launch] embed_ids, use_ELMO') with tf.name_scope('text_embedding_layer'): # Build the biLM graph. if self.params.USE_CHAR_ELMO: bilm = BidirectionalLanguageModel( options_file=self.data_path + self.params.ELMO_OPTIONS, weight_file=self.data_path + self.params.ELMO_WEIGHTS, max_batch_size=self.params.batch_size * self.params.MAX_SENTENCES) else: bilm = BidirectionalLanguageModel( options_file=self.data_path + self.params.ELMO_OPTIONS, weight_file=self.data_path + self.params.ELMO_WEIGHTS, use_character_inputs=False, embedding_weight_file=self.data_path + self.params.ELMO_TOKEN, max_batch_size=self.params.batch_size * self.params.MAX_SENTENCES) # question self.embed_q_op = bilm(self.batch_q) self.elmo_q_output = weight_layers('output', self.embed_q_op, l2_coef=0.0) self.embed_q_inter = self.elmo_q_output['weighted_op'] ''' self.q_len_to_pad = self.params.MAX_LENGTH_Q - tf.reduce_max( self.batch_len_q ) -1 self.q_len_to_pad = tf.maximum(self.q_len_to_pad, 0) self.embed_q = tf.pad( self.embed_q_inter, [[0,0], [0, self.q_len_to_pad], [0,0]] ) ''' self.embed_q = self.embed_q_inter # sentence self.embed_s_op = bilm(self.batch_s) with tf.variable_scope('', reuse=tf.AUTO_REUSE): self.elmo_s_output = weight_layers('output', self.embed_s_op, l2_coef=0.0) self.embed_s_inter = self.elmo_s_output['weighted_op'] self.s_len_to_pad = self.params.MAX_SENTENCES - tf.reduce_max( self.batch_len_s) - 1 self.s_len_to_pad = tf.maximum(self.s_len_to_pad, 0) #self.embed_s = tf.pad( self.embed_s_inter, [[0,0], [0, self.s_len_to_pad], [0,0]] ) # [batch_size, max_len (data dependent), elmo_embedding] self.embed_q = self.embed_q_inter # [batch_size, MAX_SENTENCES, max_len (data dependent), elmo_embedding] self.embed_s = tf.reshape(self.embed_s_inter, [ self.params.batch_size, self.params.MAX_SENTENCES, -1, self.params.DIM_WORD_EMBEDDING ])
def load_elmo_embeddings(directory, top=False): """ :param directory: directory with an ELMo model ('model.hdf5', 'options.json' and 'vocab.txt.gz') :param top: use ony top ELMo layer :return: ELMo batcher, character id placeholders, op object """ vocab_file = os.path.join(directory, 'vocab.txt.gz') options_file = os.path.join(directory, 'options.json') weight_file = os.path.join(directory, 'model.hdf5') # Create a Batcher to map text to character ids. batcher = Batcher(vocab_file, 50) # Input placeholders to the biLM. sentence_character_ids = tf.placeholder('int32', shape=(None, None, 50)) # Build the biLM graph. bilm = BidirectionalLanguageModel(options_file, weight_file, max_batch_size=300) # Get ops to compute the LM embeddings. sentence_embeddings_op = bilm(sentence_character_ids) # Get an op to compute ELMo (weighted average of the internal biLM layers) elmo_sentence_input = weight_layers('input', sentence_embeddings_op, use_top_only=top) return batcher, sentence_character_ids, elmo_sentence_input
def load_elmo_embeddings(directory, top=False): """ :param directory: directory with an ELMo model ('model.hdf5', 'options.json' and 'vocab.txt.gz') :param top: use ony top ELMo layer :return: ELMo batcher, character id placeholders, op object """ if os.path.isfile(os.path.join(directory, 'vocab.txt.gz')): vocab_file = os.path.join(directory, 'vocab.txt.gz') elif os.path.isfile(os.path.join(directory, 'vocab.txt')): vocab_file = os.path.join(directory, 'vocab.txt') else: raise SystemExit('Error: no vocabulary file found in the directory.') options_file = os.path.join(directory, 'options.json') weight_file = os.path.join(directory, 'model.hdf5') with open(options_file, 'r') as f: m_options = json.load(f) max_chars = m_options['char_cnn']['max_characters_per_token'] # Create a Batcher to map text to character ids. batcher = Batcher(vocab_file, max_chars) # Input placeholders to the biLM. sentence_character_ids = tf.compat.v1.placeholder('int32', shape=(None, None, max_chars)) # Build the biLM graph. bilm = BidirectionalLanguageModel(options_file, weight_file, max_batch_size=128) # Get ops to compute the LM embeddings. sentence_embeddings_op = bilm(sentence_character_ids) # Get an op to compute ELMo (weighted average of the internal biLM layers) elmo_sentence_input = weight_layers('input', sentence_embeddings_op, use_top_only=top) return batcher, sentence_character_ids, elmo_sentence_input
def embed_sent_batch(self, sentences, length): sentences_tokenid = self._token_batcher.batch_sentences(sentences) # s_tokenid = s_tokenid[1:][:-1] tf.reset_default_graph() processed_sentences_tokenid = [] length += 2 # Take into account <s> and </s> for s_tokenid in sentences_tokenid: if (len(s_tokenid) >= length): s_tokenid = s_tokenid[:length] else: s_tokenid = np.pad(s_tokenid, (0, length - len(s_tokenid)), 'constant', constant_values=(0)) #s_tokenid = np.expand_dims(s_tokenid, axis=0) processed_sentences_tokenid.append(s_tokenid) batch_size = len(processed_sentences_tokenid) processed_sentences_tokenid = np.array(processed_sentences_tokenid) # tf with tf.device("/cpu:0"): context_token_ids = tf.placeholder('int32', shape=(batch_size, length)) context_embeddings_op = self._bilm(context_token_ids) elmo_context_output = weight_layers('output', context_embeddings_op, l2_coef=0.0)['weighted_op'] config = tf.ConfigProto() config.gpu_options.allow_growth = True print ('++++++Check_point_1\n') with tf.Session(config=config) as sess: sess.run([tf.global_variables_initializer()]) elmo_context_output_ = sess.run([elmo_context_output],feed_dict={context_token_ids: processed_sentences_tokenid})[0] #print (elmo_context_output_.shape) return elmo_context_output_
def __init__(self, session, bilm_params): self.params = bilm_params # Create a Batcher to map text to character ids. self.batcher = Batcher(self.params.vocab_file, self.params.max_char_len) # Input placeholders to the biLM. self.sentence_character_ids = tf.placeholder( 'int32', shape=(None, None, self.params.max_char_len)) # Build the biLM graph. bilm = BidirectionalLanguageModel( self.params.options_file, self.params.weights_file, ) # Get ops to compute the LM embeddings. sentence_embeddings_op = bilm(self.sentence_character_ids) self.elmo_sentence_input = weight_layers('input', sentence_embeddings_op, l2_coef=0.0, use_top_only=True) self.sess = session self.sess.run(tf.global_variables_initializer())
def load_elmo_embeddings(directory, top=True): if os.path.isfile(os.path.join(directory, 'vocab.txt.gz')): vocab_file = os.path.join(directory, 'vocab.txt.gz') elif os.path.isfile(os.path.join(directory, 'vocab.txt')): vocab_file = os.path.join(directory, 'vocab.txt') else: raise SystemExit('Error: no vocabulary file found in the directory.') options_file = os.path.join(directory, 'options.json') weight_file = os.path.join(directory, 'model.hdf5') # Create a Batcher to map text to character ids. batcher = Batcher(vocab_file, 50) # Input placeholders to the biLM. sentence_character_ids = tf.placeholder('int32', shape=(None, None, 50)) # Build the biLM graph. bilm = BidirectionalLanguageModel(options_file, weight_file, max_batch_size=300) # Get ops to compute the LM embeddings. sentence_embeddings_op = bilm(sentence_character_ids) # Get an op to compute ELMo (weighted average of the internal biLM layers) # Our model includes ELMo at both the input and output layers # of the task GRU, so we need 2x ELMo representations at each of the input and output. elmo_sentence_input = weight_layers('input', sentence_embeddings_op, use_top_only=top) return batcher, sentence_character_ids, elmo_sentence_input
def call(self, x, mask=None): context_embeddings_op = self.bilm(x) elmo_context_input = weight_layers('input', context_embeddings_op, l2_coef=0.0) elmo = elmo_context_input['weighted_op'] return elmo
def add_elmo_embedding_layer(self, options_file, weight_file, output_use=False): """ Adds ELMo lstm embeddings to the graph. 1. self.elmo_context_input (batch size, max_context_len among the batch, 1024) 2. self.elmo_question_input (batch size, max_qn_len among the batch, 1024) If output_use is true: add the output to the graph either Inputs: options_file: json_file for the pretrained model weight_file: weights hdf5 file for the pretrained model output_use: determine if use elmo in output of biRNN (default False) """ #Build biLM graph bilm = BidirectionalLanguageModel(options_file, weight_file) context_embeddings_op = bilm(self.context_elmo) question_embeddings_op = bilm(self.qn_elmo) # Get an op to compute ELMo (weighted average of the internal biLM layers) # Our SQuAD model includes ELMo at both the input and output layers # of the task GRU, so we need 4x ELMo representations for the question # and context at each of the input and output. # We use the same ELMo weights for both the question and context # at each of the input and output. #compute the final ELMo representations. self.elmo_context_input = weight_layers( 'input', context_embeddings_op, l2_coef=0.001 )['weighted_op'] #(batch size, max_context_len among the batch, 1024) with tf.variable_scope('', reuse=True): # the reuse=True scope reuses weights from the context for the question self.elmo_question_input = weight_layers( 'input', question_embeddings_op, l2_coef=0.001)['weighted_op'] if output_use: self.elmo_context_output = weight_layers( 'output', context_embeddings_op, l2_coef=0.001)['weighted_op'] with tf.variable_scope('', reuse=True): # the reuse=True scope reuses weights from the context for the question self.elmo_question_output = weight_layers( 'output', question_embeddings_op, l2_coef=0.001)['weighted_op']
def __elmo_embedding(self, inputs, masks, keep_prob=0.8): """Compute ELMo embeddings. """ from bilm import weight_layers elmo_embeddings_op = self.elmo_bilm(inputs) elmo_input = weight_layers('input', elmo_embeddings_op, l2_coef=0.0) elmo_embeddings = elmo_input['weighted_op'] # (batch_size, sentence_length, elmo_dim) # masking(remove noise due to padding) elmo_embeddings *= masks return tf.nn.dropout(elmo_embeddings, keep_prob)
def word_embedding(self): bilm = BidirectionalLanguageModel( self.options_file, self.weight_file, use_character_inputs=False, embedding_weight_file=self.token_embedding_file) context_embeddings_op = bilm(self.W_P) question_embeddings_op = bilm(self.W_Q) elmo_context_input = weight_layers('input', context_embeddings_op, l2_coef=0.0) with tf.variable_scope('', reuse=True): # the reuse=True scope reuses weights from the context for the question elmo_question_input = weight_layers('input', question_embeddings_op, l2_coef=0.0) self.p_embed, self.q_embed = elmo_context_input[ 'weighted_op'], elmo_question_input['weighted_op']
def elmo_embedding(options_file, weight_file, token_a_character_ids, token_b_character_ids): # Input placeholders to the biLM. # token_a_character_ids = tf.placeholder('int32', shape=(None, None, 50)) # token_b_character_ids = tf.placeholder('int32', shape=(None, None, 50)) # Build the biLM graph. bilm = BidirectionalLanguageModel(options_file, weight_file) # Get ops to compute the LM embeddings. token_a_embeddings_op = bilm(token_a_character_ids) token_b_embeddings_op = bilm(token_b_character_ids) elmo_token_a = weight_layers('input', token_a_embeddings_op, l2_coef=0.0) with tf.variable_scope('', reuse=True): # the reuse=True scope reuses weights from the context for the question elmo_token_b = weight_layers('input', token_b_embeddings_op, l2_coef=0.0) return elmo_token_a['weighted_op'], elmo_token_b['weighted_op']
def __init__(self, config): self.lr = config["lr"] self.input_dropout = config["dropout"] self.lstm_dim = config["lstm_dim"] self.layer_type = config["layer_type"] self.use_attention = config["attention"] self.num_attention_heads = config['num_attention_heads'] self.size_per_head = config['size_per_head'] self.num_tags = 7 self.char_dim = 300 self.global_step = tf.Variable(0, trainable=False) self.best_dev_f1 = tf.Variable(0.0, trainable=False) self.initializer = initializers.xavier_initializer() # elmo self.batcher = TokenBatcher(config['vocab_file']) # Input placeholders to the biLM. self.context_token_ids = tf.placeholder('int32', shape=(None, None)) # Build the biLM graph. self.bilm = BidirectionalLanguageModel( config['options_file'], config['weight_file'], use_character_inputs=False, embedding_weight_file=config['token_embedding_file']) self.context_embeddings_op = self.bilm(self.context_token_ids) self.elmo_context_input = weight_layers('input', self.context_embeddings_op, l2_coef=0.0)['weighted_op'] # add placeholders for the model self.mask_inputs = tf.placeholder(dtype=tf.int32, shape=[None, None], name="ChatInputs") self.targets = tf.placeholder(dtype=tf.int32, shape=[None, None], name="Targets") # dropout keep prob self.dropout = tf.placeholder(dtype=tf.float32, name="Dropout") used = tf.sign(tf.abs(self.mask_inputs)) length = tf.reduce_sum(used, reduction_indices=1) self.lengths = tf.cast(length, tf.int32) self.batch_size = tf.shape(self.mask_inputs)[0] self.num_steps = tf.shape(self.mask_inputs)[-1] self.logits = self.inference(self.elmo_context_input) # loss of the model self.loss = self.loss_layer(self.logits, self.lengths) self.train_op = self.train(self.loss) # saver of the model self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=5)
def _add_elmo_embedding(self): """ The Elmo embedding layer """ embeddings_op = self.elmo_bilm(self.elmo_p) self.elmo_emb = weight_layers('input', embeddings_op)['weighted_op'] if self.elmo_mode == 1: # concat word emb and elmo emb self.embedding_layer = tf.concat( [self.embedding_layer, self.elmo_emb], 2) else: # Default: only use Elmo self.embedding_layer = self.elmo_emb
def _load_embeddings(self, vocab="vocab.txt", options="elmo_options.json", weights="elmo_weights.hdf5"): self.elmo_model = BidirectionalLanguageModel(options, weights) self.batcher = Batcher(vocab, 50) self.character_ids = tf.placeholder('int32', shape=(None, None, 50)) context_embeddings_op = self.elmo_model(self.character_ids) self.elmo_context_output = weight_layers('output', context_embeddings_op, l2_coef=0.0) tf.global_variables_initializer().run()
def __init__(self, path=embedding_path, embedding_dim=512, sentence_len=max_sentence_len, pair_mode=False): embeddings = dict() self.embedding_path = path self.embedding_dim = embedding_dim self.sentence_len = sentence_len self.pair_mode = pair_mode self.embedding_dict = embeddings g_elmo = tf.Graph() vocab_file = './bilmelmo/data/vocab.txt' options_file = './bilmelmo/try/options.json' weight_file = './bilmelmo/try/weights.hdf5' token_embedding_file = './bilmelmo/data/vocab_embedding.hdf5' with tf.Graph().as_default() as g_elmo: self.batcher = TokenBatcher(vocab_file) self.context_token_ids = tf.placeholder('int32', shape=(None, None)) self.bilm = BidirectionalLanguageModel( options_file, weight_file, use_character_inputs=False, embedding_weight_file=token_embedding_file ) self.context_embeddings_op = self.bilm(self.context_token_ids) self.elmo_context_input = weight_layers('input', self.context_embeddings_op, l2_coef=0.0) self.elmo_context_output = weight_layers( 'output', self.context_embeddings_op, l2_coef=0.0 ) init = tf.global_variables_initializer() sess_elmo = tf.Session(graph=g_elmo) sess_elmo.run(init) self.sess_elmo = sess_elmo
def weight_layers(self, name, bilm_ops, l2_coef=None, use_top_only=False, do_layer_norm=False): ''' Weight the layers of a biLM with trainable scalar weights to compute ELMo representations. See more details on https://github.com/allenai/bilm-tf/blob/81a4b54937f4dfb93308f709c1cf34dbb37c553e/bilm/elmo.py { 'weighted_op': op to compute weighted average for output, 'regularization_op': op to compute regularization term } ''' return weight_layers(name, bilm_ops, l2_coef, use_top_only, do_layer_norm)
def __init__(self, config): super(NERModel, self).__init__(config) self.idx_to_tag = { idx: tag for tag, idx in list(self.config.vocab_tags.items()) } if self.config.use_elmo: # self.elmo_inputs = [] self.batcher = Batcher(self.config.filename_words, 50) self.bilm = BidirectionalLanguageModel( self.config.filename_elmo_options, self.config.filename_elmo_weights) self.elmo_token_ids = tf.placeholder('int32', shape=(None, None, 50)) self.elmo_embeddings_op = self.bilm(self.elmo_token_ids) self.elmo_embeddings_input = weight_layers('input', self.elmo_embeddings_op, l2_coef=0.0)
def __lambda_layer(x): import tensorflow as tf from utils.files import ProjectPath from bilm import BidirectionalLanguageModel, all_layers, weight_layers x_input = tf.cast(x, tf.int32) input_dir = ProjectPath.from_dict(path_dict) options_file: str = input_dir.join("options.json").get() weight_file: str = input_dir.join("weights.hdf5").get() with tf.variable_scope('', reuse=tf.AUTO_REUSE): bilm = BidirectionalLanguageModel(options_file, weight_file) embedding_op = bilm(x_input) if mode == "weighted": return all_layers(embedding_op) else: context_input = weight_layers('input', embedding_op, l2_coef=0.0, use_top_only=(mode == "top")) return context_input['weighted_op']
def get_elmo_embeddings(config): batcher = Batcher(config.filename_words, 50) token_ids = tf.placeholder('int32', shape=(None, None, 50)) bilm = BidirectionalLanguageModel( config.filename_elmo_options, config.filename_elmo_weights, ) elmo_embeddings_op = bilm(token_ids) elmo_context_input = weight_layers('input', elmo_embeddings_op, l2_coef=0.0) with tf.Session() as sess: # It is necessary to initialize variables once before running inference. sess.run(tf.global_variables_initializer()) # Create batches of data. train = CoNLLDataset(config.filename_train) sents_train = [entry[0] for entry in train] sent_ids_train = batcher.batch_sentences(sents_train) # Compute ELMo representations (here for the input only, for simplicity). elmo_input = sess.run([elmo_context_input['weighted_op']], feed_dict={token_ids: sent_ids_train[0]}) for batch in sent_ids_train[1:]: elmo_input_ = sess.run([elmo_context_input['weighted_op']], feed_dict={token_ids: batch}) elmo_input = np.hstack(elmo_input, elmo_input_) test = CoNLLDataset(config.filename_test) sents_test = [entry[0] for entry in test] sent_ids_test = batcher.batch_sentences(sents_test) elmo_context_output_ = sess.run([elmo_context_input['weighted_op']], feed_dict={token_ids: sent_ids_test}) return elmo_context_input_, elmo_context_output_
def make_elmo(chars_batched): bilm = BidirectionalLanguageModel( options_file="data/elmo_2x4096_512_2048cnn_2xhighway_options.json", weight_file="data/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5", max_batch_size=128) lm = bilm(chars_batched) word_representations_padded = weight_layers('scalar_mix', lm, l2_coef=0.0)['weighted_op'] # Strip off multiplication by gamma. Our parser has gamma=1 because there is a # projection matrix right after word_representations_padded = word_representations_padded.op.inputs[0] with tf.variable_scope('', reuse=True): elmo_scalar_mix_matrix = tf.get_variable('scalar_mix_ELMo_W') tf.global_variables_initializer().run() tf.assign(elmo_scalar_mix_matrix, [ float(sd['elmo.scalar_mix_0.scalar_parameters.0']), float(sd['elmo.scalar_mix_0.scalar_parameters.1']), float(sd['elmo.scalar_mix_0.scalar_parameters.2'])]).eval() # Switch from padded to packed representation valid_mask = lm['mask'] dim_padded = tf.shape(lm['mask'])[:2] mask_flat = tf.reshape(lm['mask'], (-1,)) dim_flat = tf.shape(mask_flat)[:1] nonpad_ids = tf.to_int32(tf.where(mask_flat)[:,0]) word_reps_shape = tf.shape(word_representations_padded) word_representations_flat = tf.reshape(word_representations_padded, [-1, int(word_representations_padded.shape[-1])]) word_representations = tf.gather(word_representations_flat, nonpad_ids) projected_annotations = tf.matmul( word_representations, tf.constant(sd['project_elmo.weight'].numpy().transpose())) return projected_annotations, nonpad_ids, dim_flat, dim_padded, valid_mask, lm['lengths']
def __init__( self, request_names=['train', 'valid', 'test'], new_names=['train', 'valid', 'test'], classes_name='classes', op_type='vectorizer', op_name='elmo', dimension=1024, file_type='bin', #TODO: ? options_file='./embeddingsruwiki_pp_1.0_elmo/options.json', #TODO: ? weights_file='./embeddingsruwiki_pp_1.0_elmo/weights.hdf5', #TODO: ? vocab_file='./embeddingsruwiki_pp_1.0_elmo/vocab.txt' #TODO: ? ): super().__init__(request_names, new_names, op_type, op_name) self.file_type = file_type self.classes_name = classes_name self.dimension = dimension # Location of pretrained LM. self.options_file = options_file self.weights_file = weights_file self.vocab_file = vocab_file # Create a Batcher to map text to character ids. char_per_token = 50 self.batcher = Batcher(self.vocab_file, char_per_token) # Input placeholders to the biLM. self.character_ids = tf.placeholder('int32', shape=(None, None, char_per_token)) # Build the biLM graph. bilm = BidirectionalLanguageModel(self.options_file, self.weights_file) # Get ops to compute the LM embeddings. embeddings_op = bilm(character_ids) # Get an op to compute ELMo (weighted average of the internal biLM layers) self.elmo_output = weight_layers('elmo_output', embeddings_op, l2_coef=0.0)
def embedding_layer(self, char_inputs, elmo_model, name=None): """ :param char_inputs: one-hot encoding of sentence :param seg_inputs: segmentation feature :param config: wither use segmentation feature :return: [1, num_steps, embedding size], """ # embedding = [] # with tf.variable_scope("char_embedding" if not name else name), tf.device('/cpu:0'): # self.char_lookup = tf.get_variable( # name="char_embedding", # shape=[self.num_chars, self.char_dim], # initializer=self.initializer) # embedding.append(tf.nn.embedding_lookup(self.char_lookup, char_inputs)) # embed = tf.concat(embedding, axis=-1) # load bert embedding ops = self.elmo(self.ids) elmo_context_input = weight_layers('input', ops, l2_coef=0.0) elmo_embedding = elmo_context_input['weighted_op'] return elmo_embedding
question_character_ids = tf.placeholder('int32', shape=(None, None, 50)) # Build the biLM graph. bilm = BidirectionalLanguageModel(options_file, weight_file) # Get ops to compute the LM embeddings. context_embeddings_op = bilm(context_character_ids) question_embeddings_op = bilm(question_character_ids) # Get an op to compute ELMo (weighted average of the internal biLM layers) # Our SQuAD model includes ELMo at both the input and output layers # of the task GRU, so we need 4x ELMo representations for the question # and context at each of the input and output. # We use the same ELMo weights for both the question and context # at each of the input and output. elmo_context_input = weight_layers('input', context_embeddings_op, l2_coef=0.0) with tf.variable_scope('', reuse=True): # the reuse=True scope reuses weights from the context for the question elmo_question_input = weight_layers( 'input', question_embeddings_op, l2_coef=0.0 ) elmo_context_output = weight_layers( 'output', context_embeddings_op, l2_coef=0.0 ) with tf.variable_scope('', reuse=True): # the reuse=True scope reuses weights from the context for the question elmo_question_output = weight_layers( 'output', question_embeddings_op, l2_coef=0.0 )
# We will use "${args.exptdir}/alltrain.epitope.elmo" as the model directory model_dir = join(args.exptdir, 'alltrain.epitope.elmo', 'best_model') vocab_file = join(args.exptdir, 'alltrain.epitope.vocab') options_file = join(model_dir, 'pred.options.json') weight_file = join(model_dir, 'weights.h5') # Create a Batcher to map text to character ids. batcher = Batcher(vocab_file, 50) # Input placeholders to the biLM. context_character_ids = tf.placeholder('int32', shape=(None, None, 50)) bilm = BidirectionalLanguageModel(options_file, weight_file) context_embeddings_op = bilm(context_character_ids) elmo_context_input = weight_layers('input', context_embeddings_op, l2_coef=0.0) elmo_context_output = weight_layers('output', context_embeddings_op, l2_coef=0.0) with tf.Session() as sess: # It is necessary to initialize variables once before running inference. sess.run(tf.global_variables_initializer()) for trial in range(trial_num): t_topdir = join( args.datadir, 'trial' + str(trial + 1)) if args.trial_num >= 1 else args.datadir for dtype in dtypes:
#input placeholder to the biLM token_ids = tf.placeholder('int32', shape=(None, None)) y_label = tf.placeholder('float32', shape=(None, None, 17)) #Build the biLM graph bilm = BidirectionalLanguageModel(options_file, weight_file, use_character_inputs=False, embedding_weight_file=token_embedding_file) #Get ops to compute the LM embeddings embeddings_op = bilm(token_ids) #Get an op to compute ELMo(weighted average of the internal biLM layers) elmo_input = weight_layers('input', embeddings_op, l2_coef=0.0) hidden_dim = 512 dropout = 0.5 #Bidirectional layers fw_cell = tf.contrib.rnn.BasicLSTMCell(hidden_dim, state_is_tuple=True) bw_cell = tf.contrib.rnn.BasicLSTMCell(hidden_dim, state_is_tuple=True) fw_cell = tf.contrib.rnn.DropoutWrapper(fw_cell, 1 - dropout) bw_cell = tf.contrib.rnn.DropoutWrapper(bw_cell, 1 - dropout) ##shape(batch_num, length, hs_dim) (outputs, (fw_st, bw_st)) = tf.nn.bidirectional_dynamic_rnn(fw_cell, bw_cell, elmo_input['weighted_op'], dtype=tf.float32,