예제 #1
0
    def make_ELMo(self):
        # Location of pretrained BiLM for the specified language
        # TBD check if ELMo language resources are present
        description = self._get_description('elmo-en')
        if description is not None:
            self.lang = description["lang"]
            vocab_file = description["path-vocab"]
            options_file = description["path-config"]
            weight_file = description["path_weights"]

            print('init ELMo')

            # Create a Batcher to map text to character ids
            self.batcher = Batcher(vocab_file, 50)

            # Build the biLM graph.
            self.bilm = BidirectionalLanguageModel(options_file, weight_file)

            # Input placeholders to the biLM.
            self.character_ids = tf.placeholder('int32',
                                                shape=(None, None, 50))
            self.embeddings_op = self.bilm(self.character_ids)

            with tf.variable_scope('', reuse=tf.AUTO_REUSE):
                # the reuse=True scope reuses weights from the whole context
                self.elmo_input = weight_layers('input',
                                                self.embeddings_op,
                                                l2_coef=0.0)
예제 #2
0
    def get_sentence_vector_ELMo_with_token_dump(self, token_list):
        if not self.use_ELMo:
            print(
                "Warning: ELMo embeddings requested but embeddings object wrongly initialised"
            )
            return

        with tf.variable_scope('', reuse=tf.AUTO_REUSE):
            # the reuse=True scope reuses weights from the whole context
            self.elmo_input_token_dump = weight_layers(
                'input', self.embeddings_op_token_dump, l2_coef=0.0)

        # Create batches of data
        local_token_ids = self.batcher_token_dump.batch_sentences(token_list)

        with tf.Session() as sess:
            # weird, for this cpu is faster than gpu (1080Ti !)
            with tf.device("/cpu:0"):
                # It is necessary to initialize variables once before running inference
                sess.run(tf.global_variables_initializer())

                # Compute ELMo representations
                elmo_result = sess.run(
                    self.elmo_input_token_dump['weighted_op'],
                    feed_dict={self.token_ids: local_token_ids})
        return elmo_result
예제 #3
0
def get_elmo_embedding(character_ids, elmo, sent_len):
    embeddings_op = elmo.bilm(character_ids)

    with tf.variable_scope('', reuse=tf.AUTO_REUSE):
        # the reuse=True scope reuses weights from the whole context
        elmo_input = weight_layers('input', embeddings_op, l2_coef=0.0)
        elmo_embed = elmo_input['weighted_op']

    return elmo_embed