def __init__(self, sequence_length, num_hidden_layers, vocab_size, feature_dim_size, batch_size, num_heads, ff_hidden_size, initialization, num_sampled, num_neighbors, use_pos):
        # Placeholders for input, output
        self.input_x = tf.placeholder(tf.int32, [batch_size, sequence_length], name="input_x")
        self.input_y = tf.placeholder(tf.int32, [batch_size*sequence_length*num_neighbors, 1], name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")

        # Embedding layer
        with tf.name_scope("input_feature"):
            if initialization != []:
                self.input_feature = tf.get_variable(name="input_feature_1", initializer=initialization, trainable=False)
            else:
                self.input_feature = tf.get_variable(name="input_feature_2", shape=[vocab_size, feature_dim_size], initializer=tf.contrib.layers.xavier_initializer(seed=1234))
        #Inputs for Transformer Encoder
        self.inputTransfG = tf.nn.embedding_lookup(self.input_feature, self.input_x)
        self.inputTransfG = tf.expand_dims(self.inputTransfG, axis=-1)
        self.inputTransfG = squash(self.inputTransfG)
        self.inputTransfG = tf.reshape(self.inputTransfG, [batch_size, sequence_length, 1, feature_dim_size])

        self.hparams = transformer.transformer_base()
        self.hparams.hidden_size = feature_dim_size
        self.hparams.batch_size = batch_size * sequence_length
        self.hparams.max_length = sequence_length
        self.hparams.num_hidden_layers = num_hidden_layers
        self.hparams.num_heads = num_heads
        self.hparams.filter_size = ff_hidden_size
        self.hparams.use_target_space_embedding = False
        # No positional embedding
        if use_pos == 0:
            self.hparams.pos = None

        #Transformer Encoder
        self.encoder = transformer.TransformerEncoder(self.hparams, mode=tf.estimator.ModeKeys.TRAIN)
        self.outputEncoder = self.encoder({"inputs":self.inputTransfG, "targets": 0, "target_space_id": 0})[0]

        self.outputEncoder = tf.reshape(self.outputEncoder, [batch_size, sequence_length, feature_dim_size, 1])
        self.outputEncoder = squash(self.outputEncoder)

        self.outputEncoder = tf.squeeze(self.outputEncoder)

        self.outputEncoderInd = tf.reshape(self.outputEncoder, [batch_size*sequence_length, feature_dim_size])

        self.outputEncoder = tf.tile(self.outputEncoder, [1, 1, num_neighbors])
        self.outputEncoder = tf.reshape(self.outputEncoder, [batch_size*sequence_length*num_neighbors, feature_dim_size])

        self.outputEncoder = tf.nn.dropout(self.outputEncoder, self.dropout_keep_prob)

        with tf.name_scope("embedding"):
            self.embedding_matrix = tf.get_variable(
                    "W", shape=[vocab_size, feature_dim_size],
                    initializer=tf.contrib.layers.xavier_initializer(seed=1234))

            self.softmax_biases = tf.Variable(tf.zeros([vocab_size]))

        self.total_loss = tf.reduce_mean(
            tf.nn.sampled_softmax_loss(weights=self.embedding_matrix, biases=self.softmax_biases, inputs=self.outputEncoder,
                                       labels=self.input_y, num_sampled=num_sampled, num_classes=vocab_size))

        self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=500)
        tf.logging.info('Seting up the main structure')
Exemplo n.º 2
0
def transformer_encoding(node_seq_input, num_nodes, params, mode):
    """Construct a node-level encoder based on the transformer module.

  Args:
    node_seq_input : tf.Tensor. A tensor with 3 dimensions.
    num_nodes: tf.Tensor. Number of nodes per instance.
    params : dict. A parameter dictionary.
    mode : tf.estimator.ModeKeys object.

  Returns:
    node_seq_output: tf.Tensor. A tensor with 3 dimensions.
  """
    node_weights = tf.sequence_mask(num_nodes)

    hparams = transformer.transformer_tiny()
    hparams.hidden_size = params["transformer_hidden_unit"]
    hparams.num_heads = params["transformer_head"]
    hparams.num_hidden_layers = params["transformer_hidden_layer"]

    if hparams.hidden_size % hparams.num_heads != 0:
        raise ValueError(
            "The hidden_size needs to be divisible by trans_head.")

    transformer_encoder = transformer.TransformerEncoder(hparams, mode=mode)
    # Input shape [batch_size, sequence_length, 1, hidden_dim].
    node_seq_input = tf.layers.dense(node_seq_input, hparams.hidden_size)
    node_seq_input_reshape = tf.expand_dims(node_seq_input, 2)
    # Targets and target_space_id are required by decoder of transformer,
    # are both set as 0 for encoder.
    node_seq_output = transformer_encoder(
        {
            "inputs": node_seq_input_reshape,
            "targets": 0,
            "target_space_id": 0,
        },
        nonpadding=node_weights)
    node_seq_output = tf.squeeze(node_seq_output[0], 2)
    # Construct a residue network by adding up the input and output
    node_seq_output = tf.add(node_seq_input, node_seq_output)
    return node_seq_output
    def build_model(self):
        # build index table
        index_table = tf.contrib.lookup.index_table_from_file(
            vocabulary_file=self.config.vocab_list,
            num_oov_buckets=0,
            default_value=0)

        # get data iterator
        self.data_iterator = self.data.get_data_iterator(index_table,
                                                         mode=self.mode)

        # get inputs
        with tf.variable_scope("inputs"):
            # get next batch if there is no feeded data
            next_batch = self.data_iterator.get_next()
            self.input_queries = tf.placeholder_with_default(
                next_batch["input_queries"], [None, self.config.max_length],
                name="input_queries")
            self.input_replies = tf.placeholder_with_default(
                next_batch["input_replies"], [None, self.config.max_length],
                name="input_replies")
            self.query_lengths = tf.placeholder_with_default(
                tf.squeeze(next_batch["query_lengths"]), [None],
                name="query_lengths")
            self.reply_lengths = tf.placeholder_with_default(
                tf.squeeze(next_batch["reply_lengths"]), [None],
                name="reply_lengths")

            # get hyperparams
            self.embed_dropout_keep_prob = tf.placeholder(
                tf.float64, name="embed_dropout_keep_prob")
            self.lstm_dropout_keep_prob = tf.placeholder(
                tf.float32, name="lstm_dropout_keep_prob")
            self.dense_dropout_keep_prob = tf.placeholder(
                tf.float32, name="dense_dropout_keep_prob")
            self.num_negative_samples = tf.placeholder(
                tf.int32, name="num_negative_samples")

        with tf.variable_scope("properties"):
            # length properties
            cur_batch_length = tf.shape(self.input_queries)[0]

            # get hparms from tensor2tensor.models.transformer
            hparams = transformer.transformer_small()
            hparams.batch_size = self.config.batch_size
            hparams.learning_rate_decay_steps = 10000
            hparams.learning_rate_minimum = 3e-5

            # learning rate
            lr = learning_rate.learning_rate_schedule(hparams)
            self.learning_rate = lr

        # embedding layer
        with tf.variable_scope("embedding"):
            embeddings = tf.Variable(get_embeddings(
                self.config.vocab_list, self.config.pretrained_embed_dir,
                self.config.vocab_size, self.config.embed_dim),
                                     trainable=True,
                                     name="embeddings")
            embeddings = tf.nn.dropout(
                embeddings,
                keep_prob=self.embed_dropout_keep_prob,
                noise_shape=[tf.shape(embeddings)[0], 1])
            queries_embedded = tf.to_float(
                tf.nn.embedding_lookup(embeddings,
                                       self.input_queries,
                                       name="queries_embedded"))
            replies_embedded = tf.to_float(
                tf.nn.embedding_lookup(embeddings,
                                       self.input_replies,
                                       name="replies_embedded"))

            self.queries_embedded = queries_embedded
            self.replies_embedded = replies_embedded

        # transformer layer
        with tf.variable_scope("transformer"):
            queries_expanded = tf.expand_dims(queries_embedded,
                                              axis=2,
                                              name="queries_expanded")
            replies_expanded = tf.expand_dims(replies_embedded,
                                              axis=2,
                                              name="replies_expanded")

            hparams = transformer.transformer_small()
            hparams.set_hparam("batch_size", self.config.batch_size)
            hparams.set_hparam("hidden_size", self.config.embed_dim)
            encoder = transformer.TransformerEncoder(hparams, mode=self.mode)

            self.queries_encoded = encoder({
                "inputs": queries_expanded,
                "targets": queries_expanded
            })[0]
            self.replies_encoded = encoder({
                "inputs": replies_expanded,
                "targets": replies_expanded
            })[0]

            self.queries_encoded = tf.squeeze(
                tf.reduce_sum(self.queries_encoded, axis=1, keep_dims=True))
            self.replies_encoded = tf.squeeze(
                tf.reduce_sum(self.replies_encoded, axis=1, keep_dims=True))

        with tf.variable_scope("sampling"):
            positive_mask = tf.eye(cur_batch_length)
            negative_mask = make_negative_mask(
                tf.zeros([cur_batch_length, cur_batch_length]),
                method=self.config.negative_sampling,
                num_negative_samples=self.num_negative_samples)
            negative_queries_indices, negative_replies_indices = tf.split(
                tf.where(tf.not_equal(negative_mask, 0)), [1, 1], 1)

            self.distances = tf.matmul(self.queries_encoded,
                                       self.replies_encoded,
                                       transpose_b=True)
            self.distances_flattened = tf.reshape(self.distances, [-1])

            self.positive_distances = tf.gather(
                self.distances_flattened,
                tf.where(tf.reshape(positive_mask, [-1])))
            self.negative_distances = tf.gather(
                self.distances_flattened,
                tf.where(tf.reshape(negative_mask, [-1])))

            self.negative_queries_indices = tf.squeeze(
                negative_queries_indices)
            self.negative_replies_indices = tf.squeeze(
                negative_replies_indices)

            self.positive_inputs = tf.concat([
                self.queries_encoded, self.positive_distances,
                self.replies_encoded
            ], 1)
            self.negative_inputs = tf.reshape(
                tf.concat([
                    tf.nn.embedding_lookup(self.queries_encoded,
                                           self.negative_queries_indices),
                    self.negative_distances,
                    tf.nn.embedding_lookup(self.replies_encoded,
                                           self.negative_replies_indices)
                ], 1), [
                    tf.shape(negative_queries_indices)[0],
                    self.config.embed_dim * 2 + 1
                ])

        with tf.variable_scope("prediction"):
            self.hidden_outputs = tf.layers.dense(tf.concat(
                [self.positive_inputs, self.negative_inputs], 0),
                                                  256,
                                                  tf.nn.relu,
                                                  name="hidden_layer")
            self.logits = tf.layers.dense(self.hidden_outputs,
                                          2,
                                          tf.nn.relu,
                                          name="output_layer")
            labels = tf.concat([
                tf.ones([tf.shape(self.positive_inputs)[0]], tf.float64),
                tf.zeros([tf.shape(self.negative_inputs)[0]], tf.float64)
            ], 0)

            self.labels = tf.one_hot(tf.to_int32(labels), 2)

            self.probs = tf.sigmoid(self.logits)
            self.predictions = tf.argmax(self.probs, 1)

        with tf.variable_scope("loss"):
            self.loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.labels,
                                                           logits=self.logits))
            self.train_step = optimize.optimize(self.loss,
                                                lr,
                                                hparams,
                                                use_tpu=False)

        with tf.variable_scope("score"):
            correct_predictions = tf.equal(self.predictions,
                                           tf.argmax(self.labels, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                   "float"),
                                           name="accuracy")
    def build_model(self):
        # build index table
        index_table = tf.contrib.lookup.index_table_from_file(
            vocabulary_file=self.config.vocab_list,
            num_oov_buckets=0,
            default_value=0)

        # get data iterator
        self.data_iterator = self.data.get_data_iterator(index_table,
                                                         mode=self.mode)

        # get inputs
        with tf.variable_scope("inputs"):
            # get next batch if there is no feeded data
            next_batch = self.data_iterator.get_next()
            self.input_queries = tf.placeholder_with_default(
                next_batch["input_queries"], [None, self.config.max_length],
                name="input_queries")
            self.input_replies = tf.placeholder_with_default(
                next_batch["input_replies"], [None, self.config.max_length],
                name="input_replies")
            self.query_lengths = tf.placeholder_with_default(
                tf.squeeze(next_batch["query_lengths"]), [None],
                name="query_lengths")
            self.reply_lengths = tf.placeholder_with_default(
                tf.squeeze(next_batch["reply_lengths"]), [None],
                name="reply_lengths")

            # get hyperparams
            self.embed_dropout_keep_prob = tf.placeholder(
                tf.float64, name="embed_dropout_keep_prob")
            self.lstm_dropout_keep_prob = tf.placeholder(
                tf.float32, name="lstm_dropout_keep_prob")
            self.dense_dropout_keep_prob = tf.placeholder(
                tf.float32, name="dense_dropout_keep_prob")
            self.num_negative_samples = tf.placeholder(
                tf.int32, name="num_negative_samples")

        with tf.variable_scope("properties"):
            # length properties
            cur_batch_length = tf.shape(self.input_queries)[0]

            # get hparms from tensor2tensor.models.transformer
            hparams = transformer.transformer_small()
            hparams.batch_size = self.config.batch_size

            # learning rate
            lr = learning_rate.learning_rate_schedule(hparams)

        # embedding layer
        with tf.variable_scope("embedding"):
            embeddings = tf.Variable(get_embeddings(
                self.config.vocab_list, self.config.pretrained_embed_dir,
                self.config.vocab_size, self.config.embed_dim),
                                     trainable=True,
                                     name="embeddings")
            embeddings = tf.nn.dropout(
                embeddings,
                keep_prob=self.embed_dropout_keep_prob,
                noise_shape=[tf.shape(embeddings)[0], 1])
            queries_embedded = tf.to_float(
                tf.nn.embedding_lookup(embeddings,
                                       self.input_queries,
                                       name="queries_embedded"))
            replies_embedded = tf.to_float(
                tf.nn.embedding_lookup(embeddings,
                                       self.input_replies,
                                       name="replies_embedded"))

            self.queries_embedded = queries_embedded
            self.replies_embedded = replies_embedded

        # transformer layer
        with tf.variable_scope("transformer"):
            queries_expanded = tf.expand_dims(queries_embedded,
                                              axis=2,
                                              name="queries_expanded")
            replies_expanded = tf.expand_dims(replies_embedded,
                                              axis=2,
                                              name="replies_expanded")

            hparams = transformer.transformer_small()
            hparams.set_hparam("batch_size", self.config.batch_size)
            hparams.set_hparam("hidden_size", self.config.embed_dim)
            encoder = transformer.TransformerEncoder(hparams, mode=self.mode)

            self.queries_encoded = encoder({
                "inputs": queries_expanded,
                "targets": queries_expanded
            })[0]
            self.replies_encoded = encoder({
                "inputs": replies_expanded,
                "targets": replies_expanded
            })[0]

            self.queries_pooled = tf.nn.max_pool(
                self.queries_encoded,
                ksize=[1, self.config.max_length, 1, 1],
                strides=[1, 1, 1, 1],
                padding='VALID',
                name="queries_pooled")
            self.replies_pooled = tf.nn.max_pool(
                self.replies_encoded,
                ksize=[1, self.config.max_length, 1, 1],
                strides=[1, 1, 1, 1],
                padding='VALID',
                name="replies_pooled")

            self.queries_flattened = tf.reshape(self.queries_pooled,
                                                [cur_batch_length, -1])
            self.replies_flattened = tf.reshape(self.replies_pooled,
                                                [cur_batch_length, -1])

        # build dense layer
        with tf.variable_scope("dense_layer"):
            M = tf.get_variable(
                "M",
                shape=[self.config.embed_dim, self.config.embed_dim],
                initializer=tf.initializers.truncated_normal())
            M = tf.nn.dropout(M, self.dense_dropout_keep_prob)
            self.queries_transformed = tf.matmul(self.queries_flattened, M)

        with tf.variable_scope("sampling"):
            self.distances = tf.matmul(self.queries_transformed,
                                       self.replies_flattened,
                                       transpose_b=True)
            positive_mask = tf.reshape(tf.eye(cur_batch_length), [-1])
            negative_mask = tf.reshape(
                make_negative_mask(
                    self.distances,
                    method=self.config.negative_sampling,
                    num_negative_samples=self.num_negative_samples), [-1])

        with tf.variable_scope("prediction"):
            distances_flattened = tf.reshape(self.distances, [-1])
            self.positive_logits = tf.gather(distances_flattened,
                                             tf.where(positive_mask), 1)
            self.negative_logits = tf.gather(distances_flattened,
                                             tf.where(negative_mask), 1)

            self.logits = tf.concat(
                [self.positive_logits, self.negative_logits], axis=0)
            self.labels = tf.concat([
                tf.ones_like(self.positive_logits),
                tf.zeros_like(self.negative_logits)
            ],
                                    axis=0)

            self.positive_probs = tf.sigmoid(self.positive_logits)

            self.probs = tf.sigmoid(self.logits)
            self.predictions = tf.cast(self.probs > 0.5, dtype=tf.int32)

        with tf.variable_scope("loss"):
            self.loss = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(labels=self.labels,
                                                        logits=self.logits))
            self.train_step = optimize.optimize(self.loss,
                                                lr,
                                                hparams,
                                                use_tpu=False)

        with tf.variable_scope("score"):
            correct_predictions = tf.equal(self.predictions,
                                           tf.to_int32(self.labels))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                   "float"),
                                           name="accuracy")