Exemplo n.º 1
0
    def build_model(self):
        # build index table
        index_table = tf.contrib.lookup.index_table_from_file(
            vocabulary_file=self.config.vocab_list,
            num_oov_buckets=0,
            default_value=0)

        # get data iterator
        self.data_iterator = self.data.get_data_iterator(index_table,
                                                         mode=self.mode)

        # get inputs
        with tf.variable_scope("inputs"):
            # get next batch if there is no feeded data
            next_batch = self.data_iterator.get_next()
            self.input_queries = tf.placeholder_with_default(
                next_batch["input_queries"], [None, self.config.max_length],
                name="input_queries")
            self.input_replies = tf.placeholder_with_default(
                next_batch["input_replies"], [None, self.config.max_length],
                name="input_replies")
            self.query_lengths = tf.placeholder_with_default(
                tf.squeeze(next_batch["query_lengths"]), [None],
                name="query_lengths")
            self.reply_lengths = tf.placeholder_with_default(
                tf.squeeze(next_batch["reply_lengths"]), [None],
                name="reply_lengths")

            # get hyperparams
            self.embed_dropout_keep_prob = tf.placeholder(
                tf.float64, name="embed_dropout_keep_prob")
            self.lstm_dropout_keep_prob = tf.placeholder(
                tf.float32, name="lstm_dropout_keep_prob")
            self.num_negative_samples = tf.placeholder(
                tf.int32, name="num_negative_samples")
            self.dense_dropout_keep_prob = tf.placeholder(
                tf.float64, name="dense_dropout_keep_prob")

        with tf.variable_scope("properties"):
            # length properties
            cur_batch_length = tf.shape(self.input_queries)[0]
            query_max_length = tf.shape(self.input_queries)[1]
            reply_max_length = tf.shape(self.input_replies)[1]

            # learning rate and optimizer
            learning_rate = tf.train.exponential_decay(
                self.config.learning_rate,
                self.global_step_tensor,
                decay_steps=50000,
                decay_rate=0.96)
            self.optimizer = tf.train.AdamOptimizer(learning_rate)

        # embedding layer
        with tf.variable_scope("embedding"):
            embeddings = tf.Variable(get_embeddings(
                self.config.vocab_list, self.config.pretrained_embed_dir,
                self.config.vocab_size, self.config.embed_dim),
                                     trainable=True,
                                     name="embeddings")

            embeddings = tf.nn.dropout(embeddings,
                                       keep_prob=self.embed_dropout_keep_prob,
                                       noise_shape=[90000, 1])

            queries_embedded = tf.to_float(
                tf.nn.embedding_lookup(embeddings,
                                       self.input_queries,
                                       name="queries_embedded"))
            replies_embedded = tf.to_float(
                tf.nn.embedding_lookup(embeddings,
                                       self.input_replies,
                                       name="replies_embedded"))

        # build LSTM layer
        with tf.variable_scope("query_lstm_layer") as vs:
            lstm_cell_fw = tf.nn.rnn_cell.LSTMCell(self.config.lstm_dim,
                                                   forget_bias=2.0,
                                                   use_peepholes=True,
                                                   state_is_tuple=True)
            lstm_cell_fw = tf.contrib.rnn.DropoutWrapper(
                lstm_cell_fw, input_keep_prob=self.lstm_dropout_keep_prob)

            lstm_cell_bw = tf.nn.rnn_cell.LSTMCell(self.config.lstm_dim,
                                                   forget_bias=2.0,
                                                   use_peepholes=True,
                                                   state_is_tuple=True)
            lstm_cell_bw = tf.contrib.rnn.DropoutWrapper(
                lstm_cell_bw, input_keep_prob=self.lstm_dropout_keep_prob)

            _, queries_encoded = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=lstm_cell_fw,
                cell_bw=lstm_cell_bw,
                inputs=queries_embedded,
                sequence_length=self.query_lengths,
                dtype=tf.float32)

            self.queries_encoded = tf.cast(
                tf.concat([queries_encoded[0].h, queries_encoded[1].h], 1),
                tf.float64)

        with tf.variable_scope("reply_lstm_layer") as vs:
            lstm_cell_fw = tf.nn.rnn_cell.LSTMCell(self.config.lstm_dim,
                                                   forget_bias=2.0,
                                                   use_peepholes=True,
                                                   state_is_tuple=True,
                                                   reuse=tf.AUTO_REUSE)
            lstm_cell_fw = tf.contrib.rnn.DropoutWrapper(
                lstm_cell_fw, input_keep_prob=self.lstm_dropout_keep_prob)

            lstm_cell_bw = tf.nn.rnn_cell.LSTMCell(self.config.lstm_dim,
                                                   forget_bias=2.0,
                                                   use_peepholes=True,
                                                   state_is_tuple=True,
                                                   reuse=tf.AUTO_REUSE)
            lstm_cell_bw = tf.contrib.rnn.DropoutWrapper(
                lstm_cell_bw, input_keep_prob=self.lstm_dropout_keep_prob)

            _, replies_encoded = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=lstm_cell_fw,
                cell_bw=lstm_cell_bw,
                inputs=replies_embedded,
                sequence_length=self.reply_lengths,
                dtype=tf.float32)

            self.replies_encoded = tf.cast(
                tf.concat([replies_encoded[0].h, replies_encoded[1].h], 1),
                tf.float64)

        # build dense layer
        with tf.variable_scope("dense_layer"):
            M = tf.get_variable(
                "M",
                shape=[self.config.lstm_dim * 2, self.config.lstm_dim * 2],
                initializer=tf.contrib.layers.xavier_initializer())
            M = tf.nn.dropout(M, keep_prob=self.config.dense_dropout_keep_prob)
            self.queries_transformed = tf.matmul(self.queries_encoded,
                                                 tf.cast(M, tf.float64))

        with tf.variable_scope("sampling"):
            self.distances = tf.matmul(self.queries_encoded,
                                       self.replies_encoded,
                                       transpose_b=True)
            positive_mask = tf.reshape(tf.eye(cur_batch_length), [-1])
            negative_mask = tf.reshape(
                make_negative_mask(
                    self.distances,
                    method=self.config.negative_sampling,
                    num_negative_samples=self.num_negative_samples), [-1])

        with tf.variable_scope("prediction"):
            distances_flattened = tf.reshape(self.distances, [-1])
            self.positive_logits = tf.gather(distances_flattened,
                                             tf.where(positive_mask), 1)
            self.negative_logits = tf.gather(distances_flattened,
                                             tf.where(negative_mask), 1)

            self.logits = tf.concat(
                [self.positive_logits, self.negative_logits], axis=0)
            self.labels = tf.concat([
                tf.ones_like(self.positive_logits),
                tf.zeros_like(self.negative_logits)
            ],
                                    axis=0)

            self.positive_probs = tf.sigmoid(self.positive_logits)

            self.probs = tf.sigmoid(self.logits)
            self.predictions = tf.cast(self.probs > 0.5, dtype=tf.int32)

        with tf.variable_scope("loss"):
            self.loss = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(labels=self.labels,
                                                        logits=self.logits))
            # gvs = self.optimizer.compute_gradients(self.loss)
            # capped_gvs = [(tf.clip_by_norm(grad, 5), var) for grad, var in gvs]
            # self.train_step = self.optimizer.apply_gradients(capped_gvs)
            self.train_step = self.optimizer.minimize(self.loss)

        with tf.variable_scope("score"):
            correct_predictions = tf.equal(self.predictions,
                                           tf.to_int32(self.labels))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                   "float"),
                                           name="accuracy")
    def build_model(self):
        # build index table
        index_table = tf.contrib.lookup.index_table_from_file(
            vocabulary_file=self.config.vocab_list,
            num_oov_buckets=0,
            default_value=0)

        # get data iterator
        self.data_iterator = self.data.get_data_iterator(index_table,
                                                         mode=self.mode)

        # get inputs
        with tf.variable_scope("inputs"):
            # get next batch if there is no feeded data
            next_batch = self.data_iterator.get_next()
            self.input_queries = tf.placeholder_with_default(
                next_batch["input_queries"], [None, self.config.max_length],
                name="input_queries")
            self.input_replies = tf.placeholder_with_default(
                next_batch["input_replies"], [None, self.config.max_length],
                name="input_replies")
            self.query_lengths = tf.placeholder_with_default(
                tf.squeeze(next_batch["query_lengths"]), [None],
                name="query_lengths")
            self.reply_lengths = tf.placeholder_with_default(
                tf.squeeze(next_batch["reply_lengths"]), [None],
                name="reply_lengths")
            self.weak_distances = tf.placeholder(tf.float32, [None, None],
                                                 name="weak_distances")

            # get hyperparams
            self.embed_dropout_keep_prob = tf.placeholder(
                tf.float32, name="embed_dropout_keep_prob")
            self.lstm_dropout_keep_prob = tf.placeholder(
                tf.float32, name="lstm_dropout_keep_prob")
            self.num_negative_samples = tf.placeholder(
                tf.int32, name="num_negative_samples")
            self.add_echo = tf.placeholder(tf.bool, name="add_echo")

        with tf.variable_scope("properties"):
            # length properties
            cur_batch_length = tf.shape(self.input_queries)[0]
            query_max_length = tf.shape(self.input_queries)[1]
            reply_max_length = tf.shape(self.input_replies)[1]

            # learning rate and optimizer
            # self.optimizer = tf.train.GradientDescentOptimizer(self.config.learning_rate) # delstm1024_nsrandom9_ws_sgd_lr1e-1
            learning_rate = tf.train.exponential_decay(
                self.config.learning_rate,
                self.global_step_tensor,
                decay_steps=50000,
                decay_rate=0.96)
            self.optimizer = tf.train.AdamOptimizer(
                learning_rate)  # delstm1024_nsrandom9_ws_adam_lr1e-3

        # embedding layer
        with tf.variable_scope("embedding"):
            embeddings = tf.Variable(get_embeddings(
                self.config.vocab_list, self.config.pretrained_embed_dir,
                self.config.vocab_size, self.config.embed_dim),
                                     trainable=True,
                                     name="embeddings")
            queries_embedded = tf.to_float(
                tf.nn.embedding_lookup(embeddings,
                                       self.input_queries,
                                       name="queries_embedded"))
            replies_embedded = tf.to_float(
                tf.nn.embedding_lookup(embeddings,
                                       self.input_replies,
                                       name="replies_embedded"))

        # build LSTM layer
        with tf.variable_scope("lstm_layer") as vs:
            query_lstm_cell = tf.nn.rnn_cell.LSTMCell(
                self.config.lstm_dim,
                forget_bias=2.0,
                use_peepholes=True,
                state_is_tuple=True,
                # initializer=tf.orthogonal_initializer(),
            )
            query_lstm_cell = tf.contrib.rnn.DropoutWrapper(
                query_lstm_cell, input_keep_prob=self.lstm_dropout_keep_prob)
            reply_lstm_cell = tf.nn.rnn_cell.LSTMCell(
                self.config.lstm_dim,
                forget_bias=2.0,
                use_peepholes=True,
                state_is_tuple=True,
                # initializer=tf.orthogonal_initializer(),
                reuse=True)
            reply_lstm_cell = tf.contrib.rnn.DropoutWrapper(
                reply_lstm_cell, input_keep_prob=self.lstm_dropout_keep_prob)
            _, queries_encoded = tf.nn.dynamic_rnn(
                cell=query_lstm_cell,
                inputs=queries_embedded,
                sequence_length=tf.cast(self.query_lengths, tf.float32),
                dtype=tf.float32,
            )
            _, replies_encoded = tf.nn.dynamic_rnn(
                cell=reply_lstm_cell,
                inputs=replies_embedded,
                sequence_length=tf.cast(self.reply_lengths, tf.float32),
                dtype=tf.float32,
            )

            self.queries_encoded = tf.cast(queries_encoded.h, tf.float64)
            self.replies_encoded = tf.cast(replies_encoded.h, tf.float64)

        # build dense layer
        with tf.variable_scope("dense_layer"):
            M = tf.get_variable(
                "M",
                shape=[self.config.lstm_dim, self.config.lstm_dim],
                initializer=tf.initializers.truncated_normal())
            self.queries_transformed = tf.matmul(self.queries_encoded,
                                                 tf.cast(M, tf.float64))

        with tf.variable_scope("sampling"):
            self.distances = tf.matmul(self.queries_transformed,
                                       self.replies_encoded,
                                       transpose_b=True)
            positive_mask = tf.reshape(tf.eye(cur_batch_length), [-1])
            negative_mask = tf.reshape(
                make_negative_mask(
                    self.distances,
                    method=self.config.negative_sampling,
                    num_negative_samples=self.num_negative_samples), [-1])
            candidates_mask = positive_mask + negative_mask

        with tf.variable_scope("weak_supervision"):
            self.weak_positives = tf.gather(
                tf.reshape(self.weak_distances, [-1]), tf.where(positive_mask),
                1)
            self.weak_positives_tiled = tf.tile(self.weak_positives,
                                                [1, cur_batch_length])
            self.weak_distances_normalized = tf.maximum(
                0.,
                self.weak_distances * tf.reciprocal(self.weak_positives_tiled)
                - 1)

        with tf.variable_scope("prediction"):
            distances_flattened = tf.reshape(self.distances, [-1])
            self.positive_logits = tf.gather(distances_flattened,
                                             tf.where(positive_mask), 1)
            self.negative_logits = tf.gather(distances_flattened,
                                             tf.where(negative_mask), 1)
            self.positive_logits_tiled = tf.transpose(
                tf.tile(self.positive_logits, [1, cur_batch_length]))
            self.logits = tf.concat(
                [self.positive_logits, self.negative_logits], axis=0)
            self.labels = tf.concat([
                tf.ones_like(self.positive_logits),
                tf.zeros_like(self.negative_logits)
            ],
                                    axis=0)

        with tf.variable_scope("loss"):
            self.supervised_distances = tf.maximum(
                0.,
                tf.to_float(self.distances) -
                tf.to_float(self.positive_logits_tiled) +
                self.weak_distances_normalized)
            self.loss = tf.reduce_sum(
                tf.gather(tf.reshape(self.supervised_distances, [-1]),
                          tf.where(candidates_mask), 1))

            #gvs = self.optimizer.compute_gradients(self.loss)
            #capped_gvs = [(tf.clip_by_norm(grad, 5), var) for grad, var in gvs]
            #self.train_step = self.optimizer.apply_gradients(capped_gvs)
            self.train_step = self.optimizer.minimize(self.loss)

        with tf.variable_scope("score"):
            self.positive_probs = tf.sigmoid(self.positive_logits)
            self.probs = tf.sigmoid(self.logits)
            self.predictions = tf.to_int32(self.probs > 0.5)
            correct_predictions = tf.equal(self.predictions,
                                           tf.to_int32(self.labels))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                   "float"),
                                           name="accuracy")
Exemplo n.º 3
0
    def build_model(self):
        # build index table
        index_table = tf.contrib.lookup.index_table_from_file(
            vocabulary_file=self.config.vocab_list,
            num_oov_buckets=0,
            default_value=0)

        # get data iterator
        self.data_iterator = self.data.get_data_iterator(index_table,
                                                         mode=self.mode)

        # get inputs
        with tf.variable_scope("inputs"):
            # get next batch if there is no feeded data
            next_batch = self.data_iterator.get_next()
            self.input_queries = tf.placeholder_with_default(
                next_batch["input_queries"], [None, self.config.max_length],
                name="input_queries")
            self.input_replies = tf.placeholder_with_default(
                next_batch["input_replies"], [None, self.config.max_length],
                name="input_replies")
            self.query_lengths = tf.placeholder_with_default(
                tf.squeeze(next_batch["query_lengths"]), [None],
                name="query_lengths")
            self.reply_lengths = tf.placeholder_with_default(
                tf.squeeze(next_batch["reply_lengths"]), [None],
                name="reply_lengths")

            # get hyperparams
            self.embed_dropout_keep_prob = tf.placeholder(
                tf.float32, name="embed_dropout_keep_prob")
            self.lstm_dropout_keep_prob = tf.placeholder(
                tf.float32, name="lstm_dropout_keep_prob")
            self.num_negative_samples = tf.placeholder(
                tf.int32, name="num_negative_samples")
            self.add_echo = tf.placeholder(tf.bool, name="add_echo")

        with tf.variable_scope("properties"):
            # length properties
            cur_batch_length = tf.shape(self.input_queries)[0]

            # learning rate and optimizer
            learning_rate = tf.train.exponential_decay(
                self.config.learning_rate,
                self.global_step_tensor,
                decay_steps=100000,
                decay_rate=0.9)
            self.optimizer = tf.train.AdamOptimizer(learning_rate)

        # embedding layer
        with tf.variable_scope("embedding"):
            embeddings = tf.Variable(get_embeddings(
                self.config.vocab_list, self.config.pretrained_embed_dir,
                self.config.vocab_size, self.config.embed_dim),
                                     trainable=True,
                                     name="embeddings")
            queries_embedded = tf.expand_dims(
                tf.to_float(
                    tf.nn.embedding_lookup(embeddings,
                                           self.input_queries,
                                           name="queries_embedded")), -1)
            replies_embedded = tf.expand_dims(
                tf.to_float(
                    tf.nn.embedding_lookup(embeddings,
                                           self.input_replies,
                                           name="replies_embedded")), -1)

        # build CNN layer
        with tf.variable_scope("convolution_layer"):
            queries_pooled_outputs = list()
            replies_pooled_outputs = list()
            for i, filter_size in enumerate(self.filter_sizes):
                filter_shape = [
                    filter_size, self.config.embed_dim, 1,
                    self.config.num_filters
                ]
                with tf.name_scope(
                        "conv-maxpool-query-{}".format(filter_size)):
                    W = tf.Variable(tf.truncated_normal(filter_shape,
                                                        stddev=0.1),
                                    name="W")
                    b = tf.Variable(tf.constant(
                        0.1, shape=[self.config.num_filters]),
                                    name="b")
                    conv = tf.nn.conv2d(queries_embedded,
                                        W,
                                        strides=[1, 1, 1, 1],
                                        padding="VALID",
                                        name="conv")
                    h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
                    pooled = tf.nn.max_pool(h,
                                            ksize=[
                                                1, self.config.max_length -
                                                filter_size + 1, 1, 1
                                            ],
                                            strides=[1, 1, 1, 1],
                                            padding="VALID",
                                            name="pool")
                    queries_pooled_outputs.append(pooled)

                with tf.name_scope(
                        "conv-maxpool-reply-{}".format(filter_size)):
                    W = tf.Variable(tf.truncated_normal(filter_shape,
                                                        stddev=0.1),
                                    name="W")
                    b = tf.Variable(tf.constant(
                        0.1, shape=[self.config.num_filters]),
                                    name="b")
                    conv = tf.nn.conv2d(
                        replies_embedded,
                        W,
                        strides=[1, 1, 1, 1],
                        padding="VALID",
                        name="conv",
                        # reuse=True,
                    )
                    h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
                    pooled = tf.nn.max_pool(h,
                                            ksize=[
                                                1, self.config.max_length -
                                                filter_size + 1, 1, 1
                                            ],
                                            strides=[1, 1, 1, 1],
                                            padding="VALID",
                                            name="pool")
                    replies_pooled_outputs.append(pooled)

                    # conv_echo = tf.nn.conv2d(queries_embedded,
                    #                     W,
                    #                     strides=[1, 1, 1, 1],
                    #                     padding="VALID",
                    #                     name="conv",
                    #                     reuse=True)
                    # h_echo = tf.nn.relu(tf.nn.bias_add(conv_echo, b), name="relu_echo")
                    # pooled_echo = tf.nn.max_pool(h_echo,
                    #                         ksize=[1, self.config.max_length - filter_size + 1, 1, 1],
                    #                         strides=[1, 1, 1, 1],
                    #                         padding="VALID",
                    #                         name="pool_echo")
                    # echo_pooled_outputs.append(pooled_echo)

        # combine all pooled outputs
        num_filters_total = self.config.num_filters * len(self.filter_sizes)
        self.queries_encoded = tf.reshape(tf.concat(queries_pooled_outputs, 3),
                                          [-1, num_filters_total],
                                          name="queries_encoded")
        self.replies_encoded = tf.reshape(tf.concat(replies_pooled_outputs, 3),
                                          [-1, num_filters_total],
                                          name="replies_encoded")

        with tf.variable_scope("dense_layer"):
            M = tf.get_variable(
                "M",
                shape=[num_filters_total, num_filters_total],
                initializer=tf.contrib.layers.xavier_initializer())
            self.queries_transformed = tf.matmul(self.queries_encoded, M)

        with tf.variable_scope("sampling"):
            self.distances = tf.matmul(self.queries_transformed,
                                       self.replies_encoded,
                                       transpose_b=True)
            # self.echo_distances = tf.matmul(self.queries_transformed, self.echo_encoded, transpose_b=True)
            positive_mask = tf.reshape(tf.eye(cur_batch_length), [-1])
            negative_mask = tf.reshape(
                make_negative_mask(
                    self.distances,
                    method=self.config.negative_sampling,
                    num_negative_samples=self.num_negative_samples), [-1])

        with tf.variable_scope("prediction"):
            distances_flattened = tf.reshape(self.distances, [-1])
            # echo_distances_flattened = tf.reshape(self.echo_distances, [-1])
            self.positive_logits = tf.gather(distances_flattened,
                                             tf.where(positive_mask), 1)
            self.negative_logits = tf.gather(distances_flattened,
                                             tf.where(negative_mask), 1)

            self.logits = tf.concat(
                [self.positive_logits, self.negative_logits], axis=0)
            self.labels = tf.concat([
                tf.ones_like(self.positive_logits),
                tf.zeros_like(self.negative_logits)
            ],
                                    axis=0)

            # self.echo_logits = tf.gather(echo_distances_flattened, tf.where(positive_mask), 1)

            # self.logits = tf.cond(self.add_echo,
            #                       lambda: tf.concat([self.positive_logits,
            #                                          self.negative_logits,
            #                                          self.echo_logits], axis=0),
            #                       lambda: tf.concat([self.positive_logits,
            #                                          self.negative_logits], axis=0))
            # self.labels = tf.cond(self.add_echo,
            #                       lambda: tf.concat([tf.ones_like(self.positive_logits),
            #                                          tf.zeros_like(self.negative_logits),
            #                                          tf.zeros_like(self.echo_logits)], axis=0),
            #                       lambda: tf.concat([tf.ones_like(self.positive_logits),
            #                                          tf.zeros_like(self.negative_logits)], axis=0))

            self.positive_probs = tf.sigmoid(self.positive_logits)
            #  self.echo_probs = tf.sigmoid(self.echo_logits)

            self.probs = tf.sigmoid(self.logits)
            self.predictions = tf.cast(self.probs > 0.5, dtype=tf.int32)

        with tf.variable_scope("loss"):
            self.loss = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(labels=self.labels,
                                                        logits=self.logits))
            gvs = self.optimizer.compute_gradients(self.loss)
            capped_gvs = [(tf.clip_by_norm(grad, 5), var) for grad, var in gvs]
            self.train_step = self.optimizer.apply_gradients(capped_gvs)
            # self.train_step = self.optimizer.minimize(self.loss)

        with tf.variable_scope("score"):
            correct_predictions = tf.equal(self.predictions,
                                           tf.to_int32(self.labels))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                   "float"),
                                           name="accuracy")
Exemplo n.º 4
0
    def build_model(self):
        # build index table
        index_table = tf.contrib.lookup.index_table_from_file(
            vocabulary_file=self.config.vocab_list,
            num_oov_buckets=0,
            default_value=0)

        # get data iterator
        self.data_iterator = self.data.get_data_iterator(index_table,
                                                         mode=self.mode)

        with tf.variable_scope("inputs"):
            # get next batch if there is no feeded data
            next_batch = self.data_iterator.get_next()
            self.input_queries = tf.placeholder_with_default(
                next_batch["input_queries"], [None, self.config.max_length],
                name="input_queries")
            self.input_replies = tf.placeholder_with_default(
                next_batch["input_replies"], [None, self.config.max_length],
                name="input_replies")
            self.query_lengths = tf.placeholder_with_default(
                tf.squeeze(next_batch["query_lengths"]), [None],
                name="query_lengths")
            self.reply_lengths = tf.placeholder_with_default(
                tf.squeeze(next_batch["reply_lengths"]), [None],
                name="reply_lengths")

            # get hyperparams
            self.embed_dropout_keep_prob = tf.placeholder(
                tf.float64, name="embed_dropout_keep_prob")
            self.lstm_dropout_keep_prob = tf.placeholder(
                tf.float32, name="lstm_dropout_keep_prob")
            self.dense_dropout_keep_prob = tf.placeholder(
                tf.float32, name="dense_dropout_keep_prob")
            self.num_negative_samples = tf.placeholder(
                tf.int32, name="num_negative_samples")

        with tf.variable_scope("properties"):
            # length properties
            cur_batch_length = tf.shape(self.input_queries)[0]

            # learning rate and optimizer
            learning_rate = tf.train.exponential_decay(
                self.config.learning_rate,
                self.global_step_tensor,
                decay_steps=100000,
                decay_rate=0.96)
            self.optimizer = tf.train.AdamOptimizer(learning_rate)

        # embedding layer
        with tf.variable_scope("embedding"):
            embeddings = tf.Variable(get_embeddings(
                self.config.vocab_list, self.config.pretrained_embed_dir,
                self.config.vocab_size, self.config.embed_dim),
                                     trainable=True,
                                     name="embeddings")
            embeddings = tf.nn.dropout(
                embeddings,
                keep_prob=self.embed_dropout_keep_prob,
                noise_shape=[tf.shape(embeddings)[0], 1])
            queries_embedded = tf.to_float(
                tf.nn.embedding_lookup(embeddings,
                                       self.input_queries,
                                       name="queries_embedded"))
            replies_embedded = tf.to_float(
                tf.nn.embedding_lookup(embeddings,
                                       self.input_replies,
                                       name="replies_embedded"))

        # gru layer
        with tf.variable_scope("gru_layer"):
            sentence_gru_cell = tf.nn.rnn_cell.GRUCell(
                self.config.lstm_dim,
                kernel_initializer=tf.initializers.orthogonal(),
                reuse=tf.AUTO_REUSE)
            sentence_gru_cell = tf.contrib.rnn.DropoutWrapper(
                sentence_gru_cell, input_keep_prob=self.lstm_dropout_keep_prob)
            self.query_rnn_outputs, _ = tf.nn.dynamic_rnn(
                sentence_gru_cell,
                queries_embedded,
                sequence_length=self.query_lengths,
                dtype=tf.float32,
                scope="sentence_gru")
            self.reply_rnn_outputs, _ = tf.nn.dynamic_rnn(
                sentence_gru_cell,
                replies_embedded,
                sequence_length=self.reply_lengths,
                dtype=tf.float32,
                scope="sentence_gru")

        # negative sampling
        with tf.variable_scope("negative_sampling"):
            negative_mask = make_negative_mask(
                tf.zeros([cur_batch_length, cur_batch_length]),
                method=self.config.negative_sampling,
                num_negative_samples=self.num_negative_samples)
            negative_queries_indices, negative_replies_indices = tf.split(
                tf.where(tf.not_equal(negative_mask, 0)), [1, 1], 1)

            self.negative_queries_indices = tf.squeeze(
                negative_queries_indices)
            self.negative_replies_indices = tf.squeeze(
                negative_replies_indices)
            self.num_negatives = tf.shape(self.negative_replies_indices)[0]

            queries_embedded_neg = tf.nn.embedding_lookup(
                queries_embedded, self.negative_queries_indices)
            replies_embedded_neg = tf.nn.embedding_lookup(
                replies_embedded, self.negative_replies_indices)

            self.query_rnn_outputs_neg = tf.reshape(
                tf.nn.embedding_lookup(self.query_rnn_outputs,
                                       self.negative_queries_indices),
                [
                    self.num_negatives, self.config.max_length,
                    self.config.lstm_dim
                ])
            self.reply_rnn_outputs_neg = tf.reshape(
                tf.nn.embedding_lookup(self.reply_rnn_outputs,
                                       self.negative_replies_indices),
                [
                    self.num_negatives, self.config.max_length,
                    self.config.lstm_dim
                ])

        # build matrix for convolution
        with tf.variable_scope("matrix"):
            A_matrix = tf.get_variable(
                "A_matrix_v",
                shape=(self.config.lstm_dim, self.config.lstm_dim),
                initializer=tf.contrib.layers.xavier_initializer(),
                dtype=tf.float32)

            replies_embedded_transposed = tf.transpose(replies_embedded,
                                                       [0, 2, 1])
            reply_rnn_outputs_transposed = tf.transpose(
                self.reply_rnn_outputs, [0, 2, 1])
            replies_embedded_neg_transposed = tf.transpose(
                replies_embedded_neg, [0, 2, 1])
            reply_rnn_outputs_neg_transposed = tf.transpose(
                self.reply_rnn_outputs_neg, [0, 2, 1])

            embed_matrix = tf.matmul(queries_embedded,
                                     replies_embedded_transposed)

            rnn_outputs = tf.einsum("aij,jk->aik", self.query_rnn_outputs,
                                    A_matrix)
            rnn_outputs = tf.matmul(rnn_outputs, reply_rnn_outputs_transposed)
            self.matrix_stacked = tf.stack([embed_matrix, rnn_outputs],
                                           axis=3,
                                           name="matrix_stacked")

        # build negative matrix for convolution
        with tf.variable_scope("matrix", reuse=True):
            A_matrix_neg = tf.get_variable(
                "A_matrix_v",
                shape=(self.config.lstm_dim, self.config.lstm_dim),
                initializer=tf.contrib.layers.xavier_initializer(),
                dtype=tf.float32)

            embed_matrix_neg = tf.matmul(queries_embedded_neg,
                                         replies_embedded_neg_transposed)

            rnn_outputs_neg = tf.einsum("aij,jk->aik",
                                        self.query_rnn_outputs_neg,
                                        A_matrix_neg)
            rnn_outputs_neg = tf.matmul(rnn_outputs_neg,
                                        reply_rnn_outputs_neg_transposed)
            self.matrix_stacked_neg = tf.stack(
                [embed_matrix_neg, rnn_outputs_neg],
                axis=3,
                name="matrix_stacked_neg")

        # cnn layer
        with tf.variable_scope("convolution_layer"):
            conv = tf.layers.conv2d(
                self.matrix_stacked,
                filters=8,
                kernel_size=(3, 3),
                padding="VALID",
                kernel_initializer=tf.contrib.layers.xavier_initializer(),
                activation=tf.nn.relu,
                reuse=None,
                name="conv")
            pooled = tf.layers.max_pooling2d(conv, (3, 3),
                                             strides=(3, 3),
                                             padding="VALID",
                                             name="max_pooling")
            self.hidden_outputs = tf.expand_dims(
                tf.layers.dense(
                    tf.contrib.layers.flatten(pooled),
                    50,
                    kernel_initializer=tf.contrib.layers.xavier_initializer()),
                -1)

        # cnn layer
        with tf.variable_scope("convolution_layer", reuse=True):
            conv_neg = tf.layers.conv2d(
                self.matrix_stacked_neg,
                filters=8,
                kernel_size=(3, 3),
                padding="VALID",
                kernel_initializer=tf.contrib.layers.xavier_initializer(),
                activation=tf.nn.relu,
                reuse=True,
                name="conv")
            pooled_neg = tf.layers.max_pooling2d(conv_neg, (3, 3),
                                                 strides=(3, 3),
                                                 padding="VALID",
                                                 name="max_pooling_neg")
            self.hidden_outputs_neg = tf.expand_dims(
                tf.layers.dense(
                    tf.contrib.layers.flatten(pooled_neg),
                    50,
                    kernel_initializer=tf.contrib.layers.xavier_initializer(),
                    reuse=True), -1)

        # matching gru layer
        with tf.variable_scope("matching_gru_layer"):
            matching_gru_cell = tf.nn.rnn_cell.GRUCell(
                self.config.lstm_dim,
                kernel_initializer=tf.initializers.orthogonal(),
                name="gru_cell",
                reuse=tf.AUTO_REUSE)

            _, positive_state = tf.nn.dynamic_rnn(matching_gru_cell,
                                                  self.hidden_outputs,
                                                  dtype=tf.float32,
                                                  scope="matching_gru")

            _, negative_state = tf.nn.dynamic_rnn(matching_gru_cell,
                                                  self.hidden_outputs_neg,
                                                  dtype=tf.float32,
                                                  scope="matching_gru")

            self.positive_logits = tf.layers.dense(
                positive_state,
                2,
                kernel_initializer=tf.contrib.layers.xavier_initializer(),
                name="predict")

            self.negative_logits = tf.layers.dense(
                negative_state,
                2,
                kernel_initializer=tf.contrib.layers.xavier_initializer(),
                name="predict",
                reuse=True)

        # build loss
        with tf.variable_scope("loss"):
            self.logits = tf.concat(
                [self.positive_logits, self.negative_logits], 0)
            self.positive_probs = tf.nn.softmax(self.positive_logits)
            self.probs = tf.nn.softmax(self.logits)
            self.labels = tf.concat([
                tf.ones_like(self.positive_logits),
                tf.zeros_like(self.negative_logits)
            ], 0)
            losses = tf.nn.softmax_cross_entropy_with_logits_v2(
                labels=self.labels, logits=self.logits)
            self.loss = tf.reduce_mean(losses)
            self.train_step = self.optimizer.minimize(self.loss)

        with tf.variable_scope("score"):
            self.predictions = tf.argmax(self.probs, 1)
            correct_predictions = tf.equal(self.predictions,
                                           tf.argmax(self.labels, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                   "float"),
                                           name="accuracy")
    def build_model(self):
        # build index table
        index_table = tf.contrib.lookup.index_table_from_file(
            vocabulary_file=self.config.vocab_list,
            num_oov_buckets=0,
            default_value=0)

        # get data iterator
        self.data_iterator = self.data.get_data_iterator(index_table,
                                                         mode=self.mode)

        # get inputs
        with tf.variable_scope("inputs"):
            # get next batch if there is no feeded data
            next_batch = self.data_iterator.get_next()
            self.input_queries = tf.placeholder_with_default(
                next_batch["input_queries"], [None, self.config.max_length],
                name="input_queries")
            self.input_replies = tf.placeholder_with_default(
                next_batch["input_replies"], [None, self.config.max_length],
                name="input_replies")
            self.query_lengths = tf.placeholder_with_default(
                tf.squeeze(next_batch["query_lengths"]), [None],
                name="query_lengths")
            self.reply_lengths = tf.placeholder_with_default(
                tf.squeeze(next_batch["reply_lengths"]), [None],
                name="reply_lengths")

            # get hyperparams
            self.embed_dropout_keep_prob = tf.placeholder(
                tf.float64, name="embed_dropout_keep_prob")
            self.lstm_dropout_keep_prob = tf.placeholder(
                tf.float32, name="lstm_dropout_keep_prob")
            self.dense_dropout_keep_prob = tf.placeholder(
                tf.float32, name="dense_dropout_keep_prob")
            self.num_negative_samples = tf.placeholder(
                tf.int32, name="num_negative_samples")

        with tf.variable_scope("properties"):
            # length properties
            cur_batch_length = tf.shape(self.input_queries)[0]

            # get hparms from tensor2tensor.models.transformer
            hparams = transformer.transformer_small()
            hparams.batch_size = self.config.batch_size
            hparams.learning_rate_decay_steps = 10000
            hparams.learning_rate_minimum = 3e-5

            # learning rate
            lr = learning_rate.learning_rate_schedule(hparams)
            self.learning_rate = lr

        # embedding layer
        with tf.variable_scope("embedding"):
            embeddings = tf.Variable(get_embeddings(
                self.config.vocab_list, self.config.pretrained_embed_dir,
                self.config.vocab_size, self.config.embed_dim),
                                     trainable=True,
                                     name="embeddings")
            embeddings = tf.nn.dropout(
                embeddings,
                keep_prob=self.embed_dropout_keep_prob,
                noise_shape=[tf.shape(embeddings)[0], 1])
            queries_embedded = tf.to_float(
                tf.nn.embedding_lookup(embeddings,
                                       self.input_queries,
                                       name="queries_embedded"))
            replies_embedded = tf.to_float(
                tf.nn.embedding_lookup(embeddings,
                                       self.input_replies,
                                       name="replies_embedded"))

            self.queries_embedded = queries_embedded
            self.replies_embedded = replies_embedded

        # transformer layer
        with tf.variable_scope("transformer"):
            queries_expanded = tf.expand_dims(queries_embedded,
                                              axis=2,
                                              name="queries_expanded")
            replies_expanded = tf.expand_dims(replies_embedded,
                                              axis=2,
                                              name="replies_expanded")

            hparams = transformer.transformer_small()
            hparams.set_hparam("batch_size", self.config.batch_size)
            hparams.set_hparam("hidden_size", self.config.embed_dim)
            encoder = transformer.TransformerEncoder(hparams, mode=self.mode)

            self.queries_encoded = encoder({
                "inputs": queries_expanded,
                "targets": queries_expanded
            })[0]
            self.replies_encoded = encoder({
                "inputs": replies_expanded,
                "targets": replies_expanded
            })[0]

            self.queries_encoded = tf.squeeze(
                tf.reduce_sum(self.queries_encoded, axis=1, keep_dims=True))
            self.replies_encoded = tf.squeeze(
                tf.reduce_sum(self.replies_encoded, axis=1, keep_dims=True))

        with tf.variable_scope("sampling"):
            positive_mask = tf.eye(cur_batch_length)
            negative_mask = make_negative_mask(
                tf.zeros([cur_batch_length, cur_batch_length]),
                method=self.config.negative_sampling,
                num_negative_samples=self.num_negative_samples)
            negative_queries_indices, negative_replies_indices = tf.split(
                tf.where(tf.not_equal(negative_mask, 0)), [1, 1], 1)

            self.distances = tf.matmul(self.queries_encoded,
                                       self.replies_encoded,
                                       transpose_b=True)
            self.distances_flattened = tf.reshape(self.distances, [-1])

            self.positive_distances = tf.gather(
                self.distances_flattened,
                tf.where(tf.reshape(positive_mask, [-1])))
            self.negative_distances = tf.gather(
                self.distances_flattened,
                tf.where(tf.reshape(negative_mask, [-1])))

            self.negative_queries_indices = tf.squeeze(
                negative_queries_indices)
            self.negative_replies_indices = tf.squeeze(
                negative_replies_indices)

            self.positive_inputs = tf.concat([
                self.queries_encoded, self.positive_distances,
                self.replies_encoded
            ], 1)
            self.negative_inputs = tf.reshape(
                tf.concat([
                    tf.nn.embedding_lookup(self.queries_encoded,
                                           self.negative_queries_indices),
                    self.negative_distances,
                    tf.nn.embedding_lookup(self.replies_encoded,
                                           self.negative_replies_indices)
                ], 1), [
                    tf.shape(negative_queries_indices)[0],
                    self.config.embed_dim * 2 + 1
                ])

        with tf.variable_scope("prediction"):
            self.hidden_outputs = tf.layers.dense(tf.concat(
                [self.positive_inputs, self.negative_inputs], 0),
                                                  256,
                                                  tf.nn.relu,
                                                  name="hidden_layer")
            self.logits = tf.layers.dense(self.hidden_outputs,
                                          2,
                                          tf.nn.relu,
                                          name="output_layer")
            labels = tf.concat([
                tf.ones([tf.shape(self.positive_inputs)[0]], tf.float64),
                tf.zeros([tf.shape(self.negative_inputs)[0]], tf.float64)
            ], 0)

            self.labels = tf.one_hot(tf.to_int32(labels), 2)

            self.probs = tf.sigmoid(self.logits)
            self.predictions = tf.argmax(self.probs, 1)

        with tf.variable_scope("loss"):
            self.loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.labels,
                                                           logits=self.logits))
            self.train_step = optimize.optimize(self.loss,
                                                lr,
                                                hparams,
                                                use_tpu=False)

        with tf.variable_scope("score"):
            correct_predictions = tf.equal(self.predictions,
                                           tf.argmax(self.labels, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                   "float"),
                                           name="accuracy")
Exemplo n.º 6
0
    def build_model(self):
        # build index table
        index_table = tf.contrib.lookup.index_table_from_file(
            vocabulary_file=self.config.vocab_list,
            num_oov_buckets=0,
            default_value=0)

        # get data iterator
        self.data_iterator = self.data.get_data_iterator(index_table,
                                                         mode=self.mode)

        # get inputs
        with tf.variable_scope("inputs"):
            # get next batch if there is no feeded data
            next_batch = self.data_iterator.get_next()
            self.input_queries = tf.placeholder_with_default(
                next_batch["input_queries"], [None, self.config.max_length],
                name="input_queries")
            self.input_replies = tf.placeholder_with_default(
                next_batch["input_replies"], [None, self.config.max_length],
                name="input_replies")
            self.query_lengths = tf.placeholder_with_default(
                tf.squeeze(next_batch["query_lengths"]), [None],
                name="query_lengths")
            self.reply_lengths = tf.placeholder_with_default(
                tf.squeeze(next_batch["reply_lengths"]), [None],
                name="reply_lengths")

            # get hyperparams
            self.embed_dropout_keep_prob = tf.placeholder(
                tf.float64, name="embed_dropout_keep_prob")
            self.lstm_dropout_keep_prob = tf.placeholder(
                tf.float32, name="lstm_dropout_keep_prob")
            self.dense_dropout_keep_prob = tf.placeholder(
                tf.float32, name="dense_dropout_keep_prob")
            self.num_negative_samples = tf.placeholder(
                tf.int32, name="num_negative_samples")

        with tf.variable_scope("properties"):
            # length properties
            cur_batch_length = tf.shape(self.input_queries)[0]
            query_max_length = tf.shape(self.input_queries)[1]
            reply_max_length = tf.shape(self.input_replies)[1]

            # learning rate and optimizer
            learning_rate = tf.train.exponential_decay(
                self.config.learning_rate,
                self.global_step_tensor,
                decay_steps=20000,
                decay_rate=0.96)
            self.optimizer = tf.train.AdamOptimizer(learning_rate)

        # embedding layer
        with tf.variable_scope("embedding"):
            embeddings = tf.Variable(get_embeddings(
                self.config.vocab_list, self.config.pretrained_embed_dir,
                self.config.vocab_size, self.config.embed_dim),
                                     trainable=True,
                                     name="embeddings")
            embeddings = tf.nn.dropout(embeddings,
                                       keep_prob=self.embed_dropout_keep_prob,
                                       noise_shape=[90000, 1])
            queries_embedded = tf.to_float(
                tf.nn.embedding_lookup(embeddings,
                                       self.input_queries,
                                       name="queries_embedded"))
            replies_embedded = tf.to_float(
                tf.nn.embedding_lookup(embeddings,
                                       self.input_replies,
                                       name="replies_embedded"))
            self.queries_embedded = queries_embedded
            self.replies_embedded = replies_embedded

        # build LSTM layer
        with tf.variable_scope("lstm_layer") as vs:
            query_lstm_cell = tf.nn.rnn_cell.LSTMCell(self.config.lstm_dim,
                                                      forget_bias=2.0,
                                                      use_peepholes=True,
                                                      state_is_tuple=True)
            query_lstm_cell = tf.contrib.rnn.DropoutWrapper(
                query_lstm_cell, input_keep_prob=self.lstm_dropout_keep_prob)
            reply_lstm_cell = tf.nn.rnn_cell.LSTMCell(self.config.lstm_dim,
                                                      forget_bias=2.0,
                                                      use_peepholes=True,
                                                      state_is_tuple=True,
                                                      reuse=True)
            reply_lstm_cell = tf.contrib.rnn.DropoutWrapper(
                reply_lstm_cell, input_keep_prob=self.lstm_dropout_keep_prob)
            queries_encoded, queries_state = tf.nn.dynamic_rnn(
                cell=query_lstm_cell,
                inputs=queries_embedded,
                sequence_length=tf.cast(self.query_lengths, tf.float32),
                dtype=tf.float32,
            )
            replies_encoded, replies_state = tf.nn.dynamic_rnn(
                cell=reply_lstm_cell,
                inputs=replies_embedded,
                sequence_length=tf.cast(self.reply_lengths, tf.float32),
                dtype=tf.float32,
            )

            self.queries_encoded = tf.expand_dims(queries_encoded, -1)
            self.replies_encoded = tf.expand_dims(replies_encoded, -1)

        # Create a convolution + maxpool layer for each filter size
        queries_pooled_outputs = list()
        replies_pooled_outputs = list()

        for i, filter_size in enumerate([1, 2, 3, 4, 5]):
            filter_shape = [filter_size, self.config.lstm_dim, 1, 128]

            # queries
            with tf.name_scope("conv-maxpool-query-%s" % filter_size):
                # Convolution Layer
                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1),
                                name="W")
                b = tf.Variable(tf.constant(0.1, shape=[128]), name="bias")
                conv = tf.nn.conv2d(self.queries_encoded,
                                    W,
                                    strides=[1, 1, 1, 1],
                                    padding="VALID",
                                    name="conv")

                # Apply nonlinearity
                h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")

                # Maxpooling over the outputs
                pooled = tf.nn.max_pool(
                    h,
                    ksize=[1, self.config.max_length - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name="pool")

                queries_pooled_outputs.append(pooled)

            # replies
            with tf.name_scope("conv-maxpool-reply-%s" % filter_size):
                # Convolution Layer
                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1),
                                name="W")
                b = tf.Variable(tf.constant(0.1, shape=[128]), name="bias")
                conv = tf.nn.conv2d(self.replies_encoded,
                                    W,
                                    strides=[1, 1, 1, 1],
                                    padding="VALID",
                                    name="conv")

                # Apply nonlinearity
                h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")

                # Maxpooling over the outputs
                pooled = tf.nn.max_pool(
                    h,
                    ksize=[1, self.config.max_length - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name="pool")

                replies_pooled_outputs.append(pooled)

        # Combine all the pooled features
        num_filters_total = 128 * 5

        self.queries_conv_output = tf.reshape(
            tf.concat(queries_pooled_outputs, 3), [-1, num_filters_total])
        self.replies_conv_output = tf.reshape(
            tf.concat(replies_pooled_outputs, 3), [-1, num_filters_total])

        with tf.variable_scope("sampling"):
            positive_mask = tf.reshape(tf.eye(cur_batch_length), [-1])
            negative_mask = make_negative_mask(
                tf.zeros([cur_batch_length, cur_batch_length]),
                method=self.config.negative_sampling,
                num_negative_samples=self.num_negative_samples)
            negative_queries_indices, negative_replies_indices = tf.split(
                tf.where(tf.not_equal(negative_mask, 0)), [1, 1], 1)

            self.negative_queries_indices = tf.squeeze(
                negative_queries_indices)
            self.negative_replies_indices = tf.squeeze(
                negative_replies_indices)

            self.distances = tf.matmul(queries_state.h,
                                       replies_state.h,
                                       transpose_b=True)
            self.distances_flattened = tf.reshape(self.distances, [-1])
            self.positive_distances = tf.gather(self.distances_flattened,
                                                tf.where(positive_mask), 1)
            self.negative_distances = tf.gather(
                self.distances_flattened,
                tf.where(tf.reshape(negative_mask, [-1])), 1)

            self.positive_inputs = tf.concat([
                self.queries_conv_output, self.positive_distances,
                self.replies_conv_output
            ], 1)
            self.negative_inputs = tf.reshape(
                tf.concat([
                    tf.nn.embedding_lookup(self.queries_conv_output,
                                           self.negative_queries_indices),
                    self.negative_distances,
                    tf.nn.embedding_lookup(self.replies_conv_output,
                                           self.negative_replies_indices)
                ], 1), [
                    tf.shape(negative_queries_indices)[0],
                    num_filters_total * 2 + 1
                ])

            self.num_positives = tf.shape(self.positive_inputs)[0]
            self.num_negatives = tf.shape(self.negative_inputs)[0]

        # hidden layer
        with tf.name_scope("hidden"):
            W = tf.get_variable(
                "W_hidden",
                shape=[2 * num_filters_total + 1, 100],
                initializer=tf.contrib.layers.xavier_initializer())
            b = tf.Variable(tf.constant(0.1, shape=[100]), name="bias")
            self.hidden_output = tf.nn.relu(
                tf.nn.xw_plus_b(tf.concat(
                    [self.positive_inputs, self.negative_inputs], 0),
                                W,
                                b,
                                name="hidden_output"))

        # Add dropout
        with tf.name_scope("dropout"):
            self.h_drop = tf.nn.dropout(self.hidden_output,
                                        self.dense_dropout_keep_prob,
                                        name="hidden_output_drop")

        # Final (unnormalized) scores and predictions
        with tf.name_scope("output"):
            W = tf.get_variable(
                "W_output",
                shape=[100, 1],
                initializer=tf.contrib.layers.xavier_initializer())
            b = tf.Variable(tf.constant(0.1, shape=[1]), name="bias")
            self.logits = tf.nn.xw_plus_b(self.h_drop, W, b, name="logits")

            self.positive_logits, self.negative_logits = tf.split(
                self.logits, [self.num_positives, self.num_negatives])
            self.probs = tf.sigmoid(self.logits)
            self.predictions = tf.to_int32(self.probs > 0.5,
                                           name="predictions")

            labels = tf.concat([
                tf.ones([self.num_positives], tf.float64),
                tf.zeros([self.num_negatives], tf.float64)
            ], 0)

            self.labels = tf.to_int32(labels)

        with tf.variable_scope("loss"):
            self.positive_scores = tf.expand_dims(self.positive_logits, 1)
            self.negative_scores = self.negative_logits
            self.ranking_loss = tf.reduce_sum(
                tf.maximum(
                    0.0, self.config.hinge_loss - self.positive_scores +
                    self.negative_scores))
            l2_vars = [
                v for v in tf.trainable_variables()
                if 'bias' not in v.name and 'embedding' not in v.name
            ]
            l2_loss = tf.add_n([tf.nn.l2_loss(v) for v in l2_vars])

            self.loss = self.ranking_loss + l2_loss
            self.train_step = self.optimizer.minimize(
                self.loss, global_step=self.global_step_tensor)

        with tf.variable_scope("score"):
            correct_predictions = tf.equal(self.predictions, self.labels)
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                   "float"),
                                           name="accuracy")
Exemplo n.º 7
0
    def build_model(self):
        # build index table
        index_table = tf.contrib.lookup.index_table_from_file(
            vocabulary_file=self.config.vocab_list,
            num_oov_buckets=0,
            default_value=0)

        # get data iterator
        self.data_iterator = self.data.get_data_iterator(index_table,
                                                         mode=self.mode)

        # get inputs
        with tf.variable_scope("inputs"):
            # get next batch if there is no feeded data
            next_batch = self.data_iterator.get_next()
            self.input_queries = tf.placeholder_with_default(
                next_batch["input_queries"], [None, self.config.max_length],
                name="input_queries")
            self.input_replies = tf.placeholder_with_default(
                next_batch["input_replies"], [None, self.config.max_length],
                name="input_replies")
            self.query_lengths = tf.placeholder_with_default(
                tf.squeeze(next_batch["query_lengths"]), [None],
                name="query_lengths")
            self.reply_lengths = tf.placeholder_with_default(
                tf.squeeze(next_batch["reply_lengths"]), [None],
                name="reply_lengths")

            # get hyperparams
            self.embed_dropout_keep_prob = tf.placeholder(
                tf.float64, name="embed_dropout_keep_prob")
            self.lstm_dropout_keep_prob = tf.placeholder(
                tf.float32, name="lstm_dropout_keep_prob")
            self.dense_dropout_keep_prob = tf.placeholder(
                tf.float32, name="dense_dropout_keep_prob")
            self.num_negative_samples = tf.placeholder(
                tf.int32, name="num_negative_samples")

        with tf.variable_scope("properties"):
            # length properties
            cur_batch_length = tf.shape(self.input_queries)[0]

            # learning rate and optimizer
            learning_rate = tf.train.exponential_decay(
                self.config.learning_rate,
                self.global_step_tensor,
                decay_steps=100000,
                decay_rate=0.96)
            self.optimizer = tf.train.AdamOptimizer(learning_rate)

        # embedding layer
        with tf.variable_scope("embedding"):
            embeddings = tf.Variable(get_embeddings(
                self.config.vocab_list, self.config.pretrained_embed_dir,
                self.config.vocab_size, self.config.embed_dim),
                                     trainable=True,
                                     name="embeddings")
            embeddings = tf.nn.dropout(embeddings,
                                       keep_prob=self.embed_dropout_keep_prob,
                                       noise_shape=[90000, 1])
            queries_embedded = tf.to_float(
                tf.nn.embedding_lookup(embeddings,
                                       self.input_queries,
                                       name="queries_embedded"))
            replies_embedded = tf.to_float(
                tf.nn.embedding_lookup(embeddings,
                                       self.input_replies,
                                       name="replies_embedded"))
            self.queries_embedded = queries_embedded
            self.replies_embedded = replies_embedded

        # build LSTM layer
        with tf.variable_scope("lstm_layer") as vs:
            query_lstm_cell = tf.nn.rnn_cell.LSTMCell(self.config.lstm_dim,
                                                      forget_bias=2.0,
                                                      use_peepholes=True,
                                                      state_is_tuple=True)
            query_lstm_cell = tf.contrib.rnn.DropoutWrapper(
                query_lstm_cell, input_keep_prob=self.lstm_dropout_keep_prob)
            reply_lstm_cell = tf.nn.rnn_cell.LSTMCell(self.config.lstm_dim,
                                                      forget_bias=2.0,
                                                      use_peepholes=True,
                                                      state_is_tuple=True,
                                                      reuse=True)
            reply_lstm_cell = tf.contrib.rnn.DropoutWrapper(
                reply_lstm_cell, input_keep_prob=self.lstm_dropout_keep_prob)
            _, queries_encoded = tf.nn.dynamic_rnn(
                cell=query_lstm_cell,
                inputs=queries_embedded,
                sequence_length=tf.cast(self.query_lengths, tf.float32),
                dtype=tf.float32,
            )
            _, replies_encoded = tf.nn.dynamic_rnn(
                cell=reply_lstm_cell,
                inputs=replies_embedded,
                sequence_length=tf.cast(self.reply_lengths, tf.float32),
                dtype=tf.float32,
            )

            self.queries_encoded = tf.cast(queries_encoded.h, tf.float64)
            self.replies_encoded = tf.cast(replies_encoded.h, tf.float64)

        with tf.variable_scope("sampling"):
            negative_mask = make_negative_mask(
                tf.zeros([cur_batch_length, cur_batch_length]),
                method=self.config.negative_sampling,
                num_negative_samples=self.num_negative_samples)
            negative_queries_indices, negative_replies_indices = tf.split(
                tf.where(tf.not_equal(negative_mask, 0)), [1, 1], 1)

            # self.distances = tf.matmul(self.queries_encoded, self.replies_encoded, transpose_b=True)
            # self.distances_flattened = tf.reshape(self.distances, [-1])

            # self.positive_distances = tf.gather(self.distances_flattened, tf.where(tf.reshape(tf.eye(cur_batch_length), -1)))
            # self.negative_distances = tf.gather(self.distances_flattened, tf.where(tf.reshape(negative_mask, -1)))

            self.negative_queries_indices = tf.squeeze(
                negative_queries_indices)
            self.negative_replies_indices = tf.squeeze(
                negative_replies_indices)

            self.positive_inputs = tf.concat(
                [self.queries_encoded, self.replies_encoded], 1)
            self.negative_inputs = tf.reshape(
                tf.concat([
                    tf.nn.embedding_lookup(self.queries_encoded,
                                           self.negative_queries_indices),
                    tf.nn.embedding_lookup(self.replies_encoded,
                                           self.negative_replies_indices)
                ], 1), [
                    tf.shape(negative_queries_indices)[0],
                    self.config.lstm_dim * 2
                ])

        with tf.variable_scope("prediction"):
            self.hidden_outputs = tf.layers.dense(tf.concat(
                [self.positive_inputs, self.negative_inputs], 0),
                                                  256,
                                                  tf.nn.relu,
                                                  name="hidden_layer")
            self.logits = tf.layers.dense(self.hidden_outputs,
                                          2,
                                          tf.nn.relu,
                                          name="output_layer")
            labels = tf.concat([
                tf.ones([tf.shape(self.positive_inputs)[0]], tf.float64),
                tf.zeros([tf.shape(self.negative_inputs)[0]], tf.float64)
            ], 0)

            self.labels = tf.one_hot(tf.to_int32(labels), 2)

            self.probs = tf.sigmoid(self.logits)
            self.predictions = tf.argmax(self.probs, 1)
            self.positive_probs = tf.slice(self.probs, [0, 0],
                                           [cur_batch_length, -1])

        with tf.variable_scope("loss"):
            self.loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.labels,
                                                           logits=self.logits))
            self.train_step = self.optimizer.minimize(
                self.loss, global_step=self.global_step_tensor)

        with tf.variable_scope("score"):
            correct_predictions = tf.equal(self.predictions,
                                           tf.argmax(self.labels, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                   "float"),
                                           name="accuracy")
    def build_model(self):
        # build index table
        index_table = tf.contrib.lookup.index_table_from_file(
            vocabulary_file=self.config.vocab_list,
            num_oov_buckets=0,
            default_value=0)

        # get data iterator
        self.data_iterator = self.data.get_data_iterator(index_table,
                                                         mode=self.mode)

        # get inputs
        with tf.variable_scope("inputs"):
            # get next batch if there is no feeded data
            next_batch = self.data_iterator.get_next()
            self.input_queries = tf.placeholder_with_default(
                next_batch["input_queries"], [None, self.config.max_length],
                name="input_queries")
            self.input_replies = tf.placeholder_with_default(
                next_batch["input_replies"], [None, self.config.max_length],
                name="input_replies")
            self.query_lengths = tf.placeholder_with_default(
                tf.squeeze(next_batch["query_lengths"]), [None],
                name="query_lengths")
            self.reply_lengths = tf.placeholder_with_default(
                tf.squeeze(next_batch["reply_lengths"]), [None],
                name="reply_lengths")

            # get hyperparams
            self.embed_dropout_keep_prob = tf.placeholder(
                tf.float64, name="embed_dropout_keep_prob")
            self.lstm_dropout_keep_prob = tf.placeholder(
                tf.float32, name="lstm_dropout_keep_prob")
            self.dense_dropout_keep_prob = tf.placeholder(
                tf.float32, name="dense_dropout_keep_prob")
            self.num_negative_samples = tf.placeholder(
                tf.int32, name="num_negative_samples")

        with tf.variable_scope("properties"):
            # length properties
            cur_batch_length = tf.shape(self.input_queries)[0]

            # get hparms from tensor2tensor.models.transformer
            hparams = transformer.transformer_small()
            hparams.batch_size = self.config.batch_size

            # learning rate
            lr = learning_rate.learning_rate_schedule(hparams)

        # embedding layer
        with tf.variable_scope("embedding"):
            embeddings = tf.Variable(get_embeddings(
                self.config.vocab_list, self.config.pretrained_embed_dir,
                self.config.vocab_size, self.config.embed_dim),
                                     trainable=True,
                                     name="embeddings")
            embeddings = tf.nn.dropout(
                embeddings,
                keep_prob=self.embed_dropout_keep_prob,
                noise_shape=[tf.shape(embeddings)[0], 1])
            queries_embedded = tf.to_float(
                tf.nn.embedding_lookup(embeddings,
                                       self.input_queries,
                                       name="queries_embedded"))
            replies_embedded = tf.to_float(
                tf.nn.embedding_lookup(embeddings,
                                       self.input_replies,
                                       name="replies_embedded"))

            self.queries_embedded = queries_embedded
            self.replies_embedded = replies_embedded

        # transformer layer
        with tf.variable_scope("transformer"):
            queries_expanded = tf.expand_dims(queries_embedded,
                                              axis=2,
                                              name="queries_expanded")
            replies_expanded = tf.expand_dims(replies_embedded,
                                              axis=2,
                                              name="replies_expanded")

            hparams = transformer.transformer_small()
            hparams.set_hparam("batch_size", self.config.batch_size)
            hparams.set_hparam("hidden_size", self.config.embed_dim)
            encoder = transformer.TransformerEncoder(hparams, mode=self.mode)

            self.queries_encoded = encoder({
                "inputs": queries_expanded,
                "targets": queries_expanded
            })[0]
            self.replies_encoded = encoder({
                "inputs": replies_expanded,
                "targets": replies_expanded
            })[0]

            self.queries_pooled = tf.nn.max_pool(
                self.queries_encoded,
                ksize=[1, self.config.max_length, 1, 1],
                strides=[1, 1, 1, 1],
                padding='VALID',
                name="queries_pooled")
            self.replies_pooled = tf.nn.max_pool(
                self.replies_encoded,
                ksize=[1, self.config.max_length, 1, 1],
                strides=[1, 1, 1, 1],
                padding='VALID',
                name="replies_pooled")

            self.queries_flattened = tf.reshape(self.queries_pooled,
                                                [cur_batch_length, -1])
            self.replies_flattened = tf.reshape(self.replies_pooled,
                                                [cur_batch_length, -1])

        # build dense layer
        with tf.variable_scope("dense_layer"):
            M = tf.get_variable(
                "M",
                shape=[self.config.embed_dim, self.config.embed_dim],
                initializer=tf.initializers.truncated_normal())
            M = tf.nn.dropout(M, self.dense_dropout_keep_prob)
            self.queries_transformed = tf.matmul(self.queries_flattened, M)

        with tf.variable_scope("sampling"):
            self.distances = tf.matmul(self.queries_transformed,
                                       self.replies_flattened,
                                       transpose_b=True)
            positive_mask = tf.reshape(tf.eye(cur_batch_length), [-1])
            negative_mask = tf.reshape(
                make_negative_mask(
                    self.distances,
                    method=self.config.negative_sampling,
                    num_negative_samples=self.num_negative_samples), [-1])

        with tf.variable_scope("prediction"):
            distances_flattened = tf.reshape(self.distances, [-1])
            self.positive_logits = tf.gather(distances_flattened,
                                             tf.where(positive_mask), 1)
            self.negative_logits = tf.gather(distances_flattened,
                                             tf.where(negative_mask), 1)

            self.logits = tf.concat(
                [self.positive_logits, self.negative_logits], axis=0)
            self.labels = tf.concat([
                tf.ones_like(self.positive_logits),
                tf.zeros_like(self.negative_logits)
            ],
                                    axis=0)

            self.positive_probs = tf.sigmoid(self.positive_logits)

            self.probs = tf.sigmoid(self.logits)
            self.predictions = tf.cast(self.probs > 0.5, dtype=tf.int32)

        with tf.variable_scope("loss"):
            self.loss = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(labels=self.labels,
                                                        logits=self.logits))
            self.train_step = optimize.optimize(self.loss,
                                                lr,
                                                hparams,
                                                use_tpu=False)

        with tf.variable_scope("score"):
            correct_predictions = tf.equal(self.predictions,
                                           tf.to_int32(self.labels))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                   "float"),
                                           name="accuracy")