示例#1
0
    def __init__(self, config, sess):
        self._data_file = config.data_file
        self._margin = 4
        self._batch_size = config.batch_size
        self._vocab_size = config.nwords
        self._rel_size = config.nrels
        self._ent_size = config.nents
        self._sentence_size = config.query_size
        self._embedding_size = config.edim
        self._path_size = config.path_size
        self._memory_size = config.nrels

        self._hops = config.nhop
        self._max_grad_norm = config.max_grad_norm
        self._init = tf.contrib.layers.xavier_initializer()
        #self._init = tf.random_normal_initializer(stddev=config.init_std)

        self._opt = tf.train.AdamOptimizer()
        self._name = "IRN"
        self._checkpoint_dir = config.checkpoint_dir + '/' + self._name

        if not os.path.exists(self._checkpoint_dir):
            os.makedirs(self._checkpoint_dir)

        self._build_inputs()
        self._build_vars()
        self._saver = tf.train.Saver(max_to_keep=1)

        self._encoding = tf.constant(position_encoding(self._sentence_size,
                                                       self._embedding_size),
                                     name="encoding")

        KB_batch_loss = self._pretranse()
        KB_loss_op = tf.reduce_sum(KB_batch_loss, name="KB_loss_op")
        KB_grads_and_vars = self._opt.compute_gradients(
            KB_loss_op, [self.EE, self.RE, self.Mse])
        KB_nil_grads_and_vars = []
        for g, v in KB_grads_and_vars:
            if v.name in self._nil_vars:
                KB_nil_grads_and_vars.append((zero_nil_slot(g), v))
            else:
                KB_nil_grads_and_vars.append((g, v))
        print "KB_grads_and_vars"
        for g, v in KB_nil_grads_and_vars:
            print g, v.name
        KB_train_op = self._opt.apply_gradients(KB_grads_and_vars,
                                                name="KB_train_op")

        #cross entropy as loss for QA:
        batch_loss, p = self._inference()  # (b,1), (batch_size, 5)
        QA_loss_op = tf.reduce_sum(batch_loss, name="QA_loss_op")

        QA_params = [self.QE, self.Mrq, self.Mrs]
        QA_grads_and_vars = self._opt.compute_gradients(QA_loss_op, QA_params)

        QA_grads_and_vars = [(tf.clip_by_norm(g, self._max_grad_norm), v)
                             for g, v in QA_grads_and_vars if g is not None]
        QA_grads_and_vars = [(add_gradient_noise(g), v)
                             for g, v in QA_grads_and_vars]
        QA_nil_grads_and_vars = []
        for g, v in QA_grads_and_vars:
            if v.name in self._nil_vars:
                QA_nil_grads_and_vars.append((zero_nil_slot(g), v))
            else:
                QA_nil_grads_and_vars.append((g, v))

        print "QA_grads_and_vars"
        for g, v in QA_nil_grads_and_vars:
            print g, v.name
        #grads_and_vars = [(tf.Print(g, [v.name,str(g.get_shape()),g], summarize=1e1/2), v) for g, v in grads_and_vars]

        QA_train_op = self._opt.apply_gradients(QA_nil_grads_and_vars,
                                                name="QA_train_op")

        # predict ops
        QA_predict_op = p

        # assign ops
        self.KB_loss_op = KB_loss_op
        self.KB_train_op = KB_train_op
        self.QA_loss_op = QA_loss_op
        self.QA_predict_op = QA_predict_op
        self.QA_train_op = QA_train_op

        init_op = tf.global_variables_initializer()
        self._sess = sess
        self._sess.run(init_op)
示例#2
0
    def build_model(self):

        # init ph, weights and dropout rate
        self.input_feature_ph_dict = dict()
        # 建立特征权重字典
        self.weight_dropout_ph_dict = dict()
        self.feature_weight_dict = dict()
        self.nil_vars = set()
        self.dropout_rate_ph = tf.placeholder(tf.float32, name='dropout_rate_ph')
        # label ph
        self.input_label_ph = tf.placeholder(
            dtype=tf.int32, shape=[None, self.sequence_length], name='input_label_ph')
        # 读入特征,并搭建特征结构
        for feature_name in self.feature_names:

            # input ph
            self.input_feature_ph_dict[feature_name] = tf.placeholder(
                dtype=tf.int32, shape=[None, self.sequence_length],
                name='input_feature_ph_%s' % feature_name)

            # dropout rate ph
            self.weight_dropout_ph_dict[feature_name] = tf.placeholder(
                tf.float32, name='dropout_ph_%s' % feature_name)

            # init feature weights, 初始化未指定的
            if feature_name not in self.feature_init_weight_dict:
                feature_weight = uniform_tensor(
                    shape=self.feature_weight_shape_dict[feature_name],
                    name='f_w_%s' % feature_name)
                self.feature_weight_dict[feature_name] = tf.Variable(
                    initial_value=feature_weight, name='feature_weigth_%s' % feature_name)
            else:
                self.feature_weight_dict[feature_name] = tf.Variable(
                    initial_value=self.feature_init_weight_dict[feature_name],
                    name='feature_weight_%s' % feature_name)
            self.nil_vars.add(self.feature_weight_dict[feature_name].name)

            # init dropout rate, 初始化未指定的
            if feature_name not in self.feature_weight_dropout_dict:
                self.feature_weight_dropout_dict[feature_name] = 0.

        # init embeddings
        # 对特征进行编码并连接
        self.embedding_features = []
        for feature_name in self.feature_names:
            embedding_feature = tf.nn.dropout(tf.nn.embedding_lookup(
                self.feature_weight_dict[feature_name],
                ids=self.input_feature_ph_dict[feature_name],
                name='embedding_feature_%s' % feature_name),
                keep_prob=1.-self.weight_dropout_ph_dict[feature_name],
                name='embedding_feature_dropout_%s' % feature_name)
            self.embedding_features.append(embedding_feature)

        # concat all features
        # 多个词拼接成一句话
        input_features = self.embedding_features[0] if len(self.embedding_features) == 1 \
            else tf.concat(values=self.embedding_features, axis=2, name='input_features')

        # cnn

        cnn_output=self.IDCNN_layer(input_features)

        # # bi-lstm
        #
        # if self.rnn_unit == 'lstm':
        #     fw_cell = rnn.BasicLSTMCell(self.nb_hidden, forget_bias=1., state_is_tuple=True)
        #     bw_cell = rnn.BasicLSTMCell(self.nb_hidden, forget_bias=1., state_is_tuple=True)
        # elif self.rnn_unit == 'gru':
        #     fw_cell = rnn.GRUCell(self.nb_hidden)
        #     bw_cell = rnn.GRUCell(self.nb_hidden)
        # else:
        #     raise ValueError('rnn_unit must in (lstm, gru)!')
        # 计算self.input_features[feature_names[0]]的实际长度(0为padding值)
        self.sequence_actual_length = get_sequence_actual_length(  # 每个句子的实际长度
            self.input_feature_ph_dict[self.feature_names[0]])
        # # print(input_features)
        # rnn_outputs, _ = tf.nn.bidirectional_dynamic_rnn(
        #     fw_cell, bw_cell, input_features, scope='bi-lstm',
        #     dtype=tf.float32, sequence_length=self.sequence_actual_length)
        # # shape = [batch_size, max_len, nb_hidden*2]
        # # dropout 之后由[m,n]变成[1,1]输入输出维度保持不变s
        # lstm_output = tf.nn.dropout(
        #     tf.concat(rnn_outputs, axis=2, name='lstm_output'),
        #     keep_prob=1.-self.dropout_rate_ph, name='lstm_output_dropout')
        #
        # # softmax
        # # 重新规整输出形式
        # self.outputs = tf.reshape(lstm_output, [-1, self.nb_hidden*2], name='outputs')

        self.softmax_w = tf.get_variable('softmax_w', [self.cnn_output_width, self.nb_classes])
        self.softmax_b = tf.get_variable('softmax_b', [self.nb_classes])
        self.logits = tf.reshape(
            tf.matmul(cnn_output, self.softmax_w) + self.softmax_b,
            shape=[-1, self.sequence_length, self.nb_classes], name='logits')

        # 计算loss
        self.loss = self.compute_loss()
        self.l2_loss = self.l2_rate * (tf.nn.l2_loss(self.softmax_w) + tf.nn.l2_loss(self.softmax_b))

        self.total_loss = self.loss + self.l2_loss

        # train op
        optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
        grads_and_vars = optimizer.compute_gradients(self.total_loss)
        nil_grads_and_vars = []
        for g, v in grads_and_vars:
            if v.name in self.nil_vars:
                nil_grads_and_vars.append((zero_nil_slot(g), v))
            else:
                nil_grads_and_vars.append((g, v))

        global_step = tf.Variable(0, name='global_step', trainable=False)
        if self.clip:
            # clip by global norm
            gradients, variables = zip(*nil_grads_and_vars)
            gradients, _ = tf.clip_by_global_norm(gradients, self.clip)
            self.train_op = optimizer.apply_gradients(
                zip(gradients, variables), name='train_op', global_step=global_step)
        else:
            self.train_op = optimizer.apply_gradients(
                nil_grads_and_vars, name='train_op', global_step=global_step)

        # TODO sess, visible_device_list待修改
        gpu_options = tf.GPUOptions(visible_device_list='0', allow_growth=True)
        self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        # init all variable
        init = tf.global_variables_initializer()
        self.sess.run(init)
示例#3
0
    def __init__(self, config, sess):
        self._data_file = config.data_file
        self._margin = 2
        self._batch_size = config.batch_size
        self._vocab_size = config.nwords
        self._rel_size = config.nrels
        self._ent_size = config.nents
        self._sentence_size = config.query_size
        self._embedding_size = config.edim
        self._path_size = config.path_size
        self._memory_size = config.nrels

        self._hops = config.nhop
        self._max_grad_norm = config.max_grad_norm
        self._init = tf.contrib.layers.xavier_initializer()
        #self._init = tf.random_normal_initializer(stddev=config.init_std)

        self._opt = tf.train.AdamOptimizer()
        self._name = "IRN_C"
        self._checkpoint_dir = config.checkpoint_dir + '/' + self._name

        if not os.path.exists(self._checkpoint_dir):
            os.makedirs(self._checkpoint_dir)

        self._build_inputs()
        self._build_vars()
        self._saver = tf.train.Saver(max_to_keep=10)

        self._encoding = tf.constant(position_encoding(self._sentence_size,
                                                       self._embedding_size),
                                     name="encoding")

        KB_batch_loss = self._pretranse()
        KB_loss_op = tf.reduce_sum(KB_batch_loss, name="KB_loss_op")
        KB_grads_and_vars = self._opt.compute_gradients(
            KB_loss_op, [self.EE, self.RE, self.Mse])
        KB_nil_grads_and_vars = []
        for g, v in KB_grads_and_vars:
            if v.name in self._nil_vars:
                KB_nil_grads_and_vars.append((zero_nil_slot(g), v))
            else:
                KB_nil_grads_and_vars.append((g, v))
        print "KB_grads_and_vars"
        for g, v in KB_nil_grads_and_vars:
            print g, v.name
        KB_train_op = self._opt.apply_gradients(KB_grads_and_vars,
                                                name="KB_train_op")
        KBE_norm_op = tf.nn.l2_normalize(self.EE, 1)
        KBR_norm_op = tf.nn.l2_normalize(self.RE, 1)

        #cross entropy as loss for QA:
        batch_loss_1, p_1, ans_1 = self._inference(self._paths[:, 0, :])
        batch_loss_2, p_2, ans_2 = self._inference(self._paths[:, 1, :])
        QA_loss_op = tf.reduce_sum(batch_loss_1 + batch_loss_2,
                                   name="QA_loss_op")

        # gradient pipeline, seem not affect much
        QA_grads_and_vars = self._opt.compute_gradients(QA_loss_op)

        QA_grads_and_vars = [(tf.clip_by_norm(g, self._max_grad_norm), v)
                             for g, v in QA_grads_and_vars if g is not None]

        QA_grads_and_vars = [(add_gradient_noise(g), v)
                             for g, v in QA_grads_and_vars]
        QA_nil_grads_and_vars = []
        for g, v in QA_grads_and_vars:
            if v.name in self._nil_vars:
                QA_nil_grads_and_vars.append((zero_nil_slot(g), v))
            else:
                QA_nil_grads_and_vars.append((g, v))

        print "QA_grads_and_vars"
        for g, v in QA_nil_grads_and_vars:
            print g, v.name
        #grads_and_vars = [(tf.Print(g, [v.name,str(g.get_shape()),g], summarize=1e1/2), v) for g, v in grads_and_vars]

        QA_train_op = self._opt.apply_gradients(QA_nil_grads_and_vars,
                                                name="QA_train_op")
        fans = ans_1 + ans_2
        final_ans = tf.reshape(tf.cast(tf.argmax(fans, 1), tf.int32), [-1, 1])

        # predict ops
        QA_predict_op = tf.concat(axis=1, values=[p_1, p_2,
                                                  final_ans])  #(none,11)

        # assign ops
        self.KB_loss_op = KB_loss_op
        self.KB_train_op = KB_train_op
        self.KBE_norm_op = KBE_norm_op
        self.KBR_norm_op = KBR_norm_op
        self.QA_loss_op = QA_loss_op
        self.QA_predict_op = QA_predict_op
        self.QA_train_op = QA_train_op

        init_op = tf.global_variables_initializer()
        self._sess = sess
        self._sess.run(init_op)
示例#4
0
    def build_model(self):
        for feature_name in self._feature_names:

            # input ph
            self.input_feature_ph_dict[feature_name] = tf.placeholder(
                dtype=tf.int32, shape=[None, self._sequence_length],
                name='input_feature_ph_%s' % feature_name)

            # dropout rate ph
            self.weight_dropout_ph_dict[feature_name] = tf.placeholder(
                tf.float32, name='dropout_ph_%s' % feature_name)

            # init feature weights, 初始化未指定的
            if feature_name not in self._feature_init_weight_dict:
                feature_weight = uniform_tensor(
                    shape=self._feature_weight_shape_dict[feature_name],
                    name='f_w_%s' % feature_name)
                self.feature_weight_dict[feature_name] = tf.Variable(
                    initial_value=feature_weight, name='feature_weigth_%s' % feature_name)
            else:
                self.feature_weight_dict[feature_name] = tf.Variable(
                    initial_value=self._feature_init_weight_dict[feature_name],
                    name='feature_weight_%s' % feature_name)
                self.nil_vars.add(self.feature_weight_dict[feature_name].name)

            # init dropout rate, 初始化未指定的
            if feature_name not in self._feature_weight_dropout_dict:
                self._feature_weight_dropout_dict[feature_name] = 0.
        # char feature
        if self._use_char_feature:
            # char feature weights
            feature_weight = uniform_tensor(
                shape=self._feature_weight_shape_dict['char'], name='f_w_%s' % 'char')
            self.feature_weight_dict['char'] = tf.Variable(
                initial_value=feature_weight, name='feature_weigth_%s' % 'char')
            self.nil_vars.add(self.feature_weight_dict['char'].name)
            self.nil_vars.add(self.feature_weight_dict['char'].name)
            self.input_feature_ph_dict['char'] = tf.placeholder(
                dtype=tf.int32, shape=[None, self._sequence_length, self._word_length],
                name='input_feature_ph_%s' % 'char')

        # init embeddings
        self.embedding_features = []
        for feature_name in self._feature_names:
            embedding_feature = tf.nn.dropout(tf.nn.embedding_lookup(
                self.feature_weight_dict[feature_name],
                ids=self.input_feature_ph_dict[feature_name],
                name='embedding_feature_%s' % feature_name),
                keep_prob=1.-self.weight_dropout_ph_dict[feature_name],
                name='embedding_feature_dropout_%s' % feature_name)
            self.embedding_features.append(embedding_feature)
        # char embedding
        if self._use_char_feature:
            char_embedding_feature = tf.nn.embedding_lookup(
                self.feature_weight_dict['char'],
                ids=self.input_feature_ph_dict['char'],
                name='embedding_feature_%s' % 'char')
            # conv
            couv_feature_char = MultiConvolutional3D(
                char_embedding_feature, filter_length_list=self._conv_filter_len_list,
                nb_filter_list=self._conv_filter_size_list).output
            couv_feature_char = tf.nn.dropout(
                couv_feature_char, keep_prob=1-self.cnn_dropout_rate_ph)

        # concat all features
        input_features = self.embedding_features[0] if len(self.embedding_features) == 1 \
            else tf.concat(values=self.embedding_features, axis=2, name='input_features')
        if self._use_char_feature:
            input_features = tf.concat([input_features, couv_feature_char], axis=-1)

        # multi bi-lstm layer
        _fw_cells = []
        _bw_cells = []
        for _ in range(self._num_layers):
            fw, bw = self._get_rnn_unit(self._rnn_unit)
            _fw_cells.append(tf.nn.rnn_cell.DropoutWrapper(fw, output_keep_prob=1-self.rnn_dropout_rate_ph))
            _bw_cells.append(tf.nn.rnn_cell.DropoutWrapper(bw, output_keep_prob=1-self.rnn_dropout_rate_ph))
        fw_cell = tf.nn.rnn_cell.MultiRNNCell(_fw_cells)
        bw_cell = tf.nn.rnn_cell.MultiRNNCell(_bw_cells)

        # 计算self.input_features[feature_names[0]]的实际长度(0为padding值)
        self.sequence_actual_length = get_sequence_actual_length(  # 每个句子的实际长度
            self.input_feature_ph_dict[self._feature_names[0]])
        rnn_outputs, _ = tf.nn.bidirectional_dynamic_rnn(
            fw_cell, bw_cell, input_features, scope='bi-lstm',
            dtype=tf.float32, sequence_length=self.sequence_actual_length)
        # shape = [batch_size, max_len, nb_hidden*2]
        lstm_output = tf.nn.dropout(
            tf.concat(rnn_outputs, axis=2, name='lstm_output'),
            keep_prob=1.-self.dropout_rate_ph, name='lstm_output_dropout')

        # softmax
        hidden_size = int(lstm_output.shape[-1])
        self.outputs = tf.reshape(lstm_output, [-1, hidden_size], name='outputs')
        self.softmax_w = tf.get_variable('softmax_w', [hidden_size, self._nb_classes])
        self.softmax_b = tf.get_variable('softmax_b', [self._nb_classes])
        self.logits = tf.reshape(
            tf.matmul(self.outputs, self.softmax_w) + self.softmax_b,
            shape=[-1, self._sequence_length, self._nb_classes], name='logits')

        # 计算loss
        self.loss = self.compute_loss()
        self.l2_loss = self._l2_rate * (tf.nn.l2_loss(self.softmax_w) + tf.nn.l2_loss(self.softmax_b))

        self.total_loss = self.loss + self.l2_loss

        # train op
        optimizer = tf.train.AdamOptimizer(learning_rate=self._learning_rate)
        grads_and_vars = optimizer.compute_gradients(self.total_loss)
        nil_grads_and_vars = []
        for g, v in grads_and_vars:
            if v.name in self.nil_vars:
                nil_grads_and_vars.append((zero_nil_slot(g), v))
            else:
                nil_grads_and_vars.append((g, v))

        global_step = tf.Variable(0, name='global_step', trainable=False)
        if self._clip:
            # clip by global norm
            gradients, variables = zip(*nil_grads_and_vars)
            gradients, _ = tf.clip_by_global_norm(gradients, self._clip)
            self.train_op = optimizer.apply_gradients(
                zip(gradients, variables), name='train_op', global_step=global_step)
        else:
            self.train_op = optimizer.apply_gradients(
                nil_grads_and_vars, name='train_op', global_step=global_step)

        # TODO sess, visible_device_list待修改
        gpu_options = tf.GPUOptions(visible_device_list='0', allow_growth=True)
        self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        # init all variable
        init = tf.global_variables_initializer()
        self.sess.run(init)
示例#5
0
    def build_model(self):
        for feature_name in self._feature_names:

            # input ph
            self.input_feature_ph_dict[feature_name] = tf.placeholder(
                dtype=tf.int32,
                shape=[None, self._sequence_length],
                name='input_feature_ph_%s' % feature_name)

            # dropout rate ph
            self.weight_dropout_ph_dict[feature_name] = tf.placeholder(
                tf.float32, name='dropout_ph_%s' % feature_name)

            # init feature weights, 初始化未指定的
            if feature_name not in self._feature_init_weight_dict:
                feature_weight = uniform_tensor(
                    shape=self._feature_weight_shape_dict[feature_name],
                    name='f_w_%s' % feature_name)
                self.feature_weight_dict[feature_name] = tf.Variable(
                    initial_value=feature_weight,
                    name='feature_weigth_%s' % feature_name)
            else:
                self.feature_weight_dict[feature_name] = tf.Variable(
                    initial_value=self._feature_init_weight_dict[feature_name],
                    name='feature_weight_%s' % feature_name)
                self.nil_vars.add(self.feature_weight_dict[feature_name].name)

            # init dropout rate, 初始化未指定的
            if feature_name not in self._feature_weight_dropout_dict:
                self._feature_weight_dropout_dict[feature_name] = 0.
        # char feature
        if self._use_char_feature:
            # char feature weights
            feature_weight = uniform_tensor(
                shape=self._feature_weight_shape_dict['char'],
                name='f_w_%s' % 'char')
            self.feature_weight_dict['char'] = tf.Variable(
                initial_value=feature_weight,
                name='feature_weigth_%s' % 'char')
            self.nil_vars.add(self.feature_weight_dict['char'].name)
            self.nil_vars.add(self.feature_weight_dict['char'].name)
            self.input_feature_ph_dict['char'] = tf.placeholder(
                dtype=tf.int32,
                shape=[None, self._sequence_length, self._word_length],
                name='input_feature_ph_%s' % 'char')

        # init embeddings
        self.embedding_features = []
        for feature_name in self._feature_names:
            print(self.input_feature_ph_dict[feature_name].shape)
            embedding_feature = tf.nn.dropout(
                tf.nn.embedding_lookup(
                    self.feature_weight_dict[feature_name],
                    ids=self.input_feature_ph_dict[feature_name],
                    name='embedding_feature_%s' % feature_name),
                keep_prob=1. - self.weight_dropout_ph_dict[feature_name],
                name='embedding_feature_dropout_%s' % feature_name)
            self.embedding_features.append(embedding_feature)
            print(embedding_feature.shape)
        # char embedding
        if self._use_char_feature:
            char_embedding_feature = tf.nn.embedding_lookup(
                self.feature_weight_dict['char'],
                ids=self.input_feature_ph_dict['char'],
                name='embedding_feature_%s' % 'char')
            # conv
            couv_feature_char = MultiConvolutional3D(
                char_embedding_feature,
                filter_length_list=self._conv_filter_len_list,
                nb_filter_list=self._conv_filter_size_list).output
            couv_feature_char = tf.nn.dropout(couv_feature_char,
                                              keep_prob=1 -
                                              self.cnn_dropout_rate_ph)

        # concat all features
        input_features = self.embedding_features[0] if len(self.embedding_features) == 1 \
            else tf.concat(values=self.embedding_features, axis=len(self._feature_names), name='input_features')
        print('input features shape', input_features.shape)

        if self._use_char_feature:
            input_features = tf.concat([input_features, couv_feature_char],
                                       axis=-1)

        # multi bi-lstm layer
        _fw_cells = []
        _bw_cells = []
        for _ in range(self._num_layers):
            fw, bw = self._get_rnn_unit(self._rnn_unit)
            _fw_cells.append(
                tf.nn.rnn_cell.DropoutWrapper(fw,
                                              output_keep_prob=1 -
                                              self.rnn_dropout_rate_ph))
            _bw_cells.append(
                tf.nn.rnn_cell.DropoutWrapper(bw,
                                              output_keep_prob=1 -
                                              self.rnn_dropout_rate_ph))
        fw_cell = tf.nn.rnn_cell.MultiRNNCell(_fw_cells)
        bw_cell = tf.nn.rnn_cell.MultiRNNCell(_bw_cells)

        # 计算self.input_features[feature_names[0]]的实际长度(0为padding值)
        self.sequence_actual_length = get_sequence_actual_length(  # 每个句子的实际长度
            self.input_feature_ph_dict[self._feature_names[0]],
            dim=1)

        print(self.sequence_actual_length.shape)
        input_size = input_features.shape[-1]
        print('input_features shape ', input_features.shape)
        rnn_inputs = tf.reshape(input_features,
                                [-1, self._sequence_length, input_size])
        print('rnn inputs shape ', rnn_inputs.shape)
        rnn_lengths = tf.reshape(self.sequence_actual_length, [-1])

        # todo: add encoder output
        rnn_outputs, rnn_state = tf.nn.bidirectional_dynamic_rnn(
            fw_cell,
            bw_cell,
            rnn_inputs,
            scope='bi-lstm',
            dtype=tf.float32,
            sequence_length=rnn_lengths)

        # shape = [batch_size, max_len, nb_hidden*2]
        rnn_outputs = tf.concat(rnn_outputs, axis=2, name='lstm_output')
        rnn_outputs = tf.nn.dropout(rnn_outputs,
                                    keep_prob=1. - self.dropout_rate_ph,
                                    name='lstm_output_dropout')
        rnn_hidden = self.merge_bi_rnn_state(rnn_state).h
        #
        batch_size = tf.shape(input_features)[0]
        #
        print('rnn outputs shape', rnn_outputs.shape)
        print('rnn hidden shape', rnn_hidden.shape)
        #
        # rnn_outputs = tf.reshape(rnn_outputs,
        #                          [batch_size, turn_size, self._sequence_length, self._nb_hidden * 2])
        #
        # rnn_hidden = tf.reshape(rnn_hidden, [batch_size, turn_size, self._nb_hidden * 2])
        # # rnn_hidden = tf.nn.dropout(rnn_hidden, keep_prob=1. - self.dropout_rate_ph)
        # print('rnn outputs shape', rnn_outputs.shape)
        # print('rnn hidden shape', rnn_hidden.shape)
        #
        # # context rnn
        # ctx_cell = rnn.BasicLSTMCell(self._nb_hidden * 2, forget_bias=1., state_is_tuple=True)
        # ctx_lengths = get_sequence_actual_length(self.input_feature_ph_dict[self._feature_names[0]], dim=[1, 2])
        # print("ctx inputs shape", rnn_hidden.shape)
        # print('ctx lengths shape', ctx_lengths.shape)
        #
        # ctx_outputs, _ = tf.nn.dynamic_rnn(cell=ctx_cell,
        #                                    inputs=rnn_hidden,
        #                                    sequence_length=ctx_lengths,
        #                                    dtype=tf.float32)
        # # predict intents
        intent_logits = tf.layers.dense(rnn_hidden, 24, activation=None)  #!!!!
        label_intents = tf.reshape(self.input_label_intent, [-1])
        print('intent_logits shape', intent_logits.shape)
        print('input_label_intent shape', self.input_label_intent.shape)
        intent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=label_intents, logits=intent_logits)
        intent_mask = tf.reshape(tf.sign(self.sequence_actual_length), [-1])
        intent_mask = tf.cast(intent_mask, dtype=tf.float32)
        print('intent_mask shape', intent_mask.shape)
        self.intent_loss = tf.reduce_sum(
            intent_loss * intent_mask) / tf.reduce_sum(intent_mask)

        pred_intents = tf.argmax(intent_logits, axis=1)
        self.pred_intents = tf.reshape(pred_intents, [-1])
        print('pred_intents shape', self.pred_intents.shape)
        correct_preds = tf.equal(tf.cast(pred_intents, dtype=tf.int32),
                                 tf.cast(label_intents, dtype=tf.int32))

        self.intent_accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32) * intent_mask) \
                               / tf.reduce_sum(intent_mask)

        self.intent_count = tf.cast(tf.reduce_sum(intent_mask), tf.int32)

        self.intent_logits = intent_logits

        # predict slots
        # batch * turn * hidden
        # print('ctx outputs shape', ctx_outputs.shape)
        # ctx_outputs = tf.reshape(ctx_outputs, [batch_size, turn_size, self._nb_hidden * 2])
        # rnn_intent_outputs = [ctx_outputs for _ in range(self._sequence_length)]
        # rnn_intent_outputs = tf.stack(rnn_intent_outputs, axis=2)
        # print('rnn intent outputs', rnn_intent_outputs.shape)
        #
        # ctx_h = tf.reshape(ctx_outputs[:, :, :self._nb_hidden], [-1, self._nb_hidden])
        # ctx_c = tf.reshape(ctx_outputs[:, :, self._nb_hidden:], [-1, self._nb_hidden])
        #
        # init_fw_hidden = []
        # init_bw_hidden = []
        # for _ in range(self._num_layers):
        #     lstm_hidden = rnn.LSTMStateTuple(h=ctx_h, c=ctx_c)
        #     init_fw_hidden += [lstm_hidden]
        #     init_bw_hidden += [lstm_hidden]
        # init_fw_hidden = tuple(init_fw_hidden)
        # init_bw_hidden = tuple(init_bw_hidden)

        # slot_outputs = tf.concat([rnn_outputs, rnn_intent_outputs], axis=3)
        # slot_outputs = tf.reshape(slot_outputs, [-1, self._nb_hidden * 4])

        # run the rnn again with init state

        # slot_outputs, _ = tf.nn.bidirectional_dynamic_rnn(
        #     fw_cell, bw_cell, rnn_inputs,
        #     scope='bi-lstm',
        #     initial_state_fw=init_fw_hidden,
        #     initial_state_bw=init_bw_hidden,
        #     dtype=tf.float32,
        #     sequence_length=rnn_lengths)

        # slot_outputs = tf.concat(slot_outputs, axis=2, name='slot_output')

        slot_logits = tf.layers.dense(rnn_outputs,
                                      self._nb_classes,
                                      activation=None)
        self.slot_logits = tf.reshape(
            slot_logits, [batch_size, self._sequence_length, self._nb_classes])
        print('slot logits shape', self.slot_logits.shape)
        slot_labels = tf.reshape(self.input_label_ph,
                                 [-1, self._sequence_length])
        slot_logits = tf.reshape(self.slot_logits,
                                 [-1, self._sequence_length, self._nb_classes])
        slot_lengths = tf.reshape(self.sequence_actual_length, [-1])
        log_likelihood, self.transition_params = tf.contrib.crf.crf_log_likelihood(
            slot_logits, slot_labels, slot_lengths)
        print('transition params shape', self.transition_params.shape)
        print('log likelihood loss', log_likelihood.shape)
        self.slot_loss = tf.reduce_sum(
            -log_likelihood * intent_mask) / tf.reduce_sum(intent_mask)
        self.total_loss = self.intent_loss + self.slot_loss
        self.train_loss = self.slot_loss + self.intent_loss * self.intent_weight_ph

        # train op
        optimizer = tf.train.AdamOptimizer(learning_rate=self._learning_rate)
        self.train_op = optimizer.minimize(self.train_loss)
        grads_and_vars = optimizer.compute_gradients(self.train_loss)
        nil_grads_and_vars = []
        for g, v in grads_and_vars:
            if v.name in self.nil_vars:
                nil_grads_and_vars.append((zero_nil_slot(g), v))
            else:
                nil_grads_and_vars.append((g, v))

        global_step = tf.Variable(0, name='global_step', trainable=False)
        if self._clip:
            # clip by global norm
            gradients, variables = zip(*nil_grads_and_vars)
            gradients, _ = tf.clip_by_global_norm(gradients, self._clip)
            self.train_op = optimizer.apply_gradients(zip(
                gradients, variables),
                                                      name='train_op',
                                                      global_step=global_step)
        else:
            self.train_op = optimizer.apply_gradients(nil_grads_and_vars,
                                                      name='train_op',
                                                      global_step=global_step)

        # TODO sess, visible_device_list待修改
        gpu_options = tf.GPUOptions(visible_device_list='0', allow_growth=True)
        self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        # init all variable
        init = tf.global_variables_initializer()
        self.sess.run(init)
示例#6
0
    def __init__(self, config, multi_kb: MultiKnowledgeBase, sess):
        self._margin = 4
        self._dataset = config.dataset
        self._batch_size = config.batch_size
        self._sentence_size = config.sentence_size
        self._embedding_size = config.embedding_dimension
        self._max_grad_norm = config.max_grad_norm
        self._multi_kb = multi_kb
        self._vocab_size = config.question_words
        self._is_direct_align = config.direct_align
        self._is_dual_matrices = config.dual_matrices
        self._hops = config.hops
        self._steps = config.steps
        self._lan_labels = config.lan_labels
        self._lan_que = config.lan_que
        self._rel_size_1 = self._multi_kb.kb1.n_relations
        self._rel_size_2 = self._multi_kb.kb2.n_relations
        self._ent_size_1 = self._multi_kb.kb1.n_entities
        self._ent_size_2 = self._multi_kb.kb2.n_entities
        self._init = tf.contrib.layers.xavier_initializer()
        self._orthogonal_init = orthogonal_initializer()
        self._opt = tf.train.AdamOptimizer(learning_rate=config.lr,
                                           epsilon=config.epsilon,
                                           name="opt")
        # self._AM_opt = tf.train.AdamOptimizer(learning_rate=config.lr * config.ar, epsilon=config.epsilon,
        #                                       name="AM_opt")
        self._name = "M_IRN"
        self._checkpoint_dir = config.checkpoint_dir + '/' + config.kb_dir + '/' + self._name
        if not self._is_direct_align:
            self.this_k_1_2 = config.this_k_1_2
            self.this_k_2_1 = config.this_k_2_1

        if not os.path.exists(self._checkpoint_dir):
            os.makedirs(self._checkpoint_dir)

        self._build_inputs()
        self._build_vars()
        self._saver = tf.train.Saver(max_to_keep=1)

        # kg1 train and loss
        kg1_batch_loss = self._kg1_to_train()
        kg1_loss_op = tf.reduce_sum(kg1_batch_loss, name="KG1_loss_op")
        kg1_grads_and_vars = self._opt.compute_gradients(
            kg1_loss_op, [self._kg1_ent_emb, self._kg1_rel_emb, self._kg1_Mse])
        kg1_nil_grads_and_vars = []
        for g, v in kg1_grads_and_vars:
            if v.name in self._nil_vars:  # not _kg1_Mse
                kg1_nil_grads_and_vars.append((zero_nil_slot(g), v))
            else:
                kg1_nil_grads_and_vars.append((g, v))
        print("Gradients and Variables for KG1:")
        for g, v in kg1_nil_grads_and_vars:
            print(g, v.name)
        kg1_train_op = self._opt.apply_gradients(kg1_grads_and_vars,
                                                 name="kg1_train_op")

        # kg2 train and loss
        kg2_batch_loss = self._kg2_to_train()
        kg2_loss_op = tf.reduce_sum(kg2_batch_loss, name="kg2_loss_op")
        kg2_grads_and_vars = self._opt.compute_gradients(
            kg2_loss_op, [self._kg2_ent_emb, self._kg2_rel_emb, self._kg2_Mse])
        kg2_nil_grads_and_vars = []
        for g, v in kg2_grads_and_vars:
            if v.name in self._nil_vars:  # not _kg2_Mse
                kg2_nil_grads_and_vars.append((zero_nil_slot(g), v))
            else:
                kg2_nil_grads_and_vars.append((g, v))
        print("Gradients and Variables for KG2:")
        for g, v in kg2_nil_grads_and_vars:
            print(g, v.name)
        kg2_train_op = self._opt.apply_gradients(kg2_grads_and_vars,
                                                 name="kg2_train_op")

        # # alignment train and loss
        # alignment_batch_loss = self._align_to_train()
        # alignment_loss_op = tf.reduce_sum(alignment_batch_loss, name="alignment_loss_op")
        # alignment_train_op = self._AM_opt.minimize(alignment_loss_op)
        # ali_res_1_op, ali_res_2_op = self._align_kNN()

        # cross entropy as loss for inference:
        batch_loss, inference_path = self._inference(
        )  # (batch_size, 1), (batch_size, 6)
        inference_loss_op = tf.reduce_sum(batch_loss, name="inference_loss_op")
        inference_params = [
            self._que_emb, self._kg1_Mrq, self._kg1_Mrs, self._kg2_Mrq,
            self._kg2_Mrs
        ]
        inference_grads_and_vars = self._opt.compute_gradients(
            inference_loss_op, inference_params)
        inference_grads_and_vars = [(tf.clip_by_norm(g,
                                                     self._max_grad_norm), v)
                                    for g, v in inference_grads_and_vars
                                    if g is not None]
        inference_grads_and_vars = [(add_gradient_noise(g), v)
                                    for g, v in inference_grads_and_vars]
        inference_nil_grads_and_vars = []
        for g, v in inference_grads_and_vars:
            if v.name in self._nil_vars:
                inference_nil_grads_and_vars.append((zero_nil_slot(g), v))
            else:
                inference_nil_grads_and_vars.append((g, v))
        print("Gradients and variables for inference:")
        for g, v in inference_nil_grads_and_vars:
            print(g, v.name)

        inference_train_op = self._opt.apply_gradients(
            inference_nil_grads_and_vars, name="inference_train_op")

        # batch_predict ops
        inference_predict_op = inference_path

        # assign ops
        self.kg1_loss_op = kg1_loss_op
        self.kg1_train_op = kg1_train_op
        self.kg2_loss_op = kg2_loss_op
        self.kg2_train_op = kg2_train_op
        # self.alignment_loss_op = alignment_loss_op
        # self.alignment_train_op = alignment_train_op
        self.inference_loss_op = inference_loss_op
        self.inference_predict_op = inference_predict_op
        self.inference_train_op = inference_train_op
        # self.ali_res_1 = ali_res_1_op
        # self.ali_res_2 = ali_res_2_op

        init_op = tf.global_variables_initializer()
        table_op = tf.tables_initializer()
        self._sess = sess
        self._sess.run(init_op)
        self._sess.run(table_op)
示例#7
0
    def build_model(self):

        # init ph, weights and dropout rate
        self.input_feature_ph_dict = dict()

        self.input_char_ph_dict=dict()
        self.char_weight_dict=dict()
        # 建立特征权重字典
        self.weight_dropout_ph_dict = dict()
        self.weight_dropout_ph_dict['char']=tf.placeholder(tf.float32,name='dropout_char')
        self.feature_weight_dict = dict()
        self.nil_vars = set()
        self.dropout_rate_ph = tf.placeholder(tf.float32, name='dropout_rate_ph')
        # label ph
        self.input_label_ph = tf.placeholder(
            dtype=tf.int32, shape=[None, self.sequence_length], name='input_label_ph')
        # 读入特征,并搭建特征结构 ph
        for feature_name in self.feature_names:
            # input ph,每个feature建立一次placeholder
            self.input_feature_ph_dict[feature_name] = tf.placeholder(
                dtype=tf.int32, shape=[None, self.sequence_length],
                name='input_feature_ph_%s' % feature_name)
            # dropout rate ph
            self.weight_dropout_ph_dict[feature_name] = tf.placeholder(
                tf.float32, name='dropout_ph_%s' % feature_name)
            # init feature weights, 初始化随机变量
            if feature_name not in self.feature_init_weight_dict:
                feature_weight = uniform_tensor(
                    shape=self.feature_weight_shape_dict[feature_name],
                    name='f_w_%s' % feature_name)
                self.feature_weight_dict[feature_name] = tf.Variable(
                    initial_value=feature_weight, name='feature_weigth_%s' % feature_name)
            else:
                self.feature_weight_dict[feature_name] = tf.Variable(
                    initial_value=self.feature_init_weight_dict[feature_name],
                    name='feature_weight_%s' % feature_name)
            self.nil_vars.add(self.feature_weight_dict[feature_name].name)
            # init dropout rate, 初始化未指定的
            if feature_name not in self.feature_weight_dropout_dict:
                self.feature_weight_dropout_dict[feature_name] = 0.

        # 初始化字向量空间
        self.input_char_ph_dict['char']=tf.placeholder(dtype=tf.int32,shape=[None,self.sequence_length,self.word_length],
                                                       name='char_input')
        self.input_char_flat=tf.reshape(self.input_char_ph_dict['char'],[-1,self.word_length*self.sequence_length],
                                        name='input_x_char_flat')
        self.char_weight_dict['char']=tf.Variable(initial_value=self.char_init_weight_dict['char'],name='char_init')

        self.char_embedding_init=tf.nn.dropout(tf.nn.embedding_lookup(
            self.char_weight_dict['char'],
            ids=self.input_char_flat,
            name='char_embedding',),
            keep_prob=1.-self.weight_dropout_ph_dict['char'],
            name='char_dropout')
        with tf.name_scope('char_conv'):
            self.filter_shape=[self.filter_size,self.char_embedding_size,self.num_filter]
            self.W_conv=tf.Variable(tf.truncated_normal(self.filter_shape,stddev=0.1),name='W_conv')
            self.b_conv=tf.Variable(tf.constant(0.1,shape=[self.num_filter]),name='b_conv')
            self.conv=tf.nn.conv1d(self.char_embedding_init,self.W_conv,stride=1,padding='SAME',name='conv')
            self.h_expand=tf.expand_dims(self.conv,-1)
            self.pooled=tf.nn.max_pool(self.h_expand,ksize=[1,self.sequence_length*self.word_length,1,1],
                                       strides=[1,self.word_length,1,1],padding='SAME',name='pooled')
            self.char_pool_flat=tf.reshape(self.pooled,[-1,self.sequence_length,self.num_filter],name='char_pool_flat')
        # init embeddings
        # 对特征进行编码并连接
        self.embedding_features = []
        for feature_name in self.feature_names:
            embedding_feature = tf.nn.dropout(tf.nn.embedding_lookup(
                self.feature_weight_dict[feature_name],
                ids=self.input_feature_ph_dict[feature_name],
                name='embedding_feature_%s' % feature_name),
                keep_prob=1. - self.weight_dropout_ph_dict[feature_name],
                name='embedding_feature_dropout_%s' % feature_name)
            self.embedding_features.append(embedding_feature)

        # concat all features
        # 多个词拼接成一句话
        input_features = self.embedding_features[0] if len(self.embedding_features) == 1 \
            else tf.concat(values=self.embedding_features, axis=2, name='input_features')
        input_features=tf.concat([input_features,self.char_pool_flat],axis=2)
        # bi-lstm

        if self.rnn_unit == 'lstm':
            fw_cell = rnn.BasicLSTMCell(self.nb_hidden, forget_bias=1., state_is_tuple=True)
            bw_cell = rnn.BasicLSTMCell(self.nb_hidden, forget_bias=1., state_is_tuple=True)
        elif self.rnn_unit == 'gru':
            fw_cell = rnn.GRUCell(self.nb_hidden)
            bw_cell = rnn.GRUCell(self.nb_hidden)
        else:
            raise ValueError('rnn_unit must in (lstm, gru)!')
        # 计算self.input_features[feature_names[0]]的实际长度(0为padding值)
        self.sequence_actual_length = get_sequence_actual_length(  # 每个句子的实际长度
            self.input_feature_ph_dict[self.feature_names[0]])
        # print(input_features)
        rnn_outputs, _ = tf.nn.bidirectional_dynamic_rnn(
            fw_cell, bw_cell, input_features, scope='bi-lstm',
            dtype=tf.float32, sequence_length=self.sequence_actual_length)
        # shape = [batch_size, max_len, nb_hidden*2]
        # dropout 之后由[m,n]变成[1,1]输入输出维度保持不变s
        lstm_output = tf.nn.dropout(
            tf.concat(rnn_outputs, axis=2, name='lstm_output'),
            keep_prob=1. - self.dropout_rate_ph, name='lstm_output_dropout')

        # softmax
        # 重新规整输出形式
        self.outputs = tf.reshape(lstm_output, [-1, self.nb_hidden * 2], name='outputs')
        self.softmax_w = tf.get_variable('softmax_w', [self.nb_hidden * 2, self.nb_classes])
        self.softmax_b = tf.get_variable('softmax_b', [self.nb_classes])
        self.logits = tf.reshape(
            tf.matmul(self.outputs, self.softmax_w) + self.softmax_b,
            shape=[-1, self.sequence_length, self.nb_classes], name='logits')

        # 计算loss
        self.loss = self.compute_loss()
        self.l2_loss = self.l2_rate * (tf.nn.l2_loss(self.softmax_w) + tf.nn.l2_loss(self.softmax_b))

        self.total_loss = self.loss + self.l2_loss

        # train op
        optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
        grads_and_vars = optimizer.compute_gradients(self.total_loss)
        nil_grads_and_vars = []
        for g, v in grads_and_vars:
            if v.name in self.nil_vars:
                nil_grads_and_vars.append((zero_nil_slot(g), v))
            else:
                nil_grads_and_vars.append((g, v))

        global_step = tf.Variable(0, name='global_step', trainable=False)
        if self.clip:
            # clip by global norm
            gradients, variables = zip(*nil_grads_and_vars)
            gradients, _ = tf.clip_by_global_norm(gradients, self.clip)
            self.train_op = optimizer.apply_gradients(
                zip(gradients, variables), name='train_op', global_step=global_step)
        else:
            self.train_op = optimizer.apply_gradients(
                nil_grads_and_vars, name='train_op', global_step=global_step)

        # TODO sess, visible_device_list待修改
        gpu_options = tf.GPUOptions(visible_device_list='0', allow_growth=True)
        self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        # init all variable
        init = tf.global_variables_initializer()
        self.sess.run(init)