Exemplo n.º 1
0
def bi_sru_recurrent_network(
        rep_tensor, rep_mask, is_train=None, keep_prob=1., wd=0.,
        scope=None):
    """

    :param rep_tensor: [Tensor/tf.float32] rank is 3 with shape [batch_size/bs, max_sent_len/sl, vec]
    :param rep_mask: [Tensor/tf.bool]rank is 2 with shape [bs,sl]
    :param is_train: [Scalar Tensor/tf.bool]scalar tensor to indicate whether the mode is training or not
    :param keep_prob: [float] dropout keep probability in the range of (0,1)
    :param wd: [float]for L2 regularization, if !=0, add tensors to tf collection "reg_vars"
    :param scope: [str]variable scope name
    :return: [Tensor/tf.float32] with shape [bs, sl, 2vec] for forward and backward
    """
    bs, sl, vec = tf.shape(rep_tensor)[0], tf.shape(rep_tensor)[1], tf.shape(rep_tensor)[2]
    ivec = rep_tensor.get_shape().as_list()[2]
    with tf.variable_scope(scope or 'bi_sru_recurrent_network'):
        U_d = bn_dense_layer([rep_tensor], 6 * ivec, False, 0., 'get_frc', 'linear',
                           False, wd, keep_prob, is_train)  # bs, sl, 6vec
        U_d_fw, U_d_bw = tf.split(U_d, 2, 2)
        with tf.variable_scope('forward'):
            U_fw = tf.concat([rep_tensor, U_d_fw], -1)
            fw_SRUCell = SwitchableDropoutWrapper(SRUCell(ivec, tf.nn.tanh), is_train, keep_prob)
            fw_output, _ = dynamic_rnn(
                fw_SRUCell, U_fw, tf.reduce_sum(tf.cast(rep_mask, tf.int32), -1),
                dtype=tf.float32, scope='forward_sru')  # bs, sl, vec

        with tf.variable_scope('backward'):
            U_bw = tf.concat([rep_tensor, U_d_bw], -1)
            bw_SRUCell = SwitchableDropoutWrapper(SRUCell(ivec, tf.nn.tanh), is_train, keep_prob)
            bw_output, _ = bw_dynamic_rnn(
                bw_SRUCell, U_bw, tf.reduce_sum(tf.cast(rep_mask, tf.int32), -1),
                dtype=tf.float32, scope='backward_sru')  # bs, sl, vec

        all_output = tf.concat([fw_output, bw_output], -1)  # bs, sl, 2vec
        return all_output
Exemplo n.º 2
0
def one_direction_rnn(tensor_rep,
                      mask_rep,
                      hn,
                      cell_type,
                      only_final=False,
                      wd=0.,
                      keep_prob=1.,
                      is_train=None,
                      is_forward=True,
                      scope=None):
    assert not is_forward  # todo: waiting to be implemented
    with tf.variable_scope(scope or '%s_rnn' %
                           'forward' if is_forward else 'backward'):
        reuse = None if not tf.get_variable_scope().reuse else True
        # print(reuse)
        if cell_type == 'gru':
            cell = tf.contrib.rnn.GRUCell(hn, reuse=reuse)
        elif cell_type == 'lstm':
            cell = tf.contrib.rnn.LSTMCell(hn, reuse=reuse)
        elif cell_type == 'basic_lstm':
            cell = tf.contrib.rnn.BasicLSTMCell(hn, reuse=reuse)
        elif cell_type == 'basic_rnn':
            cell = tf.contrib.rnn.BasicRNNCell(hn, reuse=reuse)
        else:
            raise AttributeError('no cell type \'%s\'' % cell_type)
        cell_dp = SwitchableDropoutWrapper(cell, is_train, keep_prob)

        tensor_len = tf.reduce_sum(tf.cast(mask_rep, tf.int32), -1)  # [bs]

        rnn_outputs, _ = dynamic_rnn(cell_dp,
                                     tensor_rep,
                                     tensor_len,
                                     dtype=tf.float32)

        if wd > 0:
            add_reg_without_bias()
        if not only_final:
            return rnn_outputs  # [....,sl, 2hn]
        else:
            return get_last_state(rnn_outputs, mask_rep)  # [...., 2hn]
Exemplo n.º 3
0
def bi_sru_recurrent_network(rep_tensor,
                             rep_mask,
                             is_train=None,
                             keep_prob=1.,
                             wd=0.,
                             scope=None):
    bs, sl, vec = tf.shape(rep_tensor)[0], tf.shape(rep_tensor)[1], tf.shape(
        rep_tensor)[2]
    ivec = rep_tensor.get_shape().as_list()[2]
    with tf.variable_scope(scope or 'bi_sru_recurrent_network'):
        U_d = bn_dense_layer([rep_tensor], 6 * ivec, True, 0., 'get_frc',
                             'linear', False, wd, keep_prob,
                             is_train)  # bs, sl, 6vec
        U_d_fw, U_d_bw = tf.split(U_d, 2, 2)
        with tf.variable_scope('forward'):
            U_fw = tf.concat([rep_tensor, U_d_fw], -1)
            fw_SRUCell = SwitchableDropoutWrapper(SRUCell(ivec, tf.nn.tanh),
                                                  is_train, keep_prob)
            fw_output, _ = dynamic_rnn(fw_SRUCell,
                                       U_fw,
                                       tf.reduce_sum(
                                           tf.cast(rep_mask, tf.int32), -1),
                                       dtype=tf.float32,
                                       scope='forward_sru')  # bs, sl, vec

        with tf.variable_scope('backward'):
            U_bw = tf.concat([rep_tensor, U_d_bw], -1)
            bw_SRUCell = SwitchableDropoutWrapper(SRUCell(ivec, tf.nn.tanh),
                                                  is_train, keep_prob)
            bw_output, _ = bw_dynamic_rnn(bw_SRUCell,
                                          U_bw,
                                          tf.reduce_sum(
                                              tf.cast(rep_mask, tf.int32), -1),
                                          dtype=tf.float32,
                                          scope='backward_sru')  # bs, sl, vec

        all_output = tf.concat([fw_output, bw_output], -1)  # bs, sl, 2vec
        return all_output
Exemplo n.º 4
0
    def build_network(self):

        with tf.name_scope('code_embeddings'):
            if self.model_type == 'raw':
                # init_code_embed = tf.random_uniform([self.vocabulary_size, self.embedding_size], -1.0, 1.0)
                # code_embeddings = tf.Variable(init_code_embed)
                init_code_embed = tf.one_hot(self.inputs, self.vocabulary_size,on_value=1.0, off_value=0.0,axis=-1)
                inputs_embed = bn_dense_layer(init_code_embed, self.embedding_size, True, 0.,
                                        'bn_dense_map_linear', 'linear',
                                        False, wd=0., keep_prob=1.,
                                        is_train=True)
            elif self.model_type == 'tesa':
                init_code_embed = tesan_trans(self.model_type)
                # code_embeddings = tf.Variable(init_code_embed, dtype=tf.float32)
                code_embeddings = tf.Variable(init_code_embed, dtype=tf.float32)
                inputs_embed = tf.nn.embedding_lookup(code_embeddings, self.inputs)
            elif self.model_type == 'delta':
                init_code_embed = tesan_trans(self.model_type)
                # code_embeddings = tf.Variable(init_code_embed, dtype=tf.float32)
                code_embeddings = tf.Variable(init_code_embed, dtype=tf.float32)
                inputs_embed = tf.nn.embedding_lookup(code_embeddings, self.inputs)
            elif self.model_type == 'sa':
                init_code_embed = tesan_trans(self.model_type)
                # code_embeddings = tf.Variable(init_code_embed, dtype=tf.float32)
                code_embeddings = tf.Variable(init_code_embed, dtype=tf.float32)
                inputs_embed = tf.nn.embedding_lookup(code_embeddings, self.inputs)
            elif self.model_type == 'normal':
                init_code_embed = tesan_trans(self.model_type)
                # code_embeddings = tf.Variable(init_code_embed, dtype=tf.float32)
                code_embeddings = tf.Variable(init_code_embed, dtype=tf.float32)
                inputs_embed = tf.nn.embedding_lookup(code_embeddings, self.inputs)
            elif self.model_type == 'cbow':
                init_code_embed = tesan_trans(self.model_type)
                # code_embeddings = tf.Variable(init_code_embed, dtype=tf.float32)
                code_embeddings = tf.Variable(init_code_embed, dtype=tf.float32)
                inputs_embed = tf.nn.embedding_lookup(code_embeddings, self.inputs)
            elif self.model_type == 'sg':
                init_code_embed = tesan_trans(self.model_type)
                # code_embeddings = tf.Variable(init_code_embed, dtype=tf.float32)
                code_embeddings = tf.Variable(init_code_embed, dtype=tf.float32)
                inputs_embed = tf.nn.embedding_lookup(code_embeddings, self.inputs)
            elif self.model_type == 'mce':
                init_code_embed = mce_trans()
                # code_embeddings = tf.Variable(init_code_embed, dtype=tf.float32)
                code_embeddings = tf.Variable(init_code_embed, dtype=tf.float32)
                inputs_embed = tf.nn.embedding_lookup(code_embeddings, self.inputs)
            elif self.model_type == 'glove':
                init_code_embed = glove_trans()
                # code_embeddings = tf.Variable(init_code_embed, dtype=tf.float32)
                code_embeddings = tf.Variable(init_code_embed, dtype=tf.float32)
                inputs_embed = tf.nn.embedding_lookup(code_embeddings, self.inputs)
            else:
                init_code_embed = med2vec_trans()
                # code_embeddings = tf.constant(init_code_embed, dtype=tf.float32)
                code_embeddings = tf.Variable(init_code_embed, dtype=tf.float32)
                inputs_embed = tf.nn.embedding_lookup(code_embeddings, self.inputs)

        with tf.name_scope('visit_embedding'):
            # bs, max_visits, max_len_visit, embed_size
            inputs_masked = mask_for_high_rank(inputs_embed, self.inputs_mask)
            inputs_reduced = tf.reduce_mean(inputs_masked, 2)  # batch_size, max_visits, embed_size

        with tf.name_scope('visit_masking'):
            visit_mask = tf.reduce_sum(tf.cast(self.inputs_mask, tf.int32), -1)  # [bs,max_visits]
            visit_mask = tf.cast(visit_mask, tf.bool)
            tensor_len = tf.reduce_sum(tf.cast(visit_mask, tf.int32), -1)  # [bs]

        with tf.name_scope('RNN_computaion'):
            reuse = None if not tf.get_variable_scope().reuse else True
            if cfg.cell_type == 'gru':
                cell = tf.contrib.rnn.GRUCell(cfg.hn, reuse=reuse)
            elif cfg.cell_type == 'lstm':
                cell = tf.contrib.rnn.LSTMCell(cfg.hn, reuse=reuse)
            elif cfg.cell_type == 'basic_lstm':
                cell = tf.contrib.rnn.BasicLSTMCell(cfg.hn, reuse=reuse)
            elif cfg.cell_type == 'basic_rnn':
                cell = tf.contrib.rnn.BasicRNNCell(cfg.hn, reuse=reuse)

            outputs, final_state = dynamic_rnn(cell, inputs_reduced, tensor_len, dtype=tf.float32)
        return outputs, final_state, tensor_len