Пример #1
0
def cnn_model(num_steps, num_context, num_event, keep_rate_input, dae_weight,
              phase_indicator, autoencoder_length, filter_num, filter_size,
              autoencoder_initializer):
    with tf.name_scope('data_source'):
        # 标准输入规定为TBD
        batch_size = tf.placeholder(tf.int32, [], name='batch_size')
        event_placeholder = tf.placeholder(tf.float32,
                                           [num_steps, None, num_event],
                                           name='event_placeholder')
        context_placeholder = tf.placeholder(tf.float32,
                                             [num_steps, None, num_context],
                                             name='context_placeholder')
        time_list = tf.placeholder(tf.int32, [None, num_steps],
                                   name='time_list')
        y_placeholder = tf.placeholder(tf.float32, [None, 1],
                                       name='y_placeholder')

    with tf.name_scope('autoencoder'):
        # input_x 用于计算重构原始向量时产生的误差
        processed_input, autoencoder_weight = autoencoder.denoising_autoencoder(
            phase_indicator, context_placeholder, keep_rate_input,
            autoencoder_length, autoencoder_initializer)

    with tf.name_scope('cnn'):
        predictions, scores = _cnn_detail(processed_input=processed_input,
                                          event_list=event_placeholder,
                                          time_list=time_list,
                                          filter_num=filter_num,
                                          filter_size_list=filter_size,
                                          scope='cnn')

    with tf.name_scope('loss'):
        with tf.name_scope('pred_loss'):
            loss_pred = tf.losses.sigmoid_cross_entropy(
                logits=scores, multi_class_labels=y_placeholder)

        with tf.name_scope('dae_loss'):
            if autoencoder_length > 0:
                loss_dae = autoencoder.autoencoder_loss(
                    embedding=processed_input,
                    origin_input=context_placeholder,
                    weight=autoencoder_weight)
            else:
                loss_dae = 0

        with tf.name_scope('loss_sum'):
            loss = loss_pred + loss_dae * dae_weight

    return loss, predictions, event_placeholder, context_placeholder, y_placeholder, batch_size, phase_indicator, \
        time_list
def vanilla_rnn_model(cell,
                      num_steps,
                      num_hidden,
                      num_context,
                      num_event,
                      keep_rate_input,
                      dae_weight,
                      phase_indicator,
                      auto_encoder_value,
                      auto_encoder_initializer=tf.initializers.orthogonal()):
    """
    :param cell:
    :param num_steps:
    :param num_hidden:
    :param num_context: 要求Context变量全部变为二值变量
    :param num_event:
    :param dae_weight:
    :param keep_rate_input:
    :param phase_indicator:
    :param auto_encoder_value: 大于0时执行对输入的自编码,值即为最终降到的维度
    :param auto_encoder_initializer:
    :return:
    loss, prediction, x_placeholder, y_placeholder, batch_size, phase_indicator
    其中 phase_indicator>0代表是测试期,<=0代表是训练期
    """
    with tf.name_scope('vanilla_rnn'):
        with tf.name_scope('data_source'):
            batch_size = tf.placeholder(tf.int32, [], name='batch_size')
            # 标准输入规定为BTD
            event_placeholder = tf.placeholder(tf.float32,
                                               [None, num_steps, num_event],
                                               name='event_placeholder')
            context_placeholder = tf.placeholder(
                tf.float32, [None, num_steps, num_context],
                name='context_placeholder')
            y_placeholder = tf.placeholder(tf.float32, [None, 1],
                                           name='y_placeholder')

            # input_x 用于计算重构原始向量时产生的误差
            processed_input, input_x, autoencoder_weight = autoencoder.denoising_autoencoder(
                phase_indicator, context_placeholder, event_placeholder,
                keep_rate_input, auto_encoder_value, auto_encoder_initializer)

            output_list = __vanilla_rnn(batch_size=batch_size,
                                        rnn_cell=cell,
                                        input_x=processed_input)

    with tf.variable_scope('output_layer', reuse=tf.AUTO_REUSE):
        output_weight = tf.get_variable(
            "weight", [num_hidden, 1],
            initializer=tf.initializers.orthogonal())
        bias = tf.get_variable('bias', [])

    with tf.name_scope('loss'):
        unnormalized_prediction = tf.matmul(output_list[-1],
                                            output_weight) + bias
        loss_pred = tf.losses.sigmoid_cross_entropy(
            logits=unnormalized_prediction, multi_class_labels=y_placeholder)

        if auto_encoder_value > 0:
            loss_dae = autoencoder.autoencoder_loss(embedding=processed_input,
                                                    origin_input=input_x,
                                                    weight=autoencoder_weight)
        else:
            loss_dae = 0

        loss = loss_pred + loss_dae * dae_weight

    with tf.name_scope('prediction'):
        prediction = tf.sigmoid(unnormalized_prediction)

    return loss, prediction, event_placeholder, context_placeholder, y_placeholder, batch_size, phase_indicator
Пример #3
0
def hawkes_rnn_model(cell,
                     num_steps,
                     num_hidden,
                     num_context,
                     num_event,
                     keep_rate_input,
                     dae_weight,
                     phase_indicator,
                     autoencoder_length,
                     autoencoder_initializer=tf.initializers.orthogonal()):
    """
    :param cell:
    :param num_steps:
    :param num_hidden:
    :param num_context: 要求Context变量全部变为二值变量
    :param num_event:
    :param dae_weight:
    :param keep_rate_input:
    :param phase_indicator:
    :param autoencoder_length: 大于0时执行对输入的自编码,值即为最终降到的维度
    :param autoencoder_initializer:
    :return:
        loss, prediction, x_placeholder, y_placeholder, batch_size, phase_indicator
        其中 phase_indicator>0代表是测试期,<=0代表是训练期
    """
    with tf.name_scope('data_source'):
        # 标准输入规定为TBD
        batch_size = tf.placeholder(tf.int32, [], name='batch_size')
        event_placeholder = tf.placeholder(tf.float32,
                                           [num_steps, None, num_event],
                                           name='event_placeholder')
        context_placeholder = tf.placeholder(tf.float32,
                                             [num_steps, None, num_context],
                                             name='context_placeholder')
        base_intensity = tf.placeholder(tf.float32, [num_event, 1],
                                        name='base_intensity')
        mutual_intensity = tf.placeholder(tf.float32, [num_event, num_event],
                                          name='mutual_intensity')
        time_list = tf.placeholder(tf.int32, [None, num_steps],
                                   name='time_list')
        task_index = tf.placeholder(tf.int32, [], name='task_index')
        sequence_length = tf.placeholder(tf.int32, [None],
                                         name='sequence_length')
        y_placeholder = tf.placeholder(tf.float32, [None, 1],
                                       name='y_placeholder')

        initial_state = cell.get_initial_state(batch_size)

    with tf.name_scope('autoencoder'):
        processed_input, autoencoder_weight = autoencoder.denoising_autoencoder(
            phase_indicator, context_placeholder, keep_rate_input,
            autoencoder_length, autoencoder_initializer)

    with tf.name_scope('hawkes_rnn'):
        input_ = tf.concat([processed_input, event_placeholder], axis=2)
        outputs, final_state = _hawkes_dynamic_rnn(
            cell,
            input_,
            sequence_length,
            initial_state,
            base_intensity=base_intensity,
            task_index=task_index,
            mutual_intensity=mutual_intensity,
            time_list=time_list,
            event_list=event_placeholder)
        # 在使用时LSTM时比较麻烦,因为state里同时包含了hidden state和cell state,只有后者是需要输出的
        # 因此需要额外需要做一个split。这种写法非常不优雅,但是我想了一想,也没什么更好的办法
        # 做split时,需要特别注意一下state里到底谁前谁后
        output_length = outputs.shape[2].value
        state_length = final_state.shape[1].value
        if output_length == state_length:
            # 不需要做任何事情
            pass
        elif output_length * 2 == state_length:
            final_state = tf.split(final_state, 2, axis=1)[0]
        else:
            raise ValueError('Invalid Size')

    with tf.variable_scope('output_para'):
        output_weight = tf.get_variable(
            "weight", [num_hidden, 1],
            initializer=tf.initializers.orthogonal())
        bias = tf.get_variable('bias', [])

    with tf.name_scope('prediction'):
        unnormalized_prediction = tf.matmul(final_state, output_weight) + bias
        prediction = tf.sigmoid(unnormalized_prediction)

    with tf.name_scope('loss'):
        with tf.name_scope('pred_loss'):
            loss_pred = tf.losses.sigmoid_cross_entropy(
                logits=unnormalized_prediction,
                multi_class_labels=y_placeholder)

        with tf.name_scope('dae_loss'):
            if autoencoder_length > 0:
                loss_dae = autoencoder.autoencoder_loss(
                    embedding=processed_input,
                    origin_input=context_placeholder,
                    weight=autoencoder_weight)
            else:
                loss_dae = 0

        with tf.name_scope('loss_sum'):
            loss = loss_pred + loss_dae * dae_weight

    return loss, prediction, event_placeholder, context_placeholder, y_placeholder, batch_size, phase_indicator, \
        base_intensity, mutual_intensity, time_list, task_index, sequence_length, final_state
Пример #4
0
def concat_hawkes_model(cell_context,
                        cell_event,
                        num_steps,
                        num_hidden,
                        num_context,
                        num_event,
                        keep_rate_input,
                        dae_weight,
                        phase_indicator,
                        autoencoder_length,
                        autoencoder_initializer=tf.initializers.orthogonal()):
    with tf.name_scope('data_source'):
        # 标准输入规定为TBD
        batch_size = tf.placeholder(tf.int32, [], name='batch_size')
        event_placeholder = tf.placeholder(tf.float32,
                                           [num_steps, None, num_event],
                                           name='event_placeholder')
        context_placeholder = tf.placeholder(tf.float32,
                                             [num_steps, None, num_context],
                                             name='context_placeholder')
        base_intensity = tf.placeholder(tf.float32, [num_event, 1],
                                        name='base_intensity')
        mutual_intensity = tf.placeholder(tf.float32, [num_event, num_event],
                                          name='mutual_intensity')
        time_list = tf.placeholder(tf.int32, [None, num_steps],
                                   name='time_list')
        task_index = tf.placeholder(tf.int32, [], name='task_index')
        sequence_length = tf.placeholder(tf.int32, [None],
                                         name='sequence_length')
        y_placeholder = tf.placeholder(tf.float32, [None, 1],
                                       name='y_placeholder')

        event_initial_state = cell_event.get_initial_state(batch_size)
        context_initial_state = cell_context.get_initial_state(batch_size)

    with tf.name_scope('autoencoder'):
        # input_x 用于计算重构原始向量时产生的误差
        processed_input, autoencoder_weight = autoencoder.denoising_autoencoder(
            phase_indicator, context_placeholder, keep_rate_input,
            autoencoder_length, autoencoder_initializer)

    with tf.name_scope('context_hawkes_rnn'):
        outputs, final_state = _hawkes_dynamic_rnn(
            cell_context,
            processed_input,
            sequence_length,
            context_initial_state,
            base_intensity=base_intensity,
            task_index=task_index,
            mutual_intensity=mutual_intensity,
            time_list=time_list,
            event_list=event_placeholder,
            scope='context_hawkes')
        output_length = outputs.shape[2].value
        state_length = final_state.shape[1].value
        if output_length == state_length:
            # 不需要做任何事情
            context_final_state = final_state
        elif output_length * 2 == state_length:
            context_final_state = tf.split(final_state, 2, axis=1)[0]
        else:
            raise ValueError('Invalid Size')

    with tf.name_scope('event_hawkes_rnn'):
        outputs, final_state = _hawkes_dynamic_rnn(
            cell_event,
            event_placeholder,
            sequence_length,
            event_initial_state,
            base_intensity=base_intensity,
            task_index=task_index,
            mutual_intensity=mutual_intensity,
            time_list=time_list,
            event_list=event_placeholder,
            scope='event_hawkes')
        output_length = outputs.shape[2].value
        state_length = final_state.shape[1].value
        if output_length == state_length:
            # 不需要做任何事情
            event_final_state = final_state
        elif output_length * 2 == state_length:
            event_final_state = tf.split(final_state, 2, axis=1)[0]
        else:
            raise ValueError('Invalid Size')

    with tf.name_scope('state_concat'):
        concat_final_state = tf.concat(
            [event_final_state, context_final_state], axis=1)

    with tf.variable_scope('output_para'):
        output_weight = tf.get_variable(
            "weight", [num_hidden * 2, 1],
            initializer=tf.initializers.orthogonal())
        bias = tf.get_variable('bias', [])

    with tf.name_scope('prediction'):
        unnormalized_prediction = tf.matmul(concat_final_state,
                                            output_weight) + bias
        prediction = tf.sigmoid(unnormalized_prediction)

    with tf.name_scope('loss'):
        with tf.name_scope('pred_loss'):
            loss_pred = tf.losses.sigmoid_cross_entropy(
                logits=unnormalized_prediction,
                multi_class_labels=y_placeholder)

        with tf.name_scope('dae_loss'):
            if autoencoder_length > 0:
                loss_dae = autoencoder.autoencoder_loss(
                    embedding=processed_input,
                    origin_input=context_placeholder,
                    weight=autoencoder_weight)
            else:
                loss_dae = 0

        with tf.name_scope('loss_sum'):
            loss = loss_pred + loss_dae * dae_weight

    return loss, prediction, event_placeholder, context_placeholder, y_placeholder, batch_size, phase_indicator, \
        base_intensity, mutual_intensity, time_list, task_index, sequence_length, concat_final_state