def cnn_model(num_steps, num_context, num_event, keep_rate_input, dae_weight, phase_indicator, autoencoder_length, filter_num, filter_size, autoencoder_initializer): with tf.name_scope('data_source'): # 标准输入规定为TBD batch_size = tf.placeholder(tf.int32, [], name='batch_size') event_placeholder = tf.placeholder(tf.float32, [num_steps, None, num_event], name='event_placeholder') context_placeholder = tf.placeholder(tf.float32, [num_steps, None, num_context], name='context_placeholder') time_list = tf.placeholder(tf.int32, [None, num_steps], name='time_list') y_placeholder = tf.placeholder(tf.float32, [None, 1], name='y_placeholder') with tf.name_scope('autoencoder'): # input_x 用于计算重构原始向量时产生的误差 processed_input, autoencoder_weight = autoencoder.denoising_autoencoder( phase_indicator, context_placeholder, keep_rate_input, autoencoder_length, autoencoder_initializer) with tf.name_scope('cnn'): predictions, scores = _cnn_detail(processed_input=processed_input, event_list=event_placeholder, time_list=time_list, filter_num=filter_num, filter_size_list=filter_size, scope='cnn') with tf.name_scope('loss'): with tf.name_scope('pred_loss'): loss_pred = tf.losses.sigmoid_cross_entropy( logits=scores, multi_class_labels=y_placeholder) with tf.name_scope('dae_loss'): if autoencoder_length > 0: loss_dae = autoencoder.autoencoder_loss( embedding=processed_input, origin_input=context_placeholder, weight=autoencoder_weight) else: loss_dae = 0 with tf.name_scope('loss_sum'): loss = loss_pred + loss_dae * dae_weight return loss, predictions, event_placeholder, context_placeholder, y_placeholder, batch_size, phase_indicator, \ time_list
def vanilla_rnn_model(cell, num_steps, num_hidden, num_context, num_event, keep_rate_input, dae_weight, phase_indicator, auto_encoder_value, auto_encoder_initializer=tf.initializers.orthogonal()): """ :param cell: :param num_steps: :param num_hidden: :param num_context: 要求Context变量全部变为二值变量 :param num_event: :param dae_weight: :param keep_rate_input: :param phase_indicator: :param auto_encoder_value: 大于0时执行对输入的自编码,值即为最终降到的维度 :param auto_encoder_initializer: :return: loss, prediction, x_placeholder, y_placeholder, batch_size, phase_indicator 其中 phase_indicator>0代表是测试期,<=0代表是训练期 """ with tf.name_scope('vanilla_rnn'): with tf.name_scope('data_source'): batch_size = tf.placeholder(tf.int32, [], name='batch_size') # 标准输入规定为BTD event_placeholder = tf.placeholder(tf.float32, [None, num_steps, num_event], name='event_placeholder') context_placeholder = tf.placeholder( tf.float32, [None, num_steps, num_context], name='context_placeholder') y_placeholder = tf.placeholder(tf.float32, [None, 1], name='y_placeholder') # input_x 用于计算重构原始向量时产生的误差 processed_input, input_x, autoencoder_weight = autoencoder.denoising_autoencoder( phase_indicator, context_placeholder, event_placeholder, keep_rate_input, auto_encoder_value, auto_encoder_initializer) output_list = __vanilla_rnn(batch_size=batch_size, rnn_cell=cell, input_x=processed_input) with tf.variable_scope('output_layer', reuse=tf.AUTO_REUSE): output_weight = tf.get_variable( "weight", [num_hidden, 1], initializer=tf.initializers.orthogonal()) bias = tf.get_variable('bias', []) with tf.name_scope('loss'): unnormalized_prediction = tf.matmul(output_list[-1], output_weight) + bias loss_pred = tf.losses.sigmoid_cross_entropy( logits=unnormalized_prediction, multi_class_labels=y_placeholder) if auto_encoder_value > 0: loss_dae = autoencoder.autoencoder_loss(embedding=processed_input, origin_input=input_x, weight=autoencoder_weight) else: loss_dae = 0 loss = loss_pred + loss_dae * dae_weight with tf.name_scope('prediction'): prediction = tf.sigmoid(unnormalized_prediction) return loss, prediction, event_placeholder, context_placeholder, y_placeholder, batch_size, phase_indicator
def hawkes_rnn_model(cell, num_steps, num_hidden, num_context, num_event, keep_rate_input, dae_weight, phase_indicator, autoencoder_length, autoencoder_initializer=tf.initializers.orthogonal()): """ :param cell: :param num_steps: :param num_hidden: :param num_context: 要求Context变量全部变为二值变量 :param num_event: :param dae_weight: :param keep_rate_input: :param phase_indicator: :param autoencoder_length: 大于0时执行对输入的自编码,值即为最终降到的维度 :param autoencoder_initializer: :return: loss, prediction, x_placeholder, y_placeholder, batch_size, phase_indicator 其中 phase_indicator>0代表是测试期,<=0代表是训练期 """ with tf.name_scope('data_source'): # 标准输入规定为TBD batch_size = tf.placeholder(tf.int32, [], name='batch_size') event_placeholder = tf.placeholder(tf.float32, [num_steps, None, num_event], name='event_placeholder') context_placeholder = tf.placeholder(tf.float32, [num_steps, None, num_context], name='context_placeholder') base_intensity = tf.placeholder(tf.float32, [num_event, 1], name='base_intensity') mutual_intensity = tf.placeholder(tf.float32, [num_event, num_event], name='mutual_intensity') time_list = tf.placeholder(tf.int32, [None, num_steps], name='time_list') task_index = tf.placeholder(tf.int32, [], name='task_index') sequence_length = tf.placeholder(tf.int32, [None], name='sequence_length') y_placeholder = tf.placeholder(tf.float32, [None, 1], name='y_placeholder') initial_state = cell.get_initial_state(batch_size) with tf.name_scope('autoencoder'): processed_input, autoencoder_weight = autoencoder.denoising_autoencoder( phase_indicator, context_placeholder, keep_rate_input, autoencoder_length, autoencoder_initializer) with tf.name_scope('hawkes_rnn'): input_ = tf.concat([processed_input, event_placeholder], axis=2) outputs, final_state = _hawkes_dynamic_rnn( cell, input_, sequence_length, initial_state, base_intensity=base_intensity, task_index=task_index, mutual_intensity=mutual_intensity, time_list=time_list, event_list=event_placeholder) # 在使用时LSTM时比较麻烦,因为state里同时包含了hidden state和cell state,只有后者是需要输出的 # 因此需要额外需要做一个split。这种写法非常不优雅,但是我想了一想,也没什么更好的办法 # 做split时,需要特别注意一下state里到底谁前谁后 output_length = outputs.shape[2].value state_length = final_state.shape[1].value if output_length == state_length: # 不需要做任何事情 pass elif output_length * 2 == state_length: final_state = tf.split(final_state, 2, axis=1)[0] else: raise ValueError('Invalid Size') with tf.variable_scope('output_para'): output_weight = tf.get_variable( "weight", [num_hidden, 1], initializer=tf.initializers.orthogonal()) bias = tf.get_variable('bias', []) with tf.name_scope('prediction'): unnormalized_prediction = tf.matmul(final_state, output_weight) + bias prediction = tf.sigmoid(unnormalized_prediction) with tf.name_scope('loss'): with tf.name_scope('pred_loss'): loss_pred = tf.losses.sigmoid_cross_entropy( logits=unnormalized_prediction, multi_class_labels=y_placeholder) with tf.name_scope('dae_loss'): if autoencoder_length > 0: loss_dae = autoencoder.autoencoder_loss( embedding=processed_input, origin_input=context_placeholder, weight=autoencoder_weight) else: loss_dae = 0 with tf.name_scope('loss_sum'): loss = loss_pred + loss_dae * dae_weight return loss, prediction, event_placeholder, context_placeholder, y_placeholder, batch_size, phase_indicator, \ base_intensity, mutual_intensity, time_list, task_index, sequence_length, final_state
def concat_hawkes_model(cell_context, cell_event, num_steps, num_hidden, num_context, num_event, keep_rate_input, dae_weight, phase_indicator, autoencoder_length, autoencoder_initializer=tf.initializers.orthogonal()): with tf.name_scope('data_source'): # 标准输入规定为TBD batch_size = tf.placeholder(tf.int32, [], name='batch_size') event_placeholder = tf.placeholder(tf.float32, [num_steps, None, num_event], name='event_placeholder') context_placeholder = tf.placeholder(tf.float32, [num_steps, None, num_context], name='context_placeholder') base_intensity = tf.placeholder(tf.float32, [num_event, 1], name='base_intensity') mutual_intensity = tf.placeholder(tf.float32, [num_event, num_event], name='mutual_intensity') time_list = tf.placeholder(tf.int32, [None, num_steps], name='time_list') task_index = tf.placeholder(tf.int32, [], name='task_index') sequence_length = tf.placeholder(tf.int32, [None], name='sequence_length') y_placeholder = tf.placeholder(tf.float32, [None, 1], name='y_placeholder') event_initial_state = cell_event.get_initial_state(batch_size) context_initial_state = cell_context.get_initial_state(batch_size) with tf.name_scope('autoencoder'): # input_x 用于计算重构原始向量时产生的误差 processed_input, autoencoder_weight = autoencoder.denoising_autoencoder( phase_indicator, context_placeholder, keep_rate_input, autoencoder_length, autoencoder_initializer) with tf.name_scope('context_hawkes_rnn'): outputs, final_state = _hawkes_dynamic_rnn( cell_context, processed_input, sequence_length, context_initial_state, base_intensity=base_intensity, task_index=task_index, mutual_intensity=mutual_intensity, time_list=time_list, event_list=event_placeholder, scope='context_hawkes') output_length = outputs.shape[2].value state_length = final_state.shape[1].value if output_length == state_length: # 不需要做任何事情 context_final_state = final_state elif output_length * 2 == state_length: context_final_state = tf.split(final_state, 2, axis=1)[0] else: raise ValueError('Invalid Size') with tf.name_scope('event_hawkes_rnn'): outputs, final_state = _hawkes_dynamic_rnn( cell_event, event_placeholder, sequence_length, event_initial_state, base_intensity=base_intensity, task_index=task_index, mutual_intensity=mutual_intensity, time_list=time_list, event_list=event_placeholder, scope='event_hawkes') output_length = outputs.shape[2].value state_length = final_state.shape[1].value if output_length == state_length: # 不需要做任何事情 event_final_state = final_state elif output_length * 2 == state_length: event_final_state = tf.split(final_state, 2, axis=1)[0] else: raise ValueError('Invalid Size') with tf.name_scope('state_concat'): concat_final_state = tf.concat( [event_final_state, context_final_state], axis=1) with tf.variable_scope('output_para'): output_weight = tf.get_variable( "weight", [num_hidden * 2, 1], initializer=tf.initializers.orthogonal()) bias = tf.get_variable('bias', []) with tf.name_scope('prediction'): unnormalized_prediction = tf.matmul(concat_final_state, output_weight) + bias prediction = tf.sigmoid(unnormalized_prediction) with tf.name_scope('loss'): with tf.name_scope('pred_loss'): loss_pred = tf.losses.sigmoid_cross_entropy( logits=unnormalized_prediction, multi_class_labels=y_placeholder) with tf.name_scope('dae_loss'): if autoencoder_length > 0: loss_dae = autoencoder.autoencoder_loss( embedding=processed_input, origin_input=context_placeholder, weight=autoencoder_weight) else: loss_dae = 0 with tf.name_scope('loss_sum'): loss = loss_pred + loss_dae * dae_weight return loss, prediction, event_placeholder, context_placeholder, y_placeholder, batch_size, phase_indicator, \ base_intensity, mutual_intensity, time_list, task_index, sequence_length, concat_final_state