def predictor(x_, o_dim_, o_type_, num_layers_=1, h_dim_=100, activation_fn=tf.nn.relu, keep_prob_=1.0, w_reg_=None): ''' INPUT x_ : (2D-tensor) input o_dim_ : (int) output dimension o_type_ : (string) output type one of {'continuous', 'categorical', 'binary'} num_layers_ : (int) # of hidden layers activation_fn_: tf activation functions OUTPUT o_type_ tensor ''' if o_type_ == 'continuous': out_fn = None elif o_type_ == 'categorical': out_fn = tf.nn.softmax #for classification task elif o_type_ == 'binary': out_fn = tf.nn.sigmoid else: raise ValueError('Wrong output type. The value {}!!'.format(o_type_)) if num_layers_ == 1: out = FC_Net(inputs=x_, num_outputs=o_dim_, activation_fn=out_fn, weights_regularizer=w_reg_, scope='out') else: #num_layers > 1 for tmp_layer in range(num_layers_-1): if tmp_layer == 0: net = x_ net = FC_Net(inputs=net, num_outputs=h_dim_, activation_fn=activation_fn, weights_regularizer=w_reg_, scope='layer_'+str(tmp_layer)) net = tf.nn.dropout(net, keep_prob=keep_prob_) out = FC_Net(inputs=net, num_outputs=o_dim_, activation_fn=out_fn, weights_regularizer=w_reg_, scope='out') return out
def _build_net(self): with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE): ## Placeholder self.mb_size = tf.placeholder(tf.int32, [], name='batch_size') #Batch Size self.lr_rate = tf.placeholder(tf.float32, [], name='learning_rate') #Learning Rate self.keep_prob = tf.placeholder(tf.float32, [], name='keep_probability') #keeping rate: 1- dropout rate self.x = tf.placeholder(tf.float32, shape=[None, self.x_dim], name='inputs') #Covariates self.k = tf.placeholder(tf.float32, shape=[None, 1], name='labels') #Event/censoring label (censoring:0) self.t = tf.placeholder(tf.float32, shape=[None, 1], name='timetoevents') #Time until event to occur self.y1 = tf.placeholder(tf.float32, shape=[None, self.num_evalTime], name='pseudo1') #Pseudo values for CIF for cause 1 self.y2 = tf.placeholder(tf.float32, shape=[None, self.num_evalTime], name='pseudo2')#pseudo values for CIF for cause 2 ## Get output from shared network shared_out = util_net.create_FCNet(self.x, self.num_layers_shared, self.num_units_shared, self.activation_fn, self.num_units_shared, self.activation_fn, self.initial_W, self.keep_prob, self.reg_W) ## Get output for cause 1 from cause-specific network feeding output from shared network as input cs_out1= util_net.create_FCNet(shared_out, self.num_layers_CS, self.num_units_shared, self.activation_fn, self.num_units_CS, self.activation_fn, self.initial_W, self.keep_prob, self.reg_W) self.out1= FC_Net(cs_out1, self.num_evalTime, activation_fn=tf.nn.selu, weights_initializer=self.initial_W, weights_regularizer=self.reg_W_out, scope="Output1") ## Get output for cause 2 from cause-specific network feeding output from shared network as input cs_out2= util_net.create_FCNet(shared_out, self.num_layers_CS, self.num_units_shared, self.activation_fn, self.num_units_CS, self.activation_fn, self.initial_W, self.keep_prob, self.reg_W) self.out2= FC_Net(cs_out2, self.num_evalTime, activation_fn=tf.nn.selu, weights_initializer=self.initial_W, weights_regularizer=self.reg_W_out, scope="Output2") ## Stack the outputs of cause 1 and cause 2 of the event out = tf.stack((self.out1, self.out2), axis=1) ## Reshape outputs self.output = tf.reshape(out, [-1, self.num_Event, self.num_evalTime]) ## Get loss function self.loss_mse_1() self.loss_mse_2() ## Optimization self.LOSS_TOTAL = self.LOSS1 + self.LOSS2 self.solver = tf.train.AdamOptimizer(learning_rate=self.lr_rate).minimize(self.LOSS_TOTAL)
def stochastic_encoder(x_, o_dim_, num_layers_=1, h_dim_=100, activation_fn=tf.nn.relu, keep_prob_=1.0, w_reg_=None): ''' INPUT x_ : (2D-tensor) input o_dim_ : (int) output dimension num_layers_ : (int) # of hidden layers activation_fn_: tf activation functions OUTPUT [mu,sigma] tensor ''' if num_layers_ == 1: out = FC_Net(inputs=x_, num_outputs=o_dim_, activation_fn=None, weights_regularizer=w_reg_, scope='out') else: #num_layers > 1 for tmp_layer in range(num_layers_-1): if tmp_layer == 0: net = x_ net = FC_Net(inputs=net, num_outputs=h_dim_, activation_fn=activation_fn, weights_regularizer=w_reg_, scope='layer_'+str(tmp_layer)) net = tf.nn.dropout(net, keep_prob=keep_prob_) out = FC_Net(inputs=net, num_outputs=o_dim_, activation_fn=None, weights_regularizer=w_reg_, scope='out') return out
def create_FCNet(inputs, num_layers, h_dim, h_fn, o_dim, o_fn, w_init, keep_prob, regularizer=None): ''' GOAL : Create FC network with different specifications inputs (tensor) : input tensor num_layers : number of layers in FCNet h_dim (int) : number of hidden units h_fn : activation function for hidden layers (default: tf.nn.relu) o_dim (int) : number of output units o_fn : activation function for output layers (defalut: None) w_init : initialization for weight matrix (defalut: Xavier) keep_prob : keep probabilty [0, 1] (if None, dropout is not employed) ''' # default active functions (hidden: relu, out: None) if h_fn is None: h_fn = tf.nn.relu if o_fn is None: o_fn = None # default initialization functions (weight: Xavier, bias: None) if w_init is None: w_init = tf.contrib.layers.xavier_initializer() # Xavier initialization for layer in range(num_layers): if num_layers == 1: out = FC_Net(inputs, o_dim, activation_fn=o_fn, weights_initializer=w_init, weights_regularizer=regularizer, biases_regularizer=regularizer) else: if layer == 0: h = FC_Net(inputs, h_dim, activation_fn=h_fn, weights_initializer=w_init, weights_regularizer=regularizer, biases_regularizer=regularizer) if not keep_prob is None: h = tf.nn.dropout(h, keep_prob=keep_prob) elif layer > 0 and layer != (num_layers-1): # layer > 0: h = FC_Net(h, h_dim, activation_fn=h_fn, weights_initializer=w_init, weights_regularizer=regularizer, biases_regularizer=regularizer) if not keep_prob is None: h = tf.nn.dropout(h, keep_prob=keep_prob) else: # layer == num_layers-1 (the last layer) out = FC_Net(h, o_dim, activation_fn=o_fn, weights_initializer=w_init, weights_regularizer=regularizer, biases_regularizer=regularizer) return out
def _build_net(self): with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE): ## Placeholder self.mb_size = tf.placeholder(tf.int32, [], name='batch_size') #Batch Size self.lr_rate = tf.placeholder(tf.float32, [], name='learning_rate') #Learning Rate self.keep_prob = tf.placeholder( tf.float32, [], name='keep_probability') #Keeping rate: 1- dropout rate self.x = tf.placeholder(tf.float32, shape=[None, self.x_dim], name='inputs') #Covariates self.k = tf.placeholder( tf.float32, shape=[None, 1], name='labels') #Event/censoring label (censoring:0) self.t = tf.placeholder( tf.float32, shape=[None, 1], name='timetoevents') #Time until event to occur self.y = tf.placeholder( tf.float32, shape=[None, self.num_Event, self.num_evalTime], name='pseudo') #Pseudo values for CIF ## Get output from final hidden layer out_fc = util_net.create_FCNet(self.x, self.num_layers, self.num_units, self.activation_fn, self.num_units, self.activation_fn, self.initial_W, self.keep_prob, self.reg_W) ## Output Layer (Use output of final hidden layer as Input) out = FC_Net(out_fc, self.num_Event * self.num_evalTime, activation_fn=tf.nn.selu, weights_initializer=self.initial_W, weights_regularizer=self.reg_W_out, scope="Output") ## Reshape outputs self.output = tf.reshape(out, [-1, self.num_Event, self.num_evalTime]) ## Get loss function self.loss_mse() ## Optimization self.solver = tf.train.AdamOptimizer( learning_rate=self.lr_rate).minimize(self.LOSS)
def _build_net(self): with tf.variable_scope(self.name): #### PLACEHOLDER DECLARATION self.mb_size = tf.placeholder(tf.int32, [], name='batch_size') self.lr_rate = tf.placeholder(tf.float32, [], name='learning_rate') self.keep_prob = tf.placeholder( tf.float32, [], name='keep_probability') #keeping rate self.a = tf.placeholder(tf.float32, [], name='alpha') self.b = tf.placeholder(tf.float32, [], name='beta') self.c = tf.placeholder(tf.float32, [], name='gamma') self.x = tf.placeholder(tf.float32, shape=[None, self.x_dim], name='inputs') self.k = tf.placeholder( tf.float32, shape=[None, 1], name='labels') #event/censoring label (censoring:0) self.t = tf.placeholder(tf.float32, shape=[None, 1], name='timetoevents') self.fc_mask1 = tf.placeholder( tf.float32, shape=[None, self.num_Event, self.num_Category], name='mask1') #for Loss 1 self.fc_mask2 = tf.placeholder(tf.float32, shape=[None, self.num_Category], name='mask2') #for Loss 2 / Loss 3 ##### SHARED SUBNETWORK w/ FCNETS shared_out = utils.create_FCNet(self.x, self.num_layers_shared, self.h_dim_shared, self.active_fn, self.h_dim_shared, self.active_fn, self.initial_W, self.keep_prob, self.reg_W) last_x = self.x #for residual connection h = tf.concat([last_x, shared_out], axis=1) #(num_layers_CS) layers for cause-specific (num_Event subNets) out = [] for _ in range(self.num_Event): cs_out = utils.create_FCNet(h, (self.num_layers_CS), self.h_dim_CS, self.active_fn, self.h_dim_CS, self.active_fn, self.initial_W, self.keep_prob, self.reg_W) out.append(cs_out) out = tf.stack(out, axis=1) # stack referenced on subject out = tf.reshape(out, [-1, self.num_Event * self.h_dim_CS]) out = tf.nn.dropout(out, keep_prob=self.keep_prob) out = FC_Net(out, self.num_Event * self.num_Category, activation_fn=tf.nn.softmax, weights_initializer=self.initial_W, weights_regularizer=self.reg_W_out, scope="Output") self.out = tf.reshape(out, [-1, self.num_Event, self.num_Category]) ##### GET LOSS FUNCTIONS self.loss_Log_Likelihood() #get loss1: Log-Likelihood loss self.loss_Ranking() #get loss2: Ranking loss self.loss_Calibration() #get loss3: Calibration loss self.LOSS_TOTAL = self.a * self.LOSS_1 + self.b * self.LOSS_2 + self.c * self.LOSS_3 self.solver = tf.train.AdamOptimizer( learning_rate=self.lr_rate).minimize(self.LOSS_TOTAL)
def _build_net(self): with tf.variable_scope(self.name): #### PLACEHOLDER DECLARATION self.lr_rate = tf.placeholder(tf.float32) self.keep_prob = tf.placeholder(tf.float32) #keeping rate self.a = tf.placeholder(tf.float32) self.b = tf.placeholder(tf.float32) self.c = tf.placeholder(tf.float32, shape=[self.num_Event]) self.sigma1 = tf.placeholder(tf.float32) # sigma hyperparameter self.x = tf.placeholder(tf.float32, shape=[None, self.x_dim]) self.k = tf.placeholder( tf.float32, shape=[None, 1]) #event/censoring label (censoring:0) self.t = tf.placeholder(tf.float32, shape=[None, 1]) self.fc_mask1 = tf.placeholder( tf.float32, shape=[None, self.num_Event, self.num_Category]) #for Loss 1 self.fc_mask2 = tf.placeholder(tf.float32, shape=[None, self.num_Category ]) #for Loss 2 ##### SHARED SUBNETWORK w/ FCNETS n_inputs = self.x_dim_lst[0] if self.version == "standard": shared_inputs = self.x[:, 0:n_inputs] shared_out = utils.create_FCNet(shared_inputs, int(self.num_layers_shared), int(self.h_dim_shared), self.active_fn, int(self.h_dim_shared), self.active_fn, self.initial_W, self.keep_prob) elif self.version == "sparse": shared_o2o_weights = tf.Variable(self.initial_W([n_inputs]), name="shared_o2o_weights") shared_inputs = tf.multiply(self.x[:, 0:n_inputs], shared_o2o_weights) shared_o2o_regularizer = tf.contrib.layers.l1_regularizer( scale=tf.reduce_mean(self.c), scope=None) tf.contrib.layers.apply_regularization( shared_o2o_regularizer, weights_list=[shared_o2o_weights]) shared_out = utils.create_FCNet(shared_inputs, int(self.num_layers_shared), int(self.h_dim_shared), self.active_fn, int(self.h_dim_shared), self.active_fn, self.initial_W, self.keep_prob) elif self.version == "attentive": att_shared_inputs = self.x[:, 0:n_inputs] att_output_dim = sum(self.x_dim_lst[1:]) att_out = utils.create_FCNet(att_shared_inputs, int(self.num_layers_shared), att_output_dim, self.active_fn, att_output_dim, None, self.initial_W, self.keep_prob) att_out = tf.reshape(att_out, [-1, self.num_Event, n_inputs]) self.att_out = tf.nn.softmax(att_out, axis=-1) # num_layers_FC layers for cause-specific subnetwork out = [] for _event in range(self.num_Event): start = sum(self.x_dim_lst[0:(_event + 1)]) end = sum(self.x_dim_lst[0:(_event + 2)]) important_x = self.x[:, start:end] #for residual connection if self.version == "standard": inputs = tf.concat([important_x, shared_out], axis=1) n_inputs_event = self.x_dim_lst[_event + 1] + self.h_dim_shared cs_out = utils.create_FCNet( inputs, int(self.num_layers_FC[_event]), int(self.h_dim_FC[_event]), self.active_fn, int(self.h_dim_FC[_event]), self.active_fn, self.initial_W, self.keep_prob) elif self.version == "sparse": inputs = tf.concat([important_x, shared_out], axis=1) n_inputs_event = self.x_dim_lst[_event + 1] + self.h_dim_shared specific_o2o_weights = tf.Variable( self.initial_W([int(n_inputs_event)]), name="specific_o2o_weights_" + str(_event + 1)) specific_inputs = tf.multiply(inputs, specific_o2o_weights) specific_o2o_regularizer = tf.contrib.layers.l1_regularizer( scale=self.c[_event], scope=None) tf.contrib.layers.apply_regularization( specific_o2o_regularizer, weights_list=[specific_o2o_weights]) cs_out = utils.create_FCNet( specific_inputs, int(self.num_layers_FC[_event]), int(self.h_dim_FC[_event]), self.active_fn, int(self.h_dim_FC[_event]), self.active_fn, self.initial_W, self.keep_prob) elif self.version == "attentive": att_inputs = tf.multiply(important_x, self.att_out[:, _event, :]) regularizer = tf.contrib.layers.l2_regularizer( scale=self.c[_event], scope=None) cs_out = utils.create_FCNet( att_inputs, int(self.num_layers_FC[_event]), int(self.h_dim_FC[_event]), self.active_fn, int(self.h_dim_FC[_event]), self.active_fn, self.initial_W, self.keep_prob, regularizer=regularizer) out.append(cs_out) # out = tf.stack(out, axis=1) # stack referenced on subject # out = tf.reshape(out, [-1, sum(self.h_dim_FC)]) out = tf.concat(out, axis=1) out = tf.nn.dropout(out, keep_prob=self.keep_prob) if self.version in ["standard", "sparse"]: out = FC_Net(out, self.num_Event * self.num_Category, activation_fn=tf.nn.softmax, weights_initializer=self.initial_W, scope="Output") elif self.version == "attentive": regularizer = tf.contrib.layers.l2_regularizer( scale=tf.reduce_mean(self.c), scope=None) out = FC_Net(out, self.num_Event * self.num_Category, activation_fn=tf.nn.softmax, weights_initializer=self.initial_W, scope="Output", weights_regularizer=regularizer, biases_regularizer=regularizer) self.out = tf.reshape(out, [-1, self.num_Event, self.num_Category]) ##### GET LOSS FUNCTIONS self.loss_Log_Likelihood() #get loss1: Log-Likelihood loss self.loss_Ranking() #get loss2: Ranking loss self.LOSS_TOTAL = self.a * self.LOSS_1 + self.b * self.LOSS_2 + tf.losses.get_regularization_loss( ) self.solver = tf.train.AdamOptimizer( learning_rate=self.lr_rate).minimize(self.LOSS_TOTAL)
def _build_net(self): with tf.variable_scope(self.name): #### PLACEHOLDER DECLARATION self.mb_size = tf.placeholder(tf.int32, [], name='batch_size') self.lr_rate = tf.placeholder(tf.float32) self.keep_prob = tf.placeholder(tf.float32) #keeping rate self.a = tf.placeholder(tf.float32) self.b = tf.placeholder(tf.float32) self.c = tf.placeholder(tf.float32) self.x = tf.placeholder(tf.float32, shape=[None, self.max_length, self.x_dim]) self.x_mi = tf.placeholder( tf.float32, shape=[None, self.max_length, self.x_dim] ) #this is the missing indicator (including for cont. & binary) (includes delta) self.k = tf.placeholder( tf.float32, shape=[None, 1]) #event/censoring label (censoring:0) self.t = tf.placeholder(tf.float32, shape=[None, 1]) self.fc_mask1 = tf.placeholder( tf.float32, shape=[None, self.num_Event, self.num_Category]) #for denominator self.fc_mask2 = tf.placeholder( tf.float32, shape=[None, self.num_Event, self.num_Category]) #for Loss 1 self.fc_mask3 = tf.placeholder(tf.float32, shape=[None, self.num_Category ]) #for Loss 2 seq_length = get_seq_length(self.x) tmp_range = tf.expand_dims(tf.range(0, self.max_length, 1), axis=0) self.rnn_mask1 = tf.cast( tf.less_equal(tmp_range, tf.expand_dims(seq_length - 1, axis=1)), tf.float32) self.rnn_mask2 = tf.cast( tf.equal(tmp_range, tf.expand_dims(seq_length - 1, axis=1)), tf.float32) ### DEFINE LOOP FUNCTION FOR RAW_RNN w/ TEMPORAL ATTENTION def loop_fn_att(time, cell_output, cell_state, loop_state): emit_output = cell_output if cell_output is None: # time == 0 next_cell_state = cell.zero_state(self.mb_size, tf.float32) next_loop_state = loop_state_ta else: next_cell_state = cell_state tmp_h = utils.create_concat_state(next_cell_state, self.num_layers_RNN, self.RNN_type) e = utils.create_FCNet(tf.concat([tmp_h, all_last], axis=1), self.num_layers_ATT, self.h_dim2, tf.nn.tanh, 1, None, self.initial_W, keep_prob=self.keep_prob) e = tf.exp(e) next_loop_state = ( loop_state[0].write(time - 1, e), # save att power (e_{j}) loop_state[1].write(time - 1, tmp_h) ) # save all the hidden states # elements_finished = (time >= seq_length) elements_finished = (time >= self.max_length - 1) #this gives the break-point (no more recurrence after the max_length) finished = tf.reduce_all(elements_finished) next_input = tf.cond( finished, lambda: tf.zeros([self.mb_size, 2 * self.x_dim], dtype=tf.float32), # [x_hist, mi_hist] lambda: inputs_ta.read(time)) return (elements_finished, next_input, next_cell_state, emit_output, next_loop_state) # divide into the last x and previous x's x_last = tf.slice(self.x, [0, (self.max_length - 1), 1], [-1, -1, -1]) #current measurement x_last = tf.reshape(x_last, [-1, (self.x_dim_cont + self.x_dim_bin) ]) #remove the delta of the last measurement x_last = tf.reduce_sum( tf.tile(tf.expand_dims(self.rnn_mask2, axis=2), [1, 1, self.x_dim]) * self.x, reduction_indices=1 ) #sum over time since all others time stamps are 0 x_last = tf.slice( x_last, [0, 1], [-1, -1]) #remove the delta of the last measurement x_hist = self.x * ( 1. - tf.tile(tf.expand_dims(self.rnn_mask2, axis=2), [1, 1, self.x_dim]) ) #since all others time stamps are 0 and measurements are 0-padded x_hist = tf.slice(x_hist, [0, 0, 0], [-1, (self.max_length - 1), -1]) # do same thing for missing indicator mi_last = tf.slice(self.x_mi, [0, (self.max_length - 1), 1], [-1, -1, -1]) #current measurement mi_last = tf.reshape(mi_last, [-1, (self.x_dim_cont + self.x_dim_bin) ]) #remove the delta of the last measurement mi_last = tf.reduce_sum( tf.tile(tf.expand_dims(self.rnn_mask2, axis=2), [1, 1, self.x_dim]) * self.x_mi, reduction_indices=1 ) #sum over time since all others time stamps are 0 mi_last = tf.slice( mi_last, [0, 1], [-1, -1]) #remove the delta of the last measurement mi_hist = self.x_mi * ( 1. - tf.tile(tf.expand_dims(self.rnn_mask2, axis=2), [1, 1, self.x_dim]) ) #since all others time stamps are 0 and measurements are 0-padded mi_hist = tf.slice(mi_hist, [0, 0, 0], [-1, (self.max_length - 1), -1]) all_hist = tf.concat([x_hist, mi_hist], axis=2) all_last = tf.concat([x_last, mi_last], axis=1) #extract inputs for the temporal attention: mask (to incorporate only the measured time) and x_{M} seq_length = get_seq_length(x_hist) rnn_mask_att = tf.cast( tf.not_equal(tf.reduce_sum(x_hist, reduction_indices=2), 0), dtype=tf.float32 ) #[mb_size, max_length-1], 1:measurements 0:no measurements ##### SHARED SUBNETWORK: RNN w/ TEMPORAL ATTENTION #change the input tensor to TensorArray format with [max_length, mb_size, x_dim] inputs_ta = tf.TensorArray(dtype=tf.float32, size=self.max_length - 1).unstack( _transpose_batch_time(all_hist), name='Shared_Input') #create a cell with RNN hyper-parameters (RNN types, #layers, #nodes, activation functions, keep proability) cell = utils.create_rnn_cell(self.h_dim1, self.num_layers_RNN, self.keep_prob, self.RNN_type, self.RNN_active_fn) #define the loop_state TensorArray for information from rnn time steps loop_state_ta = ( tf.TensorArray(size=self.max_length - 1, dtype=tf.float32), #e values (e_{j}) tf.TensorArray(size=self.max_length - 1, dtype=tf.float32)) #hidden states (h_{j}) rnn_outputs_ta, self.rnn_final_state, loop_state_ta = tf.nn.raw_rnn( cell, loop_fn_att) #rnn_outputs_ta : TensorArray #rnn_final_state : Tensor #rnn_states_ta : (TensorArray, TensorArray) rnn_outputs = _transpose_batch_time(rnn_outputs_ta.stack()) # rnn_outputs = tf.reshape(rnn_outputs, [-1, self.max_length-1, self.h_dim1]) rnn_states = _transpose_batch_time(loop_state_ta[1].stack()) att_weight = _transpose_batch_time( loop_state_ta[0].stack()) #e_{j} att_weight = tf.reshape(att_weight, [ -1, self.max_length - 1 ]) * rnn_mask_att # masking to set 0 for the unmeasured e_{j} #get a_{j} = e_{j}/sum_{l=1}^{M-1}e_{l} self.att_weight = div( att_weight, (tf.reduce_sum(att_weight, axis=1, keepdims=True) + _EPSILON)) #softmax (tf.exp is done, previously) # 1) expand att_weight to hidden state dimension, 2) c = \sum_{j=1}^{M} a_{j} x h_{j} self.context_vec = tf.reduce_sum(tf.tile( tf.reshape(self.att_weight, [-1, self.max_length - 1, 1]), [1, 1, self.num_layers_RNN * self.h_dim1]) * rnn_states, axis=1) self.z_mean = FC_Net(rnn_outputs, self.x_dim, activation_fn=None, weights_initializer=self.initial_W, scope="RNN_out_mean1") self.z_std = tf.exp( FC_Net(rnn_outputs, self.x_dim, activation_fn=None, weights_initializer=self.initial_W, scope="RNN_out_std1")) epsilon = tf.random_normal( [self.mb_size, self.max_length - 1, self.x_dim], mean=0.0, stddev=1.0, dtype=tf.float32) self.z = self.z_mean + self.z_std * epsilon ##### CS-SPECIFIC SUBNETWORK w/ FCNETS inputs = tf.concat([x_last, self.context_vec], axis=1) #1 layer for combining inputs h = FC_Net(inputs, self.h_dim2, activation_fn=self.FC_active_fn, weights_initializer=self.initial_W, scope="Layer1") h = tf.nn.dropout(h, keep_prob=self.keep_prob) # (num_layers_CS-1) layers for cause-specific (num_Event subNets) out = [] for _ in range(self.num_Event): cs_out = utils.create_FCNet(h, (self.num_layers_CS), self.h_dim2, self.FC_active_fn, self.h_dim2, self.FC_active_fn, self.initial_W, self.reg_W, self.keep_prob) out.append(cs_out) out = tf.stack(out, axis=1) # stack referenced on subject out = tf.reshape(out, [-1, self.num_Event * self.h_dim2]) out = tf.nn.dropout(out, keep_prob=self.keep_prob) out = FC_Net(out, self.num_Event * self.num_Category, activation_fn=tf.nn.softmax, weights_initializer=self.initial_W, weights_regularizer=self.reg_W_out, scope="Output") self.out = tf.reshape(out, [-1, self.num_Event, self.num_Category]) ##### GET LOSS FUNCTIONS self.loss_Log_Likelihood() #get loss1: Log-Likelihood loss self.loss_Ranking() #get loss2: Ranking loss self.loss_RNN_Prediction() #get loss3: RNN prediction loss self.LOSS_TOTAL = self.a * self.LOSS_1 + self.b * self.LOSS_2 + self.c * self.LOSS_3 + tf.losses.get_regularization_loss( ) self.LOSS_BURNIN = self.LOSS_3 + tf.losses.get_regularization_loss( ) self.solver = tf.train.AdamOptimizer( learning_rate=self.lr_rate).minimize(self.LOSS_TOTAL) self.solver_burn_in = tf.train.AdamOptimizer( learning_rate=self.lr_rate).minimize(self.LOSS_BURNIN)