Пример #1
0
def predictor(x_, o_dim_, o_type_, num_layers_=1, h_dim_=100, activation_fn=tf.nn.relu, keep_prob_=1.0, w_reg_=None):
    '''
        INPUT
            x_            : (2D-tensor) input
            o_dim_        : (int) output dimension
            o_type_       : (string) output type one of {'continuous', 'categorical', 'binary'}
            num_layers_   : (int) # of hidden layers
            activation_fn_: tf activation functions
        
        OUTPUT
            o_type_ tensor
    '''
    if o_type_ == 'continuous':
        out_fn = None
    elif o_type_ == 'categorical':
        out_fn = tf.nn.softmax #for classification task
    elif o_type_ == 'binary':
        out_fn = tf.nn.sigmoid
    else:
        raise ValueError('Wrong output type. The value {}!!'.format(o_type_))

    if num_layers_ == 1:
        out =  FC_Net(inputs=x_, num_outputs=o_dim_, activation_fn=out_fn, weights_regularizer=w_reg_, scope='out')
    else: #num_layers > 1
        for tmp_layer in range(num_layers_-1):
            if tmp_layer == 0:
                net = x_
            net = FC_Net(inputs=net, num_outputs=h_dim_, activation_fn=activation_fn, weights_regularizer=w_reg_, scope='layer_'+str(tmp_layer))
            net = tf.nn.dropout(net, keep_prob=keep_prob_)
        out =  FC_Net(inputs=net, num_outputs=o_dim_, activation_fn=out_fn, weights_regularizer=w_reg_, scope='out')  
    return out
Пример #2
0
    def _build_net(self):
        with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE):
            
            ## Placeholder
            self.mb_size    = tf.placeholder(tf.int32, [], name='batch_size')          #Batch Size
            self.lr_rate    = tf.placeholder(tf.float32, [], name='learning_rate')     #Learning Rate
            self.keep_prob  = tf.placeholder(tf.float32, [], name='keep_probability')  #keeping rate: 1- dropout rate

            self.x          = tf.placeholder(tf.float32, shape=[None, self.x_dim], name='inputs') #Covariates
            self.k          = tf.placeholder(tf.float32, shape=[None, 1], name='labels')  #Event/censoring label (censoring:0)
            self.t          = tf.placeholder(tf.float32, shape=[None, 1], name='timetoevents') #Time until event to occur
            self.y1         = tf.placeholder(tf.float32, shape=[None, self.num_evalTime], name='pseudo1') #Pseudo values for CIF for cause 1
            self.y2         = tf.placeholder(tf.float32, shape=[None, self.num_evalTime], name='pseudo2')#pseudo values for CIF for cause 2




            ## Get output from shared network
            shared_out = util_net.create_FCNet(self.x, self.num_layers_shared, self.num_units_shared, self.activation_fn, self.num_units_shared, self.activation_fn, self.initial_W, self.keep_prob, self.reg_W)
            
            ## Get output for cause 1 from cause-specific network feeding output from shared network as input
            cs_out1= util_net.create_FCNet(shared_out, self.num_layers_CS, self.num_units_shared, self.activation_fn, self.num_units_CS, self.activation_fn, self.initial_W, self.keep_prob, self.reg_W)
            self.out1= FC_Net(cs_out1, self.num_evalTime, activation_fn=tf.nn.selu, 
                         weights_initializer=self.initial_W, weights_regularizer=self.reg_W_out, scope="Output1")
            
            
            ## Get output for cause 2 from cause-specific network feeding output from shared network as input
            cs_out2= util_net.create_FCNet(shared_out, self.num_layers_CS, self.num_units_shared, self.activation_fn, self.num_units_CS, self.activation_fn, self.initial_W, self.keep_prob, self.reg_W)
            self.out2= FC_Net(cs_out2, self.num_evalTime, activation_fn=tf.nn.selu, 
                         weights_initializer=self.initial_W, weights_regularizer=self.reg_W_out, scope="Output2")
           

       
            ## Stack the outputs of cause 1 and cause 2 of the event
            out = tf.stack((self.out1, self.out2), axis=1)
           
            ## Reshape outputs
            self.output = tf.reshape(out, [-1, self.num_Event, self.num_evalTime])
            
          

            ## Get loss function
            self.loss_mse_1() 
            self.loss_mse_2()
                 
            ## Optimization
            self.LOSS_TOTAL = self.LOSS1 + self.LOSS2
            self.solver = tf.train.AdamOptimizer(learning_rate=self.lr_rate).minimize(self.LOSS_TOTAL)
Пример #3
0
def stochastic_encoder(x_, o_dim_, num_layers_=1, h_dim_=100, activation_fn=tf.nn.relu, keep_prob_=1.0, w_reg_=None):
    '''
        INPUT
            x_            : (2D-tensor) input
            o_dim_        : (int) output dimension
            num_layers_   : (int) # of hidden layers
            activation_fn_: tf activation functions
        
        OUTPUT
            [mu,sigma] tensor
    '''
    if num_layers_ == 1:
        out =  FC_Net(inputs=x_, num_outputs=o_dim_, activation_fn=None, weights_regularizer=w_reg_, scope='out')
    else: #num_layers > 1
        for tmp_layer in range(num_layers_-1):
            if tmp_layer == 0:
                net = x_
            net = FC_Net(inputs=net, num_outputs=h_dim_, activation_fn=activation_fn, weights_regularizer=w_reg_, scope='layer_'+str(tmp_layer))
            net = tf.nn.dropout(net, keep_prob=keep_prob_)
        out =  FC_Net(inputs=net, num_outputs=o_dim_, activation_fn=None, weights_regularizer=w_reg_, scope='out')  
    return out
Пример #4
0
def create_FCNet(inputs, num_layers, h_dim, h_fn, o_dim, o_fn, w_init, keep_prob, regularizer=None):
    '''
        GOAL             : Create FC network with different specifications
        inputs (tensor)  : input tensor
        num_layers       : number of layers in FCNet
        h_dim  (int)     : number of hidden units
        h_fn             : activation function for hidden layers (default: tf.nn.relu)
        o_dim  (int)     : number of output units
        o_fn             : activation function for output layers (defalut: None)
        w_init           : initialization for weight matrix (defalut: Xavier)
        keep_prob        : keep probabilty [0, 1]  (if None, dropout is not employed)
    '''
    # default active functions (hidden: relu, out: None)
    if h_fn is None:
        h_fn = tf.nn.relu
    if o_fn is None:
        o_fn = None

    # default initialization functions (weight: Xavier, bias: None)
    if w_init is None:
        w_init = tf.contrib.layers.xavier_initializer() # Xavier initialization

    for layer in range(num_layers):
        if num_layers == 1:
            out = FC_Net(inputs, o_dim, activation_fn=o_fn, weights_initializer=w_init, weights_regularizer=regularizer, biases_regularizer=regularizer)
        else:
            if layer == 0:
                h = FC_Net(inputs, h_dim, activation_fn=h_fn, weights_initializer=w_init, weights_regularizer=regularizer, biases_regularizer=regularizer)
                if not keep_prob is None:
                    h = tf.nn.dropout(h, keep_prob=keep_prob)

            elif layer > 0 and layer != (num_layers-1): # layer > 0:
                h = FC_Net(h, h_dim, activation_fn=h_fn, weights_initializer=w_init, weights_regularizer=regularizer, biases_regularizer=regularizer)
                if not keep_prob is None:
                    h = tf.nn.dropout(h, keep_prob=keep_prob)

            else: # layer == num_layers-1 (the last layer)
                out = FC_Net(h, o_dim, activation_fn=o_fn, weights_initializer=w_init, weights_regularizer=regularizer, biases_regularizer=regularizer)

    return out
    def _build_net(self):
        with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE):

            ## Placeholder
            self.mb_size = tf.placeholder(tf.int32, [],
                                          name='batch_size')  #Batch Size
            self.lr_rate = tf.placeholder(tf.float32, [],
                                          name='learning_rate')  #Learning Rate
            self.keep_prob = tf.placeholder(
                tf.float32, [],
                name='keep_probability')  #Keeping rate: 1- dropout rate

            self.x = tf.placeholder(tf.float32,
                                    shape=[None, self.x_dim],
                                    name='inputs')  #Covariates
            self.k = tf.placeholder(
                tf.float32, shape=[None, 1],
                name='labels')  #Event/censoring label (censoring:0)
            self.t = tf.placeholder(
                tf.float32, shape=[None, 1],
                name='timetoevents')  #Time until event to occur
            self.y = tf.placeholder(
                tf.float32,
                shape=[None, self.num_Event, self.num_evalTime],
                name='pseudo')  #Pseudo values for CIF

            ## Get output from final hidden layer
            out_fc = util_net.create_FCNet(self.x, self.num_layers,
                                           self.num_units, self.activation_fn,
                                           self.num_units, self.activation_fn,
                                           self.initial_W, self.keep_prob,
                                           self.reg_W)

            ## Output Layer (Use output of final hidden layer as Input)
            out = FC_Net(out_fc,
                         self.num_Event * self.num_evalTime,
                         activation_fn=tf.nn.selu,
                         weights_initializer=self.initial_W,
                         weights_regularizer=self.reg_W_out,
                         scope="Output")

            ## Reshape outputs
            self.output = tf.reshape(out,
                                     [-1, self.num_Event, self.num_evalTime])

            ## Get loss function
            self.loss_mse()

            ## Optimization
            self.solver = tf.train.AdamOptimizer(
                learning_rate=self.lr_rate).minimize(self.LOSS)
Пример #6
0
    def _build_net(self):
        with tf.variable_scope(self.name):
            #### PLACEHOLDER DECLARATION
            self.mb_size = tf.placeholder(tf.int32, [], name='batch_size')
            self.lr_rate = tf.placeholder(tf.float32, [], name='learning_rate')
            self.keep_prob = tf.placeholder(
                tf.float32, [], name='keep_probability')  #keeping rate
            self.a = tf.placeholder(tf.float32, [], name='alpha')
            self.b = tf.placeholder(tf.float32, [], name='beta')
            self.c = tf.placeholder(tf.float32, [], name='gamma')

            self.x = tf.placeholder(tf.float32,
                                    shape=[None, self.x_dim],
                                    name='inputs')
            self.k = tf.placeholder(
                tf.float32, shape=[None, 1],
                name='labels')  #event/censoring label (censoring:0)
            self.t = tf.placeholder(tf.float32,
                                    shape=[None, 1],
                                    name='timetoevents')

            self.fc_mask1 = tf.placeholder(
                tf.float32,
                shape=[None, self.num_Event, self.num_Category],
                name='mask1')  #for Loss 1
            self.fc_mask2 = tf.placeholder(tf.float32,
                                           shape=[None, self.num_Category],
                                           name='mask2')  #for Loss 2 / Loss 3

            ##### SHARED SUBNETWORK w/ FCNETS
            shared_out = utils.create_FCNet(self.x, self.num_layers_shared,
                                            self.h_dim_shared, self.active_fn,
                                            self.h_dim_shared, self.active_fn,
                                            self.initial_W, self.keep_prob,
                                            self.reg_W)
            last_x = self.x  #for residual connection

            h = tf.concat([last_x, shared_out], axis=1)

            #(num_layers_CS) layers for cause-specific (num_Event subNets)
            out = []
            for _ in range(self.num_Event):
                cs_out = utils.create_FCNet(h, (self.num_layers_CS),
                                            self.h_dim_CS, self.active_fn,
                                            self.h_dim_CS, self.active_fn,
                                            self.initial_W, self.keep_prob,
                                            self.reg_W)
                out.append(cs_out)
            out = tf.stack(out, axis=1)  # stack referenced on subject
            out = tf.reshape(out, [-1, self.num_Event * self.h_dim_CS])
            out = tf.nn.dropout(out, keep_prob=self.keep_prob)

            out = FC_Net(out,
                         self.num_Event * self.num_Category,
                         activation_fn=tf.nn.softmax,
                         weights_initializer=self.initial_W,
                         weights_regularizer=self.reg_W_out,
                         scope="Output")
            self.out = tf.reshape(out, [-1, self.num_Event, self.num_Category])

            ##### GET LOSS FUNCTIONS
            self.loss_Log_Likelihood()  #get loss1: Log-Likelihood loss
            self.loss_Ranking()  #get loss2: Ranking loss
            self.loss_Calibration()  #get loss3: Calibration loss

            self.LOSS_TOTAL = self.a * self.LOSS_1 + self.b * self.LOSS_2 + self.c * self.LOSS_3
            self.solver = tf.train.AdamOptimizer(
                learning_rate=self.lr_rate).minimize(self.LOSS_TOTAL)
Пример #7
0
    def _build_net(self):
        with tf.variable_scope(self.name):
            #### PLACEHOLDER DECLARATION
            self.lr_rate = tf.placeholder(tf.float32)
            self.keep_prob = tf.placeholder(tf.float32)  #keeping rate
            self.a = tf.placeholder(tf.float32)
            self.b = tf.placeholder(tf.float32)
            self.c = tf.placeholder(tf.float32, shape=[self.num_Event])
            self.sigma1 = tf.placeholder(tf.float32)  # sigma hyperparameter

            self.x = tf.placeholder(tf.float32, shape=[None, self.x_dim])
            self.k = tf.placeholder(
                tf.float32, shape=[None,
                                   1])  #event/censoring label (censoring:0)
            self.t = tf.placeholder(tf.float32, shape=[None, 1])

            self.fc_mask1 = tf.placeholder(
                tf.float32, shape=[None, self.num_Event,
                                   self.num_Category])  #for Loss 1
            self.fc_mask2 = tf.placeholder(tf.float32,
                                           shape=[None, self.num_Category
                                                  ])  #for Loss 2

            ##### SHARED SUBNETWORK w/ FCNETS
            n_inputs = self.x_dim_lst[0]

            if self.version == "standard":
                shared_inputs = self.x[:, 0:n_inputs]
                shared_out = utils.create_FCNet(shared_inputs,
                                                int(self.num_layers_shared),
                                                int(self.h_dim_shared),
                                                self.active_fn,
                                                int(self.h_dim_shared),
                                                self.active_fn, self.initial_W,
                                                self.keep_prob)

            elif self.version == "sparse":
                shared_o2o_weights = tf.Variable(self.initial_W([n_inputs]),
                                                 name="shared_o2o_weights")
                shared_inputs = tf.multiply(self.x[:, 0:n_inputs],
                                            shared_o2o_weights)

                shared_o2o_regularizer = tf.contrib.layers.l1_regularizer(
                    scale=tf.reduce_mean(self.c), scope=None)
                tf.contrib.layers.apply_regularization(
                    shared_o2o_regularizer, weights_list=[shared_o2o_weights])

                shared_out = utils.create_FCNet(shared_inputs,
                                                int(self.num_layers_shared),
                                                int(self.h_dim_shared),
                                                self.active_fn,
                                                int(self.h_dim_shared),
                                                self.active_fn, self.initial_W,
                                                self.keep_prob)

            elif self.version == "attentive":
                att_shared_inputs = self.x[:, 0:n_inputs]
                att_output_dim = sum(self.x_dim_lst[1:])
                att_out = utils.create_FCNet(att_shared_inputs,
                                             int(self.num_layers_shared),
                                             att_output_dim, self.active_fn,
                                             att_output_dim, None,
                                             self.initial_W, self.keep_prob)
                att_out = tf.reshape(att_out, [-1, self.num_Event, n_inputs])
                self.att_out = tf.nn.softmax(att_out, axis=-1)

            # num_layers_FC layers for cause-specific subnetwork
            out = []
            for _event in range(self.num_Event):
                start = sum(self.x_dim_lst[0:(_event + 1)])
                end = sum(self.x_dim_lst[0:(_event + 2)])
                important_x = self.x[:, start:end]  #for residual connection

                if self.version == "standard":
                    inputs = tf.concat([important_x, shared_out], axis=1)
                    n_inputs_event = self.x_dim_lst[_event +
                                                    1] + self.h_dim_shared
                    cs_out = utils.create_FCNet(
                        inputs, int(self.num_layers_FC[_event]),
                        int(self.h_dim_FC[_event]), self.active_fn,
                        int(self.h_dim_FC[_event]), self.active_fn,
                        self.initial_W, self.keep_prob)

                elif self.version == "sparse":
                    inputs = tf.concat([important_x, shared_out], axis=1)
                    n_inputs_event = self.x_dim_lst[_event +
                                                    1] + self.h_dim_shared

                    specific_o2o_weights = tf.Variable(
                        self.initial_W([int(n_inputs_event)]),
                        name="specific_o2o_weights_" + str(_event + 1))
                    specific_inputs = tf.multiply(inputs, specific_o2o_weights)

                    specific_o2o_regularizer = tf.contrib.layers.l1_regularizer(
                        scale=self.c[_event], scope=None)
                    tf.contrib.layers.apply_regularization(
                        specific_o2o_regularizer,
                        weights_list=[specific_o2o_weights])

                    cs_out = utils.create_FCNet(
                        specific_inputs, int(self.num_layers_FC[_event]),
                        int(self.h_dim_FC[_event]), self.active_fn,
                        int(self.h_dim_FC[_event]), self.active_fn,
                        self.initial_W, self.keep_prob)

                elif self.version == "attentive":
                    att_inputs = tf.multiply(important_x,
                                             self.att_out[:, _event, :])
                    regularizer = tf.contrib.layers.l2_regularizer(
                        scale=self.c[_event], scope=None)

                    cs_out = utils.create_FCNet(
                        att_inputs,
                        int(self.num_layers_FC[_event]),
                        int(self.h_dim_FC[_event]),
                        self.active_fn,
                        int(self.h_dim_FC[_event]),
                        self.active_fn,
                        self.initial_W,
                        self.keep_prob,
                        regularizer=regularizer)

                out.append(cs_out)

            # out = tf.stack(out, axis=1) # stack referenced on subject
            # out = tf.reshape(out, [-1, sum(self.h_dim_FC)])
            out = tf.concat(out, axis=1)

            out = tf.nn.dropout(out, keep_prob=self.keep_prob)

            if self.version in ["standard", "sparse"]:
                out = FC_Net(out,
                             self.num_Event * self.num_Category,
                             activation_fn=tf.nn.softmax,
                             weights_initializer=self.initial_W,
                             scope="Output")
            elif self.version == "attentive":
                regularizer = tf.contrib.layers.l2_regularizer(
                    scale=tf.reduce_mean(self.c), scope=None)
                out = FC_Net(out,
                             self.num_Event * self.num_Category,
                             activation_fn=tf.nn.softmax,
                             weights_initializer=self.initial_W,
                             scope="Output",
                             weights_regularizer=regularizer,
                             biases_regularizer=regularizer)

            self.out = tf.reshape(out, [-1, self.num_Event, self.num_Category])

            ##### GET LOSS FUNCTIONS
            self.loss_Log_Likelihood()  #get loss1: Log-Likelihood loss
            self.loss_Ranking()  #get loss2: Ranking loss

            self.LOSS_TOTAL = self.a * self.LOSS_1 + self.b * self.LOSS_2 + tf.losses.get_regularization_loss(
            )
            self.solver = tf.train.AdamOptimizer(
                learning_rate=self.lr_rate).minimize(self.LOSS_TOTAL)
    def _build_net(self):
        with tf.variable_scope(self.name):
            #### PLACEHOLDER DECLARATION
            self.mb_size = tf.placeholder(tf.int32, [], name='batch_size')

            self.lr_rate = tf.placeholder(tf.float32)
            self.keep_prob = tf.placeholder(tf.float32)  #keeping rate
            self.a = tf.placeholder(tf.float32)
            self.b = tf.placeholder(tf.float32)
            self.c = tf.placeholder(tf.float32)

            self.x = tf.placeholder(tf.float32,
                                    shape=[None, self.max_length, self.x_dim])
            self.x_mi = tf.placeholder(
                tf.float32, shape=[None, self.max_length, self.x_dim]
            )  #this is the missing indicator (including for cont. & binary) (includes delta)
            self.k = tf.placeholder(
                tf.float32, shape=[None,
                                   1])  #event/censoring label (censoring:0)
            self.t = tf.placeholder(tf.float32, shape=[None, 1])

            self.fc_mask1 = tf.placeholder(
                tf.float32, shape=[None, self.num_Event,
                                   self.num_Category])  #for denominator
            self.fc_mask2 = tf.placeholder(
                tf.float32, shape=[None, self.num_Event,
                                   self.num_Category])  #for Loss 1
            self.fc_mask3 = tf.placeholder(tf.float32,
                                           shape=[None, self.num_Category
                                                  ])  #for Loss 2

            seq_length = get_seq_length(self.x)
            tmp_range = tf.expand_dims(tf.range(0, self.max_length, 1), axis=0)

            self.rnn_mask1 = tf.cast(
                tf.less_equal(tmp_range, tf.expand_dims(seq_length - 1,
                                                        axis=1)), tf.float32)
            self.rnn_mask2 = tf.cast(
                tf.equal(tmp_range, tf.expand_dims(seq_length - 1, axis=1)),
                tf.float32)

            ### DEFINE LOOP FUNCTION FOR RAW_RNN w/ TEMPORAL ATTENTION
            def loop_fn_att(time, cell_output, cell_state, loop_state):

                emit_output = cell_output

                if cell_output is None:  # time == 0
                    next_cell_state = cell.zero_state(self.mb_size, tf.float32)
                    next_loop_state = loop_state_ta
                else:
                    next_cell_state = cell_state
                    tmp_h = utils.create_concat_state(next_cell_state,
                                                      self.num_layers_RNN,
                                                      self.RNN_type)

                    e = utils.create_FCNet(tf.concat([tmp_h, all_last],
                                                     axis=1),
                                           self.num_layers_ATT,
                                           self.h_dim2,
                                           tf.nn.tanh,
                                           1,
                                           None,
                                           self.initial_W,
                                           keep_prob=self.keep_prob)
                    e = tf.exp(e)

                    next_loop_state = (
                        loop_state[0].write(time - 1,
                                            e),  # save att power (e_{j})
                        loop_state[1].write(time - 1, tmp_h)
                    )  # save all the hidden states

                # elements_finished = (time >= seq_length)
                elements_finished = (time >= self.max_length - 1)

                #this gives the break-point (no more recurrence after the max_length)
                finished = tf.reduce_all(elements_finished)
                next_input = tf.cond(
                    finished,
                    lambda: tf.zeros([self.mb_size, 2 * self.x_dim],
                                     dtype=tf.float32),  # [x_hist, mi_hist]
                    lambda: inputs_ta.read(time))

                return (elements_finished, next_input, next_cell_state,
                        emit_output, next_loop_state)

            # divide into the last x and previous x's
            x_last = tf.slice(self.x, [0, (self.max_length - 1), 1],
                              [-1, -1, -1])  #current measurement
            x_last = tf.reshape(x_last,
                                [-1, (self.x_dim_cont + self.x_dim_bin)
                                 ])  #remove the delta of the last measurement

            x_last = tf.reduce_sum(
                tf.tile(tf.expand_dims(self.rnn_mask2, axis=2),
                        [1, 1, self.x_dim]) * self.x,
                reduction_indices=1
            )  #sum over time since all others time stamps are 0
            x_last = tf.slice(
                x_last, [0, 1],
                [-1, -1])  #remove the delta of the last measurement
            x_hist = self.x * (
                1. - tf.tile(tf.expand_dims(self.rnn_mask2, axis=2),
                             [1, 1, self.x_dim])
            )  #since all others time stamps are 0 and measurements are 0-padded
            x_hist = tf.slice(x_hist, [0, 0, 0],
                              [-1, (self.max_length - 1), -1])

            # do same thing for missing indicator
            mi_last = tf.slice(self.x_mi, [0, (self.max_length - 1), 1],
                               [-1, -1, -1])  #current measurement
            mi_last = tf.reshape(mi_last,
                                 [-1, (self.x_dim_cont + self.x_dim_bin)
                                  ])  #remove the delta of the last measurement

            mi_last = tf.reduce_sum(
                tf.tile(tf.expand_dims(self.rnn_mask2, axis=2),
                        [1, 1, self.x_dim]) * self.x_mi,
                reduction_indices=1
            )  #sum over time since all others time stamps are 0
            mi_last = tf.slice(
                mi_last, [0, 1],
                [-1, -1])  #remove the delta of the last measurement
            mi_hist = self.x_mi * (
                1. - tf.tile(tf.expand_dims(self.rnn_mask2, axis=2),
                             [1, 1, self.x_dim])
            )  #since all others time stamps are 0 and measurements are 0-padded
            mi_hist = tf.slice(mi_hist, [0, 0, 0],
                               [-1, (self.max_length - 1), -1])

            all_hist = tf.concat([x_hist, mi_hist], axis=2)
            all_last = tf.concat([x_last, mi_last], axis=1)

            #extract inputs for the temporal attention: mask (to incorporate only the measured time) and x_{M}
            seq_length = get_seq_length(x_hist)
            rnn_mask_att = tf.cast(
                tf.not_equal(tf.reduce_sum(x_hist, reduction_indices=2), 0),
                dtype=tf.float32
            )  #[mb_size, max_length-1], 1:measurements 0:no measurements

            ##### SHARED SUBNETWORK: RNN w/ TEMPORAL ATTENTION
            #change the input tensor to TensorArray format with [max_length, mb_size, x_dim]
            inputs_ta = tf.TensorArray(dtype=tf.float32,
                                       size=self.max_length - 1).unstack(
                                           _transpose_batch_time(all_hist),
                                           name='Shared_Input')

            #create a cell with RNN hyper-parameters (RNN types, #layers, #nodes, activation functions, keep proability)
            cell = utils.create_rnn_cell(self.h_dim1, self.num_layers_RNN,
                                         self.keep_prob, self.RNN_type,
                                         self.RNN_active_fn)

            #define the loop_state TensorArray for information from rnn time steps
            loop_state_ta = (
                tf.TensorArray(size=self.max_length - 1,
                               dtype=tf.float32),  #e values (e_{j})
                tf.TensorArray(size=self.max_length - 1,
                               dtype=tf.float32))  #hidden states (h_{j})

            rnn_outputs_ta, self.rnn_final_state, loop_state_ta = tf.nn.raw_rnn(
                cell, loop_fn_att)
            #rnn_outputs_ta  : TensorArray
            #rnn_final_state : Tensor
            #rnn_states_ta   : (TensorArray, TensorArray)

            rnn_outputs = _transpose_batch_time(rnn_outputs_ta.stack())
            # rnn_outputs =  tf.reshape(rnn_outputs, [-1, self.max_length-1, self.h_dim1])

            rnn_states = _transpose_batch_time(loop_state_ta[1].stack())

            att_weight = _transpose_batch_time(
                loop_state_ta[0].stack())  #e_{j}
            att_weight = tf.reshape(att_weight, [
                -1, self.max_length - 1
            ]) * rnn_mask_att  # masking to set 0 for the unmeasured e_{j}

            #get a_{j} = e_{j}/sum_{l=1}^{M-1}e_{l}
            self.att_weight = div(
                att_weight, (tf.reduce_sum(att_weight, axis=1, keepdims=True) +
                             _EPSILON))  #softmax (tf.exp is done, previously)

            # 1) expand att_weight to hidden state dimension, 2) c = \sum_{j=1}^{M} a_{j} x h_{j}
            self.context_vec = tf.reduce_sum(tf.tile(
                tf.reshape(self.att_weight, [-1, self.max_length - 1, 1]),
                [1, 1, self.num_layers_RNN * self.h_dim1]) * rnn_states,
                                             axis=1)

            self.z_mean = FC_Net(rnn_outputs,
                                 self.x_dim,
                                 activation_fn=None,
                                 weights_initializer=self.initial_W,
                                 scope="RNN_out_mean1")
            self.z_std = tf.exp(
                FC_Net(rnn_outputs,
                       self.x_dim,
                       activation_fn=None,
                       weights_initializer=self.initial_W,
                       scope="RNN_out_std1"))

            epsilon = tf.random_normal(
                [self.mb_size, self.max_length - 1, self.x_dim],
                mean=0.0,
                stddev=1.0,
                dtype=tf.float32)
            self.z = self.z_mean + self.z_std * epsilon

            ##### CS-SPECIFIC SUBNETWORK w/ FCNETS
            inputs = tf.concat([x_last, self.context_vec], axis=1)

            #1 layer for combining inputs
            h = FC_Net(inputs,
                       self.h_dim2,
                       activation_fn=self.FC_active_fn,
                       weights_initializer=self.initial_W,
                       scope="Layer1")
            h = tf.nn.dropout(h, keep_prob=self.keep_prob)

            # (num_layers_CS-1) layers for cause-specific (num_Event subNets)
            out = []
            for _ in range(self.num_Event):
                cs_out = utils.create_FCNet(h, (self.num_layers_CS),
                                            self.h_dim2, self.FC_active_fn,
                                            self.h_dim2, self.FC_active_fn,
                                            self.initial_W, self.reg_W,
                                            self.keep_prob)
                out.append(cs_out)
            out = tf.stack(out, axis=1)  # stack referenced on subject
            out = tf.reshape(out, [-1, self.num_Event * self.h_dim2])
            out = tf.nn.dropout(out, keep_prob=self.keep_prob)

            out = FC_Net(out,
                         self.num_Event * self.num_Category,
                         activation_fn=tf.nn.softmax,
                         weights_initializer=self.initial_W,
                         weights_regularizer=self.reg_W_out,
                         scope="Output")
            self.out = tf.reshape(out, [-1, self.num_Event, self.num_Category])

            ##### GET LOSS FUNCTIONS
            self.loss_Log_Likelihood()  #get loss1: Log-Likelihood loss
            self.loss_Ranking()  #get loss2: Ranking loss
            self.loss_RNN_Prediction()  #get loss3: RNN prediction loss

            self.LOSS_TOTAL = self.a * self.LOSS_1 + self.b * self.LOSS_2 + self.c * self.LOSS_3 + tf.losses.get_regularization_loss(
            )
            self.LOSS_BURNIN = self.LOSS_3 + tf.losses.get_regularization_loss(
            )

            self.solver = tf.train.AdamOptimizer(
                learning_rate=self.lr_rate).minimize(self.LOSS_TOTAL)
            self.solver_burn_in = tf.train.AdamOptimizer(
                learning_rate=self.lr_rate).minimize(self.LOSS_BURNIN)