示例#1
0
    def build_model(self):
        # declaring the placeholders for our extracted image feature vectors, our caption, and our mask
        # (describes how long our caption is with an array of 0/1 values of length `maxlen`  
        img = tf.placeholder(tf.float32, [self.batch_size, self.dim_in])
        self.img=img
        caption_placeholder = tf.placeholder(tf.int32, [self.batch_size, self.n_lstm_steps])
        self.caption_placeholder=caption_placeholder
        mask = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps])
        self.mask=mask
        self.output_placeholder = tf.placeholder(tf.int32, [self.batch_size, self.n_lstm_steps])

        network_weights = self._initialize_weights()
        
        # getting an initial LSTM embedding from our image_imbedding
        image_embedding = tf.matmul(img, self.img_embedding) + self.img_embedding_bias
        
        flat_caption_placeholder=tf.reshape(caption_placeholder,[-1])

        #leverage one-hot sparsity to lookup embeddings fast
        embedded_input,KLD_loss=self._get_word_embedding([network_weights['variational_encoding'],network_weights['biases_variational_encoding']],network_weights['input_meaning'],flat_caption_placeholder,logit=True)
        KLD_loss=tf.multiply(KLD_loss,tf.reshape(mask,[-1,1]))
        KLD_loss=tf.reduce_sum(KLD_loss)*0
        word_embeddings=tf.matmul(embedded_input,self.word_embedding)+self.embedding_bias
        word_embeddings=tf.reshape(word_embeddings,[self.batch_size,self.n_lstm_steps,-1])
        #initialize lstm state
        state = self.lstm.zero_state(self.batch_size, dtype=tf.float32)
        rnn_output=[]
        with tf.variable_scope("RNN"):
            # unroll lstm
            for i in range(self.n_lstm_steps): 
                if i > 0:
                   # if this isn’t the first iteration of our LSTM we need to get the word_embedding corresponding
                   # to the (i-1)th word in our caption 
                    
                    current_embedding = word_embeddings[:,i-1,:]
                else:
                     #if this is the first iteration of our LSTM we utilize the embedded image as our input 
                    current_embedding = image_embedding
                if i > 0: 
                    # allows us to reuse the LSTM tensor variable on each iteration
                    tf.get_variable_scope().reuse_variables()

                out, state = self.lstm(current_embedding, state)
                # if i>0:
                rnn_output.append(tf.expand_dims(out,1))
        #perform classification of output
        rnn_output=tf.concat(rnn_output,axis=1)
        rnn_output=tf.reshape(rnn_output,[self.batch_size*(self.n_lstm_steps),-1])
        encoded_output=tf.matmul(rnn_output,self.word_encoding)+self.word_encoding_bias

        encoded_output=tf.reshape(tf.square(encoded_output),[self.batch_size*self.n_lstm_steps,-1])[:,1:]

        #get loss

        # normed_embedding= tf.nn.l2_normalize(encoded_output, dim=-1)
        # normed_target=tf.nn.l2_normalize(embedded_input,dim=-1)
        # cos_sim=tf.multiply(normed_embedding,normed_target)[:,1:]
        # cos_sim=(tf.reduce_sum(cos_sim,axis=-1))
        # cos_sim=tf.reshape(cos_sim,[self.batch_size,-1])
        # cos_sim=tf.reduce_sum(cos_sim[:,1:]*mask[:,1:])
        # cos_sim=cos_sim/tf.reduce_sum(mask[:,1:])
        # self.exp_loss=tf.reduce_sum((-cos_sim))
        # # self.exp_loss=tf.reduce_sum(xentropy)/float(self.batch_size)
        # total_loss = tf.reduce_sum(-(cos_sim))
        # mse=tf.reduce_sum(tf.reshape(tf.square(encoded_output-embedded_input),[self.batch_size*self.n_lstm_steps,1]),axis=-1)[:,1:]*(mask[:,1:])
        # mse=tf.reduce_sum(mse)/tf.reduce_sum(mask[:,1:])
        with tf.variable_scope('D',reuse=True) as scope:
            total_loss+=tf.reduce_mean(-tf.log(self.discriminator.discriminate(encoded_output,train=False)))
            self.D2=self.discriminator.discriminate(tf.stop_gradients(encoded_output),train=True)

        #average over timeseries length

        # total_loss=tf.reduce_sum(masked_xentropy)/tf.reduce_sum(mask[:,1:])
        self.print_loss=total_loss
        total_loss+=KLD_loss/tf.reduce_sum(mask)
        return total_loss, img,  caption_placeholder, mask
示例#2
0
        def metastep_graph(inp):
            meta_train_x, meta_train_y, meta_val_x, meta_val_y = inp
            meta_train_loss_list = []
            meta_val_loss_list = []

            weights = self._weights
            meta_train_output = self._contruct_forward(meta_train_x,
                                                       weights,
                                                       reuse=False,
                                                       norm=norm,
                                                       is_train=self._is_train)
            # Meta train loss: Calculate gradient
            meta_train_loss = self._loss_fn(meta_train_y, meta_train_output)
            meta_train_loss = tf.reduce_mean(meta_train_loss)
            meta_train_loss_list.append(meta_train_loss)
            grads = dict(
                zip(weights.keys(),
                    tf.gradients(meta_train_loss, list(weights.values()))))
            new_weights = dict(
                zip(weights.keys(), [
                    weights[key] - self._alpha * grads[key]
                    for key in weights.keys()
                ]))
            if self._avoid_second_derivative:
                new_weights = tf.stop_gradients(new_weights)
            meta_val_output = self._contruct_forward(meta_val_x,
                                                     new_weights,
                                                     reuse=True,
                                                     norm=norm,
                                                     is_train=self._is_train)
            # Meta val loss: Calculate loss (meta step)
            meta_val_loss = self._loss_fn(meta_val_y, meta_val_output)
            meta_val_loss = tf.reduce_mean(meta_val_loss)
            meta_val_loss_list.append(meta_val_loss)
            # If perform multiple updates
            for _ in range(self._num_updates - 1):
                meta_train_output = self._contruct_forward(
                    meta_train_x,
                    new_weights,
                    reuse=True,
                    norm=norm,
                    is_train=self._is_train)
                meta_train_loss = self._loss_fn(meta_train_y,
                                                meta_train_output)
                meta_train_loss = tf.reduce_mean(meta_train_loss)
                meta_train_loss_list.append(meta_train_loss)
                grads = dict(
                    zip(
                        new_weights.keys(),
                        tf.gradients(meta_train_loss,
                                     list(new_weights.values()))))
                new_weights = dict(
                    zip(new_weights.keys(), [
                        new_weights[key] - self._alpha * grads[key]
                        for key in new_weights.keys()
                    ]))
                if self._avoid_second_derivative:
                    new_weights = tf.stop_gradients(new_weights)
                meta_val_output = self._contruct_forward(
                    meta_val_x,
                    new_weights,
                    reuse=True,
                    norm=norm,
                    is_train=self._is_train)
                meta_val_loss = self._loss_fn(meta_val_y, meta_val_output)
                meta_val_loss = tf.reduce_mean(meta_val_loss)
                meta_val_loss_list.append(meta_val_loss)

            return [
                meta_train_loss_list, meta_val_loss_list, meta_train_output,
                meta_val_output
            ]
示例#3
0
def get_standard_loss(inputs, 
                      outputs,
                      is_chief=True,
                      verbose=False,
                      loss_base_name="net",
                      epsilon=1e-8,
                      num_cells=None,
                      **kwargs):
    """ Compute the loss function for standard object detection (final stage).
    
    Args:
        inputs: A dictionnary of inputs
        outputs: A dictionnary of outputs
        is_chief: Adds additional summaries iff is_chief is True
        verbose: verbosity level
        base_name: Prefix for all the loss colelctions to be added in the graph
        epsilon: for avoiding overflow error
        num_cells: 2D array number of cells in teh grid, used to normalize the centers distance
        
    Kwargs:    
        centers_localization_loss_weight: Weights for the localization losses of centers. defaults to 1
        scales_localization_loss_weight: Weights for the localization losses of log scales. defaults to 1
        confidence_loss_weight: Weights for the confidence loss. defaults to 5
        noobj_confidence_loss_weight: Weights for the confidence loss  (empty cells). defautls to 1
        classification_loss_weight: weights for the counting loss. defaults to 1
    """     
    (centers_localization_loss_weight, scales_localization_loss_weight, 
     confidence_loss_weight, noobj_confidence_loss_weight) = get_defaults(kwargs, [
        'centers_localization_loss_weight', 'scales_localization_loss_weight', 
        'confidence_loss_weight', 'noobj_confidence_loss_weight'], verbose=verbose)
    assert num_cells is not None
    
    # obj_i_mask: (batch, num_cells, num_cells, 1, num_gt), indicates presence of a box in a cell
    obj_i_mask = inputs['obj_i_mask_bbs']
        
    ## Split coordinates
    # pred_bbs: 4 * (batch, num_cells, num_cells, num_preds, 1)
    # true_bbs: 4 * (batch, 1, 1, 1, num_gt)
    with tf.name_scope('coordinates'):
        pred_bbs = tf.split(outputs['bounding_boxes'], 4, axis=-1)
        true_bbs = tf.split(tf.expand_dims(tf.expand_dims(
            tf.transpose(inputs['bounding_boxes'], (0, 2, 1)), axis=1), axis=1), 4, axis=-2)        
        
    ## Compute target value for the assignment reward and the confidences
    # target_confs: (batch, num_cells, num_cells, num_preds, num_gt)
    with tf.name_scope('compute_target_confidence'): 
        target_confs = get_iou(true_bbs, pred_bbs, epsilon=epsilon)
        target_confs = tf.stop_gradient(target_confs)
    
    # assignment_rewards: (batch, num_cells, num_cells, num_preds, num_gt)
    with tf.name_scope('compute_assignment_reward'):        
        # TODO rename
        assignment_rewards = tf.stop_gradient(target_confs)    
    
    ## Create obj mask mapping ground-truth to predictors
    # obj_ij_mask: (batch, num_cells, num_cells, num_preds, num_gt, 1)
    with tf.name_scope('assign_predictors'):        
        best_reward = tf.reduce_max(assignment_rewards, axis=-2, keepdims=True)
        obj_ij_mask = tf.to_float(tf.greater_equal(assignment_rewards, best_reward))
        obj_ij_mask *= obj_i_mask
        obj_ij_mask = tf.expand_dims(obj_ij_mask, axis=-1) 
        obj_ij_mask = tf.stop_gradient(obj_ij_mask)    
    
    ## Localization loss
    with tf.name_scope('localization_loss'):
        # true_mins, true_maxs: (batch, num_cells, num_cells, num_preds, num_gt, 2)
        true_mins = tf.stack(true_bbs[:2], axis=-1)
        true_maxs = tf.stack(true_bbs[2:], axis=-1)
        # centers
        with tf.name_scope('xy_loss'):
            centers_diffs = tf.expand_dims(outputs['shifted_centers'], axis=-2) - num_cells * (true_mins + true_maxs) / 2
            centers_localization_loss = tf.losses.compute_weighted_loss(
                centers_diffs**2,
                weights=centers_localization_loss_weight * obj_ij_mask,
                reduction=tf.losses.Reduction.SUM_BY_NONZERO_WEIGHTS)        
        # scales
        with tf.name_scope('wh_loss'):
            scales_diff = tf.expand_dims(outputs['log_scales'], axis=-2) - tf.log(tf.maximum(epsilon, true_maxs - true_mins))
            scales_localization_loss = tf.losses.compute_weighted_loss(
                scales_diff**2,
                weights=scales_localization_loss_weight * obj_ij_mask,
                reduction=tf.losses.Reduction.SUM_BY_NONZERO_WEIGHTS)
        
    ## Confidence loss
    with tf.name_scope('conf_loss'):  
        # Best predictor in non-empty cells
        with tf.name_scope('non_empty'):
            # confs_diffs: (batch, num_cells, num_cells, num_preds, num_gt)
            obj_mask = tf.squeeze(obj_ij_mask, axis=-1)
            confs_diffs = target_confs - outputs["confidence_scores"]
            confidence_loss_obj = tf.losses.compute_weighted_loss(
                confs_diffs**2,
                weights=confidence_loss_weight * obj_mask,
                reduction=tf.losses.Reduction.SUM_BY_NONZERO_WEIGHTS)        
        # Predictors in empty cells
        with tf.name_scope('empty'):
            # noobj_mask: (batch, num_cells, num_cells, 1, 1)
            noobj_mask = 1. - tf.minimum(1., tf.reduce_sum(obj_i_mask, axis=-1, keepdims=True))
            confidence_loss_noobj = tf.losses.compute_weighted_loss(
                outputs["confidence_scores"]**2,
                weights=noobj_confidence_loss_weight * noobj_mask,
                reduction=tf.losses.Reduction.SUM_BY_NONZERO_WEIGHTS)
        
    ## Classification loss
    if 'classification_probs' in outputs:        
        assert 'class_labels' in inputs        
        with tf.name_scope('classification_loss'):
            classification_loss_weight = get_defaults(kwargs, ['classification_loss_weight'], verbose=verbose)[0]
            # labels: (batch, 1, 1, 1, num_gt, num_classes)
            labels = inputs['class_labels'] # (batch, num_gt, num_classes)
            labels = tf.expand_dims(labels, axis=1)
            labels = tf.expand_dims(labels, axis=1)
            labels = tf.stop_gradients(tf.to_float(tf.expand_dims(labels, axis=1)))
            # logits: (batch, num_cells, num_cells, num_preds, 1, num_classes)
            logits = outputs['classification_probs']
            logits = tf.expand_dims(logits, axis=4)
            # classification loss
            class_diffs = labels - logits
            classification_loss = tf.losses.compute_weighted_loss(
                class_diffs**2,
                weights=classification_loss_weight * obj_ij_mask,
                reduction=tf.losses.Reduction.SUM_BY_NONZERO_WEIGHTS)
    else:
        classification_loss = 0.        
    
    ## Add informative summaries
    if is_chief:
        is_assigned_predictor = tf.to_float(tf.reduce_sum(obj_ij_mask, axis=-2) > 0.)
        outputs["target_bounding_boxes"] = outputs["bounding_boxes"] * is_assigned_predictor
        
    return [('%s_centers_localization_loss' % loss_base_name, centers_localization_loss),
            ('%s_scales_localization_loss' % loss_base_name, scales_localization_loss),
            ('%s_confidence_obj_loss' % loss_base_name, confidence_loss_obj),
            ('%s_confidence_noobj_loss' % loss_base_name, confidence_loss_noobj),
            ('%s_classification_loss' % loss_base_name, classification_loss)]
示例#4
0
    def _build_graph(self, dim_input, dim_output, norm):
        def model_summary():
            model_vars = tf.trainable_variables()
            slim.model_analyzer.analyze_vars(model_vars, print_info=True)

        learning_x, learning_y, meta_x, meta_y = [
            self._learning_x, self._learning_y, self._meta_x, self._meta_y
        ]
        learning_loss_list = []
        meta_loss_list = []

        weights = self._weights
        learning_output = self.construct_forward(learning_x,
                                                 weights,
                                                 reuse=False,
                                                 norm=norm,
                                                 is_train=self._is_train)

        # Meta train loss: Calculate gradient
        learning_loss = self._loss_fn(learning_y, learning_output)
        learning_loss = tf.reduce_mean(learning_loss)
        learning_loss_list.append(learning_loss)
        grads = dict(
            zip(weights.keys(),
                tf.gradients(learning_loss, list(weights.values()))))
        # learning rate
        self.learning_rate_op = tf.maximum(
            self._min_alpha,
            tf.train.exponential_decay(self._alpha,
                                       self.alpha_step,
                                       self._alpha_decay_step,
                                       self._alpha_decay_rate,
                                       staircase=True))
        self.learning_train_op = tf.train.AdamOptimizer(
            self.learning_rate_op).minimize(learning_loss)
        if self.dic_agent_conf['GRADIENT_CLIP']:
            for key in grads.keys():
                grads[key] = tf.clip_by_value(
                    grads[key], -1 * self.dic_agent_conf['CLIP_SIZE'],
                    self.dic_agent_conf['CLIP_SIZE'])

        self._learning_grads = grads
        new_weights = dict(
            zip(weights.keys(), [
                weights[key] - self.learning_rate_op * grads[key]
                for key in weights.keys()
            ]))

        if self._avoid_second_derivative:
            new_weights = tf.stop_gradients(new_weights)
        meta_output = self.construct_forward(meta_x,
                                             new_weights,
                                             reuse=True,
                                             norm=norm,
                                             is_train=self._is_train)
        # Meta val loss: Calculate loss (meta step)
        meta_loss = self._loss_fn(meta_y, meta_output)
        meta_loss = tf.reduce_mean(meta_loss)
        meta_loss_list.append(meta_loss)
        # If perform multiple updates

        for _ in range(self._num_updates - 1):
            learning_output = self.construct_forward(learning_x,
                                                     new_weights,
                                                     reuse=True,
                                                     norm=norm,
                                                     is_train=self._is_train)
            learning_loss = self._loss_fn(learning_y, learning_output)
            learning_loss = tf.reduce_mean(learning_loss)
            learning_loss_list.append(learning_loss)
            grads = dict(
                zip(new_weights.keys(),
                    tf.gradients(learning_loss, list(new_weights.values()))))
            new_weights = dict(
                zip(new_weights.keys(), [
                    new_weights[key] - self.learning_rate_op * grads[key]
                    for key in new_weights.keys()
                ]))
            if self._avoid_second_derivative:
                new_weights = tf.stop_gradients(new_weights)
            meta_output = self.construct_forward(meta_x,
                                                 new_weights,
                                                 reuse=True,
                                                 norm=norm,
                                                 is_train=self._is_train)
            meta_loss = self._loss_fn(meta_y, meta_output)
            meta_loss = tf.reduce_mean(meta_loss)
            meta_loss_list.append(meta_loss)

        self._new_weights = new_weights

        # output
        self._learning_output = learning_output
        self._meta_output = meta_output

        # Loss
        learning_loss = tf.reduce_mean(learning_loss_list[-1])
        meta_loss = tf.reduce_mean(meta_loss_list[-1])

        self._learning_loss = learning_loss
        self._meta_loss = meta_loss
        model_summary()
示例#5
0
    state, reward, done = tf_env_step(env, action)

    state.set_shape(initial_state_shape)

    next_value = critic(next_state)

    return next_state, done, action_probs, value, next_value, rewards


def compute_loss(action_probs: tf.Tensor, value: tf.Tensor, gamma: float
    next_value: tf.Tensor

, reward: tf.Tensor,
done: tf.Tensor) -> tf.Tensor:
advs = tf.stop_gradients(values - reward + tf.cast(gamma, tf.float32) *
                         next_value * (1 - done))
loss = -tf.math.reduce_sum(action_log_probs * advs)
return loss

huber_loss = tf.keras.losses.Huber(reduction=tf.keras.losses.Reduction.SUM)


class Policy(tf.keras.Model):
    def __init__(self, n_actions: int):
        super(Policy, self).__init__(name='Policy')
        self.fc1 = Dense(6, activation='relu')
        self.fc2 = Dense(n_actions)

    def call(self, inputs: tf.Tensor):
        x = self.fc1(inputs)
        action_logits = self.fc2(x)
示例#6
0
def compute_infinite_nature_loss(generated_rgbd, gt_rgbd, discriminator,
                                 mu_logvar):
    """Computes loss between a generated RGBD sequence and the ground truth.

  Lambda terms are the default values used during the original submission.

  Args:
    generated_rgbd: [B, T, H, W, 4] A batch of T-length RGBD sequences
      produced by the generator. Ranges from (0, 1)
    gt_rgbd: [B, T, H, W, 4] The ground truth sequence from a video.
      Ranges from (0, 1)
    discriminator: a discriminator which accepts an [B, H, W, D] tensor
      and runs a discriminator on multiple scales and returns
      a list of (features, logit) for each scale.
    mu_logvar: ([B, 128], [B, 128]) A tuple of mu, log-variance features
      parameterizing the Gaussian used to sample the variational noise.

  Returns:
    A dictionary of losses. total_loss refers to the final
      loss used to optimize the generator and total_disc_loss refers to the
      loss used by the discriminator.
  """
    _, _, height, width, _ = tf.shape(generated_rgbd).as_list()
    gen_flatten = tf.reshape(generated_rgbd, [-1, height, width, 4])
    gt_flatten = tf.reshape(gt_rgbd, [-1, height, width, 4])

    # discriminator returns:
    # [(scale_1_feats, scale_1_logits), (scale_2_feats, scale_2_logits), ...]
    disc_on_generated = discriminator(gen_flatten)
    generated_features = [f[0] for f in disc_on_generated]
    generated_logits = [f[1] for f in disc_on_generated]
    disc_on_real = discriminator(gt_flatten)
    real_features = [f[0] for f in disc_on_real]
    real_logits = [f[1] for f in disc_on_real]

    disc_loss, _, _ = compute_discriminator_loss(
        real_logits, tf.stop_gradients(generated_logits))
    fool_d_loss = compute_adversarial_loss(generated_logits)

    feature_matching_loss = compute_feature_matching(real_features,
                                                     generated_features)
    kld_loss = compute_kld_loss(mu_logvar[0], mu_logvar[1])

    rgbd_loss = tf.reduce_mean(tf.abs(generated_rgbd - gt_rgbd))
    perceptual_loss = compute_perceptual_loss(generated_rgbd * 255.,
                                              gt_rgbd * 255.)

    loss_dict = {}
    loss_dict["disc_loss"] = disc_loss
    loss_dict["adversarial_loss"] = fool_d_loss
    loss_dict["feature_matching_loss"] = feature_matching_loss
    loss_dict["kld_loss"] = kld_loss
    loss_dict["perceptual_loss"] = perceptual_loss
    loss_dict["reconstruction_loss"] = rgbd_loss

    total_loss = (1e-2 * perceptual_loss + 10.0 * feature_matching_loss +
                  0.05 * kld_loss + 1.5 * fool_d_loss + 0.5 * rgbd_loss)
    total_disc_loss = 1.5 * disc_loss
    loss_dict["total_loss"] = total_loss
    loss_dict["total_disc_loss"] = total_disc_loss
    return loss_dict
示例#7
0
    def _build_model(self):
        param_dict = self._params
        stack = self.conv2D_layer(self._X,
                                  filter_size=self._filters,
                                  n_filters=48,
                                  relu=self._relu,
                                  elu=self._elu,
                                  batch_norm=True,
                                  layer_name='ConvLayer',
                                  param_dict=param_dict)
        combine_layers = []
        layer_sizes = []
        for i, depth in enumerate(self._layer_depths):
            dense_block = self._dense_block(
                stack,
                depth=depth,
                growth=self._growth,
                param_dict=param_dict,
                block_name='DenseBlock{}'.format(i),
                filter_size=self._filters)
            print(dense_block.shape)
            layer_sizes.append(int(dense_block.shape[2]))
            if self._short_residual:
                bypass = self.conv2D_layer(stack,
                                           filter_size=1,
                                           n_filters=int(
                                               dense_block.shape[-1]),
                                           relu=False,
                                           batch_norm=True,
                                           dropout=False,
                                           layer_name='Bypass{}'.format(i),
                                           param_dict=param_dict,
                                           strides=(1, 1),
                                           padding='SAME')
                combine = dense_block + bypass
            else:
                combine = tf.concat([stack, dense_block], axis=-1)
            combine_layers.append(combine)
            stack = self._transition_down(
                combine,
                param_dict=param_dict,
                block_name='TransitionDown{}'.format(i))
            print(stack.shape)
        combine_layers = combine_layers[::-1]
        layer_sizes = layer_sizes[::-1]
        layer_depths = self._layer_depths[::-1]

        stack = self._dense_block(stack,
                                  depth=self._bottleneck,
                                  growth=16,
                                  param_dict=param_dict,
                                  block_name='BottleNeck',
                                  filter_size=self._filters)
        print(stack.shape)
        # volumes = self._regression_block(stack,
        # param_dict=param_dict,
        # n_reg=self._n_classes,
        # block_name='RegeressionBlock')

        for i, (combine, layer_size, depth) in enumerate(
                zip(combine_layers, layer_sizes, layer_depths)):
            padding = 'SAME' if 2 * int(
                stack.shape[2]) == layer_size else 'VALID'
            transition_up = self._transition_up(
                stack,
                output_shape=(layer_size, layer_size),
                param_dict=param_dict,
                batch_size=self._batch_size,
                block_name='TransitionUp{}'.format(i),
                padding=padding)
            print(transition_up.shape)
            if self._long_residual:
                bypass = self.conv2D_layer(combine,
                                           filter_size=1,
                                           n_filters=int(
                                               transition_up.shape[-1]),
                                           relu=False,
                                           batch_norm=True,
                                           dropout=False,
                                           layer_name='BypassUp{}'.format(i),
                                           param_dict=param_dict,
                                           strides=(1, 1),
                                           padding='SAME')
                combine_up = bypass + transition_up
            else:
                combine_up = tf.concat([combine, transition_up], axis=-1)
            stack = self._dense_block(combine_up,
                                      depth=depth,
                                      growth=self._growth,
                                      param_dict=param_dict,
                                      block_name='DenseBlockUp{}'.format(i),
                                      filter_size=self._filters)
            print(stack.shape)

        self._logits = self.conv2D_layer(stack,
                                         filter_size=self._output_filters,
                                         n_filters=self._n_classes,
                                         dropout=True,
                                         relu=self._relu,
                                         elu=self._elu,
                                         batch_norm=False,
                                         layer_name='OutputConv',
                                         param_dict=param_dict)

        self._soft_prob = tf.nn.softmax(self._logits, dim=-1)
        # self._dice_loss = self.soft_dice_loss(self._logits,
        #                                       self._Y,
        #                                       index=-1,
        #                                       weights=self._class_weights)
        output_vis = self.visualize_tensor(self._soft_prob)
        gt_vis = self.visualize_tensor(self._Y)
        self._entropy_loss = self.weighted_softmax_cross_entropy_with_logits(
            self._logits, self._Y, self._class_weights, axis=-1)
        learning_rate = tf.train.exponential_decay(5e-4,
                                                   self._params['global_step'],
                                                   1000,
                                                   0.90,
                                                   staircase=True)
        self._cost = self._entropy_loss
        self._cost_summary = tf.summary.scalar('Cost', self._cost)

        self._train_op = tf.train.AdamOptimizer(
            learning_rate=learning_rate, beta1=0.99,
            beta2=0.995).minimize(self._cost,
                                  global_step=self._params['global_step'])
        self._predictions = tf.stop_gradients(tf.argmax(self._logits, -1))
        self._output_summary = tf.summary.image('Output', output_vis)
        self._input_summary = tf.summary.image('Ground Truth', gt_vis)
        self._image_summary = tf.summary.merge(
            [self._output_summary, self._input_summary])
        folder_dir = os.path.split(self._saved)[0]
        folder_dir = os.path.split(folder_dir)[0]
        self.log_dir = os.path.join(folder_dir, 'logs')
        if os.path.exists(self.log_dir):
            shutil.rmtree(self.log_dir)
            os.makedirs(self.log_dir)
        else:
            os.makedirs(self.log_dir)
        self._summary_writer = tf.summary.FileWriter(
            self.log_dir, graph=tf.get_default_graph())
        ################
        #Reinforcement Learning nodes

        self.optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                                beta1=0.99,
                                                beta2=0.995)
        self.cross_entropy_loss = tf.nn.softmax_cross_entropy_with_logits(
            logits=self._logits, labels=self._predictions)
        self.pg_loss = tf.reduce_mean(self.cross_entropy_loss)
        self.gradients = self.optimizer.compute_gradients(self.pg_loss)
        self.discounted_rewards = self.stop_gradients(
            self.soft_dice_loss(logits=self._logits, labels=self._Y) -
            self._threshold)
        for i, (grad, var) in enumerate(self.gradients):
            if grad is not None:
                self.gradients[i] = (grad * self.discounted_rewards, var)
        self._reinforce_op = self.optimizer.apply_gradients(self.gradients)