Пример #1
0
 def symbols_to_logits(ids):
     pos = tf.shape(ids)[1]
     logits = tf.to_float(tf.log(probabilities[pos - 1, :]))
     return logits
Пример #2
0
 def symbols_to_logits(ids, _, states):
     pos = tf.shape(ids)[1] - 1
     logits = tf.to_float(tf.log(probabilities[pos, :]))
     states["state"] += 1
     return logits, states
Пример #3
0
b1 = tf.Variable(tf.random_normal([EMBEDDING_DIM]))  # bias
hidden_representation = tf.add(tf.matmul(x, W1), b1)

W2 = tf.Variable(tf.random_normal([EMBEDDING_DIM, vocab_size]))
b2 = tf.Variable(tf.random_normal([vocab_size]))
prediction = tf.nn.softmax(tf.add(tf.matmul(hidden_representation, W2), b2))

# TRAIN THE MODEL

sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)  # make sure you do this!

# define the loss function:
cross_entropy_loss = tf.reduce_mean(
    -tf.reduce_sum(y_label * tf.log(prediction), reduction_indices=[1]))

# define the training step:
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(
    cross_entropy_loss)
n_iters = 10000

# train for n_iter iterations
for _ in range(n_iters):
    sess.run(train_step, feed_dict={x: x_train, y_label: y_train})
    print(
        'loss is : ',
        sess.run(cross_entropy_loss, feed_dict={
            x: x_train,
            y_label: y_train
        }))
Пример #4
0
histogram_bias1 = tf.summary.histogram('Bias1', Bias1)
histogram_pesos2 = tf.summary.histogram('Pesos2', Pesos2)
histogram_bias2 = tf.summary.histogram('Bias2', Bias2)

#scalar_pesos1 = tf.summary.scalar('Pesos1', Pesos1)
#scalar_bias1 = tf.summary.scalar('Bias1', Bias1)
#scalar_pesos2 = tf.summary.scalar('Pesos2', Pesos2)
#scalar_bias2 = tf.summary.scalar('Bias2', Bias2)

A = tf.sigmoid(tf.matmul(x_, Pesos1) + Bias1)
Salida = tf.sigmoid(tf.matmul(A, Pesos2) + Bias2)

#histogram_salida = tf.summary.histogram('Salida', Salida)
#scalar_salida = tf.summary.scalar('salida',Salida)
#Costo=tf.reduce_mean(abs(y_-Salida))
Costo=tf.reduce_mean((y_*tf.log(Salida)+((1 - y_)* tf.log(1.0-Salida)))*-1)

#histogram_costo = tf.summary.histogram('Costo', Costo)

train_step = tf.train.GradientDescentOptimizer(.9).minimize(Costo)


init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

#t_start = time.clock()
writer = tf.summary.FileWriter('./graphs', sess.graph)
for i in range(1000):
    #writer = tf.summary.FileWriter('./graphs', sess.graph)
    sess.run(train_step, feed_dict={x_: letras_x, y_: letras_y})
Пример #5
0
def processData(samples, iii, federated, tot_devices, fraction_training,
                neighbors_number, EPOCH_THRESHOLD):
    # eng = matlab.engine.start_matlab()
    eng = 0
    global learning_rate
    learning_rate_local = learning_rate
    np.random.seed(1)
    tf.set_random_seed(1)  # common initialization tf 1.13
    # tf.random.set_seed(1)

    # database = sio.loadmat('dati_mimoradar/data_mmwave_900.mat')
    database = sio.loadmat(args.input_data)
    # database = sio.loadmat('dati_mimoradar/data_mmwave_450.mat')
    x_train = database['mmwave_data_train']
    y_train = database['label_train']
    y_train_t = to_categorical(y_train)
    x_train = (
        x_train.astype('float32').clip(0)
    ) / 1000  # DATA PREPARATION (NORMALIZATION AND SCALING OF FFT MEASUREMENTS)
    x_train2 = x_train[iii * samples:((iii + 1) * samples -
                                      1), :, :]  # DATA PARTITION
    y_train2 = y_train_t[iii * samples:((iii + 1) * samples - 1), :]

    x_test = database['mmwave_data_test']
    y_test = database['label_test']
    x_test = (x_test.astype('float32').clip(0)) / 1000
    y_test_t = to_categorical(y_test)

    total_batch2 = int(fraction_training / batch_size)
    # tf Graph Input
    x = tf.placeholder(
        tf.float32,
        [None, input_data1, input_data2])  # 512 POINT FFT RANGE MEASUREMENTS
    y = tf.placeholder(tf.float32,
                       [None, classes])  # 0-7 HR distances (safe - unsafe)

    W_ext_l1 = tf.placeholder(tf.float32, [filter, filter, 1, number])
    b_ext_l1 = tf.placeholder(tf.float32, [number])
    W_ext_l2 = tf.placeholder(tf.float32, [multip * number, classes])
    b_ext_l2 = tf.placeholder(tf.float32, [classes])

    W2_ext_l1 = tf.placeholder(tf.float32, [filter, filter, 1, number])
    b2_ext_l1 = tf.placeholder(tf.float32, [number])
    W2_ext_l2 = tf.placeholder(tf.float32, [multip * number, classes])
    b2_ext_l2 = tf.placeholder(tf.float32, [classes])

    # Set model weights
    W_l1 = tf.Variable(tf.random_normal([filter, filter, 1, number]))
    b_l1 = tf.Variable(tf.random_normal([number]))
    W_l2 = tf.Variable(tf.zeros([multip * number, classes]))
    b_l2 = tf.Variable(tf.zeros([classes]))

    # Construct model Layer #1 CNN 1d, Layer #2 FC
    hidden0 = conv2d_f(x, W_ext_l1, b_ext_l1)
    hidden01 = tf.layers.max_pooling2d(hidden0,
                                       pool_size=stride,
                                       strides=stride,
                                       padding='SAME')
    # print(hidden01) # check hidden01 size
    # hidden01 = tf.nn.max_pool1d(hidden0, ksize=stride, strides=stride, padding='SAME')
    fc01 = tf.reshape(hidden01, [-1, multip * number])
    pred = tf.nn.softmax(tf.matmul(fc01, W_ext_l2) +
                         b_ext_l2)  # example 2 layers

    hidden2 = conv2d_f(x, W2_ext_l1, b2_ext_l1)
    hidden02 = tf.layers.max_pooling2d(hidden2,
                                       pool_size=stride,
                                       strides=stride,
                                       padding='SAME')
    fc02 = tf.reshape(hidden02, [-1, multip * number])
    pred2 = tf.nn.softmax(tf.matmul(fc02, W2_ext_l2) +
                          b2_ext_l2)  # example 2 layers

    # Minimize error using cross entropy
    cost = tf.reduce_mean(-tf.reduce_sum(
        y * tf.log(tf.clip_by_value(pred, 1e-15, 0.99)), reduction_indices=1))
    cost2 = tf.reduce_mean(-tf.reduce_sum(
        y * tf.log(tf.clip_by_value(pred2, 1e-15, 0.99)), reduction_indices=1))

    #gradients per layer
    grad_W_l1, grad_b_l1, grad_W_l2, grad_b_l2 = tf.gradients(
        xs=[W_ext_l1, b_ext_l1, W_ext_l2, b_ext_l2], ys=cost)

    new_W_l1 = W_l1.assign(W_ext_l1 - learning_rate * grad_W_l1)
    new_b_l1 = b_l1.assign(b_ext_l1 - learning_rate * grad_b_l1)

    new_W_l2 = W_l2.assign(W_ext_l2 - learning_rate * grad_W_l2)
    new_b_l2 = b_l2.assign(b_ext_l2 - learning_rate * grad_b_l2)

    # Initialize the variables (i.e. assign their default value)
    init = tf.global_variables_initializer()

    # Initialize CFA
    consensus_p = CFA_process(federated, tot_devices, iii, neighbors_number,
                              args.graph, compression, args.consensus_mode)
    neighbor_vector = consensus_p.getMobileNetwork_connectivity(
        iii, neighbors_number, tot_devices, args.graph - 1)
    # print(neighbor_vector.size)

    #    Start training
    with tf.Session() as sess:
        sess.run(init)
        total_batch = int(samples / batch_size)
        # PRINTS THE TOTAL NUMBER OF MINI BATCHES
        # print(total_batch)

        # Training cycle
        val_loss = np.zeros(training_epochs)
        param_vector = np.ones(training_epochs)
        timings = np.ones(training_epochs)
        sgd_computational_time = np.ones(training_epochs)
        compression_computational_time = np.ones(training_epochs)
        for epoch in range(training_epochs):
            # changing neighbors on every round if randomized = true
            if randomized:
                neighbor_vector = consensus_p.getMobileNetwork_connectivity(
                    iii, neighbors_number, tot_devices, args.graph - 1)
            for current_neighbor in range(neighbor_vector.size + 1):
                avg_cost = 0.
                avg_cost_test = 0.
                ######## sgd on local data
                start_time = time.time()
                ################
                for i in range(total_batch):
                    batch_xs = x_train2[i * batch_size:((i + 1) * batch_size -
                                                        1), :, :]
                    batch_ys = y_train2[i * batch_size:((i + 1) * batch_size -
                                                        1), :]
                    if (i == 0) and (epoch == 0):  # initialization
                        # W_val_l1 = np.zeros([512, 32])
                        W_val_l1 = np.random.normal(
                            0.0, 1.0, (filter, filter, 1, number))
                        # b_val_l1 = np.zeros([32])
                        b_val_l1 = np.random.normal(0.0, 1.0, number)
                        W_val_l2 = np.zeros([multip * number, classes])
                        b_val_l2 = np.zeros([classes])
                    elif (i > 0):
                        W_val_l1 = n_W_l1  # modify for minibatch updates
                        b_val_l1 = n_b_l1
                        W_val_l2 = n_W_l2  # modify for minibatch updates
                        b_val_l2 = n_b_l2
                    # Fit training using batch data
                    n_W_l1, n_b_l1, n_W_l2, n_b_l2, c, g_W_l1, g_b_l1, g_W_l2, g_b_l2 = sess.run(
                        [
                            new_W_l1, new_b_l1, new_W_l2, new_b_l2, cost,
                            grad_W_l1, grad_b_l1, grad_W_l2, grad_b_l2
                        ],
                        feed_dict={
                            x: batch_xs,
                            y: batch_ys,
                            W_ext_l1: W_val_l1,
                            b_ext_l1: b_val_l1,
                            W_ext_l2: W_val_l2,
                            b_ext_l2: b_val_l2
                        })
                    avg_cost += c / total_batch  # Training loss
                    #################à
                sgd_computational_time[epoch] = sgd_computational_time[
                    epoch] + time.time() - start_time
                ###################
                # validation
                with tf.Session() as sess2:
                    sess2.run(init)
                    for i in range(total_batch2):
                        # Construct model
                        batch_xs = x_test[i *
                                          batch_size:((i + 1) * batch_size -
                                                      1), :, :]
                        batch_ys = y_test_t[i *
                                            batch_size:((i + 1) * batch_size -
                                                        1), :]
                        c = sess2.run(cost2,
                                      feed_dict={
                                          x: batch_xs,
                                          y: batch_ys,
                                          W2_ext_l1: n_W_l1,
                                          b2_ext_l1: n_b_l1,
                                          W2_ext_l2: n_W_l2,
                                          b2_ext_l2: n_b_l2
                                      })
                        avg_cost_test += c / total_batch2

                val_loss[epoch] = avg_cost_test
                if epoch == 0:
                    param_vector[epoch] = multip * number * classes
                else:
                    param_vector[epoch] = counter_param

                print(
                    'Test Device: ' + str(iii) + ' Neighbor counter: ' +
                    str(current_neighbor) + " Epoch:", '%04d' % (epoch + 1),
                    "loss=", "{:.9f}".format(avg_cost_test))

                ###########################################################
                # CFA: weights exchange (no gradients)
                # start_time = time.time()
                if args.consensus_mode == 0:
                    # combine one at a time and run sgd after every combination
                    if current_neighbor < neighbor_vector.size:
                        stop_consensus = False
                        W_val_l1, b_val_l1, W_val_l2, b_val_l2, counter_param, time_info, compression_time = consensus_p.getFederatedWeight(
                            n_W_l1, n_W_l2, n_b_l1, n_b_l2, epoch, val_loss,
                            args.eps, neighbor_vector[current_neighbor],
                            stop_consensus)
                        timings[epoch] = timings[epoch] + time_info
                    else:  # transmission of model parameters
                        stop_consensus = True
                        W_val_l1, b_val_l1, W_val_l2, b_val_l2, counter_param, time_info, compression_time = consensus_p.getFederatedWeight(
                            n_W_l1, n_W_l2, n_b_l1, n_b_l2, epoch, val_loss,
                            args.eps, [], stop_consensus)
                elif args.consensus_mode == 1:
                    # sets an alternative implementation, combine all and run one SGD
                    if current_neighbor == 0:
                        stop_consensus = False
                        W_val_l1, b_val_l1, W_val_l2, b_val_l2, counter_param, time_info, compression_time = consensus_p.getFederatedWeight(
                            n_W_l1, n_W_l2, n_b_l1, n_b_l2, epoch, val_loss,
                            args.eps, neighbor_vector, stop_consensus)
                        timings[epoch] = timings[epoch] + time_info
                    else:
                        stop_consensus = True  # enable transmission of model only, use as neighbors an empty
                        W_val_l1, b_val_l1, W_val_l2, b_val_l2, counter_param, time_info, compression_time = consensus_p.getFederatedWeight(
                            n_W_l1, n_W_l2, n_b_l1, n_b_l2, epoch, val_loss,
                            args.eps, [], stop_consensus)
                        break
                ###############################################################
            compression_computational_time[epoch] = compression_time
        ###########################################################

        print("Optimization Finished!")
        # DUMP RESULTS %Y-%m-%d-%H-%M-%S
        sio.savemat(
            'results/dump_loss_g{}_n{}_c{}_m{}_con{}_rand{}_{}.mat'.format(
                args.graph, iii, compression, neighbors_number,
                args.consensus_mode, args.rand,
                time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())), {
                    "val_acc": val_loss,
                    "device": iii,
                    "T_epochs": training_epochs,
                    "T_set_per_device": training_set_per_device,
                    "samples": samples,
                    "param_vector": param_vector,
                    "compression_method": compression,
                    "execution_time": timings,
                    "compression_computational_time":
                    compression_computational_time,
                    "sgd_computational_time": sgd_computational_time
                })
Пример #6
0
def mask(config: configure_pretraining.PretrainingConfig,
         inputs: pretrain_data.Inputs,
         mask_prob,
         proposal_distribution=1.0,
         disallow_from_mask=None,
         already_masked=None):
    """Implementation of dynamic masking. The optional arguments aren't needed for
  BERT/ELECTRA and are from early experiments in "strategically" masking out
  tokens instead of uniformly at random.

  Args:
    config: configure_pretraining.PretrainingConfig
    inputs: pretrain_data.Inputs containing input input_ids/input_mask
    mask_prob: percent of tokens to mask
    proposal_distribution: for non-uniform masking can be a [B, L] tensor
                           of scores for masking each position.
    disallow_from_mask: a boolean tensor of [B, L] of positions that should
                        not be masked out
    already_masked: a boolean tensor of [B, N] of already masked-out tokens
                    for multiple rounds of masking
  Returns: a pretrain_data.Inputs with masking added
  """
    # Get the batch size, sequence length, and max masked-out tokens
    N = config.max_predictions_per_seq
    B, L = modeling.get_shape_list(inputs.input_ids)

    # Find indices where masking out a token is allowed
    vocab = tokenization.FullTokenizer(
        config.vocab_file, do_lower_case=config.do_lower_case).vocab
    candidates_mask = _get_candidates_mask(inputs, vocab, disallow_from_mask)

    # Set the number of tokens to mask out per example
    num_tokens = tf.cast(tf.reduce_sum(inputs.input_mask, -1), tf.float32)
    num_to_predict = tf.maximum(
        1, tf.minimum(N, tf.cast(tf.round(num_tokens * mask_prob), tf.int32)))
    masked_lm_weights = tf.cast(tf.sequence_mask(num_to_predict, N),
                                tf.float32)
    if already_masked is not None:
        masked_lm_weights *= (1 - already_masked)

    # Get a probability of masking each position in the sequence
    candidate_mask_float = tf.cast(candidates_mask, tf.float32)
    sample_prob = (proposal_distribution * candidate_mask_float)
    sample_prob /= tf.reduce_sum(sample_prob, axis=-1, keepdims=True)

    # Sample the positions to mask out
    sample_prob = tf.stop_gradient(sample_prob)
    sample_logits = tf.log(sample_prob)
    masked_lm_positions = tf.random.categorical(sample_logits,
                                                N,
                                                dtype=tf.int32)
    masked_lm_positions *= tf.cast(masked_lm_weights, tf.int32)

    # Get the ids of the masked-out tokens
    shift = tf.expand_dims(L * tf.range(B), -1)  #due to the flat operation
    flat_positions = tf.reshape(masked_lm_positions + shift, [-1, 1])
    masked_lm_ids = tf.gather_nd(tf.reshape(inputs.input_ids, [-1]),
                                 flat_positions)
    masked_lm_ids = tf.reshape(masked_lm_ids, [B, -1])
    masked_lm_ids *= tf.cast(masked_lm_weights, tf.int32)

    # Update the input ids
    replace_with_mask_positions = masked_lm_positions * tf.cast(
        tf.less(tf.random.uniform([B, N]), 1 - mask_prob), tf.int32)
    inputs_ids, _ = scatter_update(inputs.input_ids,
                                   tf.fill([B, N], vocab["[MASK]"]),
                                   replace_with_mask_positions)

    return pretrain_data.get_updated_inputs(
        inputs,
        input_ids=tf.stop_gradient(inputs_ids),
        masked_lm_positions=masked_lm_positions,
        masked_lm_ids=masked_lm_ids,
        masked_lm_weights=masked_lm_weights)
Пример #7
0
    def _build_net(self):

        with tf.variable_scope("Actor" + self.suffix):

            with tf.name_scope('inputs' + self.suffix):
                self.tf_obs = tf.placeholder(tf.float32,
                                             [None, self.n_features],
                                             name='observation' + self.suffix)
                self.tf_acts = tf.placeholder(tf.int32, [
                    None,
                ],
                                              name='actions_num' + self.suffix)
                self.tf_vt = tf.placeholder(tf.float32, [
                    None,
                ],
                                            name='actions_value' + self.suffix)
                self.tf_safe = tf.placeholder(tf.float32, [
                    None,
                ],
                                              name='safety_value' +
                                              self.suffix)
                self.entropy_weight = tf.placeholder(
                    tf.float32,
                    shape=(),
                    name='entropy_weight_clustering' + self.suffix)

                ##### PPO change #####
                self.ppo_ratio = tf.placeholder(tf.float32, [
                    None,
                ],
                                                name='ppo_ratio' + self.suffix)
                ##### PPO change #####

            layer = tf.layers.dense(
                inputs=self.tf_obs,
                units=128,
                activation=tf.nn.tanh,
                # kernel_initializer=tf.random_normal_initializer(mean=0, stddev=0.3),
                kernel_initializer=tf.orthogonal_initializer(
                    gain=np.sqrt(2.)),  # ppo default initialization
                bias_initializer=tf.constant_initializer(0.1),
                name='fc1' + self.suffix)

            all_act = tf.layers.dense(
                inputs=layer,
                units=self.n_actions,
                activation=None,
                # kernel_initializer=tf.random_normal_initializer(mean=0, stddev=0.3),
                kernel_initializer=tf.orthogonal_initializer(
                    gain=np.sqrt(2.)),  # ppo default initialization
                bias_initializer=tf.constant_initializer(0.1),
                name='fc2' + self.suffix)

            self.trainable_variables = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, scope='Actor' + self.suffix)
            self.trainable_variables_shapes = [
                var.get_shape().as_list() for var in self.trainable_variables
            ]

            # sampling
            self.all_act_prob = tf.nn.softmax(all_act,
                                              name='act_prob' + self.suffix)
            self.all_act_prob = tf.clip_by_value(self.all_act_prob, 1e-20, 1.0)

            with tf.name_scope('loss' + self.suffix):
                neg_log_prob = tf.reduce_sum(
                    -tf.log(tf.clip_by_value(self.all_act_prob, 1e-30, 1.0)) *
                    tf.one_hot(indices=self.tf_acts, depth=self.n_actions),
                    axis=1)
                loss = tf.reduce_mean(neg_log_prob * self.tf_vt)
                loss += self.entropy_weight * tf.reduce_mean(
                    tf.reduce_sum(
                        tf.log(tf.clip_by_value(self.all_act_prob, 1e-30,
                                                1.0)) * self.all_act_prob,
                        axis=1))
                self.entro = self.entropy_weight * tf.reduce_mean(
                    tf.reduce_sum(
                        tf.log(tf.clip_by_value(self.all_act_prob, 1e-30,
                                                1.0)) * self.all_act_prob,
                        axis=1))
                self.loss = loss
            with tf.name_scope('train' + self.suffix):
                self.train_op = tf.train.AdamOptimizer(
                    self.pg_lr).minimize(loss)

            # safety loss
            """
            * -1?
            """
            self.chosen_action_log_probs = tf.reduce_sum(
                tf.log(tf.clip_by_value(self.all_act_prob, 1e-30, 1.0)) *
                tf.one_hot(indices=self.tf_acts, depth=self.n_actions),
                axis=1)
            ##### PPO CHANGE #####
            self.ppo_old_chosen_action_log_probs = tf.placeholder(
                tf.float32, [None])
            ##### PPO CHANGE #####
            self.old_chosen_action_log_probs = tf.stop_gradient(
                tf.placeholder(tf.float32, [None]))
            # self.each_safety_loss = tf.exp(self.chosen_action_log_probs - self.old_chosen_action_log_probs) * self.tf_safe
            self.each_safety_loss = (
                tf.exp(self.chosen_action_log_probs) -
                tf.exp(self.old_chosen_action_log_probs)) * self.tf_safe
            self.average_safety_loss = tf.reduce_mean(
                self.each_safety_loss)  #/ self.n_episodes tf.reduce_sum
            # self.average_safety_loss +=self.entro

            # KL D
            self.old_all_act_prob = tf.stop_gradient(
                tf.placeholder(tf.float32, [None, self.n_actions]))

            def kl(x, y):
                EPS = 1e-10
                x = tf.where(tf.abs(x) < EPS, EPS * tf.ones_like(x), x)
                y = tf.where(tf.abs(y) < EPS, EPS * tf.ones_like(y), y)
                X = tf.distributions.Categorical(probs=x + EPS)
                Y = tf.distributions.Categorical(probs=y + EPS)
                return tf.distributions.kl_divergence(X,
                                                      Y,
                                                      allow_nan_stats=False)

            self.each_kl_divergence = kl(
                self.all_act_prob, self.old_all_act_prob
            )  # tf.reduce_sum(kl(self.all_act_prob, self.old_all_act_prob), axis=1)
            self.average_kl_divergence = tf.reduce_mean(
                self.each_kl_divergence)
            # self.kl_gradients = tf.gradients(self.average_kl_divergence, self.trainable_variables)  # useless

            self.desired_kl = desired_kl
            # self.metrics = [self.loss, self.average_kl_divergence, self.average_safety_loss, self.entro] # Luping
            self.metrics = [
                self.loss, self.loss, self.average_safety_loss, self.entro
            ]  # Luping

            # FLat
            self.flat_params_op = get_flat_params(self.trainable_variables)
            """not use tensorflow default function, here we calculate the gradient by self:
            (1) loss: g
            (2) kl: directional_gradients (math, fisher)
            (3) safe: b 
            """
            ##### PPO change #####
            #### PPO Suyi's Change ####
            with tf.name_scope('ppoloss' + self.suffix):
                self.ppo_ratio = tf.exp(self.chosen_action_log_probs -
                                        self.ppo_old_chosen_action_log_probs)
                # self.ppo_ratio = tf.Print(self.ppo_ratio, [self.ppo_ratio], "self.ppo_ratio: ")

                surr = self.ppo_ratio * self.tf_vt
                self.ppoloss = -tf.reduce_mean(
                    tf.minimum(
                        surr,
                        tf.clip_by_value(self.ppo_ratio, 1. - self.clip_eps,
                                         1. + self.clip_eps) * self.tf_vt))

                self.ppoloss += self.entropy_weight * tf.reduce_mean(
                    tf.reduce_sum(
                        tf.log(tf.clip_by_value(self.all_act_prob, 1e-30,
                                                1.0)) * self.all_act_prob,
                        axis=1))
                # self.ppoloss += 0.01 * tf.reduce_mean(tf.reduce_sum(tf.log(tf.clip_by_value(self.all_act_prob, 1e-30, 1.0)) * self.all_act_prob, axis=1))

            with tf.variable_scope('ppotrain'):
                # self.atrain_op = tf.train.AdamOptimizer(self.lr).minimize(self.ppoloss)
                self.atrain_op = tf.train.AdamOptimizer(self.lr).minimize(
                    self.ppoloss)
            #### PPO Suyi's Change ####

            self.ppoloss_flat_gradients_op = get_flat_gradients(
                self.ppoloss, self.trainable_variables)
            ##### PPO change #####

            self.loss_flat_gradients_op = get_flat_gradients(
                self.loss, self.trainable_variables)
            self.kl_flat_gradients_op = get_flat_gradients(
                self.average_kl_divergence, self.trainable_variables)
            self.constraint_flat_gradients_op = get_flat_gradients(
                self.average_safety_loss, self.trainable_variables)

            self.vec = tf.placeholder(tf.float32, [None])
            self.fisher_product_op = self.get_fisher_product_op()

            self.new_params = tf.placeholder(tf.float32, [None])
            self.params_assign_op = assign_network_params_op(
                self.new_params, self.trainable_variables,
                self.trainable_variables_shapes)