def __init__(self, board_width, board_height, model_file=None):
        self.board_width = board_width
        self.board_height = board_height

        # Define the tensorflow neural network
        # 1. Input:
        self.input_states = tf.placeholder(
                tf.float32, shape=[None, 4, board_height, board_width])
        self.input_state = tf.transpose(self.input_states, [0, 2, 3, 1])
        # 2. Common Networks Layers
        self.conv1 = tf.layers.conv2d(inputs=self.input_state,
                                      filters=32, kernel_size=[3, 3],
                                      padding="same", data_format="channels_last",
                                      activation=tf.nn.relu)
        self.conv2 = tf.layers.conv2d(inputs=self.conv1, filters=64,
                                      kernel_size=[3, 3], padding="same",
                                      data_format="channels_last",
                                      activation=tf.nn.relu)
        self.conv3 = tf.layers.conv2d(inputs=self.conv2, filters=128,
                                      kernel_size=[3, 3], padding="same",
                                      data_format="channels_last",
                                      activation=tf.nn.relu)
        # 3-1 Action Networks
        self.action_conv = tf.layers.conv2d(inputs=self.conv3, filters=4,
                                            kernel_size=[1, 1], padding="same",
                                            data_format="channels_last",
                                            activation=tf.nn.relu)
        # Flatten the tensor
        self.action_conv_flat = tf.reshape(
                self.action_conv, [-1, 4 * board_height * board_width])
        # 3-2 Full connected layer, the output is the log probability of moves
        # on each slot on the board
        self.action_fc = tf.layers.dense(inputs=self.action_conv_flat,
                                         units=board_height * board_width,
                                         activation=tf.nn.log_softmax)
        # 4 Evaluation Networks
        self.evaluation_conv = tf.layers.conv2d(inputs=self.conv3, filters=2,
                                                kernel_size=[1, 1],
                                                padding="same",
                                                data_format="channels_last",
                                                activation=tf.nn.relu)
        self.evaluation_conv_flat = tf.reshape(
                self.evaluation_conv, [-1, 2 * board_height * board_width])
        self.evaluation_fc1 = tf.layers.dense(inputs=self.evaluation_conv_flat,
                                              units=64, activation=tf.nn.relu)
        # output the score of evaluation on current state
        self.evaluation_fc2 = tf.layers.dense(inputs=self.evaluation_fc1,
                                              units=1, activation=tf.nn.tanh)

        # Define the Loss function
        # 1. Label: the array containing if the game wins or not for each state
        self.labels = tf.placeholder(tf.float32, shape=[None, 1])
        # 2. Predictions: the array containing the evaluation score of each state
        # which is self.evaluation_fc2
        # 3-1. Value Loss function
        self.value_loss = tf.losses.mean_squared_error(self.labels,
                                                       self.evaluation_fc2)
        # 3-2. Policy Loss function
        self.mcts_probs = tf.placeholder(
                tf.float32, shape=[None, board_height * board_width])
        self.policy_loss = tf.negative(tf.reduce_mean(
                tf.reduce_sum(tf.multiply(self.mcts_probs, self.action_fc), 1)))
        # 3-3. L2 penalty (regularization)
        l2_penalty_beta = 1e-4
        vars = tf.trainable_variables()
        l2_penalty = l2_penalty_beta * tf.add_n(
            [tf.nn.l2_loss(v) for v in vars if 'bias' not in v.name.lower()])
        # 3-4 Add up to be the Loss function
        self.loss = self.value_loss + self.policy_loss + l2_penalty

        # Define the optimizer we use for training
        self.learning_rate = tf.placeholder(tf.float32)
        self.optimizer = tf.train.AdamOptimizer(
                learning_rate=self.learning_rate).minimize(self.loss)

        # Make a session
        self.session = tf.Session()

        # calc policy entropy, for monitoring only
        self.entropy = tf.negative(tf.reduce_mean(
                tf.reduce_sum(tf.exp(self.action_fc) * self.action_fc, 1)))

        # Initialize variables
        init = tf.global_variables_initializer()
        self.session.run(init)

        # For saving and restoring
        self.saver = tf.train.Saver()
        if model_file is not None:
            self.restore_model(model_file)
Exemplo n.º 2
0
    def __init__(self,
                 session,
                 player_id,
                 info_state_size,
                 num_actions,
                 loss_str="rpg",
                 loss_class=None,
                 hidden_layers_sizes=(128, ),
                 batch_size=16,
                 critic_learning_rate=0.01,
                 pi_learning_rate=0.001,
                 entropy_cost=0.01,
                 num_critic_before_pi=8,
                 additional_discount_factor=1.0,
                 max_global_gradient_norm=None,
                 optimizer_str="sgd"):
        """Initialize the PolicyGradient agent.

    Args:
      session: Tensorflow session.
      player_id: int, player identifier. Usually its position in the game.
      info_state_size: int, info_state vector size.
      num_actions: int, number of actions per info state.
      loss_str: string or None. If string, must be one of ["rpg", "qpg", "rm",
        "a2c"] and defined in `_get_loss_class`. If None, a loss class must be
        passed through `loss_class`. Defaults to "rpg".
      loss_class: Class or None. If Class, it must define the policy gradient
        loss. If None a loss class in a string format must be passed through
        `loss_str`. Defaults to None.
      hidden_layers_sizes: iterable, defines the neural network layers. Defaults
          to (128,), which produces a NN: [INPUT] -> [128] -> ReLU -> [OUTPUT].
      batch_size: int, batch size to use for Q and Pi learning. Defaults to 128.
      critic_learning_rate: float, learning rate used for Critic (Q or V).
        Defaults to 0.001.
      pi_learning_rate: float, learning rate used for Pi. Defaults to 0.001.
      entropy_cost: float, entropy cost used to multiply the entropy loss. Can
        be set to None to skip entropy computation. Defaults to 0.001.
      num_critic_before_pi: int, number of Critic (Q or V) updates before each
        Pi update. Defaults to 8 (every 8th critic learning step, Pi also
        learns).
      additional_discount_factor: float, additional discount to compute returns.
        Defaults to 1.0, in which case, no extra discount is applied.  None that
        users must provide *only one of* `loss_str` or `loss_class`.
      max_global_gradient_norm: float or None, maximum global norm of a gradient
        to which the gradient is shrunk if its value is larger.
      optimizer_str: String defining which optimizer to use. Supported values
        are {sgd, adam}
    """
        assert bool(loss_str) ^ bool(
            loss_class), "Please provide only one option."
        self._kwargs = locals()
        loss_class = loss_class if loss_class else self._get_loss_class(
            loss_str)

        self.player_id = player_id
        self._session = session
        self._num_actions = num_actions
        self._layer_sizes = hidden_layers_sizes
        self._batch_size = batch_size
        self._extra_discount = additional_discount_factor
        self._num_critic_before_pi = num_critic_before_pi

        self._episode_data = []
        self._dataset = collections.defaultdict(list)
        self._prev_time_step = None
        self._prev_action = None

        # Step counters
        self._step_counter = 0
        self._episode_counter = 0
        self._num_learn_steps = 0

        # Keep track of the last training loss achieved in an update step.
        self._last_loss_value = None

        # Placeholders
        self._info_state_ph = tf.placeholder(shape=[None, info_state_size],
                                             dtype=tf.float32,
                                             name="info_state_ph")
        self._action_ph = tf.placeholder(shape=[None],
                                         dtype=tf.int32,
                                         name="action_ph")
        self._return_ph = tf.placeholder(shape=[None],
                                         dtype=tf.float32,
                                         name="return_ph")

        # Network
        # activate final as we plug logit and qvalue heads afterwards.
        self._net_torso = snt.nets.MLP(output_sizes=self._layer_sizes,
                                       activate_final=True)
        torso_out = self._net_torso(self._info_state_ph)
        self._policy_logits_layer = snt.Linear(output_size=self._num_actions,
                                               name="policy_head")

        self.policy_logits_network = snt.Sequential(
            [self._net_torso, self._policy_logits_layer])

        self._policy_logits = self._policy_logits_layer(torso_out)
        self._policy_probs = tf.nn.softmax(self._policy_logits)

        self._savers = []

        used_output_size = 1 if loss_class.__name__ == "BatchA2CLoss" else self._num_actions
        self._q_values_layer = snt.Linear(output_size=used_output_size,
                                          name="q_values_head")

        # Add baseline (V) head for A2C.
        if loss_class.__name__ == "BatchA2CLoss":
            self._baseline = tf.squeeze(self._q_values_layer(torso_out),
                                        axis=1)
        else:
            # Add q-values head otherwise
            self._q_values = self._q_values_layer(torso_out)

        # Critic loss
        # Baseline loss in case of A2C
        if loss_class.__name__ == "BatchA2CLoss":
            self._critic_loss = tf.reduce_mean(
                tf.losses.mean_squared_error(labels=self._return_ph,
                                             predictions=self._baseline))
        else:
            # Q-loss otherwise.
            action_indices = tf.stack(
                [tf.range(tf.shape(self._q_values)[0]), self._action_ph],
                axis=-1)
            value_predictions = tf.gather_nd(self._q_values, action_indices)
            self._critic_loss = tf.reduce_mean(
                tf.losses.mean_squared_error(labels=self._return_ph,
                                             predictions=value_predictions))
        if optimizer_str == "adam":
            self._critic_optimizer = tf.train.AdamOptimizer(
                learning_rate=critic_learning_rate)
        elif optimizer_str == "sgd":
            self._critic_optimizer = tf.train.GradientDescentOptimizer(
                learning_rate=critic_learning_rate)
        else:
            raise ValueError("Not implemented, choose from 'adam' and 'sgd'.")

        def minimize_with_clipping(optimizer, loss):
            grads_and_vars = optimizer.compute_gradients(loss)
            if max_global_gradient_norm is not None:
                grads, variables = zip(*grads_and_vars)
                grads, _ = tf.clip_by_global_norm(grads,
                                                  max_global_gradient_norm)
                grads_and_vars = list(zip(grads, variables))

            return optimizer.apply_gradients(grads_and_vars)

        self._critic_learn_step = minimize_with_clipping(
            self._critic_optimizer, self._critic_loss)

        # Pi loss
        pg_class = loss_class(entropy_cost=entropy_cost)
        if loss_class.__name__ == "BatchA2CLoss":
            self._pi_loss = pg_class.loss(policy_logits=self._policy_logits,
                                          baseline=self._baseline,
                                          actions=self._action_ph,
                                          returns=self._return_ph)
        else:
            self._pi_loss = pg_class.loss(policy_logits=self._policy_logits,
                                          action_values=self._q_values)
        if optimizer_str == "adam":
            self._pi_optimizer = tf.train.AdamOptimizer(
                learning_rate=pi_learning_rate)
        elif optimizer_str == "sgd":
            self._pi_optimizer = tf.train.GradientDescentOptimizer(
                learning_rate=pi_learning_rate)

        self._pi_learn_step = minimize_with_clipping(self._pi_optimizer,
                                                     self._pi_loss)
        self._loss_str = loss_str
        self._initialize()
Exemplo n.º 3
0
    def predict(self,
                dataset: Dataset,
                loaded_model=False,
                batch_size=64,
                draw=False):
        with tf.device('/cpu:1'):
            with tf.Graph().as_default() as g:
                # resource prellocation
                self.batch_size = batch_size
                self.mean_locs = []  # expectation of locations
                self.sampled_locs = [
                ]  # sampled locations ~N(mean_locs[.], loc_sd)
                self.baselines = []  # baseline, the value prediction
                self.glimpse_images = []  # to show in window

                # set the learning rate
                self.global_step = tf.Variable(0, trainable=False)
                self.lr = tf.train.exponential_decay(self.initLr,
                                                     self.global_step,
                                                     self.lrDecayFreq,
                                                     self.lrDecayRate,
                                                     staircase=True)

                # preallocate x, y, baseline
                labels = tf.placeholder(
                    "float32", shape=[self.batch_size, self.n_classes])
                self.labels_placeholder = tf.placeholder(
                    tf.float32, shape=(self.batch_size), name="labels_raw")
                self.onehot_labels_placeholder = tf.placeholder(
                    tf.float32,
                    shape=(self.batch_size, self.n_classes),
                    name="labels_onehot")
                self.inputs_placeholder = tf.placeholder(
                    tf.float32,
                    shape=(self.batch_size, self.img_size * self.img_size),
                    name="images")

                # declare the model parameters, here're naming rule:
                # the 1st captical letter: weights or bias (W = weights, B = bias)
                # the 2nd lowercase letter: the network (e.g.: g = glimpse network)
                # the 3rd and 4th letter(s): input-output mapping, which is clearly written in the variable name argument

                self.Wg_l_h = self.weight_variable(
                    (2, self.hl_size), "glimpseNet_wts_location_hidden", True)
                self.Bg_l_h = self.weight_variable(
                    (1, self.hl_size), "glimpseNet_bias_location_hidden", True)

                self.Wg_g_h = self.weight_variable(
                    (self.totalSensorBandwidth, self.hg_size),
                    "glimpseNet_wts_glimpse_hidden", True)
                self.Bg_g_h = self.weight_variable(
                    (1, self.hg_size), "glimpseNet_bias_glimpse_hidden", True)

                self.Wg_hg_gf1 = self.weight_variable(
                    (self.hg_size, self.g_size),
                    "glimpseNet_wts_hiddenGlimpse_glimpseFeature1", True)
                self.Wg_hl_gf1 = self.weight_variable(
                    (self.hl_size, self.g_size),
                    "glimpseNet_wts_hiddenLocation_glimpseFeature1", True)
                self.Bg_hlhg_gf1 = self.weight_variable(
                    (1, self.g_size),
                    "glimpseNet_bias_hGlimpse_hLocs_glimpseFeature1", True)

                self.Wc_g_h = self.weight_variable(
                    (self.cell_size, self.g_size),
                    "coreNet_wts_glimpse_hidden", True)
                self.Bc_g_h = self.weight_variable(
                    (1, self.g_size), "coreNet_bias_glimpse_hidden", True)

                self.Wr_h_r = self.weight_variable(
                    (self.cell_out_size, self.img_size**2),
                    "reconstructionNet_wts_hidden_action", True)
                self.Br_h_r = self.weight_variable(
                    (1, self.img_size**2),
                    "reconstructionNet_bias_hidden_action", True)

                self.Wb_h_b = self.weight_variable(
                    (self.g_size, 1), "baselineNet_wts_hiddenState_baseline",
                    True)
                self.Bb_h_b = self.weight_variable(
                    (1, 1), "baselineNet_bias_hiddenState_baseline", True)

                self.Wl_h_l = self.weight_variable(
                    (self.cell_out_size, 2), "locationNet_wts_hidden_location",
                    True)
                self.Bl_h_l = self.weight_variable(
                    (1, 2), "locationNet_bias_hidden_location", True)

                self.Wa_h_a = self.weight_variable(
                    (self.cell_out_size, self.n_classes),
                    "actionNet_wts_hidden_action", True)
                self.Ba_h_a = self.weight_variable(
                    (1, self.n_classes), "actionNet_bias_hidden_action", True)

                # query the model ouput
                outputs = self.model()

                # convert list of tensors to one big tensor
                self.sampled_locs = tf.concat(axis=0, values=self.sampled_locs)
                self.sampled_locs = tf.reshape(
                    self.sampled_locs, (self.nGlimpses, self.batch_size, 2))
                self.sampled_locs = tf.transpose(self.sampled_locs, [1, 0, 2])
                self.mean_locs = tf.concat(axis=0, values=self.mean_locs)
                self.mean_locs = tf.reshape(
                    self.mean_locs, (self.nGlimpses, self.batch_size, 2))
                self.mean_locs = tf.transpose(self.mean_locs, [1, 0, 2])
                self.glimpse_images = tf.concat(axis=0,
                                                values=self.glimpse_images)

                # compute the reward
                # reconstructionCost, reconstruction, train_op_r = self.preTrain(outputs)
                cost, reward, predicted_labels, correct_labels, train_op, b, avg_b, rminusb, lr = \
                    self.calc_reward(outputs)

                saver = tf.train.Saver()
                sess_config = tf.ConfigProto(allow_soft_placement=True,
                                             log_device_placement=False)
                sess_config.gpu_options.allow_growth = True
                sess = tf.Session(config=sess_config)
                saver.restore(sess, self.ckpt_path)

                self.evaluate(dataset,
                              sess,
                              reward,
                              predicted_labels,
                              correct_labels,
                              self.glimpse_images,
                              draw=draw)

                sess.close()
Exemplo n.º 4
0
def main(_):
    mnist = input_data.read_data_sets('/tmp/data/', one_hot=True, seed=12345)
    random_weight_vector = np.random.uniform(low=0.1,
                                             high=1.9,
                                             size=TRAIN_INPUT_SIZE)

    x = tf.placeholder(tf.float32, shape=(None, INPUT_DIM), name='x')
    y = tf.placeholder(tf.float32, shape=(None, OUTPUT_DIM), name='y')
    weight = tf.placeholder(tf.float32,
                            shape=(None, OUTPUT_DIM),
                            name='weight')
    parallel_alphas = tf.placeholder(tf.float32,
                                     shape=(FLAGS.num_parallel_alphas,
                                            OUTPUT_DIM),
                                     name='parallel_alphas')
    unstack_parallel_alphas = tf.unstack(parallel_alphas, axis=0)
    parallel_logits = []
    parallel_losses = []
    parallel_optimizers = []
    validation_metrics = []
    test_metrics = []
    all_test_metrics = []

    with tf.variable_scope('classifier'):
        for alpha_index in range(FLAGS.num_parallel_alphas):
            logits = classifier(x)
            alpha = tf.reshape(unstack_parallel_alphas[alpha_index],
                               shape=[OUTPUT_DIM, 1])
            optimizer, loss = optimization(logits, y, weight, alpha,
                                           LEARNING_RATE)
            parallel_logits.append(logits)
            parallel_losses.append(loss)
            parallel_optimizers.append(optimizer)

    init = tf.global_variables_initializer()
    classifiers_init = tf.variables_initializer(
        tf.global_variables(scope='classifier'))
    with tf.Session() as sess:
        sess.run(init)

        # GetCandidatesAlpha (Algorithm 2 in paper)
        sample_alphas = np.zeros(shape=(0, OUTPUT_DIM))
        for alpha_batch_index in range(FLAGS.num_alpha_batches):
            sess.run(classifiers_init)
            if FLAGS.uniform_weights:
                alpha_batch = np.zeros(shape=(FLAGS.num_parallel_alphas,
                                              OUTPUT_DIM))
            elif FLAGS.random_alpha or alpha_batch_index < 1:
                alpha_batch = sample_from_ball(
                    size=(FLAGS.num_parallel_alphas, OUTPUT_DIM),
                    sampling_radius=FLAGS.sampling_radius)
                sample_alphas = np.concatenate([sample_alphas, alpha_batch])
            else:
                # Use LCB to generate candidates.
                alpha_batch = np.zeros(shape=(0, OUTPUT_DIM))
                sample_metrics = validation_metrics[:]
                for alpha_index in range(FLAGS.num_parallel_alphas):
                    kernel = RBF(length_scale=FLAGS.sampling_radius,
                                 length_scale_bounds=(
                                     FLAGS.sampling_radius * 1e-3,
                                     FLAGS.sampling_radius *
                                     1e3)) * ConstantKernel(1.0, (1e-3, 1e3))
                    gp = GaussianProcessRegressor(kernel=kernel,
                                                  alpha=1e-4).fit(
                                                      sample_alphas,
                                                      np.log1p(sample_metrics))
                    candidates = sample_from_ball((10000, OUTPUT_DIM),
                                                  FLAGS.sampling_radius)

                    metric_mles, metric_stds = gp.predict(candidates,
                                                          return_std=True)
                    metric_lcbs = np.maximum(
                        np.expm1(metric_mles - 1.0 * metric_stds), 0.0)
                    metric_lcbs += np.random.random(
                        size=metric_lcbs.shape) * 0.001  # break ties
                    best_index = np.argmin(metric_lcbs)

                    best_alpha = [candidates[best_index]]
                    best_alpha_metric_estimate = np.minimum(
                        np.expm1(metric_mles[best_index] +
                                 1.0 * metric_stds[best_index]), 1.0)
                    alpha_batch = np.concatenate([alpha_batch, best_alpha])

                    sample_alphas = np.concatenate([sample_alphas, best_alpha])
                    sample_metrics.append(best_alpha_metric_estimate)

            # Training classifiers
            for step in range(TRAINING_STEPS):
                batch_index = range(
                    step * BATCH_SIZE % TRAIN_INPUT_SIZE,
                    step * BATCH_SIZE % TRAIN_INPUT_SIZE + BATCH_SIZE)
                (batch_x, batch_y) = mnist.train.next_batch(BATCH_SIZE,
                                                            shuffle=False)
                batch_weight = [[random_weight_vector[i]] * OUTPUT_DIM
                                for i in batch_index]
                _, _ = sess.run(
                    [parallel_optimizers, parallel_losses],
                    feed_dict={
                        x: batch_x,
                        y: batch_y,
                        weight: batch_weight,
                        parallel_alphas: alpha_batch,
                    })

            parallel_validation_logits = sess.run(parallel_logits,
                                                  feed_dict={
                                                      x:
                                                      mnist.validation.images,
                                                      y:
                                                      mnist.validation.labels,
                                                  })
            parallel_validation_metrics = [
                metric(mnist.validation.labels,
                       validation_logits,
                       all_digits=False)
                for validation_logits in parallel_validation_logits
            ]
            validation_metrics.extend(parallel_validation_metrics)

            parallel_test_logits = sess.run(parallel_logits,
                                            feed_dict={
                                                x: mnist.test.images,
                                                y: mnist.test.labels,
                                            })
            parallel_test_metrics = [
                metric(mnist.test.labels, test_logits, all_digits=False)
                for test_logits in parallel_test_logits
            ]
            test_metrics.extend(parallel_test_metrics)

            parallel_all_test_metrics = [
                metric(mnist.test.labels, test_logits, all_digits=True)
                for test_logits in parallel_test_logits
            ]
            all_test_metrics.extend(parallel_all_test_metrics)

    best_observed_index = np.argmin(validation_metrics)
    print('[metric] validation={}'.format(
        validation_metrics[best_observed_index]))
    print('[metric] test={}'.format(test_metrics[best_observed_index]))
    for i in range(10):
        print('[all test metrics] {}={}'.format(
            i, all_test_metrics[best_observed_index][i]))
def main():
    # start of tensorflow graph
    # input and target placeholders
    global nvol, batch_cost
    # print("input_shape:", input_shape)
    # inputs_ = tf.Variable(shape=input_shape, name="inputs")
    # targets_ = tf.Variable(shape=input_shape, name="targets")

    inputs_ = tf.placeholder(tf.float32, input_shape, name='inputs')
    targets_ = tf.placeholder(tf.float32, input_shape, name='targets')

    conv1 = tf.keras.layers.Conv3D(
        filters=16, kernel_size=(3, 3, 3), strides=stride, padding=padding, activation=tf.nn.relu)(inputs_)
    maxpool1 = tf.keras.layers.MaxPool3D(
        pool_size=(2, 2, 2), strides=(3, 2, 2), padding=padding)(conv1)
    # print('shape maxpool1:', maxpool1.shape)
    conv2 = tf.keras.layers.Conv3D(
        filters=32, kernel_size=(3, 3, 3), strides=stride, padding=padding, activation=tf.nn.relu)(maxpool1)
    maxpool2 = tf.keras.layers.MaxPool3D(
        pool_size=(2, 2, 2), strides=(3, 3, 2), padding=padding)(conv2)

    # print('shape:maxpool2', maxpool2.shape)
    conv3 = tf.keras.layers.Conv3D(
        filters=96, kernel_size=(2, 2, 2), strides=stride, padding=padding, activation=tf.nn.relu)(maxpool2)
    maxpool3 = tf.keras.layers.MaxPool3D(
        pool_size=(2, 2, 2), strides=(1, 1, 2), padding=padding)(conv3)
    # print('shape maxpool3:', maxpool3.shape)
    # decoder
    unpool1 = K.resize_volumes(maxpool3, 1, 1, 2, "channels_last")
    deconv1 = tf.keras.layers.Conv3DTranspose(filters=96, kernel_size=(2, 2, 2), strides=stride,
                                              padding=padding, activation=tf.nn.relu)(unpool1)
    # print('shape deconv1:', deconv1.shape)
    unpool2 = K.resize_volumes(deconv1, 3, 3, 2, "channels_last")
    deconv2 = tf.keras.layers.Conv3DTranspose(filters=32, kernel_size=(3, 3, 3), strides=stride,
                                              padding=padding, activation=tf.nn.relu)(unpool2)

    # print('shape deconv2:', deconv2.shape)
    # (64, 24, 48, 32, 32)
    unpool3 = K.resize_volumes(deconv2, 3, 2, 2, "channels_last")
    deconv3 = tf.keras.layers.Conv3DTranspose(filters=16, kernel_size=(3, 3, 3), strides=stride,
                                              padding=padding, activation=tf.nn.relu)(unpool3)

    # print('shape deconv3:', deconv3.shape)
    # (64, 72, 96, 64, 16)
    output = tf.keras.layers.Dense(
        units=1, activation=None)(deconv3)

    loss = tf.divide(tf.norm(tf.subtract(targets_, output), ord='fro', axis=[0, -1]),
                     tf.norm(targets_, ord='fro', axis=[0, -1]))
    # print(loss.shape)
    print("loss:", loss)
    cost = tf.reduce_mean(loss, name='loss')
    # print(cost)
    print("cost:", cost)
    opt = tf.train.AdamOptimizer(learning_rate).minimize(cost)
    print("opt:", opt)

    all_saver = tf.train.Saver(max_to_keep=None)
    # conv1_v = tf.assign("conv1_v", conv1)
    # maxpool1_v = tf.assign("maxpool1_v", maxpool1)
    # conv2_v = tf.assign("conv2_v", conv2)
    # maxpool2_v = tf.assign("maxpool2_v", maxpool2)
    # conv3_v = tf.assign("conv3_v", conv3)
    # maxpool3_v = tf.assign("maxpool3_v", maxpool3)
    # enc_saver = tf.train.Saver({'conv1': conv1, 'maxpool1': maxpool1,
    #                             'conv2': conv2, 'maxpool2': maxpool2,
    #                             'conv3': conv3, 'maxpool3': maxpool3})
    # # initializing a saver to save weights
    # enc_saver = tf.train.Saver({'conv1': conv1_v, 'maxpool1': maxpool1_v,
    #                             'conv2': conv2_v, 'maxpool2': maxpool2_v,
    #                             'conv3': conv3_v, 'maxpool3': maxpool3_v})
    # initializing a restorer to restore weights
    # res_saver = tf.train.import_meta_graph('/weights/model.ckpt-1.meta')
    #
    # summary nodes
    tf.summary.scalar("loss", loss)
    tf.summary.scalar("cost", cost)
    tf.summary.histogram("conv1", conv1)
    tf.summary.histogram("maxpool1", maxpool1)
    tf.summary.histogram("conv2", conv2)
    tf.summary.histogram("maxpool2", maxpool2)
    tf.summary.histogram("conv3", conv3)
    tf.summary.histogram("maxpool3", maxpool3)
    tf.summary.histogram("unpool3", unpool3)
    tf.summary.histogram("deconv3", deconv3)
    tf.summary.histogram("unpool2", unpool2)
    tf.summary.histogram("deconv2", deconv2)
    tf.summary.histogram("unpool1", unpool1)
    tf.summary.histogram("deconv1", deconv1)

    # summary operation and a writer to save it.
    summary_op = tf.summary.merge_all(key='summaries')
    writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())

    # end of tensorflow graph

    # initializing tensorflow graph and a session
    init_op = tf.global_variables_initializer()
    sess = tf.Session(config=config)
    sess.run(init_op)

    # making operation-variables to run our methods whenever needed during training
    fetch_op_tr = input_pipeline_tr()
    fetch_op_val = input_pipeline_val()

    # coordinator and queue runners to manage parallel sampling of batches from the input pipeline
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    # start of training
    counter = 0
    try:

        while not coord.should_stop():
            print('\nEpoch\t' + str(counter + 1) + '/' + str(n_epochs))

            for i in range(n_batches):
                # fetching a batch
                vol = sess.run(fetch_op_tr)
                nvol = np.asarray(vol)
                noisy_nvol = nvol + noise_factor * np.random.randn(*nvol.shape)
                batch_cost, _ = sess.run([cost, opt], feed_dict={inputs_: noisy_nvol, targets_: nvol})
                if i % 1000 == 0:
                    print("batch_cost", batch_cost)
                print('\r' + str(((i + 1) * 100) / n_batches) + '%', sys.stdout.flush())
            counter = counter + 1
            print("Epoch: {}/{}...".format(counter, n_epochs), "Training loss: {:.4f}".format(batch_cost))
            print("time cost: {}".format(time.time()))
            # save weights and biases of the model
            all_saver.save(sess, ws_path + "model.ckpt", global_step=counter)
            # save weights and biases of the encoder
            # enc_saver.save(sess, ws_path + "enc.ckpt", global_step=counter)
            print('Weights saved')

            # saving summary  code above is clear
            # print(nvol.shape)
            # print(nvol.shape)
            # summary, _ = sess.run([summary_op, opt], feed_dict={inputs_: nvol, targets_: nvol})
            # print("summary:", summary)
            # print("counter:", counter)
            # writer.add_summary(summary, counter)
            print('Summary saved')

            if counter >= n_epochs:
                break
        # checking validation error
        vol = sess.run(fetch_op_val)
        nvol = np.asarray(vol)
        batch_cost, _ = sess.run([cost, opt], feed_dict={inputs_: nvol, targets_: nvol})
        print('Validation error' + str(batch_cost))
    except tf.errors.OutOfRangeError:
        print('Done training -- epoch limit reached')

    finally:
        coord.request_stop()

    coord.join(threads)
    sess.close()

    # '''
    # code to restore weights
    with tf.Session(config=config) as sess:
        all_saver.restore(sess,  ws_path + "model.ckpt")
        print("Model restored.")
Exemplo n.º 6
0
def placeholder(dim=None, name=None):
    return tf.placeholder(dtype=tf.float32,
                          shape=combined_shape(None, dim),
                          name=name)
Exemplo n.º 7
0
        return np.array([1, 0, 0, 0])


#  training dataset preparing
DIGIT_COUNT = 10
# x = 101~1024的數字
train_x = np.array(
    [encore_binary(i, DIGIT_COUNT) for i in range(101, 2**DIGIT_COUNT)])
# y = one hot過的 fizz buzz
train_y = np.array([one_hot_fizz_buss(i) for i in range(101, 2**DIGIT_COUNT)])

# TensorFlow parameter preparing
HIDDEN_UNIT_COUNT = 100
# input is n x digit_count matrix with float value
# output is n x 4 matrix with float value
X = tf.placeholder('float', [None, DIGIT_COUNT])
Y = tf.placeholder('float', [None, 4])


# initial weight randomly
def init_weights(shape):
    return tf.Variable(tf.random_normal(shape, stddev=0.01))


# 初始化 hidden layer的weight
w_h = init_weights([DIGIT_COUNT, HIDDEN_UNIT_COUNT])
# 初始化 output的weight
w_o = init_weights([HIDDEN_UNIT_COUNT, 4])


# define model
Exemplo n.º 8
0
    labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
    return labels_one_hot

X_train, Y_train = np.array(train.drop(['Y_1_month'], axis=1)), dense_to_one_hot(train['Y_1_month'])
X_test, Y_test = np.array(test.drop(['Y_1_month'], axis=1)), dense_to_one_hot(test['Y_1_month'])

# hyperparameters
learning_rate = 0.01
num_epochs = 30
batch_size = 1000
input_size = 55
hidden1_size = 100
output_size = 2
display_step = 1

x = tf.placeholder(tf.float32, shape = [None,input_size])
y = tf.placeholder(tf.float32, shape = [None, output_size])

def build_ANN(x):
    #Layer1
    W1 = tf.Variable(tf.random_normal(shape = [input_size, hidden1_size]))
    b1 = tf.Variable(tf.random_normal(shape = [hidden1_size]))
    H1_output = tf.nn.relu(tf.matmul(x,W1)+b1)
    #Layer 2
    W_output = tf.Variable(tf.random_normal(shape = [hidden1_size, output_size]))
    b_output = tf.Variable(tf.random_normal(shape = [output_size]))
    logits = tf.matmul(H1_output,W_output)+b_output
    return logits

predicted_value = build_ANN(x)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=predicted_value, labels=y))
Exemplo n.º 9
0
    def build_sub_graph(self, length=15, reuse=False):
        options = self._options
        hidden_size = options.hidden_size
        batch_size = options.batch_size

        seq = tf.placeholder(tf.int32, [batch_size, length],
                             name='seq' + str(length))

        e_em, r_em = self._entity_embedding, self._relation_embedding

        # seperately read, and then recover the order
        ent = seq[:, :-1:2]
        rel = seq[:, 1::2]

        ent_em = tf.nn.embedding_lookup(e_em, ent)
        rel_em = tf.nn.embedding_lookup(r_em, rel)

        em_seq = []
        for i in range(length - 1):
            if i % 2 == 0:
                em_seq.append(ent_em[:, i // 2])
            else:
                em_seq.append(rel_em[:, i // 2])

        # seperately bn
        with tf.variable_scope('input_bn'):
            if not reuse:
                bn_em_seq = [
                    tf.reshape(self.bn(em_seq[i], reuse=(i is not 0)),
                               [-1, 1, hidden_size]) for i in range(length - 1)
                ]
            else:
                bn_em_seq = [
                    tf.reshape(self.bn(em_seq[i], reuse=True),
                               [-1, 1, hidden_size]) for i in range(length - 1)
                ]

        bn_em_seq = tf.concat(bn_em_seq, axis=1)

        ent_bn_em = bn_em_seq[:, ::2]

        with tf.variable_scope('rnn', reuse=reuse):

            cell = self.lstm_cell(True, options.keep_prob, options.num_layers)

            outputs, state = tf.nn.dynamic_rnn(cell,
                                               bn_em_seq,
                                               dtype=tf.float32)

        # with tf.variable_scope('transformer', reuse=reuse):
        #     outputs = transformer_model(input_tensor=bn_em_seq,
        #                                 hidden_size=hidden_size,
        #                                 intermediate_size=hidden_size*4,
        #                                 num_attention_heads=8)

        rel_outputs = outputs[:, 1::2, :]
        outputs = [outputs[:, i, :] for i in range(length - 1)]

        ent_outputs = outputs[::2]

        # RSN
        res_rel_outputs = tf.keras.layers.Dense(rel_outputs, hidden_size, biases_initializer=None,
                                                activation_fn=None) + \
                          tf.keras.layers.Dense(
                              ent_bn_em, hidden_size, biases_initializer=None, activation_fn=None)

        # recover the order
        res_rel_outputs = [
            res_rel_outputs[:, i, :] for i in range((length - 1) // 2)
        ]
        outputs = []
        for i in range(length - 1):
            if i % 2 == 0:
                outputs.append(ent_outputs[i // 2])
            else:
                outputs.append(res_rel_outputs[i // 2])

        # output bn
        with tf.variable_scope('output_bn'):
            if reuse:
                bn_outputs = [
                    tf.reshape(self.bn(outputs[i], reuse=True),
                               [-1, 1, hidden_size]) for i in range(length - 1)
                ]
            else:
                bn_outputs = [
                    tf.reshape(self.bn(outputs[i], reuse=(i is not 0)),
                               [-1, 1, hidden_size]) for i in range(length - 1)
                ]

        def cal_loss(bn_outputs, seq):
            losses = []

            masks = np.random.choice([0., 1.0], size=batch_size, p=[0.5, 0.5])
            weight = tf.random_shuffle(tf.cast(masks, tf.float32))
            for i, output in enumerate(bn_outputs):
                if i % 2 == 0:
                    losses.append(
                        self.sampled_loss(output,
                                          seq[:, i + 1],
                                          self._rel_w,
                                          self._rel_b,
                                          weight=weight,
                                          is_entity=i))
                else:
                    losses.append(
                        self.sampled_loss(output,
                                          seq[:, i + 1],
                                          self._ent_w,
                                          self._ent_b,
                                          weight=weight,
                                          is_entity=i))
            losses = tf.stack(losses, axis=1)
            return losses

        seq_loss = cal_loss(bn_outputs, seq)

        losses = tf.reduce_sum(seq_loss) / batch_size

        return losses, seq
Exemplo n.º 10
0
hiddenSize = 100
maxMemory = 500
batchSize = 50
epoch = 100
epsilonStart = 1
epsilonDiscount = 0.999
epsilonMinimumValue = 0.1
discount = 0.9
learningRate = 0.2
winReward = 1
#------------------------------------------------------------

#------------------------------------------------------------
# 가설 설정
#------------------------------------------------------------
X = tf.placeholder(tf.float32, [None, nbStates])
W1 = tf.Variable(
    tf.truncated_normal([nbStates, hiddenSize],
                        stddev=1.0 / math.sqrt(float(nbStates))))
b1 = tf.Variable(tf.truncated_normal([hiddenSize], stddev=0.01))
input_layer = tf.nn.relu(tf.matmul(X, W1) + b1)

W2 = tf.Variable(
    tf.truncated_normal([hiddenSize, hiddenSize],
                        stddev=1.0 / math.sqrt(float(hiddenSize))))
b2 = tf.Variable(tf.truncated_normal([hiddenSize], stddev=0.01))
hidden_layer = tf.nn.relu(tf.matmul(input_layer, W2) + b2)

W3 = tf.Variable(
    tf.truncated_normal([hiddenSize, nbActions],
                        stddev=1.0 / math.sqrt(float(hiddenSize))))
Exemplo n.º 11
0
        x, W, strides=[1, 1, 1, 1],
        padding='SAME')  # x为输入,W为参数矩阵,扫描跨度strides=[1,y_step,x_step,1]
    # 扫描方式 SAME  有两个取值'SAME'和'VALID',对应一个填充,一个不填充


# ------------ 二维池化(channel=1黑白) ------------
def max_pool_2x2(x):
    return tf.nn.max_pool2d(x,
                            ksize=[1, 2, 2, 1],
                            strides=[1, 2, 2, 1],
                            padding='SAME')  # ksize


# ------------ 初始化输入输出结构 ------------
# 初始化x -- 输入的数据的大小
xs = tf.placeholder(tf.float32, [None, 784])  # None是数据的个数(不规定)
# 手写数字的图片大小为28*28
# 设置实际值 -- 输入的数据
ys = tf.placeholder(tf.float32, [None, 10])  # 输出为1*10 哪一个元素置1 就是哪个数字
x_data = tf.reshape(
    xs, [-1, 28, 28, 1
         ])  # -1表示样本数m(根据每轮训练的输入大小batch_size=100),28*28表示图片大小,1表示channel

#--------------  定义网络结构 --------------
#-------------- 卷积1层 -----------------
# output = 28*28*16
W_conv1 = weight_variable([5, 5, 1, 16
                           ])  # 定义1层权重(卷积核)  5*5*1*16的矩阵 1表示核的channel,16表示核的个数
b_conv1 = bias_variable([16])  # 定义conv1的bias矩阵
h_conv1 = tf.nn.relu(conv2d(x_data, W_conv1) + b_conv1)  # 先线性化(卷积) 再激活(非线性化)
#-------------- 池化1层 -----------------
Exemplo n.º 12
0
    def get_influence_on_test_loss(self,
                                   test_indices,
                                   train_idx,
                                   approx_type='cg',
                                   approx_params=None,
                                   force_refresh=True,
                                   test_description=None,
                                   loss_type='normal_loss',
                                   X=None,
                                   Y=None):
        # If train_idx is None then use X and Y (phantom points)
        # Need to make sure test_idx stays consistent between models
        # because mini-batching permutes dataset order

        if train_idx is None:
            if (X is None) or (Y is None):
                raise (ValueError,
                       'X and Y must be specified if using phantom points.')
            if X.shape[0] != len(Y):
                raise (ValueError, 'X and Y must have the same length.')
        else:
            if (X is not None) or (Y is not None):
                raise (
                    ValueError,
                    'X and Y cannot be specified if train_idx is specified.')

        assert len(test_indices) == 1
        self.test_index = test_indices[0]
        self.train_indices_of_test_case = self.get_train_indices_of_test_case(
            test_indices)
        self.params_test = self.get_test_params(test_index=test_indices)
        self.vec_to_list_test = self.get_vec_to_list_fn_test()
        # self.logits_test = self.inference_test()
        # self.total_loss_test, self.loss_no_reg_test, self.indiv_loss_no_reg_test = self.loss(
        #     self.logits_test,
        #     self.labels_placeholder)
        #
        # self.grad_total_loss_op_test = tf.gradients(self.total_loss_test, self.params_test)
        # self.grad_loss_no_reg_op_test = tf.gradients(self.loss_no_reg_test, self.params_test)
        self.grad_total_loss_op_test = self.get_test_grad(
            self.grad_total_loss_op)
        self.grad_loss_no_reg_op_test = self.get_test_grad(
            self.grad_loss_no_reg_op)
        self.grad_loss_r_test = self.get_test_grad(self.grad_loss_r)

        self.v_placeholder_test = [
            tf.placeholder(tf.float32, shape=a.get_shape())
            for a in self.params_test
        ]
        self.hessian_vector_test = self.hessian_vector_product_test(
            self.total_loss, self.params, self.v_placeholder_test)

        # test_grad_loss_no_reg_val = self.get_test_grad_loss_no_reg_val(test_indices, loss_type=loss_type)
        test_grad_loss_r = self.get_r_grad_loss(test_indices,
                                                loss_type=loss_type)

        # print("Shape of test gradient: %s" % test_grad_loss_no_reg_val.shape)
        print('Norm of test gradient: %s' %
              np.linalg.norm(np.concatenate(test_grad_loss_r)))

        # start_time = time.time()

        if test_description is None:
            test_description = test_indices

        approx_filename = os.path.join(
            self.train_dir, '%s-%s-%s-test-%s.npz' %
            (self.model_name, approx_type, loss_type, test_description))
        if os.path.exists(approx_filename) and force_refresh == False:
            inverse_hvp = list(np.load(approx_filename)['inverse_hvp'])
            print('Loaded inverse HVP from %s' % approx_filename)
        else:
            start_time = time.time()
            inverse_hvp = self.get_inverse_hvp(test_grad_loss_r, approx_type,
                                               approx_params)
            np.savez(approx_filename, inverse_hvp=inverse_hvp)
            print('Saved inverse HVP to %s' % approx_filename)

        duration_1 = time.time() - start_time
        print('Inverse HVP took %s sec' % duration_1)

        start_time = time.time()
        if train_idx is None:
            num_to_remove = len(Y)
            predicted_loss_diffs = np.zeros([num_to_remove])
            for counter in np.arange(num_to_remove):
                single_train_feed_dict = self.fill_feed_dict_manual(
                    X[counter, :], [Y[counter]])
                train_grad_loss_val = self.sess.run(
                    self.grad_total_loss_op, feed_dict=single_train_feed_dict)
                predicted_loss_diffs[counter] = np.dot(
                    np.concatenate(inverse_hvp),
                    np.concatenate(
                        train_grad_loss_val)) / self.num_train_examples

        else:
            num_to_remove = len(self.train_indices_of_test_case)
            predicted_loss_diffs = np.zeros([num_to_remove])
            for counter, idx_to_remove in enumerate(
                    self.train_indices_of_test_case):
                single_train_feed_dict = self.fill_feed_dict_with_one_ex(
                    self.data_sets.train, idx_to_remove)
                train_grad_loss_val = self.sess.run(
                    self.grad_total_loss_op_test,
                    feed_dict=single_train_feed_dict)
                predicted_loss_diffs[counter] = np.dot(np.concatenate(inverse_hvp),
                                                       np.concatenate(train_grad_loss_val)) / \
                                                self.train_indices_of_test_case.shape[0]

        duration_2 = time.time() - start_time
        print('Multiplying by %s train examples took %s sec' %
              (num_to_remove, duration_2))
        print("Total time is %s sec" % (duration_1 + duration_2))

        return predicted_loss_diffs
import matplotlib.pyplot as plt
tf.disable_eager_execution()

x_train = np.arange(0, 10, 0.1)
y_train = np.sin(x_train)
plt.plot(x_train, y_train)
plt.show()

num_inputs = 1
num_outputs = 1
hidden_layers = 3
hidden_units = 16
learning_rate = 0.01
batch_size = 1024

inputs = tf.placeholder(tf.float32, shape=(None, 1))
targets = tf.placeholder(tf.float32, shape=(None, 1))

w = tf.get_variable("weight-1", shape=(num_inputs, hidden_units))
b = tf.get_variable("bias-1", shape=(hidden_units))
output = tf.matmul(inputs, w) + b

for i in range(hidden_layers):
    w = tf.get_variable(f"weight{i}", shape=(hidden_units, hidden_units))
    b = tf.get_variable(f"bias{i}", shape=(hidden_units))
    output = tf.matmul(output, w) + b

w = tf.get_variable("weight-fin", shape=(hidden_units, num_outputs))
b = tf.get_variable("bias-fin", shape=(num_outputs))
output = tf.matmul(output, w) + b
Exemplo n.º 14
0
    def __init__(self, network_name, state_size, output_shape, session, k_step=1, settings=None, worker_only=False):
        network.__init__(self, network_name, state_size, output_shape, session, k_step=k_step, settings=settings, worker_only=worker_only)
        #Build network!
        with self.scope as scope:
            self.vector_inputs          = [tf.placeholder(tf.float32, (None,)+s[1:], name='vector_input{}'.format(i)) for i,s in enumerate(self.state_size_vec)]
            self.visual_inputs          = [tf.placeholder(tf.float32, (None,)+s[1:], name='visual_input{}'.format(i)) for i,s in enumerate(self.state_size_vis)]
            self.training_tf = tf.placeholder(tf.bool, shape=())
            self.main_net  = self.network_type(
                network_name,
                self.output_shape,
                self.settings,
                full_network=(not worker_only or self.settings["workers_computes_advantages"]),
                training=self.training_tf,
                kbd_activation=N.action_softmax,
                raw_outputs=True,
            )
            self.v_tf, self.pi_tf = self.main_net(self.vector_inputs, self.visual_inputs)
            #
            if not self.worker_only: #For trainers
                self.rewards_tf             =  tf.placeholder(tf.float32, (None, k_step+1, 1), name='reward'       )
                self.dones_tf               =  tf.placeholder(tf.int32,   (None, k_step+1, 1), name='done'         )
                self.actions_training_tf    =  tf.placeholder(tf.uint8,   (None, 2),           name='action'       )
                self.pieces_training_tf     =  tf.placeholder(tf.uint8,   (None, 1),           name='piece'        )
                self.probabilities_old_tf   =  tf.placeholder(tf.float32, (None, 1),           name='probabilities')
                self.target_value_tf, self.advantages_tf = self.create_targets(self.v_tf)

                #params
                self.params = {
                    'ppo_epsilon'        : tf.placeholder(tf.float32, shape=(), name='ppo_epsilon'         ),
                    'clipping_parameter' : tf.placeholder(tf.float32, shape=(), name='clipping_parameter'  ),
                    'value_loss'         : tf.placeholder(tf.float32, shape=(), name='c_value_loss'        ),
                    'policy_loss'        : tf.placeholder(tf.float32, shape=(), name='c_policy_loss'       ),
                    'entropy_loss'       : tf.placeholder(tf.float32, shape=(), name='c_entropy_loss'      ),
                    'entropy_floor_loss' : tf.placeholder(tf.float32, shape=(), name='c_entropy_floor_loss'),
                    'rescaled_entropy'   : tf.placeholder(tf.float32, shape=(), name='c_rescaled_entropy'  ),
                    'lr'                 : tf.placeholder(tf.float32, shape=(), name='lr'                  ),
                }

                self.training_ops  = self.create_training_ops(
                    self.pi_tf,
                    self.v_tf,
                    self.target_value_tf,
                    self.advantages_tf,
                    self.actions_training_tf,
                    self.pieces_training_tf,
                    self.probabilities_old_tf,
                    self.params,
                )

            self.variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=scope.name)
            self.main_net_assign_list = self.create_weight_setting_ops(self.variables)
            self.init_ops = tf.variables_initializer(self.variables)
        #Run init-op
        self.session.run(self.init_ops)
def serving_input_receiver_fn():
    inputs = {'features': tf.placeholder(
        shape=[None, 28, 28], dtype=tf.float32)}
    return tf.estimator.export.ServingInputReceiver(inputs, inputs)
Exemplo n.º 16
0
    def __init__(
        self,
        images: "tf.Tensor",
        model: Optional["FasterRCNNMetaArch"] = None,
        filename: Optional[str] = None,
        url: Optional[str] = None,
        sess: Optional["Session"] = None,
        is_training: bool = False,
        clip_values: Optional["CLIP_VALUES_TYPE"] = None,
        channels_first: bool = False,
        preprocessing_defences: Union["Preprocessor", List["Preprocessor"],
                                      None] = None,
        postprocessing_defences: Union["Postprocessor", List["Postprocessor"],
                                       None] = None,
        preprocessing: "PREPROCESSING_TYPE" = (0.0, 1.0),
        attack_losses: Tuple[str, ...] = (
            "Loss/RPNLoss/localization_loss",
            "Loss/RPNLoss/objectness_loss",
            "Loss/BoxClassifierLoss/localization_loss",
            "Loss/BoxClassifierLoss/classification_loss",
        ),
    ):
        """
        Initialization of an instance TensorFlowFasterRCNN.

        :param images: Input samples of shape (nb_samples, height, width, nb_channels).
        :param model: A TensorFlow Faster-RCNN model. The output that can be computed from the model includes a tuple
                      of (predictions, losses, detections):

                        - predictions: a dictionary holding "raw" prediction tensors.
                        - losses: a dictionary mapping loss keys (`Loss/RPNLoss/localization_loss`,
                                  `Loss/RPNLoss/objectness_loss`, `Loss/BoxClassifierLoss/localization_loss`,
                                  `Loss/BoxClassifierLoss/classification_loss`) to scalar tensors representing
                                  corresponding loss values.
                        - detections: a dictionary containing final detection results.
        :param filename: Filename of the detection model without filename extension.
        :param url: URL to download archive of detection model including filename extension.
        :param sess: Computation session.
        :param is_training: A boolean indicating whether the training version of the computation graph should be
                            constructed.
        :param clip_values: Tuple of the form `(min, max)` of floats or `np.ndarray` representing the minimum and
                            maximum values allowed for input image features. If floats are provided, these will be
                            used as the range of all features. If arrays are provided, each value will be considered
                            the bound for a feature, thus the shape of clip values needs to match the total number
                            of features.
        :param channels_first: Set channels first or last.
        :param preprocessing_defences: Preprocessing defence(s) to be applied by the classifier.
        :param postprocessing_defences: Postprocessing defence(s) to be applied by the classifier.
        :param preprocessing: Tuple of the form `(subtractor, divider)` of floats or `np.ndarray` of values to be
                              used for data preprocessing. The first value will be subtracted from the input. The
                              input will then be divided by the second one.
        :param attack_losses: Tuple of any combination of strings of the following loss components:
                              `first_stage_localization_loss`, `first_stage_objectness_loss`,
                              `second_stage_localization_loss`, `second_stage_classification_loss`.
        """
        import tensorflow.compat.v1 as tf  # lgtm [py/repeated-import]

        # Super initialization
        super().__init__(
            model=model,
            clip_values=clip_values,
            channels_first=channels_first,
            preprocessing_defences=preprocessing_defences,
            postprocessing_defences=postprocessing_defences,
            preprocessing=preprocessing,
        )

        # Check clip values
        if self.clip_values is not None:
            if not np.all(self.clip_values[0] == 0):
                raise ValueError(
                    "This classifier requires normalized input images with clip_vales=(0, 1)."
                )
            if not np.all(self.clip_values[1] == 1):  # pragma: no cover
                raise ValueError(
                    "This classifier requires normalized input images with clip_vales=(0, 1)."
                )

        # Check preprocessing and postprocessing defences
        if self.preprocessing_defences is not None:
            raise ValueError(
                "This estimator does not support `preprocessing_defences`.")
        if self.postprocessing_defences is not None:
            raise ValueError(
                "This estimator does not support `postprocessing_defences`.")

        # Create placeholders for groundtruth boxes
        self._groundtruth_boxes_list: List["tf.Tensor"]
        self._groundtruth_boxes_list = [
            tf.placeholder(dtype=tf.float32,
                           shape=(None, 4),
                           name=f"groundtruth_boxes_{i}")
            for i in range(images.shape[0])
        ]

        # Create placeholders for groundtruth classes
        self._groundtruth_classes_list: List["tf.Tensor"]
        self._groundtruth_classes_list = [
            tf.placeholder(dtype=tf.int32,
                           shape=(None, ),
                           name=f"groundtruth_classes_{i}")
            for i in range(images.shape[0])
        ]

        # Create placeholders for groundtruth weights
        self._groundtruth_weights_list: List["tf.Tensor"]
        self._groundtruth_weights_list = [
            tf.placeholder(dtype=tf.float32,
                           shape=(None, ),
                           name=f"groundtruth_weights_{i}")
            for i in range(images.shape[0])
        ]

        # Load model
        if model is None:
            # If model is None, then we need to have parameters filename and url to download, extract and load the
            # object detection model
            if filename is None or url is None:
                filename, url = (
                    "faster_rcnn_inception_v2_coco_2017_11_08",
                    "http://download.tensorflow.org/models/object_detection/"
                    "faster_rcnn_inception_v2_coco_2017_11_08.tar.gz",
                )

            self._model, self._predictions, self._losses, self._detections = self._load_model(
                images=images,
                filename=filename,
                url=url,
                obj_detection_model=None,
                is_training=is_training,
                groundtruth_boxes_list=self._groundtruth_boxes_list,
                groundtruth_classes_list=self._groundtruth_classes_list,
                groundtruth_weights_list=self._groundtruth_weights_list,
            )

        else:
            self._model, self._predictions, self._losses, self._detections = self._load_model(
                images=images,
                filename=None,
                url=None,
                obj_detection_model=model,
                is_training=is_training,
                groundtruth_boxes_list=self._groundtruth_boxes_list,
                groundtruth_classes_list=self._groundtruth_classes_list,
                groundtruth_weights_list=self._groundtruth_weights_list,
            )

        # Save new attributes
        self._input_shape = images.shape.as_list()[1:]
        self.is_training: bool = is_training
        self.images: Optional["tf.Tensor"] = images
        self.attack_losses: Tuple[str, ...] = attack_losses

        # Assign session
        if sess is None:
            logger.warning("A session cannot be None, create a new session.")
            self._sess = tf.Session()
        else:  # pragma: no cover
            self._sess = sess

        # Initialize variables
        self._sess.run(tf.global_variables_initializer())
        self._sess.run(tf.local_variables_initializer())
Exemplo n.º 17
0
def run_differentially_private_federated_averaging(loss,
                                                   train_op,
                                                   eval_correct,
                                                   data,
                                                   data_placeholder,
                                                   label_placeholder,
                                                   privacy_agent=None,
                                                   b=10,
                                                   e=4,
                                                   record_privacy=True,
                                                   m=0,
                                                   sigma=0,
                                                   eps=8,
                                                   save_dir=None,
                                                   log_dir=None,
                                                   max_comm_rounds=3000,
                                                   gm=True,
                                                   saver_func=create_save_dir,
                                                   save_params=False):
    """
    This function will simulate a federated learning setting and enable differential privacy tracking. It will detect
    all trainable tensorflow variables in the tensorflow graph and simulate a decentralized learning process where these
    variables are learned through clients that only have access to their own data set.
    This function must therefore be run inside a Graph as follows:
    --------------------------------------------------------------------------------------------------------------------

    with tf.Graph().as_default():

        train_op, eval_correct, loss, data_placeholder, labels_placeholder = Some_function_that_builds_TF_graph()

        Accuracy_accountant, Delta_accountant, model = \
            run_differentially_private_federated_averaging(loss, train_op, eval_correct, DATA, data_placeholder,
                                                           labels_placeholder)
    --------------------------------------------------------------------------------------------------------------------
    The graph that train_op, loss and eval_op belong to should have a global_step variable.

    :param loss:                TENSORFLOW node that computes the current loss
    :param train_op:            TENSORFLOW Training_op
    :param eval_correct:        TENSORFLOW node that evaluates the number of correct predictions
    :param data:                A class instance with attributes:
                                .data_set       : The training data stored in a list or numpy array.
                                .label_set      : The training labels stored in a list or numpy array.
                                                  The indices should correspond to .data_set. This means a single index
                                                  corresponds to a data(x)-label(y) pair used for training:
                                                  (x_i, y_i) = (data.data_set(i),data.label_set(i))
                                .client_set     : A nested list or numpy array. len(data.client_set) is the total
                                                  number of clients. for any j, data.client_set[j] is a list (or array)
                                                  holding indices. these indices specify the data points that client j
                                                  holds.
                                                  i.e. if i \in data.client_set[j], then client j owns (x_i, y_i)
                                .vali_data_set  : The validation data stored in a list or numpy array.
                                .vali_label_set : The validation labels stored in a list or numpy array.
    :param data_placeholder:    The placeholder from the tensorflow graph that is used to feed the model with data
    :param label_placeholder:   The placeholder from the tensorflow graph that is used to feed the model with labels
    :param privacy_agent:       A class instance that has callabels .get_m(r) .get_Sigma(r) .get_bound(), where r is the
                                communication round.
    :param b:                   Batch size
    :param e:                   Epochs to run on each client
    :param record_privacy:      Whether to record the privacy or not
    :param m:                   If specified, a privacyAgent is not used, instead the parameter is kept constant
    :param sigma:               If specified, a privacyAgent is not used, instead the parameter is kept constant
    :param eps:                 The epsilon for epsilon-delta privacy
    :param save_dir:            Directory to store the process
    :param log_dir:             Directory to store the graph
    :param max_comm_rounds:     The maximum number of allowed communication rounds
    :param gm:                  Whether to use a Gaussian Mechanism or not.
    :param saver_func:          A function that specifies where and how to save progress: Note that the usual tensorflow
                                tracking will not work
    :param save_params:         save all weights_throughout training.

    :return:

    """

    # If no privacy agent was specified, the default privacy agent is used.
    if not privacy_agent:
        privacy_agent = PrivAgent(N=len(data.client_set),
                                  Name='default_agent',
                                  comm_round=max_comm_rounds)

    # A Flags instance is created that will fuse all specified parameters and default those that are not specified.
    FLAGS = Flag(len(data.client_set), b, e, record_privacy, m, sigma, eps,
                 save_dir, log_dir, max_comm_rounds, gm, privacy_agent)

    # Check whether the specified parameters make sense.
    FLAGS = check_validaity_of_FLAGS(FLAGS)

    # At this point, FLAGS.save_dir specifies both; where we save progress and where we assume the data is stored
    save_dir = saver_func(FLAGS)

    # This function will retrieve the variable associated to the global step and create nodes that serve to
    # increase and reset it to a certain value.
    increase_global_step, set_global_step = global_step_creator()

    # - model_placeholder : a dictionary in which there is a placeholder stored for every trainable variable defined
    #                       in the tensorflow graph. Each placeholder corresponds to one trainable variable and has
    #                       the same shape and dtype as that variable. in addition, the placeholder has the same
    #                       name as the Variable, but a '_placeholder:0' added to it. The keys of the dictionary
    #                       correspond to the name of the respective placeholder
    model_placeholder = dict(
        zip([Vname_to_FeedPname(var) for var in tf.trainable_variables()], [
            tf.placeholder(
                name=Vname_to_Pname(var), shape=var.shape, dtype=tf.float32)
            for var in tf.trainable_variables()
        ]))

    # - assignments : a list of nodes. when run, all trainable variables are set to the value specified through
    #                 the placeholders in 'model_placeholder'.

    assignments = [
        tf.assign(var, model_placeholder[Vname_to_FeedPname(var)])
        for var in tf.trainable_variables()
    ]

    # load_from_directory_or_initialize checks whether there is a model at 'save_dir' corresponding to the one we
    # are building. If so, training is resumed, if not, it returns:  - model = []
    #                                                                - accuracy_accountant = []
    #                                                                - delta_accountant = []
    #                                                                - real_round = 0
    # And initializes a Differential_Privacy_Accountant as acc

    model, accuracy_accountant, delta_accountant, acc, real_round, FLAGS, computed_deltas = \
        load_from_directory_or_initialize(save_dir, FLAGS)

    # - m : amount of clients participating in a round
    # - sigma : variable for the Gaussian Mechanism.
    # Both will only be used if no Privacy_Agent is deployed.
    m = int(FLAGS.m)
    sigma = float(FLAGS.sigma)

    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)

    # If there was no loadable model, we initialize a model:
    # - model : dictionary having as keys the names of the placeholders associated to each variable. It will serve
    #           as a feed_dict to assign values to the placeholders which are used to set the variables to
    #           specific values.

    if not model:
        model = dict(
            zip([Vname_to_FeedPname(var) for var in tf.trainable_variables()],
                [sess.run(var) for var in tf.trainable_variables()]))
        model['global_step_placeholder:0'] = 0

        real_round = 0

        weights_accountant = []

    # If a model is loaded, and we are not relearning it (relearning means that we once already finished such a model
    # and we are learning it again to average the outcomes), we have to get the privacy accountant up to date. This
    # means, that we have to iterate the privacy accountant over all the m, sigmas that correspond to already completed
    # communication

    if not FLAGS.relearn and real_round > 0:
        bring_Accountant_up_to_date(acc, sess, real_round, privacy_agent,
                                    FLAGS)

    # This is where the actual communication rounds start:

    data_set_asarray = np.asarray(data.sorted_x_train)
    label_set_asarray = np.asarray(data.sorted_y_train)

    for r in range(FLAGS.max_comm_rounds):

        # First, we check whether we are loading a model, if so, we have to skip the first allocation, as it took place
        # already.
        if not (FLAGS.loaded and r == 0):
            # Setting the trainable Variables in the graph to the values stored in feed_dict 'model'
            sess.run(assignments, feed_dict=model)

            # create a feed-dict holding the validation set.

            feed_dict = {
                str(data_placeholder.name): np.asarray(data.x_vali),
                str(label_placeholder.name): np.asarray(data.y_vali)
            }

            # compute the loss on the validation set.
            global_loss = sess.run(loss, feed_dict=feed_dict)
            count = sess.run(eval_correct, feed_dict=feed_dict)
            accuracy = float(count) / float(len(data.y_vali))
            accuracy_accountant.append(accuracy)

            print_loss_and_accuracy(global_loss, accuracy)

        if delta_accountant[-1] > privacy_agent.get_bound() or math.isnan(
                delta_accountant[-1]):
            print('The last step exhausted the privacy budget!!!')
            if not math.isnan(delta_accountant[-1]):
                try:
                    None
                finally:
                    save_progress(save_dir, model,
                                  delta_accountant + [float('nan')],
                                  accuracy_accountant + [float('nan')],
                                  privacy_agent, FLAGS)
                return accuracy_accountant, delta_accountant, model
        else:
            try:
                None
            finally:
                save_progress(save_dir, model, delta_accountant,
                              accuracy_accountant, privacy_agent, FLAGS)

        ############################################################################################################
        # Start of a new communication round

        real_round = real_round + 1
        if real_round >= FLAGS.max_comm_rounds:
            print('Max communication rounds meet. Stop.')
            break

        print_new_comm_round(real_round)

        if FLAGS.priv_agent:
            m = int(privacy_agent.get_m(int(real_round)))
            sigma = privacy_agent.get_Sigma(int(real_round))

        print('Clients participating: ' + str(m))

        # Randomly choose a total of m (out of n) client-indices that participate in this round
        # randomly permute a range-list of length n: [1,2,3...n] --> [5,2,7..3]
        perm = np.random.permutation(FLAGS.n)

        # Use the first m entries of the permuted list to decide which clients (and their sets) will participate in
        # this round. participating_clients is therefore a nested list of length m. participating_clients[i] should be
        # a list of integers that specify which data points are held by client i. Note that this nested list is a
        # mapping only. the actual data is stored in data.data_set.
        s = perm[0:m].tolist()
        participating_clients = [data.client_set[k] for k in s]

        # For each client c (out of the m chosen ones):
        for c in range(m):

            # Assign the global model and set the global step. This is obsolete when the first client trains,
            # but as soon as the next client trains, all progress allocated before, has to be discarded and the
            # trainable variables reset to the values specified in 'model'
            sess.run(assignments + [set_global_step], feed_dict=model)

            # allocate a list, holding data indices associated to client c and split into batches.
            data_ind = np.split(np.asarray(participating_clients[c]), FLAGS.b,
                                0)

            # e = Epoch
            for e in range(int(FLAGS.e)):
                for step in range(len(data_ind)):
                    # increase the global_step count (it's used for the learning rate.)
                    real_step = sess.run(increase_global_step)
                    # batch_ind holds the indices of the current batch
                    batch_ind = data_ind[step]

                    # Fill a feed dictionary with the actual set of data and labels using the data and labels associated
                    # to the indices stored in batch_ind:
                    feed_dict = {
                        str(data_placeholder.name):
                        data_set_asarray[[int(j) for j in batch_ind]],
                        str(label_placeholder.name):
                        label_set_asarray[[int(j) for j in batch_ind]]
                    }

                    # Run one optimization step.
                    _ = sess.run([train_op], feed_dict=feed_dict)

            if c == 0:

                # If we just trained the first client in a comm_round, We override the old weights_accountant (or,
                # if this was the first comm_round, we allocate a new one. The Weights_accountant keeps track of
                # all client updates throughout a communication round.
                weights_accountant = WeightsAccountant(sess, model, sigma,
                                                       real_round)
            else:
                # Allocate the client update, if this is not the first client in a communication round
                weights_accountant.allocate(sess)

        # End of a communication round
        ############################################################################################################

        print('Communication round %s completed' % str(real_round))
        # Compute a new model according to the updates and the Gaussian mechanism specifications from FLAGS
        # Also, if computed_deltas is an empty list, compute delta; the probability of Epsilon-Differential Privacy
        # being broken by allocating the model. If computed_deltas is passed, instead of computing delta, the
        # pre-computed value is used.
        model, delta = weights_accountant.Update_via_GaussianMechanism(
            sess, acc, FLAGS, computed_deltas)

        # append delta to a list.
        delta_accountant.append(delta)

        # Set the global_step to the current step of the last client, such that the next clients can feed it into
        # the learning rate.
        model['global_step_placeholder:0'] = real_step

        # PRINT the progress and stage of affairs.
        print(' - Epsilon-Delta Privacy:' + str([FLAGS.eps, delta]))

        if save_params:
            weights_accountant.save_params(save_dir)

    return [], [], []
Exemplo n.º 18
0
import numpy as np
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

xy = np.loadtxt('data-04-zoo.csv', delimiter=',', dtype=np.float32)
x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]

nb_classes = 7
X = tf.placeholder(tf.float32, shape=[None, 16])
Y = tf.placeholder(tf.int32, shape=[None, 1])

Y_one_hot = tf.one_hot(Y, nb_classes)
Y_one_hot = tf.reshape(Y_one_hot, [-1, nb_classes])

W = tf.Variable(tf.random_normal([16, nb_classes]), name='weight')
b = tf.Variable(tf.random_normal([nb_classes]), name='bias')

logits = tf.matmul(X, W) + b
hypothesis = tf.nn.softmax(logits)

cost_i = tf.nn.softmax_cross_entropy_with_logits(logits=logits,
                                                 labels=Y_one_hot)

cost = tf.reduce_mean(cost_i)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)

prediction = tf.argmax(hypothesis, 1)
correct_prediction = tf.equal(prediction, tf.argmax(Y_one_hot, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
Exemplo n.º 19
0
def function_train(image, full_x, full_y, full_sigma, intensities):
    """
    Callbacks:
        lr_scheduler = keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=5)
    
    """

    n_input = 1

    tf.reset_default_graph()
    tf.disable_eager_execution()
    now = datetime.now()
    x = tf.placeholder("float", [None, 2],
                       name="x")  #["float", "float"], [None, 1], name="x")
    y = tf.placeholder("float", [None, 1], name="y")
    sigma = tf.placeholder("float", [None, 1], name="sigma")

    if n_input == 1:
        x = tf.placeholder("float", [None, 1], name="x")

    predictions = make_model(x, 1)

    #MONTE CARLO
    N_rep = 10
    N_full = len(full_y)

    full_y_reps = np.zeros(shape=(N_full, N_rep))
    for i in range(N_rep):
        full_rep = np.random.normal(0, full_sigma)
        full_y_reps[:, i] = (full_y + full_rep).reshape(N_full)

    std_reps = np.std(full_y_reps, axis=1)
    mean_reps = np.mean(full_y_reps, axis=1)

    print('MC pseudo data has been created for ', N_rep, ' replicas')

    ratio_test = 0.8

    predict_x = np.empty((0, 2))
    for i in range(len(intensities)):
        predict_x = np.concatenate(
            (predict_x,
             np.vstack((image.deltaE, np.ones(image.l) * intensities[i])).T))
    #image.deltaE #np.linspace(pred_min,pred_max,N_pred).reshape(N_pred,1)
    N_pred = image.l * len(intensities)
    predict_x = predict_x.reshape(N_pred, 2)
    if n_input == 1:
        N_pred = image.l
        predict_x = image.deltaE.reshape(N_pred, 1)
        full_x = full_x[:, 0]

    chi_array = []

    cost = tf.reduce_mean(tf.square((y - predictions) / sigma),
                          name="cost_function")
    eta = 6.0e-3
    optimizer = tf.train.RMSPropOptimizer(learning_rate=eta,
                                          decay=0.9,
                                          momentum=0.0,
                                          epsilon=1e-10).minimize(cost)
    saver = tf.train.Saver(max_to_keep=1000)

    #print("Start training on", '%04d'%(N_train), "and validating on",'%0.4d'%(N_test), "samples")

    #Nrep = 100

    for i in range(0, N_rep):

        map_name = 'Models'
        i = 0
        while os.path.exists(map_name):
            map_name = 'Models' + str(i)
            i += 1

        full_y = full_y_reps[:, i].reshape(N_full, 1)

        train_x, test_x, train_y, test_y, train_sigma, test_sigma = \
            train_test_split(full_x, full_y, full_sigma, test_size=ratio_test)

        #print(len(train_x))
        N_train = len(train_y)
        N_test = len(test_y)
        if n_input == 2:
            train_x, test_x = train_x.reshape(N_train,
                                              2), test_x.reshape(N_test, 2)
        else:
            train_x, test_x = train_x.reshape(N_train,
                                              1), test_x.reshape(N_test, 1)
        train_y, test_y = train_y.reshape(N_train,
                                          1), test_y.reshape(N_test, 1)
        train_sigma, test_sigma = train_sigma.reshape(N_train,
                                                      1), test_sigma.reshape(
                                                          N_test, 1)

        ### Train and validate
        prev_test_cost = 0
        prev_epoch = 0
        avg_cost = 0

        array_train = []
        array_test = []

        with tf.Session() as sess:

            sess.run(tf.global_variables_initializer())

            training_epochs = 20000
            display_step = 1000

            for epoch in range(training_epochs):

                _, c = sess.run([optimizer, cost],
                                feed_dict={
                                    x: train_x,
                                    y: train_y,
                                    sigma: train_sigma
                                })

                avg_cost = c

                test_cost = cost.eval({
                    x: test_x,
                    y: test_y,
                    sigma: test_sigma
                })

                if epoch % display_step == 0:
                    print("Epoch:", '%04d' % (epoch + 1), "| Training cost=",
                          "{:.9f}".format(avg_cost), "| Validation cost=",
                          "{:.9f}".format(test_cost))
                    array_train.append(avg_cost)
                    array_test.append(test_cost)
                    path_to_data = map_name + '/All_models/'
                    Path(path_to_data).mkdir(parents=True, exist_ok=True)
                    saver.save(sess,
                               path_to_data + 'my-model.ckpt',
                               global_step=epoch,
                               write_meta_graph=False)

                elif test_cost < prev_test_cost:
                    prev_test_cost = test_cost
                    prev_epoch = epoch

            best_iteration = np.argmin(array_test)
            best_epoch = best_iteration * display_step
            best_model = map_name + '/All_models/my-model.ckpt-%(s)s' % {
                's': best_epoch
            }

            print("Optimization %(i)s Finished! Best model after epoch %(s)s" %
                  {
                      'i': i,
                      's': best_epoch
                  })

            dt_string = now.strftime("%d.%m.%Y %H:%M:%S")
            d_string = now.strftime("%d.%m.%Y")
            t_string = now.strftime("%H:%M:%S")

            saver.restore(sess, best_model)
            path_to_data = map_name + '/Best_models/%(s)s/' % {'s': d_string}
            Path(path_to_data).mkdir(parents=True, exist_ok=True)
            saver.save(sess, path_to_data + 'best_model_%(i)s' % {'i': i})

            predictions_values = sess.run(predictions,
                                          feed_dict={
                                              x: train_x,
                                              y: train_y
                                          })

            extrapolation = sess.run(predictions, feed_dict={x: predict_x})

        sess.close()

        nownow = datetime.now()
        print("time elapsed", nownow - now)

        path_to_data = 'Data/Results/%(date)s/' % {"date": d_string}
        Path(path_to_data).mkdir(parents=True, exist_ok=True)
Exemplo n.º 20
0
def train(cfg: DictConfig, work_dir: Optional[str] = None, **session_kwargs):
    """Runs the training process for the provided config.

    Parameters
    ----------
    cfg : DictConfig
        The experiment configuration.

    work_dir : str, optional
        Working directory used for saving checkpoints, logs, etc.
        If None, it is set to `os.getcwd()`.

    **session_kwargs : kwargs
        Keyword arguments for configuring TF session
    """
    # Set working dir.
    if work_dir is None:
        work_dir = os.getcwd()

    # Set random seeds.
    random.seed(cfg.run.seed)
    np.random.seed(cfg.run.seed)
    tf.set_random_seed(cfg.run.seed)

    # Setup the session.
    with utils.session(**session_kwargs) as sess:
        # Build and initialize.
        meta_learner = utils.build_and_initialize(cfg,
                                                  mode=common.ModeKeys.TRAIN)

        # Setup logging and saving.
        writers = [
            tf.summary.FileWriter(logdir=os.path.join(work_dir, task.log_dir))
            for task in cfg[common.ModeKeys.TRAIN].tasks
        ]
        label_budget_ph = tf.placeholder(tf.int32, shape=())
        loss_ph = tf.placeholder(tf.float32, shape=())
        tf.summary.scalar("label_budget", label_budget_ph)
        tf.summary.scalar("loss", loss_ph)
        merged = tf.summary.merge_all()

        # Setup checkpoint.
        checkpoint = tf.train.Checkpoint(
            model_state=meta_learner.model.trainable_parameters,
            optimizer=meta_learner.optimizer,
        )
        saver = tf.train.CheckpointManager(checkpoint,
                                           directory=work_dir,
                                           max_to_keep=5)

        # Do meta-learning iterations.
        logger.info("Training...")
        for i in range(cfg.train.max_steps):
            # Do multiple steps if the optimizer is multi-step.
            if cfg.train.optimizer.n is not None:
                losses = [
                    train_step(meta_learner, sess=sess)
                    for _ in range(cfg.train.optimizer.n)
                ]
                losses = list(map(np.mean, zip(*losses)))
            else:
                losses = train_step(meta_learner, sess=sess)

            # Log metrics.
            # TODO: create a utility function for logging.
            if i % cfg.train.log_interval == 0 or i + 1 == cfg.train.max_steps:
                log = f"step: {i}"
                for loss, td in zip(losses, meta_learner.task_dists):
                    log += f"\n{td.name}:"
                    if td.num_requested_labels:
                        log += f"\n* requested labels: {td.num_requested_labels}"
                    log += f"\n* loss: {loss:.6f}"
                logger.info(log)
                for loss, td, writer in zip(losses, meta_learner.task_dists,
                                            writers):
                    feed_dict = {
                        loss_ph: loss,
                        label_budget_ph: td.num_requested_labels,
                    }
                    summary = sess.run(merged, feed_dict=feed_dict)
                    writer.add_summary(summary, i)
                    writer.flush()
            # Save model.
            if i % cfg.train.save_interval == 0 or i + 1 == cfg.train.max_steps:
                saver.save(checkpoint_number=i)
            # Update task distribution (if necessary).
            # TODO: make this more flexible.
            if (cfg.train.budget_interval is not None
                    and i % cfg.train.budget_interval == 0):
                for td, task in zip(meta_learner.task_dists, cfg.train.tasks):
                    td.expand(num_labeled_points=(task.labels_per_step * i),
                              sess=sess)
                if cfg.train.do_reinit:
                    sess.run(tf.global_variables_initializer())
Exemplo n.º 21
0
learning_Rate = 0.01
training_epochs = 2000
display_step = 200

# Dataset de treino
# Obs.: Considerar train_X = tamanho de casas e train_y = preço de casas
train_X = np.asarray([3.3,4.4,5.5,6.71,6.93,4.168,9.779,6.182,7.59,2.167,7.042,10.791,5.313,7.997,5.654,9.27,3.1])
train_y = np.asarray([1.7,2.76,2.09,3.19,1.694,1.573,3.366,2.596,2.53,1.221,2.827,3.465,1.65,2.904,2.42,2.94,1.3])
n_samples = train_X.shape[0]
 
# Dataset de teste
test_X = np.asarray([6.83, 4.668, 8.9, 7.91, 5.7, 8.7, 3.1, 2.1])
test_y = np.asarray([1.84, 2.273, 3.2, 2.831, 2.92, 3.24, 1.35, 1.03])

# Placeholders para as variáveis preditoras (X) e para  variável target (y)
X = tf.placeholder(tf.float32)
y = tf.placeholder(tf.float32)
 
# Pesos e bias do modelo
W = tf.Variable(np.random.randn(), name="weight")
b = tf.Variable(np.random.randn(), name="bias")

# Construindo o modelo de regressão linear
# Formula: y = w*X + b
linear_model = W * X + b

# Calculo de erro
cost = tf.reduce_sum(tf.square(linear_model - y)) / (2*n_samples)

# Otimização com Gradient descent
optimizer = tf.train.GradientDescentOptimizer(learning_Rate).minimize(cost)
Exemplo n.º 22
0
def infer(train_dir,
          height,
          width,
          nch,
          d_i,
          d_o,
          G_dim,
          named_id_to_fps=None,
          id_name_tsv_fp=None):
    infer_dir = os.path.join(train_dir, 'infer')
    if not os.path.isdir(infer_dir):
        os.makedirs(infer_dir)

    # Placeholders for sampling stage
    samp_zi_n = tf.placeholder(tf.int32, [], name='samp_zi_n')
    samp_zo_n = tf.placeholder(tf.int32, [], name='samp_zo_n')

    # Sample IDs or fps for comparison
    if named_id_to_fps is not None:
        # Find number of identities and sample
        nids = len(named_id_to_fps)
        tf.constant(nids, dtype=tf.int32, name='nids')
        samp_id = tf.random_uniform([samp_zi_n],
                                    0,
                                    nids,
                                    dtype=tf.int32,
                                    name='samp_id')

        # Find named ids and group fps
        named_ids = []
        fps = []
        for i, (named_id, group_fps) in enumerate(
                sorted(named_id_to_fps.items(), key=lambda k: k[0])):
            named_ids.append(named_id)
            fps.append(','.join(group_fps))
        named_ids = tf.constant(named_ids,
                                dtype=tf.string,
                                name='meta_all_named_ids')
        fps = tf.constant(fps, dtype=tf.string, name='meta_all_fps')

        # Alternative names (such as real names with spaces; not convenient for file paths)
        if id_name_tsv_fp is not None:
            with open(id_name_tsv_fp, 'r') as f:
                names = [l.split('\t')[1].strip() for l in f.readlines()[1:]]
            named_ids = tf.constant(names,
                                    dtype=tf.string,
                                    name='meta_all_names')

        samp_named_id = tf.gather(named_ids, samp_id, name='samp_named_ids')
        samp_fp_group = tf.gather(fps, samp_id, name='samp_group_fps')
        if id_name_tsv_fp is not None:
            samp_name = tf.gather(names, samp_id, name='samp_names')

    # Sample zi/zo
    samp_zi = tf.random_uniform([samp_zi_n, d_i],
                                -1.0,
                                1.0,
                                dtype=tf.float32,
                                name='samp_zi')
    samp_zo = tf.random_uniform([samp_zo_n, d_o],
                                -1.0,
                                1.0,
                                dtype=tf.float32,
                                name='samp_zo')

    # Input zo
    zi = tf.placeholder(tf.float32, [None, d_i], name='zi')
    zo = tf.placeholder(tf.float32, [None, d_o], name='zo')

    # Latent representation
    z = tf.concat([zi, zo], axis=1, name='z')

    # Make zi/zo grid
    zi_n = tf.shape(zi)[0]
    zo_n = tf.shape(zo)[0]
    zi_grid = tf.expand_dims(zi, axis=1)
    zi_grid = tf.tile(zi_grid, [1, zo_n, 1])
    zo_grid = tf.expand_dims(zo, axis=0)
    zo_grid = tf.tile(zo_grid, [zi_n, 1, 1])
    z_grid = tf.concat([zi_grid, zo_grid], axis=2, name='z_grid')

    # Execute generator
    with tf.variable_scope('G'):
        G_z = DCGANGenerator64x64(z, nch, dim=G_dim)
    G_z = tf.identity(G_z, name='G_z')

    # Execute generator on grid
    z_grid = tf.reshape(z_grid, [zi_n * zo_n, d_i + d_o])
    with tf.variable_scope('G', reuse=True):
        G_z_grid = DCGANGenerator64x64(z_grid, nch, dim=G_dim)
    G_z_grid = tf.reshape(G_z_grid, [zi_n, zo_n, height, width, nch],
                          name='G_z_grid')

    # Encode to uint8
    G_z_uint8 = encode_png_observation(G_z, name='G_z_uint8')
    G_z_grid_uint8 = encode_png_observation(G_z_grid, name='G_z_grid_uint8')

    # Flatten grid of images to one large image (row shares zi, column shares zo)
    grid_zo_n = tf.shape(G_z_grid_uint8)[1]
    G_z_grid_prev = tf.transpose(G_z_grid_uint8, [1, 0, 2, 3, 4])
    G_z_grid_prev = tf.reshape(G_z_grid_prev,
                               [grid_zo_n, zi_n * height, width, nch])
    G_z_grid_prev = tf.transpose(G_z_grid_prev, [1, 0, 2, 3])
    G_z_grid_prev = tf.reshape(G_z_grid_prev,
                               [zi_n * height, grid_zo_n * width, nch],
                               name='G_z_grid_prev')

    # Create saver
    G_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='G')
    global_step = tf.train.get_or_create_global_step()
    saver = tf.train.Saver(G_vars + [global_step])

    # Export graph
    tf.train.write_graph(tf.get_default_graph(), infer_dir, 'infer.pbtxt')

    # Export MetaGraph
    infer_metagraph_fp = os.path.join(infer_dir, 'infer.meta')
    tf.train.export_meta_graph(filename=infer_metagraph_fp,
                               clear_devices=True,
                               saver_def=saver.as_saver_def())

    # Reset graph (in case training afterwards)
    tf.reset_default_graph()
Exemplo n.º 23
0
def build_model(env, cfg):

    # Inputs to computation graph - observations
    with tf.name_scope("Inputs"):
        with tf.name_scope("Observation"):
            (
                x_non_spatial_ph,
                x_spatial_ph,
                x_mask_spell_ph,
                x_mask_spatial_ph,
                x_if_spawn_spell_ph,
            ) = utils.placeholders_from_spaces(
                env.observation_space["non_spatial"],
                env.observation_space["spatial"],
                env.observation_space["mask_spell"],
                env.observation_space["mask_spatial"],
                env.observation_space["if_spawn_spell"],
                names=[
                    "NonSpatial",
                    "Spatial",
                    "MaskSpell",
                    "MaskSpatial",
                    "IfSpawnSpell",
                ],
            )

            x_state_in_ph = tf.placeholder(
                dtype=tf.float32,
                shape=cfg.architecture.empty_rnn_state.shape,
                name="StateIn",
            )
            x_batch_size_ph = tf.placeholder(dtype=tf.int64,
                                             shape=(),
                                             name="BatchSize")
            x_rnn_mask_ph = utils.placeholder(None, name="RnnMask")

        with tf.name_scope("Action"):
            # Inputs to computation graph - actions
            (
                a_spell_ph,
                a_spatial_ph,
            ) = utils.placeholders_from_spaces(
                env.action_space["spell"],
                env.action_space["spatial"],
                names=["Spell", "Spatial"],
            )

        adv_ph, ret_ph, logp_old_ph, v_old_ph = utils.placeholders(
            None,
            None,
            None,
            None,
            names=["Advantage", "Return", "Logp", "Value"])

    actor_critic = core.actor_critic_fn(cfg)
    # Share information about action space with policy architecture
    # state_out is None, if Architecture.USE_RNN == false
    pi, logp, logp_pi, v, state_out = actor_critic(
        {
            "spatial": x_spatial_ph,
            "non_spatial": x_non_spatial_ph,
            "mask_spell": x_mask_spell_ph,
            "mask_spatial": x_mask_spatial_ph,
            "if_spawn_spell": x_if_spawn_spell_ph,
            "state_in": x_state_in_ph,
            "batch_size": x_batch_size_ph,
            "rnn_mask": x_rnn_mask_ph,
        },
        {
            "spell": a_spell_ph,
            "spatial": a_spatial_ph
        },
        env.action_space,
        cfg,
    )

    hyperp = cfg.hyperparameters
    if hyperp.clip_vf_output:
        v = tf.clip_by_value(v, -1.0, 1.0)

    # PPO objectives
    with tf.name_scope("Objectives"):

        # policy block
        ratio = tf.exp(logp - logp_old_ph)  # pi(a|s) / pi_old(a|s)
        min_adv = tf.where(
            adv_ph > 0,
            (1 + hyperp.clip_ratio) * adv_ph,
            (1 - hyperp.clip_ratio) * adv_ph,
        )
        pi_loss = -tf.reduce_mean(tf.minimum(ratio * adv_ph, min_adv),
                                  name="LossPi")

        # value block

        if hyperp.value_clipping_enabled:
            # value function clipping
            v_clipped = v_old_ph + tf.clip_by_value(
                v - v_old_ph, -hyperp.clip_range_vf, hyperp.clip_range_vf)
            v_loss1 = tf.square(v - ret_ph)
            v_loss2 = tf.square(v_clipped - ret_ph)
            v_loss = tf.reduce_mean(tf.maximum(v_loss1, v_loss2), name="LossV")
        else:
            v_loss = tf.reduce_mean((ret_ph - v)**2, name="LossV")

        # value function regularization
        if hyperp.vf_reg_enabled and not cfg.architecture.unified_policy_value:
            params_v = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                         scope="ValueFunctionMain")
            v_loss_l2 = tf.add_n(
                [tf.nn.l2_loss(v) for v in params_v if "bias" not in v.name])
            v_loss = v_loss + hyperp.vf_reg * v_loss_l2

        # todo michalw: entropy bonus, value function clipping?
        loss = hyperp.pi_loss_coef * pi_loss + hyperp.vf_loss_coef * v_loss

    # Useful to watch during learning
    with tf.name_scope("Info"):
        # a sample estimate for KL-divergence, easy to compute
        approx_kl = tf.reduce_mean(logp_old_ph - logp, name="KL")
        # a sample estimate for entropy, also easy to compute
        approx_ent = tf.reduce_mean(-logp, name="Entropy")
        clipped = tf.logical_or(ratio > (1 + hyperp.clip_ratio), ratio <
                                (1 - hyperp.clip_ratio))
        clipfrac = tf.reduce_mean(tf.cast(clipped, tf.float32),
                                  name="ClipFrac")

    with tf.name_scope("Optimizers"):
        pi_optimizer = MpiAdamOptimizer(learning_rate=hyperp.pi_lr)
        v_optimizer = MpiAdamOptimizer(learning_rate=hyperp.vf_lr)
        optimizer = MpiAdamOptimizer(learning_rate=hyperp.lr)
        train, train_pi, train_v = None, None, None

        if hyperp.grad_clipping_enabled:
            if cfg.architecture.unified_policy_value:
                # gradient clipping enabled, unified PV
                params = tf.trainable_variables()
                grads, _vars = zip(*optimizer.compute_gradients(loss, params))
                grads, _grad_norm = tf.clip_by_global_norm(
                    grads, hyperp.max_grad_norm)
                grads = list(zip(grads, params))
                train = optimizer.apply_gradients(grads)
            else:
                # gradient clipping enabled, separate PV
                params_pi = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                              scope="PolicyMain")
                pi_grads, _vars = zip(
                    *pi_optimizer.compute_gradients(pi_loss, params_pi))
                pi_grads, _grad_norm = tf.clip_by_global_norm(
                    pi_grads, hyperp.max_grad_norm)
                pi_grads = list(zip(pi_grads, params_pi))
                train_pi = pi_optimizer.apply_gradients(pi_grads)

                params_v = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                             scope="ValueFunctionMain")
                v_grads, _vars = zip(
                    *v_optimizer.compute_gradients(v_loss, params_v))
                v_grads, _grad_norm = tf.clip_by_global_norm(
                    v_grads, hyperp.max_grad_norm)
                v_grads = list(zip(v_grads, params_v))
                train_v = v_optimizer.apply_gradients(v_grads)
        else:
            if cfg.architecture.unified_policy_value:
                # no gradient clipping, unified PV
                train = optimizer.minimize(loss)
            else:
                # no gradient clipping, separate PV
                train_pi = pi_optimizer.minimize(pi_loss)
                train_v = v_optimizer.minimize(v_loss)

    if not cfg.architecture.unified_policy_value:
        tf.add_to_collection("train_pi", train_pi)
        for grad, var in pi_optimizer.grads_and_vars:
            tf.add_to_collection("train_pi", grad)
            tf.add_to_collection("train_pi", var)

        tf.add_to_collection("train_v", train_v)
        for grad, var in v_optimizer.grads_and_vars:
            tf.add_to_collection("train_v", grad)
            tf.add_to_collection("train_v", var)
    else:
        tf.add_to_collection("train", train)
        for grad, var in optimizer.grads_and_vars:
            tf.add_to_collection("train", grad)
            tf.add_to_collection("train", var)

    sync_op = sync_all_params()
    tf.add_to_collection("sync_op", sync_op)

    summary_op = tf.summary.merge_all()
    if summary_op is None:
        summary_op = tf.no_op()
    tf.add_to_collection("summary_op", summary_op)

    return {
        "x_non_spatial_ph": x_non_spatial_ph,
        "x_spatial_ph": x_spatial_ph,
        "x_mask_spell_ph": x_mask_spell_ph,
        "x_mask_spatial_ph": x_mask_spatial_ph,
        "x_if_spawn_spell_ph": x_if_spawn_spell_ph,
        "x_state_in_ph": x_state_in_ph,
        "x_rnn_mask_ph": x_rnn_mask_ph,
        "x_batch_size_ph": x_batch_size_ph,
        "a_spell_ph": a_spell_ph,
        "a_spatial_ph": a_spatial_ph,
        "adv_ph": adv_ph,
        "v_old_ph": v_old_ph,
        "ret_ph": ret_ph,
        "logp_old_ph": logp_old_ph,
        "pi_spell": pi["spell"],
        "pi_spell_argmax": pi["spell_argmax"],
        "pi_spatial": pi["spatial"],
        "pi_spatial_argmax": pi["spatial_argmax"],
        "v": v,
        "state_out": state_out,
        "pi_loss": pi_loss,
        "v_loss": v_loss,
        "loss": loss,
        "approx_ent": approx_ent,
        "approx_kl": approx_kl,
        "clipfrac": clipfrac,
        "logp_pi": logp_pi,
        "train_pi": train_pi,
        "train_v": train_v,
        "train": train,
        "sync_op": sync_op,
        "summary_op": summary_op,
    }
Exemplo n.º 24
0
    def init_placeholders(self):

        with tf.variable_scope("input_layer"):

            # [B] user id
            self.user_id = tf.placeholder(tf.int32, [
                None,
            ], name="user")
            # [B] item list (user history)
            self.item_list = tf.placeholder(tf.int32, [None, None],
                                            name="item_seq")
            # category list
            self.category_list = tf.placeholder(tf.int32, [None, None],
                                                name='category_list')
            # time_list
            self.time_list = tf.placeholder(tf.float32, [None, None],
                                            name='time_list')
            # time_last list (the interval between the current item and its last item)
            self.timelast_list = tf.placeholder(tf.float32,
                                                [None, self.position_count],
                                                name='timelast_list')
            # time_now_list (the interval between the current item and the target item)
            self.timenow_list = tf.placeholder(tf.float32, [None, None],
                                               name='timenow_list')
            # position list
            self.position_list = tf.placeholder(tf.int32, [None, None],
                                                name='position_list')
            # target item id
            self.target_item_id = tf.placeholder(tf.int32, [None],
                                                 name='target_item_id')
            # target item id
            self.target_item_category = tf.placeholder(
                tf.int32, [None], name='target_item_category')
            # target item id
            self.target_item_time = tf.placeholder(tf.float32, [None],
                                                   name='target_item_time')
            # length of item list
            self.seq_length = tf.placeholder(tf.int32, [
                None,
            ],
                                             name="seq_length")

            self.is_reconsume = tf.placeholder(tf.float32, [
                None,
            ],
                                               name="is_reconsume")
            self.reconsume_list = tf.placeholder(tf.float32, [None, None],
                                                 name='reconsume_list')

            # self.t_adj_in = tf.placeholder(dtype=tf.float32, shape=[None, None, None], name='t_adj_in')
            # self.t_adj_out = tf.placeholder(dtype=tf.float32, shape=[None, None, None], name='t_adj_out')
            #
            # self.mask_adj_in = tf.placeholder(dtype=tf.float32, shape=[None, None, None], name='mask_adj_in')
            # self.mask_adj_out = tf.placeholder(dtype=tf.float32, shape=[None, None, None], name='mask_adj_out')

            self.adj_masks = tf.placeholder(dtype=tf.float32,
                                            shape=[None, None, None],
                                            name='adj_masks')
            self.eid_adj = tf.placeholder(dtype=tf.float32,
                                          shape=[None, None, None],
                                          name='eid_adj')

            self.adj_avg_time = tf.placeholder(dtype=tf.float32,
                                               shape=[None, None],
                                               name='adj_avg_time')
Exemplo n.º 25
0
    def _build_net(self):
        def build_layers(s, c_names, n_l1, w_initializer, b_initializer):
            with tf.variable_scope('l1'):
                w1 = tf.get_variable('w1', [self.n_features, n_l1],
                                     initializer=w_initializer,
                                     collections=c_names)
                b1 = tf.get_variable('b1', [1, n_l1],
                                     initializer=b_initializer,
                                     collections=c_names)
                l1 = tf.nn.relu(tf.matmul(s, w1) + b1)

            if self.dueling:
                # Dueling DQN
                with tf.variable_scope('Value'):
                    w2 = tf.get_variable('w2', [n_l1, 1],
                                         initializer=w_initializer,
                                         collections=c_names)
                    b2 = tf.get_variable('b2', [1, 1],
                                         initializer=b_initializer,
                                         collections=c_names)
                    self.V = tf.matmul(l1, w2) + b2

                with tf.variable_scope('Advantage'):
                    w2 = tf.get_variable('w2', [n_l1, self.n_actions],
                                         initializer=w_initializer,
                                         collections=c_names)
                    b2 = tf.get_variable('b2', [1, self.n_actions],
                                         initializer=b_initializer,
                                         collections=c_names)
                    self.A = tf.matmul(l1, w2) + b2

                with tf.variable_scope('Q'):
                    out = self.V + (self.A - tf.reduce_mean(
                        self.A, axis=1, keep_dims=True))  # Q = V(s) + A(s,a)
            else:
                with tf.variable_scope('Q'):
                    w2 = tf.get_variable('w2', [n_l1, self.n_actions],
                                         initializer=w_initializer,
                                         collections=c_names)
                    b2 = tf.get_variable('b2', [1, self.n_actions],
                                         initializer=b_initializer,
                                         collections=c_names)
                    out = tf.matmul(l1, w2) + b2

            return out

        # ------------------ build evaluate_net ------------------
        self.s = tf.placeholder(tf.float32, [None, self.n_features],
                                name='s')  # input
        self.q_target = tf.placeholder(tf.float32, [None, self.n_actions],
                                       name='Q_target')  # for calculating loss
        with tf.variable_scope('eval_net'):
            c_names, n_l1, w_initializer, b_initializer = \
                ['eval_net_params', tf.GraphKeys.GLOBAL_VARIABLES], 20, \
                tf.random_normal_initializer(0., 0.3), tf.constant_initializer(0.1)  # config of layers

            self.q_eval = build_layers(self.s, c_names, n_l1, w_initializer,
                                       b_initializer)

        with tf.variable_scope('loss'):
            self.loss = tf.reduce_mean(
                tf.squared_difference(self.q_target, self.q_eval))
        with tf.variable_scope('train'):
            self._train_op = tf.train.RMSPropOptimizer(self.lr).minimize(
                self.loss)

        # ------------------ build target_net ------------------
        self.s_ = tf.placeholder(tf.float32, [None, self.n_features],
                                 name='s_')  # input
        with tf.variable_scope('target_net'):
            c_names = ['target_net_params', tf.GraphKeys.GLOBAL_VARIABLES]

            self.q_next = build_layers(self.s_, c_names, n_l1, w_initializer,
                                       b_initializer)
Exemplo n.º 26
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)
    trainer_lib.set_random_seed(FLAGS.random_seed)
    usr_dir.import_usr_dir(FLAGS.t2t_usr_dir)

    # Create hparams
    hparams = trainer_lib.create_hparams(FLAGS.hparams_set,
                                         FLAGS.hparams,
                                         data_dir=os.path.expanduser(
                                             FLAGS.data_dir),
                                         problem_name=FLAGS.problem)
    hparams.force_full_predict = True
    hparams.scheduled_sampling_k = -1

    # Params
    num_agents = 1  # TODO(mbz): fix the code for more agents
    num_steps = FLAGS.num_steps
    if hasattr(hparams.problem, "num_actions"):
        num_actions = hparams.problem.num_actions
    else:
        num_actions = None
    frame_shape = hparams.problem.frame_shape
    resized_frame = hparams.preprocess_resize_frames is not None
    if resized_frame:
        frame_shape = hparams.preprocess_resize_frames
        frame_shape += [hparams.problem.num_channels]

    dataset = registry.problem(FLAGS.problem).dataset(
        tf_estimator.ModeKeys.TRAIN,
        shuffle_files=True,
        data_dir=os.path.expanduser(FLAGS.data_dir),
        hparams=hparams)

    dataset = dataset.batch(num_agents, drop_remainder=True)
    data = dataset.make_one_shot_iterator().get_next()
    # Setup input placeholders
    input_size = [num_agents, hparams.video_num_input_frames]
    if num_actions is None:
        placeholders = {
            "inputs": tf.placeholder(tf.float32, input_size + frame_shape)
        }
    else:
        placeholders = {
            "inputs": tf.placeholder(tf.float32, input_size + frame_shape),
            "input_action": tf.placeholder(tf.int64, input_size + [1]),
            "input_reward": tf.placeholder(tf.int64, input_size + [1]),
            "reset_internal_states": tf.placeholder(tf.float32, []),
        }
    # Create model.
    model_cls = registry.model(FLAGS.model)
    model = model_cls(hparams, tf_estimator.ModeKeys.PREDICT)
    prediction_ops = model.infer(placeholders)

    states_q = Queue(maxsize=hparams.video_num_input_frames)
    actions_q = Queue(maxsize=hparams.video_num_input_frames)
    rewards_q = Queue(maxsize=hparams.video_num_input_frames)
    if num_actions is not None:
        all_qs = [states_q, actions_q, rewards_q]
    else:
        all_qs = [states_q]

    writer = common_video.WholeVideoWriter(fps=FLAGS.fps,
                                           output_path=FLAGS.output_gif)

    saver = tf.train.Saver(tf.trainable_variables())
    with tf.train.SingularMonitoredSession() as sess:
        # Load latest checkpoint
        ckpt = tf.train.get_checkpoint_state(
            FLAGS.output_dir).model_checkpoint_path
        saver.restore(sess.raw_session(), ckpt)

        # get init frames from the dataset
        data_np = sess.run(data)

        frames = np.split(data_np["inputs"], hparams.video_num_input_frames, 1)
        for frame in frames:
            frame = np.squeeze(frame, 1)
            states_q.put(frame)
            writer.write(frame[0].astype(np.uint8))

        if num_actions is not None:
            actions = np.split(data_np["input_action"],
                               hparams.video_num_input_frames, 1)
            for action in actions:
                actions_q.put(np.squeeze(action, 1))

            rewards = np.split(data_np["input_reward"],
                               hparams.video_num_input_frames, 1)
            for reward in rewards:
                rewards_q.put(np.squeeze(reward, 1))

        for step in range(num_steps):
            print(">>>>>>> ", step)

            if num_actions is not None:
                random_actions = np.random.randint(num_actions - 1)
                random_actions = np.expand_dims(random_actions, 0)
                random_actions = np.tile(random_actions, (num_agents, 1))

                # Shape inputs and targets
                inputs, input_action, input_reward = (np.stack(list(q.queue),
                                                               axis=1)
                                                      for q in all_qs)
            else:
                assert len(all_qs) == 1
                q = all_qs[0]
                elems = list(q.queue)
                # Need to adjust shapes sometimes.
                for i, e in enumerate(elems):
                    if len(e.shape) < 4:
                        elems[i] = np.expand_dims(e, axis=0)
                inputs = np.stack(elems, axis=1)

            # Predict next frames
            if num_actions is None:
                feed = {placeholders["inputs"]: inputs}
            else:
                feed = {
                    placeholders["inputs"]: inputs,
                    placeholders["input_action"]: input_action,
                    placeholders["input_reward"]: input_reward,
                    placeholders["reset_internal_states"]: float(step == 0),
                }
            predictions = sess.run(prediction_ops, feed_dict=feed)

            if num_actions is None:
                predicted_states = predictions[:, 0]
            else:
                predicted_states = predictions["targets"][:, 0]
                predicted_reward = predictions["target_reward"][:, 0]

            # Update queues
            if num_actions is None:
                new_data = (predicted_states)
            else:
                new_data = (predicted_states, random_actions, predicted_reward)
            for q, d in zip(all_qs, new_data):
                q.get()
                q.put(d.copy())

            writer.write(np.round(predicted_states[0]).astype(np.uint8))

        writer.finish_to_disk()
Exemplo n.º 27
0
    def train(self, train_dataset: Dataset, val_dataset: Dataset):
        with tf.device('/cpu:1'):
            with tf.Graph().as_default():

                # set the learning rate
                self.global_step = tf.Variable(0, trainable=False)
                self.lr = tf.train.exponential_decay(self.initLr,
                                                     self.global_step,
                                                     self.lrDecayFreq,
                                                     self.lrDecayRate,
                                                     staircase=True)

                # preallocate x, y, baseline
                labels = tf.placeholder(
                    "float32", shape=[self.batch_size, self.n_classes])
                self.labels_placeholder = tf.placeholder(
                    tf.float32, shape=(self.batch_size), name="labels_raw")
                self.onehot_labels_placeholder = tf.placeholder(
                    tf.float32,
                    shape=(self.batch_size, self.n_classes),
                    name="labels_onehot")
                self.inputs_placeholder = tf.placeholder(
                    tf.float32,
                    shape=(self.batch_size, self.img_size * self.img_size),
                    name="images")

                # declare the model parameters, here're naming rule:
                # the 1st captical letter: weights or bias (W = weights, B = bias)
                # the 2nd lowercase letter: the network (e.g.: g = glimpse network)
                # the 3rd and 4th letter(s): input-output mapping, which is clearly written in the variable name argument

                self.Wg_l_h = self.weight_variable(
                    (2, self.hl_size), "glimpseNet_wts_location_hidden", True)
                self.Bg_l_h = self.weight_variable(
                    (1, self.hl_size), "glimpseNet_bias_location_hidden", True)

                self.Wg_g_h = self.weight_variable(
                    (self.totalSensorBandwidth, self.hg_size),
                    "glimpseNet_wts_glimpse_hidden", True)
                self.Bg_g_h = self.weight_variable(
                    (1, self.hg_size), "glimpseNet_bias_glimpse_hidden", True)

                self.Wg_hg_gf1 = self.weight_variable(
                    (self.hg_size, self.g_size),
                    "glimpseNet_wts_hiddenGlimpse_glimpseFeature1", True)
                self.Wg_hl_gf1 = self.weight_variable(
                    (self.hl_size, self.g_size),
                    "glimpseNet_wts_hiddenLocation_glimpseFeature1", True)
                self.Bg_hlhg_gf1 = self.weight_variable(
                    (1, self.g_size),
                    "glimpseNet_bias_hGlimpse_hLocs_glimpseFeature1", True)

                self.Wc_g_h = self.weight_variable(
                    (self.cell_size, self.g_size),
                    "coreNet_wts_glimpse_hidden", True)
                self.Bc_g_h = self.weight_variable(
                    (1, self.g_size), "coreNet_bias_glimpse_hidden", True)

                self.Wr_h_r = self.weight_variable(
                    (self.cell_out_size, self.img_size**2),
                    "reconstructionNet_wts_hidden_action", True)
                self.Br_h_r = self.weight_variable(
                    (1, self.img_size**2),
                    "reconstructionNet_bias_hidden_action", True)

                self.Wb_h_b = self.weight_variable(
                    (self.g_size, 1), "baselineNet_wts_hiddenState_baseline",
                    True)
                self.Bb_h_b = self.weight_variable(
                    (1, 1), "baselineNet_bias_hiddenState_baseline", True)

                self.Wl_h_l = self.weight_variable(
                    (self.cell_out_size, 2), "locationNet_wts_hidden_location",
                    True)
                self.Bl_h_l = self.weight_variable(
                    (1, 2), "locationNet_bias_hidden_location", True)

                self.Wa_h_a = self.weight_variable(
                    (self.cell_out_size, self.n_classes),
                    "actionNet_wts_hidden_action", True)
                self.Ba_h_a = self.weight_variable(
                    (1, self.n_classes), "actionNet_bias_hidden_action", True)

                # query the model ouput
                outputs = self.model()

                # convert list of tensors to one big tensor
                self.sampled_locs = tf.concat(axis=0, values=self.sampled_locs)
                self.sampled_locs = tf.reshape(
                    self.sampled_locs, (self.nGlimpses, self.batch_size, 2))
                self.sampled_locs = tf.transpose(self.sampled_locs, [1, 0, 2])
                self.mean_locs = tf.concat(axis=0, values=self.mean_locs)
                self.mean_locs = tf.reshape(
                    self.mean_locs, (self.nGlimpses, self.batch_size, 2))
                self.mean_locs = tf.transpose(self.mean_locs, [1, 0, 2])
                self.glimpse_images = tf.concat(axis=0,
                                                values=self.glimpse_images)

                # compute the reward
                # reconstructionCost, reconstruction, train_op_r = self.preTrain(outputs)
                cost, reward, predicted_labels, correct_labels, train_op, b, avg_b, rminusb, lr = \
                    self.calc_reward(outputs)

                ####################################### START RUNNING THE MODEL #######################################

                sess_config = tf.ConfigProto(allow_soft_placement=True,
                                             log_device_placement=False)
                sess_config.gpu_options.allow_growth = True
                sess = tf.Session(config=sess_config)

                saver = tf.train.Saver()
                b_fetched = np.zeros((self.batch_size, (self.nGlimpses) * 2))

                init = tf.global_variables_initializer()
                sess.run(init)

                # iterations per epoch except last batch
                iterations_per_epoch = (train_dataset.num_examples //
                                        self.batch_size)
                print("iterations_per_epoch: " + str(iterations_per_epoch))

                # fig = plt.figure(1)
                # txt = fig.suptitle("-", fontsize=36, fontweight='bold')
                # plt.ion()
                # plt.show()
                # plt.subplots_adjust(top=0.7)
                # plotImgs = []
                iter = 0
                # training
                for epoch in range(0, self.max_epochs):
                    for batch_idx in range(0, train_dataset.batch_count()):

                        start_time = time.time()

                        # get the next batch of examples
                        nextX, nextY = train_dataset.next_batch(batch_idx)
                        nextX_orig = nextX
                        if self.translateMnist:
                            nextX, nextX_coord = self.convertTranslated(
                                nextX, self.ORIG_IMG_SIZE, self.img_size)

                        feed_dict = {
                            self.inputs_placeholder:
                            nextX,
                            self.labels_placeholder:
                            nextY,
                            self.onehot_labels_placeholder:
                            self.dense_to_one_hot(nextY,
                                                  num_classes=self.n_classes)
                        }

                        fetches = [
                            train_op, cost, reward, predicted_labels,
                            correct_labels, self.glimpse_images, avg_b,
                            rminusb, self.mean_locs, self.sampled_locs, self.lr
                        ]
                        # feed them to the model
                        results = sess.run(fetches, feed_dict=feed_dict)

                        _, cost_fetched, reward_fetched, prediction_labels_fetched, correct_labels_fetched, \
                        glimpse_images_fetched, avg_b_fetched, rminusb_fetched, mean_locs_fetched, sampled_locs_fetched, lr_fetched = results

                        duration = time.time() - start_time

                        if iter % 50 == 0:
                            print((
                                'Step %d: cost = %.5f reward = %.5f (%.3f sec) b = %.5f R-b = %.5f, LR = %.5f'
                                %
                                (iter, cost_fetched, reward_fetched, duration,
                                 avg_b_fetched, rminusb_fetched, lr_fetched)))
                            # f_glimpse_images = np.reshape(glimpse_images_fetched, \
                            #                               (
                            #                                   self.nGlimpses, self.batch_size, self.depth,
                            #                                   self.sensorBandwidth,
                            #                                   self.sensorBandwidth))
                            #
                            # fillList = False
                            # if len(plotImgs) == 0:
                            #     fillList = True
                            #
                            # # display the first image in the in mini-batch
                            # nCols = self.depth + 1
                            # plt.subplot2grid((self.depth, nCols), (0, 1), rowspan=self.depth, colspan=self.depth)
                            # # display the entire image
                            # self.plotWholeImg(nextX[0, :], self.img_size, sampled_locs_fetched)
                            #
                            # # display the glimpses
                            # for y in range(self.nGlimpses):
                            #     txt.set_text('Epoch: %.6d \nPrediction: %i -- Truth: %i\nStep: %i/%i'
                            #                  % (iter, prediction_labels_fetched[0], correct_labels_fetched[0],
                            #                     (y + 1),
                            #                     self.nGlimpses))
                            #
                            #     for x in range(self.depth):
                            #         plt.subplot(self.depth, nCols, 1 + nCols * x)
                            #         if fillList:
                            #             plotImg = plt.imshow(f_glimpse_images[y, 0, x], cmap=plt.get_cmap('gray'),
                            #                                  interpolation="nearest")
                            #             plotImg.autoscale()
                            #             plotImgs.append(plotImg)
                            #         else:
                            #             plotImgs[x].set_data(f_glimpse_images[y, 0, x])
                            #             plotImgs[x].autoscale()
                            #     fillList = False
                            #
                            #     # fig.canvas.draw()
                            #     time.sleep(1.15)
                            #     plt.pause(0.003)

                        iter += 1

                    if iter % iterations_per_epoch == 0:
                        print("EPOCH: " + str(epoch))
                        saver.save(sess, self.ckpt_path)
                        self.evaluate(val_dataset,
                                      sess,
                                      reward,
                                      predicted_labels,
                                      correct_labels,
                                      glimpse_images=self.glimpse_images)

                        train_dataset.on_epoch_end()
                        val_dataset.on_epoch_end()

                sess.close()
Exemplo n.º 28
0
    def fit(self, data):
        """Fit the generative model of the training data distribution.

        :param data: DataFrame: Training set
        """

        X_train, Y_train, cols_to_reverse = self._one_hot(data)

        self.columns_to_reverse = cols_to_reverse

        self.no, self.X_dim = X_train.shape
        self.z_dim = int(self.X_dim / 4)
        self.h_dim = int(self.X_dim)

        # Feature matrix
        self.X = tf.placeholder(tf.float32, shape=[None, self.X_dim])
        # Target variable
        self.Y = tf.placeholder(tf.float32, shape=[None, C_DIM])
        # Latent space
        self.Z = tf.placeholder(tf.float32, shape=[None, self.z_dim])
        # Conditional variable
        self.M = tf.placeholder(tf.float32, shape=[None, C_DIM])
        self.Y_train = Y_train

        lamda = np.sqrt(2 * np.log(1.25 * (10 ^ (self.delta)))) / self.epsilon

        # Data Preprocessing
        X_train = np.asarray(X_train)
        self.Min_Val = np.min(X_train, 0)
        X_train = X_train - self.Min_Val
        self.Max_Val = np.max(X_train, 0)
        X_train = X_train / (self.Max_Val + 1e-8)
        self.dim = len(X_train[:, 0])

        # Generator
        self.G_sample = self._generator(self.Z, self.Y)

        # Discriminator
        D_real = self._discriminator(self.X, self.Y)
        D_fake = self._discriminator(self.G_sample, self.Y)
        D_entire = tf.concat(axis=0, values=[D_real, D_fake])

        # Replacement of Clipping algorithm to Penalty term
        # 1. Line 6 in Algorithm 1
        eps = tf.random_uniform([MB_SIZE, 1], minval=0., maxval=1.)
        X_inter = eps * self.X + (1. - eps) * self.G_sample

        # 2. Line 7 in Algorithm 1
        grad = tf.gradients(self._discriminator(X_inter, self.Y),
                            [X_inter, self.Y])[0]
        grad_norm = tf.sqrt(tf.reduce_sum((grad)**2 + 1e-8, axis=1))
        grad_pen = LAM * tf.reduce_mean((grad_norm - 1)**2)

        # Loss function
        D_loss = tf.reduce_mean((1 - self.M) * D_entire) - tf.reduce_mean(
            self.M * D_entire) + grad_pen
        G_loss = -tf.reduce_mean(D_fake)

        # Solver
        D_solver = (tf.train.AdamOptimizer(
            learning_rate=LR, beta1=0.5).minimize(D_loss,
                                                  var_list=self.theta_D))
        G_solver = (tf.train.AdamOptimizer(
            learning_rate=LR, beta1=0.5).minimize(G_loss,
                                                  var_list=self.theta_G))

        # Start session
        self.sess = tf.Session()
        self.sess.run(tf.global_variables_initializer())

        # Training iterations
        for _ in tqdm(range(NITER)):
            for _ in range(NUM_TEACHERS):
                # Teacher training
                Z_mb = self._sample_Z(MB_SIZE, self.z_dim)

                # Teacher 1
                X_idx = self._sample_X(self.no, MB_SIZE)
                X_mb = X_train[X_idx, :]

                Y_mb = np.reshape(Y_train[X_idx], [MB_SIZE, 1])

                M_real = np.ones([
                    MB_SIZE,
                ])
                M_fake = np.zeros([
                    MB_SIZE,
                ])

                M_entire = np.concatenate((M_real, M_fake), 0)

                Normal_Add = np.random.normal(loc=0.0,
                                              scale=lamda,
                                              size=MB_SIZE * 2)

                M_entire = M_entire + Normal_Add

                M_entire = (M_entire > 0.5)

                M_mb = np.reshape(M_entire.astype(float), (2 * MB_SIZE, 1))

                _, D_loss_curr = self.sess.run([D_solver, D_loss],
                                               feed_dict={
                                                   self.X: X_mb,
                                                   self.Z: Z_mb,
                                                   self.M: M_mb,
                                                   self.Y: Y_mb
                                               })

            # Generator Training
            Z_mb = self._sample_Z(MB_SIZE, self.z_dim)

            X_idx = self._sample_X(self.no, MB_SIZE)

            Y_mb = np.reshape(Y_train[X_idx], [MB_SIZE, 1])

            _, G_loss_curr = self.sess.run([G_solver, G_loss],
                                           feed_dict={
                                               self.Z: Z_mb,
                                               self.Y: Y_mb
                                           })

        self.trained = True
def main(unused_argv):
    del unused_argv
    # Load the label map.
    print(' - Loading the label map...')
    label_map_dict = {}
    if FLAGS.label_map_format == 'csv':
        with tf.gfile.Open(FLAGS.label_map_file, 'r') as csv_file:
            reader = csv.reader(csv_file, delimiter=':')
            for row in reader:
                if len(row) != 2:
                    raise ValueError(
                        'Each row of the csv label map file must be in '
                        '`id:name` format.')
                id_index = int(row[0])
                name = row[1]
                label_map_dict[id_index] = {
                    'id': id_index,
                    'name': name,
                }
    else:
        raise ValueError('Unsupported label map format: {}.'.format(
            FLAGS.label_map_format))

    with tf.Session(graph=tf.Graph()) as sess:
        print(' - Loading saved model...')
        meta_graph_def = tf.saved_model.load(
            sess, [tf.saved_model.tag_constants.SERVING],
            FLAGS.saved_model_dir)
        print(dict(meta_graph_def.signature_def['serving_default'].inputs))
        inputs = dict(meta_graph_def.signature_def['serving_default'].inputs)
        outputs = dict(meta_graph_def.signature_def['serving_default'].outputs)

        image_node = tf.placeholder(shape=(), dtype=tf.string)

        output_nodes = {
            'num_detections': outputs['num_detections'].name,
            'detection_boxes': outputs['detection_boxes'].name,
            'detection_classes': outputs['detection_classes'].name,
            'detection_attributes': outputs['detection_attributes'].name,
            'detection_scores': outputs['detection_scores'].name,
        }
        if 'detection_masks' in outputs:
            output_nodes['detection_masks'] = outputs['detection_masks'].name
        if not FLAGS.use_normalized_coordinates:
            if 'image_info' not in outputs:
                raise ValueError(
                    'If `use_normalized_coordinates` = False, `image_info`'
                    ' node must be included in the SavedModel.')
            output_nodes['image_info'] = outputs['image_info'].name

        image_with_detections_list = []
        image_files = tf.gfile.Glob(FLAGS.image_file_pattern)
        for i, image_file in enumerate(image_files):
            print(' - processing image %d...' % i)
            with tf.gfile.GFile(image_file, 'rb') as f:
                image_bytes = f.read()
            image = Image.open(image_file)
            #print(image)
            image = image.convert('RGB')  # needed for images with 4 channels.
            width, height = image.size
            output1 = io.BytesIO()
            image.save(output1, format="JPEG")
            image_as_string = output1.getvalue()

            np_image = (np.array(image.getdata()).reshape(height, width,
                                                          3).astype(np.uint8))

            np_image_input = input_utils.normalize_image_np(np_image)
            np_image_input = np.float32(np_image_input.reshape(-1))
            print(np_image_input)
            #np_image_input = np.float32(np_image_input.reshape(1))
            output_results = sess.run(output_nodes,
                                      feed_dict={image_node: image_bytes})

            num_detections = int(output_results['num_detections'][0])
            np_boxes = output_results['detection_boxes'][0, :num_detections]
            if not FLAGS.use_normalized_coordinates:
                np_image_info = output_results['image_info'][0]
                np_boxes = np_boxes / np.tile(np_image_info[1:2, :], (1, 2))
            ymin, xmin, ymax, xmax = np.split(np_boxes, 4, axis=-1)
            ymin = ymin * height
            ymax = ymax * height
            xmin = xmin * width
            xmax = xmax * width
            np_boxes = np.concatenate([ymin, xmin, ymax, xmax], axis=-1)
            np_scores = output_results['detection_scores'][0, :num_detections]
            np_classes = output_results['detection_classes'][
                0, :num_detections]
            np_classes = np_classes.astype(np.int32)
            np_attributes = output_results['detection_attributes'][
                0, :num_detections, :]
            np_masks = None
            if 'detection_masks' in output_results:
                np_masks = output_results['detection_masks'][
                    0, :num_detections]
                np_masks = mask_utils.paste_instance_masks(
                    np_masks, box_utils.yxyx_to_xywh(np_boxes), height, width)

            image_with_detections = (
                visualization_utils.visualize_boxes_and_labels_on_image_array(
                    np_image,
                    np_boxes,
                    np_classes,
                    np_scores,
                    label_map_dict,
                    instance_masks=np_masks,
                    use_normalized_coordinates=False,
                    max_boxes_to_draw=FLAGS.max_boxes_to_draw,
                    min_score_thresh=FLAGS.min_score_threshold))
            image_with_detections_list.append(image_with_detections)

        print(' - Saving the outputs...')
        formatted_image_with_detections_list = [
            Image.fromarray(image.astype(np.uint8))
            for image in image_with_detections_list
        ]
        html_str = '<html>'
        image_strs = []
        for formatted_image in formatted_image_with_detections_list:
            with io.BytesIO() as stream:
                formatted_image.save(stream, format='JPEG')
                data_uri = base64.b64encode(stream.getvalue()).decode('utf-8')
            image_strs.append(
                '<img src="data:image/jpeg;base64,{}", height=800>'.format(
                    data_uri))
        images_str = ' '.join(image_strs)
        html_str += images_str
        html_str += '</html>'
        with tf.gfile.GFile(FLAGS.output_html, 'w') as f:
            f.write(html_str)
Exemplo n.º 30
0
    def __init__(
        self,
        learning_rate,
        num_layers,
        size,
        size_layer,
        output_size,
        forget_bias=0.1,
        attention_size=10,
    ):
        def lstm_cell():
            return tf.nn.rnn_cell.LSTMCell(size_layer, state_is_tuple=False)

        backward_rnn_cells = tf.nn.rnn_cell.MultiRNNCell(
            [lstm_cell() for _ in range(num_layers)], state_is_tuple=False)
        forward_rnn_cells = tf.nn.rnn_cell.MultiRNNCell(
            [lstm_cell() for _ in range(num_layers)], state_is_tuple=False)
        self.X = tf.placeholder(tf.float32, [None, None, size])
        self.Y = tf.placeholder(tf.float32, [None, output_size])
        drop_backward = tf.nn.rnn_cell.DropoutWrapper(
            backward_rnn_cells, output_keep_prob=forget_bias)
        drop_forward = tf.nn.rnn_cell.DropoutWrapper(
            forward_rnn_cells, output_keep_prob=forget_bias)
        self.backward_hidden_layer = tf.placeholder(tf.float32,
                                                    shape=(None, num_layers *
                                                           2 * size_layer))
        self.forward_hidden_layer = tf.placeholder(tf.float32,
                                                   shape=(None, num_layers *
                                                          2 * size_layer))
        outputs, last_state = tf.nn.bidirectional_dynamic_rnn(
            drop_forward,
            drop_backward,
            self.X,
            initial_state_fw=self.forward_hidden_layer,
            initial_state_bw=self.backward_hidden_layer,
            dtype=tf.float32,
        )
        outputs = list(outputs)
        attention_w = tf.get_variable('attention_v1', [attention_size],
                                      tf.float32)
        query = tf.layers.dense(
            tf.expand_dims(last_state[0][:, size_layer:], 1), attention_size)
        keys = tf.layers.dense(outputs[0], attention_size)
        align = tf.reduce_sum(attention_w * tf.tanh(keys + query), [2])
        align = tf.nn.tanh(align)
        outputs[0] = tf.squeeze(
            tf.matmul(tf.transpose(outputs[0], [0, 2, 1]),
                      tf.expand_dims(align, 2)),
            2,
        )
        outputs[0] = tf.concat([outputs[0], last_state[0][:, size_layer:]], 1)

        attention_w = tf.get_variable('attention_v2', [attention_size],
                                      tf.float32)
        query = tf.layers.dense(
            tf.expand_dims(last_state[1][:, size_layer:], 1), attention_size)
        keys = tf.layers.dense(outputs[1], attention_size)
        align = tf.reduce_sum(attention_w * tf.tanh(keys + query), [2])
        align = tf.nn.tanh(align)
        outputs[1] = tf.squeeze(
            tf.matmul(tf.transpose(outputs[1], [0, 2, 1]),
                      tf.expand_dims(align, 2)),
            2,
        )
        outputs[1] = tf.concat([outputs[1], last_state[1][:, size_layer:]], 1)

        with tf.variable_scope('decoder', reuse=False):
            self.backward_rnn_cells_dec = tf.nn.rnn_cell.MultiRNNCell(
                [lstm_cell() for _ in range(num_layers)], state_is_tuple=False)
            self.forward_rnn_cells_dec = tf.nn.rnn_cell.MultiRNNCell(
                [lstm_cell() for _ in range(num_layers)], state_is_tuple=False)
            backward_drop_dec = tf.nn.rnn_cell.DropoutWrapper(
                self.backward_rnn_cells_dec, output_keep_prob=forget_bias)
            forward_drop_dec = tf.nn.rnn_cell.DropoutWrapper(
                self.forward_rnn_cells_dec, output_keep_prob=forget_bias)
            self.outputs, self.last_state = tf.nn.bidirectional_dynamic_rnn(
                forward_drop_dec,
                backward_drop_dec,
                self.X,
                initial_state_fw=outputs[0],
                initial_state_bw=outputs[1],
                dtype=tf.float32,
            )
        self.outputs = list(self.outputs)
        attention_w = tf.get_variable('attention_v3', [attention_size],
                                      tf.float32)
        query = tf.layers.dense(
            tf.expand_dims(self.last_state[0][:, size_layer:], 1),
            attention_size,
        )
        keys = tf.layers.dense(self.outputs[0], attention_size)
        align = tf.reduce_sum(attention_w * tf.tanh(keys + query), [2])
        align = tf.nn.tanh(align)
        self.outputs[0] = tf.squeeze(
            tf.matmul(
                tf.transpose(self.outputs[0], [0, 2, 1]),
                tf.expand_dims(align, 2),
            ),
            2,
        )

        attention_w = tf.get_variable('attention_v4', [attention_size],
                                      tf.float32)
        query = tf.layers.dense(
            tf.expand_dims(self.last_state[1][:, size_layer:], 1),
            attention_size,
        )
        keys = tf.layers.dense(self.outputs[1], attention_size)
        align = tf.reduce_sum(attention_w * tf.tanh(keys + query), [2])
        align = tf.nn.tanh(align)
        self.outputs[1] = tf.squeeze(
            tf.matmul(
                tf.transpose(self.outputs[1], [0, 2, 1]),
                tf.expand_dims(align, 2),
            ),
            2,
        )
        self.outputs = tf.concat(self.outputs, 1)
        self.logits = tf.layers.dense(self.outputs, output_size)
        self.cost = tf.reduce_mean(tf.square(self.Y - self.logits))
        self.optimizer = tf.train.AdamOptimizer(
            learning_rate=learning_rate).minimize(self.cost)