示例#1
0
文件: dqn.py 项目: rmst/chi
        def train(o: [so], a: (tf.int32, [[]]), r, t: tf.bool, o2: [so]):
            q = q_network(o)
            # ac = tf.argmax(q, axis=1)

            # compute targets
            q2 = q_network.tracked(o2)

            if double_dqn:
                a2 = tf.argmax(q_network(o2), axis=1)  # yep, that's really the only difference
            else:
                a2 = tf.argmax(q2, axis=1)

            mask2 = tf.one_hot(a2, env.action_space.n, 1.0, 0.0, axis=1)
            q_target = tf.where(t, r, r + 0.99 * tf.reduce_sum(q2 * mask2, axis=1))
            q_target = tf.stop_gradient(q_target)

            # compute loss
            mask = tf.one_hot(a, env.action_space.n, 1.0, 0.0, axis=1)
            qs = tf.reduce_sum(q * mask, axis=1, name='q_max')
            td = tf.subtract(q_target, qs, name='td')
            # td = tf.clip_by_value(td, -10, 10)
            # loss = tf.reduce_mean(tf.abs(td), axis=0, name='mae')
            # loss = tf.where(tf.abs(td) < 1.0, 0.5 * tf.square(td), tf.abs(td) - 0.5, name='mse_huber')
            loss = tf.reduce_mean(tf.square(td), axis=0, name='mse')

            loss = q_network.minimize(loss)

            # logging
            layers.summarize_tensors([td, loss, r, o, a,
                                                                tf.subtract(o2, o, name='state_dif'),
                                                                tf.reduce_mean(tf.cast(t, tf.float32), name='frac_terminal'),
                                                                tf.subtract(tf.reduce_max(q, 1, True), q, name='av_advantage')])
            # layers.summarize_tensors(chi.activations())
            # layers.summarize_tensors(chi.gradients())
            return loss
示例#2
0
 def act(o: [so], noise=True):
     with arg_scope([layers.batch_norm], is_training=False):
         s = preprocess(o)
         a = actors(s, noise=noise)
         q = critics(s, a)
         layers.summarize_tensors([s, *a, *q])
         return a
示例#3
0
文件: ddpg.py 项目: jtib/chi-rl-alg
 def act(o: [so], noisy=True):
     with arg_scope([layers.batch_norm], is_training=False):
         s = preprocess(o)
         a = actor(s, noise=noisy)
         a = smart_cond(noisy, lambda: noise(a), lambda: a)
         q = critic(s, a)
         layers.summarize_tensors([s, a, q])
         return a
示例#4
0
文件: wgan.py 项目: rmst/chi
    def train_generator():
        z = tf.random_normal([m, 100])
        x = generator(z)
        loss = -critic(x)
        loss = generator.minimize(loss)

        # logging
        tf.summary.image('x', x, max_outputs=16)
        layers.summarize_tensors(chi.activations() +
                                 generator.trainable_variables() +
                                 critic.trainable_variables())
        return loss
示例#5
0
        def log_weigths():
            v = q_network.trainable_variables()
            # print(f'log weights {v}')

            f = q_network.tracker_variables
            # print(f'log weights EMA {f}')

            difs = []
            for g in v:
                a = q_network.tracker.average(g)
                difs.append(tf.subtract(g, a, name=f'ema/dif{g.name[:-2]}'))

            layers.summarize_tensors(v + f + difs)
示例#6
0
        def train_step(o: [observation_shape], a: (tf.int32, [[]]), r,
                       t: tf.bool, o2: [observation_shape]):
            q = q_network(o)
            # ac = tf.argmax(q, axis=1)

            # compute targets
            q2 = q_network.tracked(o2)

            if double_dqn:
                a2 = tf.argmax(
                    q_network(o2),
                    axis=1)  # yep, that's really the only difference
            else:
                a2 = tf.argmax(q2, axis=1)

            mask2 = tf.one_hot(a2, n_actions, 1.0, 0.0, axis=1)
            q_target = tf.where(
                t, r, r + self.discount * tf.reduce_sum(q2 * mask2, axis=1))
            q_target = tf.stop_gradient(q_target)

            # compute loss
            mask = tf.one_hot(a, n_actions, 1.0, 0.0, axis=1)
            qs = tf.reduce_sum(q * mask, axis=1, name='q_max')
            td = tf.subtract(q_target, qs, name='td')
            if clip_td:
                td = tf.clip_by_value(td, -.5, .5, name='clipped_td')
            # loss = tf.reduce_mean(tf.abs(td), axis=0, name='mae')
            # loss = tf.where(tf.abs(td) < 1.0, 0.5 * tf.square(td), tf.abs(td) - 0.5, name='mse_huber')
            loss = tf.reduce_mean(tf.square(td), axis=0, name='mse')

            gav = q_network.compute_gradients(loss)
            if clip_gradients:
                gav = [(tf.clip_by_norm(g, clip_gradients), v) for g, v in gav]
            loss_update = q_network.apply_gradients(gav)

            # logging
            layers.summarize_tensors([
                td, loss, r, o, a,
                tf.subtract(o2, o, name='state_dif'),
                tf.reduce_mean(tf.cast(t, tf.float32), name='frac_terminal'),
                tf.subtract(tf.reduce_max(q, 1, True), q, name='av_advantage')
            ])
            # layers.summarize_tensors(chi.activations())
            # layers.summarize_tensors(chi.gradients())
            return loss_update
示例#7
0
 def log_returns(rret: [], ret: [], qs, q_minus_ret, duration: []):
     layers.summarize_tensors(
         [rret, ret, qs, q_minus_ret, duration])
示例#8
0
def move_gen_cnn_model_fn(features, labels, mode, params):
    """
    Generates an EstimatorSpec for the model.
    """
    def numpy_style_repeat_1d(input, multiples):
        tiled_input = tf.multiply(tf.ones([100, 1]), input)
        return tf.boolean_mask(tiled_input, tf.sequence_mask(multiples))

    inception_module_outputs, activation_summaries = build_convolutional_modules(
        features["board"], params['inception_modules'], mode,
        params['kernel_initializer'], params['kernel_regularizer'],
        params['trainable_cnn_modules'])

    if not params["conv_init_fn"] is None:
        params["conv_init_fn"]()

    # Build the fully connected layers
    dense_layers_outputs, activation_summaries = build_fully_connected_layers_with_batch_norm(
        inception_module_outputs,
        params['dense_shape'],
        params['kernel_initializer'],
        mode,
        activation_summaries=activation_summaries)

    # Create the final layer of the ANN
    logits = tf.layers.dense(inputs=dense_layers_outputs,
                             units=params['num_outputs'],
                             use_bias=False,
                             activation=None,
                             kernel_initializer=layers.xavier_initializer(),
                             name="logit_layer")

    loss = None
    train_op = None

    legal_move_logits = tf.gather_nd(logits, features["legal_move_indices"])

    # Calculate loss (for both TRAIN and EVAL modes)
    if mode != tf.estimator.ModeKeys.PREDICT:
        with tf.variable_scope("loss"):
            loss = tf.losses.mean_squared_error(legal_move_logits,
                                                features["desired_scores"])
            loss_scalar_summary = tf.summary.scalar("loss", loss)

    # Configure the Training Op (for TRAIN mode)
    if mode == tf.estimator.ModeKeys.TRAIN:
        global_step = tf.train.get_global_step()
        learning_rate = params['learning_decay_function'](global_step)
        tf.summary.scalar("learning_rate", learning_rate)
        train_op = layers.optimize_loss(loss=loss,
                                        global_step=global_step,
                                        learning_rate=learning_rate,
                                        optimizer=params['optimizer'],
                                        summaries=params['train_summaries'])

    # Generate predictions
    predictions = {"the_move_values": logits}

    # A dictionary for scoring used when exporting model for serving.
    the_export_outputs = {
        "serving_default":
        tf.estimator.export.ClassificationOutput(scores=legal_move_logits)
    }

    # Create the validation metrics
    validation_metrics = None
    if mode != tf.estimator.ModeKeys.PREDICT:
        calculated_best_move_scores = tf.gather(
            legal_move_logits, features['desired_move_indices'])

        repeated_best_scores = numpy_style_repeat_1d(
            calculated_best_move_scores, features['num_moves'])

        ratio_moves_below_best = tf.reduce_mean(
            tf.cast(tf.greater_equal(repeated_best_scores, legal_move_logits),
                    dtype=np.float32))

        diff_from_desired = legal_move_logits - features["desired_scores"]

        mean_diff_from_desired = tf.reduce_mean(diff_from_desired)
        mean_calculated_value = tf.reduce_mean(legal_move_logits)

        to_create_metric_dict = {
            "loss/loss": (loss, loss_scalar_summary),
            "metrics/ratio_moves_below_best":
            ratio_moves_below_best,
            "metrics/mean_evaluation_value":
            mean_calculated_value,
            "metrics/mean_abs_evaluation_value":
            tf.abs(legal_move_logits),
            "metrics/mean_expected_value":
            features["desired_scores"],
            "metrics/mean_abs_expected_value":
            abs(features["desired_scores"]),
            "metrics/distance_from_desired":
            mean_diff_from_desired,
            "metrics/abs_distance_from_desired":
            tf.abs(diff_from_desired),
            "metrics/relative_distance_from_desired":
            tf.abs(mean_diff_from_desired / mean_calculated_value),
        }

        validation_metrics = metric_dict_creator(to_create_metric_dict)

    # Create the trainable variable summaries and merge them together to give to a hook
    trainable_var_summaries = layers.summarize_tensors(
        tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
    )  # Not sure if needs to be stored as a variable, should check
    merged_summaries = tf.summary.merge_all()
    summary_hook = tf.train.SummarySaverHook(save_steps=params['log_interval'],
                                             output_dir=params['model_dir'],
                                             summary_op=merged_summaries)

    # Return the EstimatorSpec object
    return tf.estimator.EstimatorSpec(mode=mode,
                                      predictions=predictions,
                                      loss=loss,
                                      train_op=train_op,
                                      training_hooks=[summary_hook],
                                      export_outputs=the_export_outputs,
                                      eval_metric_ops=validation_metrics)
示例#9
0
def board_eval_model_fn(features, labels, mode, params):
    """
    Generates an EstimatorSpec for the model.
    """

    if mode == tf.estimator.ModeKeys.PREDICT:
        input_layer = features["feature"]
    else:
        #Reshape features from original shape of [-1, 3, 8, 8, 16]
        input_layer = tf.reshape(features,
                                 [-1, 8, 8, params['num_input_filters']])

    inception_module_outputs, activation_summaries = build_convolutional_modules(
        input_layer, params['inception_modules'], mode,
        params['kernel_initializer'], params['kernel_regularizer'],
        params['trainable_cnn_modules'])

    if not params[
            "conv_init_fn"] is None:  #and tf.train.global_step(tf.get_default_session(), tf.train.get_global_step()) == 0:
        params["conv_init_fn"]()

    # Build the fully connected layers
    dense_layers_outputs, activation_summaries = build_fully_connected_layers_with_batch_norm(
        inception_module_outputs,
        params['dense_shape'],
        params['kernel_initializer'],
        mode,
        activation_summaries=activation_summaries)

    # Create the final layer of the ANN
    logits = tf.layers.dense(inputs=dense_layers_outputs,
                             units=params['num_outputs'],
                             use_bias=False,
                             activation=None,
                             kernel_initializer=params['kernel_initializer'](),
                             name="logit_layer")

    loss = None
    train_op = None
    ratio_old_new_sum_loss_to_negative_sum = None

    # Calculate loss (for both TRAIN and EVAL modes)
    if mode != tf.estimator.ModeKeys.PREDICT:
        to_split = tf.reshape(logits, [-1, 3])
        original_pos, desired_pos, random_pos = tf.split(
            to_split, [1, 1, 1], 1)

        # Implementing an altered version of the loss function defined in Deep Pink
        # There are a few other methods I've been trying out in commented out, though none seem to be as good as
        # the one proposed in Deep Pink
        with tf.variable_scope("loss"):
            # adjusted_equality_sum = (original_pos + CONSTANT + desired_pos)
            adjusted_equality_sum = (original_pos + desired_pos)
            adjusted_real_rand_sum = (random_pos - desired_pos)

            real_greater_rand_scalar_loss = tf.reduce_mean(
                -tf.log(tf.sigmoid(adjusted_real_rand_sum)))

            # test_new_loss_component = tf.reduce_mean(-tf.log(tf.sigmoid(random_pos + original_pos)))
            ## test_new_loss_component = tf.reduce_mean(-tf.log(tf.sigmoid(-(original_pos + random_pos))))
            # test_new_loss_component_summary = tf.summary.scalar("test_new_loss_component", test_new_loss_component)

            equality_scalar_loss = tf.reduce_mean(
                -tf.log(tf.sigmoid(adjusted_equality_sum)))
            negative_equality_scalar_loss = tf.reduce_mean(
                -tf.log(tf.sigmoid(-adjusted_equality_sum)))

            ratio_old_new_sum_loss_to_negative_sum = tf.divide(
                equality_scalar_loss, negative_equality_scalar_loss)

            real_rand_loss_summary = tf.summary.scalar(
                "real_greater_rand_loss", real_greater_rand_scalar_loss)

            equality_sum_loss_summary = tf.summary.scalar(
                "mean_original_plus_desired_loss", equality_scalar_loss)
            negative_equality_sum_loss_summary = tf.summary.scalar(
                "mean_negative_original_plus_desired",
                negative_equality_scalar_loss)

            # loss = real_greater_rand_scalar_loss
            # loss = real_greater_rand_scalar_loss + test_new_loss_component
            loss = real_greater_rand_scalar_loss + equality_scalar_loss + negative_equality_scalar_loss
            # loss = real_greater_rand_scalar_loss + equality_scalar_loss + negative_equality_scalar_loss + test_new_loss_component

            loss_summary = tf.summary.scalar("loss", loss)

            ########################################################################################################

            # the_labels = tf.tile(tf.constant([[0, 0, 1]]), [tf.shape(to_split)[0], 1])
            #
            # softmax_logits = to_split * tf.constant([[-1,1,1]], dtype=tf.float32)
            #
            # cross_entropy_loss = tf.losses.softmax_cross_entropy(the_labels, softmax_logits)
            #
            # old_real_sum_squared_scalar_loss = tf.reduce_mean(tf.square(2*(original_pos + desired_pos)))
            #
            # loss = cross_entropy_loss + old_real_sum_squared_scalar_loss
            #
            # cross_entropy_summary = tf.summary.scalar("cross_entropy_loss", cross_entropy_loss)
            # old_real_sum_squared_summary = tf.summary.scalar("old_real_sum_squared_loss", old_real_sum_squared_scalar_loss)
            # loss_summary = tf.summary.scalar("loss", loss)

    # Configure the Training Op (for TRAIN mode)
    if mode == tf.estimator.ModeKeys.TRAIN:
        global_step = tf.train.get_global_step()
        learning_rate = params['learning_decay_function'](global_step)
        tf.summary.scalar("learning_rate", learning_rate)
        train_op = layers.optimize_loss(loss=loss,
                                        global_step=global_step,
                                        learning_rate=learning_rate,
                                        optimizer=params['optimizer'],
                                        summaries=params['train_summaries'])

    # Generate predictions
    predictions = {"scores": logits}

    # A dictionary for scoring used when exporting model for serving.
    the_export_outputs = {
        "serving_default": tf.estimator.export.RegressionOutput(value=logits)
    }

    # Create the validation metrics
    validation_metrics = None
    if mode != tf.estimator.ModeKeys.PREDICT:
        old_plus_desired = original_pos + desired_pos
        rand_real_diff = random_pos - desired_pos

        abs_rand_real_diff = tf.abs(rand_real_diff)

        abs_old_plus_desired = tf.abs(old_plus_desired)
        mean_abs_old_plus_desired = tf.reduce_mean(abs_old_plus_desired)

        abs_randreal_realold_ratio = tf.reduce_mean(
            rand_real_diff) / mean_abs_old_plus_desired

        rand_vs_real_accuracy = tf.cast(tf.less(desired_pos, random_pos),
                                        tf.float32)

        to_create_metric_dict = {
            "metrics/rand_vs_real_accuracy":
            rand_vs_real_accuracy,
            "metrics/mean_dist_rand_real":
            rand_real_diff,
            "metrics/mean_abs_rand_real_diff":
            abs_rand_real_diff,
            "metrics/mean_dist_old_real":
            old_plus_desired,
            "metrics/mean_abs_dist_old_real":
            mean_abs_old_plus_desired,  #abs_old_plus_desired,
            "metrics/abs_randreal_realold_ratio":
            abs_randreal_realold_ratio,
            "metrics/mean_old_pos":
            original_pos,
            "metrics/mean_new_pos":
            desired_pos,
            "metrics/mean_random_pos":
            random_pos,
            "metrics/mean_abs_old_pos":
            tf.abs(original_pos),
            "metrics/mean_abs_new_pos":
            tf.abs(desired_pos),
            "metrics/mean_abs_random_pos":
            tf.abs(random_pos),

            # "loss/cross_entropy_loss" : (cross_entropy_loss, cross_entropy_summary),
            # "loss/old_real_sum_squared_loss" : (old_real_sum_squared_scalar_loss, old_real_sum_squared_summary),

            # "loss/test_new_loss_component" : (test_new_loss_component, test_new_loss_component_summary),
            "loss/real_greater_rand_loss":
            (real_greater_rand_scalar_loss, real_rand_loss_summary),
            "loss/mean_original_plus_desired_loss":
            (equality_scalar_loss, equality_sum_loss_summary),
            "loss/mean_negative_original_plus_desired":
            (negative_equality_scalar_loss,
             negative_equality_sum_loss_summary),
            "loss/ratio_old_new_sum_loss_to_negative_sum":
            ratio_old_new_sum_loss_to_negative_sum,
            "loss/loss": (loss, loss_summary),
        }

        validation_metrics = metric_dict_creator(to_create_metric_dict)

    # Create the trainable variable summaries and merge them together to give to a hook
    trainable_var_summaries = layers.summarize_tensors(
        tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
    )  # Not sure if needs to be stored as a variable, should check
    merged_summaries = tf.summary.merge_all()
    summary_hook = tf.train.SummarySaverHook(save_steps=params['log_interval'],
                                             output_dir=params['model_dir'],
                                             summary_op=merged_summaries)

    # Return the EstimatorSpec object
    return tf.estimator.EstimatorSpec(mode=mode,
                                      predictions=predictions,
                                      loss=loss,
                                      train_op=train_op,
                                      training_hooks=[summary_hook],
                                      export_outputs=the_export_outputs,
                                      eval_metric_ops=validation_metrics)
示例#10
0
def encoder_builder_fn(features, labels, mode, params):
    """
    Generates an EstimatorSpec for the model.
    """

    if mode == tf.estimator.ModeKeys.PREDICT:
        input_layer = features["data"]
    else:
        input_layer = tf.reshape(features,
                                 [-1, 8, 8, params['num_input_filters']])

    logits, activation_summaries = build_convolutional_modules(
        input_layer, params['inception_modules'], mode,
        params['kernel_initializer'], params['kernel_regularizer'],
        params['trainable_cnn_modules'])

    if not params["conv_init_fn"] is None:
        params["conv_init_fn"]()

    loss = None
    legal_move_loss = None
    pieces_loss = None
    train_op = None
    legal_move_summary = None
    pieces_loss_summary = None
    loss_summary = None

    empty_squares = tf.expand_dims(1 - tf.reduce_sum(input_layer, axis=3),
                                   axis=3)
    one_hot_piece_labels = tf.concat([input_layer, empty_squares], axis=3)

    piece_logit_slices = logits[..., :16]
    move_logit_slices = logits[..., 16:]

    # Calculate loss (for both TRAIN and EVAL modes)
    if mode != tf.estimator.ModeKeys.PREDICT:
        with tf.variable_scope("loss"):

            index_to_move_dict = {
                value: key
                for key, value in generate_move_to_enumeration_dict().items()
            }
            possible_move_indices = tf.constant(
                [[index_to_move_dict[j][0], index_to_move_dict[j][1]]
                 for j in range(len(index_to_move_dict))],
                dtype=tf.int32)

            legal_move_ints = tf.transpose(tf.to_int32(labels))

            move_logits_to_from_format = tf.transpose(tf.reshape(
                move_logit_slices, (-1, 64, 64)),
                                                      perm=[1, 2, 0])

            possible_move_logits = tf.gather_nd(move_logits_to_from_format,
                                                possible_move_indices)

            pieces_loss = tf.losses.softmax_cross_entropy(
                tf.reshape(one_hot_piece_labels, (-1, 16)),
                tf.reshape(piece_logit_slices, (-1, 16)))
            legal_move_loss = tf.losses.sigmoid_cross_entropy(
                legal_move_ints, possible_move_logits)

            loss = pieces_loss + legal_move_loss

            pieces_loss_summary = tf.summary.scalar("pieces_loss", pieces_loss)
            legal_move_summary = tf.summary.scalar("legal_move_loss",
                                                   legal_move_loss)
            loss_summary = tf.summary.scalar("loss", loss)

    # Configure the Training Op (for TRAIN mode)
    if mode == tf.estimator.ModeKeys.TRAIN:
        global_step = tf.train.get_global_step()
        learning_rate = params['learning_decay_function'](global_step)
        tf.summary.scalar("learning_rate", learning_rate)
        train_op = layers.optimize_loss(loss=loss,
                                        global_step=global_step,
                                        learning_rate=learning_rate,
                                        optimizer=params['optimizer'],
                                        summaries=params['train_summaries'])

    # Generate predictions
    predictions = {"scores": logits}

    # A dictionary for scoring used when exporting model for serving.
    the_export_outputs = {
        "serving_default": tf.estimator.export.RegressionOutput(value=logits)
    }

    # Create the validation metrics
    validation_metrics = None
    if mode == tf.estimator.ModeKeys.EVAL:
        piece_predictions = tf.nn.softmax(piece_logit_slices, axis=3)

        calculated_diff = piece_predictions - one_hot_piece_labels

        filter_diff_sums = tf.reduce_sum(calculated_diff, axis=[1, 2])

        mean_abs_diffs = tf.reduce_mean(tf.abs(filter_diff_sums), axis=0)

        to_create_metric_dict = {
            "loss/pieces_loss": (pieces_loss, pieces_loss_summary),
            "loss/legal_move_loss": (legal_move_loss, legal_move_summary),
            "loss/loss": (loss, loss_summary),
            "metrics/mean_abs_ep_diff":
            mean_abs_diffs[0],
            "metrics/mean_abs_unoccupied_diff":
            mean_abs_diffs[15],
            "metrics/mean_abs_king_diff":
            (mean_abs_diffs[1] + mean_abs_diffs[8]) / 2,
            "metrics/mean_abs_queen_diff":
            (mean_abs_diffs[2] + mean_abs_diffs[9]) / 2,
            "metrics/mean_abs_not_castling_rook_diff":
            (mean_abs_diffs[3] + mean_abs_diffs[10]) / 2,
            "metrics/mean_abs_bishop_diff":
            (mean_abs_diffs[4] + mean_abs_diffs[11]) / 2,
            "metrics/mean_abs_knight_diff":
            (mean_abs_diffs[5] + mean_abs_diffs[12]) / 2,
            "metrics/mean_abs_pawn_diff":
            (mean_abs_diffs[6] + mean_abs_diffs[13]) / 2,
            "metrics/mean_abs_can_castle_rook_diff":
            (mean_abs_diffs[7] + mean_abs_diffs[14]) / 2,
        }

        validation_metrics = metric_dict_creator(to_create_metric_dict)

    # Create the trainable variable summaries and merge them together to give to a hook
    trainable_var_summaries = layers.summarize_tensors(
        tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
    )  # Not sure if needs to be stored as a variable, should check
    merged_summaries = tf.summary.merge_all()
    summary_hook = tf.train.SummarySaverHook(save_steps=params['log_interval'],
                                             output_dir=params['model_dir'],
                                             summary_op=merged_summaries)

    # Return the EstimatorSpec object
    return tf.estimator.EstimatorSpec(mode=mode,
                                      predictions=predictions,
                                      loss=loss,
                                      train_op=train_op,
                                      training_hooks=[summary_hook],
                                      export_outputs=the_export_outputs,
                                      eval_metric_ops=validation_metrics)
示例#11
0
文件: dqn.py 项目: jtib/chi-rl-alg
 def log_returns(real_return: [], ret: [], qs):
     layers.summarize_tensors(
         [real_return, ret, qs,
          tf.subtract(ret, qs, name='R-Q')])