def train(o: [so], a: (tf.int32, [[]]), r, t: tf.bool, o2: [so]): q = q_network(o) # ac = tf.argmax(q, axis=1) # compute targets q2 = q_network.tracked(o2) if double_dqn: a2 = tf.argmax(q_network(o2), axis=1) # yep, that's really the only difference else: a2 = tf.argmax(q2, axis=1) mask2 = tf.one_hot(a2, env.action_space.n, 1.0, 0.0, axis=1) q_target = tf.where(t, r, r + 0.99 * tf.reduce_sum(q2 * mask2, axis=1)) q_target = tf.stop_gradient(q_target) # compute loss mask = tf.one_hot(a, env.action_space.n, 1.0, 0.0, axis=1) qs = tf.reduce_sum(q * mask, axis=1, name='q_max') td = tf.subtract(q_target, qs, name='td') # td = tf.clip_by_value(td, -10, 10) # loss = tf.reduce_mean(tf.abs(td), axis=0, name='mae') # loss = tf.where(tf.abs(td) < 1.0, 0.5 * tf.square(td), tf.abs(td) - 0.5, name='mse_huber') loss = tf.reduce_mean(tf.square(td), axis=0, name='mse') loss = q_network.minimize(loss) # logging layers.summarize_tensors([td, loss, r, o, a, tf.subtract(o2, o, name='state_dif'), tf.reduce_mean(tf.cast(t, tf.float32), name='frac_terminal'), tf.subtract(tf.reduce_max(q, 1, True), q, name='av_advantage')]) # layers.summarize_tensors(chi.activations()) # layers.summarize_tensors(chi.gradients()) return loss
def act(o: [so], noise=True): with arg_scope([layers.batch_norm], is_training=False): s = preprocess(o) a = actors(s, noise=noise) q = critics(s, a) layers.summarize_tensors([s, *a, *q]) return a
def act(o: [so], noisy=True): with arg_scope([layers.batch_norm], is_training=False): s = preprocess(o) a = actor(s, noise=noisy) a = smart_cond(noisy, lambda: noise(a), lambda: a) q = critic(s, a) layers.summarize_tensors([s, a, q]) return a
def train_generator(): z = tf.random_normal([m, 100]) x = generator(z) loss = -critic(x) loss = generator.minimize(loss) # logging tf.summary.image('x', x, max_outputs=16) layers.summarize_tensors(chi.activations() + generator.trainable_variables() + critic.trainable_variables()) return loss
def log_weigths(): v = q_network.trainable_variables() # print(f'log weights {v}') f = q_network.tracker_variables # print(f'log weights EMA {f}') difs = [] for g in v: a = q_network.tracker.average(g) difs.append(tf.subtract(g, a, name=f'ema/dif{g.name[:-2]}')) layers.summarize_tensors(v + f + difs)
def train_step(o: [observation_shape], a: (tf.int32, [[]]), r, t: tf.bool, o2: [observation_shape]): q = q_network(o) # ac = tf.argmax(q, axis=1) # compute targets q2 = q_network.tracked(o2) if double_dqn: a2 = tf.argmax( q_network(o2), axis=1) # yep, that's really the only difference else: a2 = tf.argmax(q2, axis=1) mask2 = tf.one_hot(a2, n_actions, 1.0, 0.0, axis=1) q_target = tf.where( t, r, r + self.discount * tf.reduce_sum(q2 * mask2, axis=1)) q_target = tf.stop_gradient(q_target) # compute loss mask = tf.one_hot(a, n_actions, 1.0, 0.0, axis=1) qs = tf.reduce_sum(q * mask, axis=1, name='q_max') td = tf.subtract(q_target, qs, name='td') if clip_td: td = tf.clip_by_value(td, -.5, .5, name='clipped_td') # loss = tf.reduce_mean(tf.abs(td), axis=0, name='mae') # loss = tf.where(tf.abs(td) < 1.0, 0.5 * tf.square(td), tf.abs(td) - 0.5, name='mse_huber') loss = tf.reduce_mean(tf.square(td), axis=0, name='mse') gav = q_network.compute_gradients(loss) if clip_gradients: gav = [(tf.clip_by_norm(g, clip_gradients), v) for g, v in gav] loss_update = q_network.apply_gradients(gav) # logging layers.summarize_tensors([ td, loss, r, o, a, tf.subtract(o2, o, name='state_dif'), tf.reduce_mean(tf.cast(t, tf.float32), name='frac_terminal'), tf.subtract(tf.reduce_max(q, 1, True), q, name='av_advantage') ]) # layers.summarize_tensors(chi.activations()) # layers.summarize_tensors(chi.gradients()) return loss_update
def log_returns(rret: [], ret: [], qs, q_minus_ret, duration: []): layers.summarize_tensors( [rret, ret, qs, q_minus_ret, duration])
def move_gen_cnn_model_fn(features, labels, mode, params): """ Generates an EstimatorSpec for the model. """ def numpy_style_repeat_1d(input, multiples): tiled_input = tf.multiply(tf.ones([100, 1]), input) return tf.boolean_mask(tiled_input, tf.sequence_mask(multiples)) inception_module_outputs, activation_summaries = build_convolutional_modules( features["board"], params['inception_modules'], mode, params['kernel_initializer'], params['kernel_regularizer'], params['trainable_cnn_modules']) if not params["conv_init_fn"] is None: params["conv_init_fn"]() # Build the fully connected layers dense_layers_outputs, activation_summaries = build_fully_connected_layers_with_batch_norm( inception_module_outputs, params['dense_shape'], params['kernel_initializer'], mode, activation_summaries=activation_summaries) # Create the final layer of the ANN logits = tf.layers.dense(inputs=dense_layers_outputs, units=params['num_outputs'], use_bias=False, activation=None, kernel_initializer=layers.xavier_initializer(), name="logit_layer") loss = None train_op = None legal_move_logits = tf.gather_nd(logits, features["legal_move_indices"]) # Calculate loss (for both TRAIN and EVAL modes) if mode != tf.estimator.ModeKeys.PREDICT: with tf.variable_scope("loss"): loss = tf.losses.mean_squared_error(legal_move_logits, features["desired_scores"]) loss_scalar_summary = tf.summary.scalar("loss", loss) # Configure the Training Op (for TRAIN mode) if mode == tf.estimator.ModeKeys.TRAIN: global_step = tf.train.get_global_step() learning_rate = params['learning_decay_function'](global_step) tf.summary.scalar("learning_rate", learning_rate) train_op = layers.optimize_loss(loss=loss, global_step=global_step, learning_rate=learning_rate, optimizer=params['optimizer'], summaries=params['train_summaries']) # Generate predictions predictions = {"the_move_values": logits} # A dictionary for scoring used when exporting model for serving. the_export_outputs = { "serving_default": tf.estimator.export.ClassificationOutput(scores=legal_move_logits) } # Create the validation metrics validation_metrics = None if mode != tf.estimator.ModeKeys.PREDICT: calculated_best_move_scores = tf.gather( legal_move_logits, features['desired_move_indices']) repeated_best_scores = numpy_style_repeat_1d( calculated_best_move_scores, features['num_moves']) ratio_moves_below_best = tf.reduce_mean( tf.cast(tf.greater_equal(repeated_best_scores, legal_move_logits), dtype=np.float32)) diff_from_desired = legal_move_logits - features["desired_scores"] mean_diff_from_desired = tf.reduce_mean(diff_from_desired) mean_calculated_value = tf.reduce_mean(legal_move_logits) to_create_metric_dict = { "loss/loss": (loss, loss_scalar_summary), "metrics/ratio_moves_below_best": ratio_moves_below_best, "metrics/mean_evaluation_value": mean_calculated_value, "metrics/mean_abs_evaluation_value": tf.abs(legal_move_logits), "metrics/mean_expected_value": features["desired_scores"], "metrics/mean_abs_expected_value": abs(features["desired_scores"]), "metrics/distance_from_desired": mean_diff_from_desired, "metrics/abs_distance_from_desired": tf.abs(diff_from_desired), "metrics/relative_distance_from_desired": tf.abs(mean_diff_from_desired / mean_calculated_value), } validation_metrics = metric_dict_creator(to_create_metric_dict) # Create the trainable variable summaries and merge them together to give to a hook trainable_var_summaries = layers.summarize_tensors( tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) ) # Not sure if needs to be stored as a variable, should check merged_summaries = tf.summary.merge_all() summary_hook = tf.train.SummarySaverHook(save_steps=params['log_interval'], output_dir=params['model_dir'], summary_op=merged_summaries) # Return the EstimatorSpec object return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, loss=loss, train_op=train_op, training_hooks=[summary_hook], export_outputs=the_export_outputs, eval_metric_ops=validation_metrics)
def board_eval_model_fn(features, labels, mode, params): """ Generates an EstimatorSpec for the model. """ if mode == tf.estimator.ModeKeys.PREDICT: input_layer = features["feature"] else: #Reshape features from original shape of [-1, 3, 8, 8, 16] input_layer = tf.reshape(features, [-1, 8, 8, params['num_input_filters']]) inception_module_outputs, activation_summaries = build_convolutional_modules( input_layer, params['inception_modules'], mode, params['kernel_initializer'], params['kernel_regularizer'], params['trainable_cnn_modules']) if not params[ "conv_init_fn"] is None: #and tf.train.global_step(tf.get_default_session(), tf.train.get_global_step()) == 0: params["conv_init_fn"]() # Build the fully connected layers dense_layers_outputs, activation_summaries = build_fully_connected_layers_with_batch_norm( inception_module_outputs, params['dense_shape'], params['kernel_initializer'], mode, activation_summaries=activation_summaries) # Create the final layer of the ANN logits = tf.layers.dense(inputs=dense_layers_outputs, units=params['num_outputs'], use_bias=False, activation=None, kernel_initializer=params['kernel_initializer'](), name="logit_layer") loss = None train_op = None ratio_old_new_sum_loss_to_negative_sum = None # Calculate loss (for both TRAIN and EVAL modes) if mode != tf.estimator.ModeKeys.PREDICT: to_split = tf.reshape(logits, [-1, 3]) original_pos, desired_pos, random_pos = tf.split( to_split, [1, 1, 1], 1) # Implementing an altered version of the loss function defined in Deep Pink # There are a few other methods I've been trying out in commented out, though none seem to be as good as # the one proposed in Deep Pink with tf.variable_scope("loss"): # adjusted_equality_sum = (original_pos + CONSTANT + desired_pos) adjusted_equality_sum = (original_pos + desired_pos) adjusted_real_rand_sum = (random_pos - desired_pos) real_greater_rand_scalar_loss = tf.reduce_mean( -tf.log(tf.sigmoid(adjusted_real_rand_sum))) # test_new_loss_component = tf.reduce_mean(-tf.log(tf.sigmoid(random_pos + original_pos))) ## test_new_loss_component = tf.reduce_mean(-tf.log(tf.sigmoid(-(original_pos + random_pos)))) # test_new_loss_component_summary = tf.summary.scalar("test_new_loss_component", test_new_loss_component) equality_scalar_loss = tf.reduce_mean( -tf.log(tf.sigmoid(adjusted_equality_sum))) negative_equality_scalar_loss = tf.reduce_mean( -tf.log(tf.sigmoid(-adjusted_equality_sum))) ratio_old_new_sum_loss_to_negative_sum = tf.divide( equality_scalar_loss, negative_equality_scalar_loss) real_rand_loss_summary = tf.summary.scalar( "real_greater_rand_loss", real_greater_rand_scalar_loss) equality_sum_loss_summary = tf.summary.scalar( "mean_original_plus_desired_loss", equality_scalar_loss) negative_equality_sum_loss_summary = tf.summary.scalar( "mean_negative_original_plus_desired", negative_equality_scalar_loss) # loss = real_greater_rand_scalar_loss # loss = real_greater_rand_scalar_loss + test_new_loss_component loss = real_greater_rand_scalar_loss + equality_scalar_loss + negative_equality_scalar_loss # loss = real_greater_rand_scalar_loss + equality_scalar_loss + negative_equality_scalar_loss + test_new_loss_component loss_summary = tf.summary.scalar("loss", loss) ######################################################################################################## # the_labels = tf.tile(tf.constant([[0, 0, 1]]), [tf.shape(to_split)[0], 1]) # # softmax_logits = to_split * tf.constant([[-1,1,1]], dtype=tf.float32) # # cross_entropy_loss = tf.losses.softmax_cross_entropy(the_labels, softmax_logits) # # old_real_sum_squared_scalar_loss = tf.reduce_mean(tf.square(2*(original_pos + desired_pos))) # # loss = cross_entropy_loss + old_real_sum_squared_scalar_loss # # cross_entropy_summary = tf.summary.scalar("cross_entropy_loss", cross_entropy_loss) # old_real_sum_squared_summary = tf.summary.scalar("old_real_sum_squared_loss", old_real_sum_squared_scalar_loss) # loss_summary = tf.summary.scalar("loss", loss) # Configure the Training Op (for TRAIN mode) if mode == tf.estimator.ModeKeys.TRAIN: global_step = tf.train.get_global_step() learning_rate = params['learning_decay_function'](global_step) tf.summary.scalar("learning_rate", learning_rate) train_op = layers.optimize_loss(loss=loss, global_step=global_step, learning_rate=learning_rate, optimizer=params['optimizer'], summaries=params['train_summaries']) # Generate predictions predictions = {"scores": logits} # A dictionary for scoring used when exporting model for serving. the_export_outputs = { "serving_default": tf.estimator.export.RegressionOutput(value=logits) } # Create the validation metrics validation_metrics = None if mode != tf.estimator.ModeKeys.PREDICT: old_plus_desired = original_pos + desired_pos rand_real_diff = random_pos - desired_pos abs_rand_real_diff = tf.abs(rand_real_diff) abs_old_plus_desired = tf.abs(old_plus_desired) mean_abs_old_plus_desired = tf.reduce_mean(abs_old_plus_desired) abs_randreal_realold_ratio = tf.reduce_mean( rand_real_diff) / mean_abs_old_plus_desired rand_vs_real_accuracy = tf.cast(tf.less(desired_pos, random_pos), tf.float32) to_create_metric_dict = { "metrics/rand_vs_real_accuracy": rand_vs_real_accuracy, "metrics/mean_dist_rand_real": rand_real_diff, "metrics/mean_abs_rand_real_diff": abs_rand_real_diff, "metrics/mean_dist_old_real": old_plus_desired, "metrics/mean_abs_dist_old_real": mean_abs_old_plus_desired, #abs_old_plus_desired, "metrics/abs_randreal_realold_ratio": abs_randreal_realold_ratio, "metrics/mean_old_pos": original_pos, "metrics/mean_new_pos": desired_pos, "metrics/mean_random_pos": random_pos, "metrics/mean_abs_old_pos": tf.abs(original_pos), "metrics/mean_abs_new_pos": tf.abs(desired_pos), "metrics/mean_abs_random_pos": tf.abs(random_pos), # "loss/cross_entropy_loss" : (cross_entropy_loss, cross_entropy_summary), # "loss/old_real_sum_squared_loss" : (old_real_sum_squared_scalar_loss, old_real_sum_squared_summary), # "loss/test_new_loss_component" : (test_new_loss_component, test_new_loss_component_summary), "loss/real_greater_rand_loss": (real_greater_rand_scalar_loss, real_rand_loss_summary), "loss/mean_original_plus_desired_loss": (equality_scalar_loss, equality_sum_loss_summary), "loss/mean_negative_original_plus_desired": (negative_equality_scalar_loss, negative_equality_sum_loss_summary), "loss/ratio_old_new_sum_loss_to_negative_sum": ratio_old_new_sum_loss_to_negative_sum, "loss/loss": (loss, loss_summary), } validation_metrics = metric_dict_creator(to_create_metric_dict) # Create the trainable variable summaries and merge them together to give to a hook trainable_var_summaries = layers.summarize_tensors( tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) ) # Not sure if needs to be stored as a variable, should check merged_summaries = tf.summary.merge_all() summary_hook = tf.train.SummarySaverHook(save_steps=params['log_interval'], output_dir=params['model_dir'], summary_op=merged_summaries) # Return the EstimatorSpec object return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, loss=loss, train_op=train_op, training_hooks=[summary_hook], export_outputs=the_export_outputs, eval_metric_ops=validation_metrics)
def encoder_builder_fn(features, labels, mode, params): """ Generates an EstimatorSpec for the model. """ if mode == tf.estimator.ModeKeys.PREDICT: input_layer = features["data"] else: input_layer = tf.reshape(features, [-1, 8, 8, params['num_input_filters']]) logits, activation_summaries = build_convolutional_modules( input_layer, params['inception_modules'], mode, params['kernel_initializer'], params['kernel_regularizer'], params['trainable_cnn_modules']) if not params["conv_init_fn"] is None: params["conv_init_fn"]() loss = None legal_move_loss = None pieces_loss = None train_op = None legal_move_summary = None pieces_loss_summary = None loss_summary = None empty_squares = tf.expand_dims(1 - tf.reduce_sum(input_layer, axis=3), axis=3) one_hot_piece_labels = tf.concat([input_layer, empty_squares], axis=3) piece_logit_slices = logits[..., :16] move_logit_slices = logits[..., 16:] # Calculate loss (for both TRAIN and EVAL modes) if mode != tf.estimator.ModeKeys.PREDICT: with tf.variable_scope("loss"): index_to_move_dict = { value: key for key, value in generate_move_to_enumeration_dict().items() } possible_move_indices = tf.constant( [[index_to_move_dict[j][0], index_to_move_dict[j][1]] for j in range(len(index_to_move_dict))], dtype=tf.int32) legal_move_ints = tf.transpose(tf.to_int32(labels)) move_logits_to_from_format = tf.transpose(tf.reshape( move_logit_slices, (-1, 64, 64)), perm=[1, 2, 0]) possible_move_logits = tf.gather_nd(move_logits_to_from_format, possible_move_indices) pieces_loss = tf.losses.softmax_cross_entropy( tf.reshape(one_hot_piece_labels, (-1, 16)), tf.reshape(piece_logit_slices, (-1, 16))) legal_move_loss = tf.losses.sigmoid_cross_entropy( legal_move_ints, possible_move_logits) loss = pieces_loss + legal_move_loss pieces_loss_summary = tf.summary.scalar("pieces_loss", pieces_loss) legal_move_summary = tf.summary.scalar("legal_move_loss", legal_move_loss) loss_summary = tf.summary.scalar("loss", loss) # Configure the Training Op (for TRAIN mode) if mode == tf.estimator.ModeKeys.TRAIN: global_step = tf.train.get_global_step() learning_rate = params['learning_decay_function'](global_step) tf.summary.scalar("learning_rate", learning_rate) train_op = layers.optimize_loss(loss=loss, global_step=global_step, learning_rate=learning_rate, optimizer=params['optimizer'], summaries=params['train_summaries']) # Generate predictions predictions = {"scores": logits} # A dictionary for scoring used when exporting model for serving. the_export_outputs = { "serving_default": tf.estimator.export.RegressionOutput(value=logits) } # Create the validation metrics validation_metrics = None if mode == tf.estimator.ModeKeys.EVAL: piece_predictions = tf.nn.softmax(piece_logit_slices, axis=3) calculated_diff = piece_predictions - one_hot_piece_labels filter_diff_sums = tf.reduce_sum(calculated_diff, axis=[1, 2]) mean_abs_diffs = tf.reduce_mean(tf.abs(filter_diff_sums), axis=0) to_create_metric_dict = { "loss/pieces_loss": (pieces_loss, pieces_loss_summary), "loss/legal_move_loss": (legal_move_loss, legal_move_summary), "loss/loss": (loss, loss_summary), "metrics/mean_abs_ep_diff": mean_abs_diffs[0], "metrics/mean_abs_unoccupied_diff": mean_abs_diffs[15], "metrics/mean_abs_king_diff": (mean_abs_diffs[1] + mean_abs_diffs[8]) / 2, "metrics/mean_abs_queen_diff": (mean_abs_diffs[2] + mean_abs_diffs[9]) / 2, "metrics/mean_abs_not_castling_rook_diff": (mean_abs_diffs[3] + mean_abs_diffs[10]) / 2, "metrics/mean_abs_bishop_diff": (mean_abs_diffs[4] + mean_abs_diffs[11]) / 2, "metrics/mean_abs_knight_diff": (mean_abs_diffs[5] + mean_abs_diffs[12]) / 2, "metrics/mean_abs_pawn_diff": (mean_abs_diffs[6] + mean_abs_diffs[13]) / 2, "metrics/mean_abs_can_castle_rook_diff": (mean_abs_diffs[7] + mean_abs_diffs[14]) / 2, } validation_metrics = metric_dict_creator(to_create_metric_dict) # Create the trainable variable summaries and merge them together to give to a hook trainable_var_summaries = layers.summarize_tensors( tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) ) # Not sure if needs to be stored as a variable, should check merged_summaries = tf.summary.merge_all() summary_hook = tf.train.SummarySaverHook(save_steps=params['log_interval'], output_dir=params['model_dir'], summary_op=merged_summaries) # Return the EstimatorSpec object return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, loss=loss, train_op=train_op, training_hooks=[summary_hook], export_outputs=the_export_outputs, eval_metric_ops=validation_metrics)
def log_returns(real_return: [], ret: [], qs): layers.summarize_tensors( [real_return, ret, qs, tf.subtract(ret, qs, name='R-Q')])