def _encode_board(self, board_state, name, reuse=None): """ Encodes a board state or prev orders state :param board_state: The board state / prev orders state to encode - (batch, NB_NODES, initial_features) :param name: The name to use for the encoding :param reuse: Whether to reuse or not the weights from another encoding operation :return: The encoded board state / prev_orders state """ from diplomacy_research.utils.tensorflow import tf from diplomacy_research.models.layers.graph_convolution import film_gcn_res_block, preprocess_adjacency # Quick function to retrieve hparams and placeholders and function shorthands hps = lambda hparam_name: self.hparams[hparam_name] pholder = lambda placeholder_name: self.placeholders[placeholder_name] relu = tf.nn.relu # Getting film gammas and betas film_gammas = self.outputs['_%s_film_gammas' % name] film_betas = self.outputs['_%s_film_betas' % name] # Computing norm adjacency norm_adjacency = preprocess_adjacency(get_adjacency_matrix()) norm_adjacency = tf.tile(tf.expand_dims(norm_adjacency, axis=0), [tf.shape(board_state)[0], 1, 1]) # Building scope scope = tf.VariableScope(name='policy/%s' % name, reuse=reuse) with tf.variable_scope(scope): # Adding noise to break symmetry board_state = board_state + tf.random_normal(tf.shape(board_state), stddev=0.01) graph_conv = tf.layers.Dense(units=hps('gcn_size'), activation=relu)(board_state) # First and intermediate layers for layer_idx in range(hps('nb_graph_conv') - 1): graph_conv = film_gcn_res_block( inputs=graph_conv, # (b, NB_NODES, gcn_size) gamma=film_gammas[layer_idx], beta=film_betas[layer_idx], gcn_out_dim=hps('gcn_size'), norm_adjacency=norm_adjacency, is_training=pholder('is_training'), residual=True) # Last layer graph_conv = film_gcn_res_block( inputs=graph_conv, # (b, NB_NODES, final_size) gamma=film_gammas[-1], beta=film_betas[-1], gcn_out_dim=hps('attn_size') // 2, norm_adjacency=norm_adjacency, is_training=pholder('is_training'), residual=False) # Returning return graph_conv
def _unsparsify(tensor): """ Properly processes gradient of IndexedSlices """ if not isinstance(tensor, tf.IndexedSlices): return tensor assert tensor.dense_shape is not None, 'Got sparse gradients of unknown shape' indices = tensor.indices while indices.shape.ndims < tensor.values.shape.ndims: indices = tf.expand_dims(indices, -1) return tf.scatter_nd(indices, tensor.values, tensor.dense_shape)
def _encode_board(self, board_state, name, reuse=None): """ Encodes a board state or prev orders state :param board_state: The board state / prev orders state to encode - (batch, NB_NODES, initial_features) :param name: The name to use for the encoding :param reuse: Whether to reuse or not the weights from another encoding operation :return: The encoded board state / prev_orders state """ from diplomacy_research.utils.tensorflow import tf from diplomacy_research.models.layers.graph_convolution import GraphConvolution, preprocess_adjacency from diplomacy_research.utils.tensorflow import batch_norm # Quick function to retrieve hparams and placeholders and function shorthands hps = lambda hparam_name: self.hparams[hparam_name] pholder = lambda placeholder_name: self.placeholders[placeholder_name] relu = tf.nn.relu # Computing norm adjacency norm_adjacency = preprocess_adjacency(get_adjacency_matrix()) norm_adjacency = tf.tile(tf.expand_dims(norm_adjacency, axis=0), [tf.shape(board_state)[0], 1, 1]) # Building scope scope = tf.VariableScope(name='policy/%s' % name, reuse=reuse) with tf.variable_scope(scope): # Adding noise to break symmetry board_state = board_state + tf.random_normal(tf.shape(board_state), stddev=0.01) graph_conv = board_state # First Layer graph_conv = GraphConvolution(input_dim=graph_conv.shape[-1].value, # (b, NB_NODES, gcn_size) output_dim=hps('gcn_size'), norm_adjacency=norm_adjacency, activation_fn=relu, bias=True)(graph_conv) # Intermediate Layers for _ in range(1, hps('nb_graph_conv') - 1): graph_conv = GraphConvolution(input_dim=hps('gcn_size'), # (b, NB_NODES, gcn_size) output_dim=hps('gcn_size'), norm_adjacency=norm_adjacency, activation_fn=relu, bias=True)(graph_conv) graph_conv = batch_norm(graph_conv, is_training=pholder('is_training'), fused=True) # Final Layer graph_conv = GraphConvolution(input_dim=hps('gcn_size'), # (b, NB_NODES, attn_size) output_dim=hps('attn_size'), norm_adjacency=norm_adjacency, activation_fn=None, bias=True)(graph_conv) # Returning return graph_conv
def _encode_board(self, board_state, name, reuse=None): """ Encodes a board state or prev orders state :param board_state: The board state / prev orders state to encode - (batch, NB_NODES, initial_features) :param name: The name to use for the encoding :param reuse: Whether to reuse or not the weights from another encoding operation :return: The encoded board state / prev_orders state """ from diplomacy_research.utils.tensorflow import tf from diplomacy_research.models.layers.graph_convolution import GraphConvolution, preprocess_adjacency # Quick function to retrieve hparams and placeholders and function shorthands hps = lambda hparam_name: self.hparams[hparam_name] relu = tf.nn.relu # Computing norm adjacency norm_adjacency = preprocess_adjacency(get_adjacency_matrix()) norm_adjacency = tf.tile(tf.expand_dims(norm_adjacency, axis=0), [tf.shape(board_state)[0], 1, 1]) # Building scope scope = tf.VariableScope(name='policy/%s' % name, reuse=reuse) with tf.variable_scope(scope): batch_size = tf.shape(board_state)[0] # Adding noise to break symmetry board_state = board_state + tf.random_normal(tf.shape(board_state), stddev=0.01) # Projecting (if needed) to 'gcn_size' if board_state.shape[-1].value == NB_FEATURES: with tf.variable_scope('proj', reuse=tf.AUTO_REUSE): proj_w = tf.get_variable('W', shape=[1, NB_FEATURES, hps('gcn_size')], dtype=tf.float32) graph_conv = relu(tf.matmul(board_state, tf.tile(proj_w, [batch_size, 1, 1]))) else: graph_conv = board_state # First and intermediate layers for _ in range(hps('nb_graph_conv') - 1): graph_conv = GraphConvolution(input_dim=hps('gcn_size'), # (b, NB_NODES, gcn_size) output_dim=hps('gcn_size'), norm_adjacency=norm_adjacency, activation_fn=relu, residual=True, bias=True)(graph_conv) # Last Layer graph_conv = GraphConvolution(input_dim=hps('gcn_size'), # (b, NB_NODES, final_size) output_dim=hps('attn_size') // 2, norm_adjacency=norm_adjacency, activation_fn=relu, residual=False, bias=True)(graph_conv) # Returning return graph_conv
def _build_draw_initial(self): """ Builds the draw model (initial step) """ from diplomacy_research.utils.tensorflow import tf from diplomacy_research.models.layers.graph_convolution import GraphConvolution, preprocess_adjacency from diplomacy_research.utils.tensorflow import to_float if not self.placeholders: self.placeholders = self.get_placeholders() else: self.placeholders.update(self.get_placeholders()) # Quick function to retrieve hparams and placeholders and function shorthands hps = lambda hparam_name: self.hparams[hparam_name] pholder = lambda placeholder_name: self.placeholders[placeholder_name] relu = tf.nn.relu sigmoid = tf.nn.sigmoid # Training loop with tf.variable_scope('draw', reuse=tf.AUTO_REUSE): with tf.device(self.cluster_config.worker_device if self. cluster_config else None): # Features board_state = to_float( self.features['board_state'] ) # tf.float32 - (b, NB_NODES, NB_FEATURES) current_power = self.features[ 'current_power'] # tf.int32 - (b,) draw_target = self.features['draw_target'] # tf.float32 - (b,) # Placeholders stop_gradient_all = pholder('stop_gradient_all') # Norm Adjacency batch_size = tf.shape(board_state)[0] norm_adjacency = preprocess_adjacency(get_adjacency_matrix()) norm_adjacency = tf.tile( tf.expand_dims(norm_adjacency, axis=0), [batch_size, 1, 1]) # Graph embeddings with tf.variable_scope('graph_conv_scope'): board_state_h0 = board_state # (b, 81, 35) board_state_h1 = GraphConvolution( input_dim=NB_FEATURES, output_dim=hps('draw_gcn_1_output_size'), norm_adjacency=norm_adjacency, activation_fn=relu, bias=True)(board_state_h0) # (b, 81, 25) # board_state_h2: (b, 2025) # board_state_h3: (b, 128) board_state_h2 = tf.reshape( board_state_h1, shape=[-1, NB_NODES * hps('draw_gcn_1_output_size')]) board_state_graph_conv = tf.layers.Dense( units=hps('draw_embedding_size'), activation=relu, use_bias=True)(board_state_h2) # Calculating draw for all powers with tf.variable_scope('draw_scope'): current_power_mask = tf.one_hot(current_power, NB_POWERS, dtype=tf.float32) draw_h0 = board_state_graph_conv # (b, 128) draw_h1 = tf.layers.Dense( units=hps('draw_h1_size'), # (b, 64) activation=relu, use_bias=True)(draw_h0) draw_h2 = tf.layers.Dense( units=hps('draw_h2_size'), # (b, 64) activation=relu, use_bias=True)(draw_h1) draw_probs = tf.layers.Dense( units=NB_POWERS, # (b, 7) activation=sigmoid, use_bias=True)(draw_h2) draw_prob = tf.reduce_sum(draw_probs * current_power_mask, axis=1) # (b,) # Computing draw loss with tf.variable_scope('draw_loss'): draw_loss = tf.reduce_mean( tf.square(draw_target - draw_prob)) draw_loss = tf.cond( stop_gradient_all, lambda: tf.stop_gradient(draw_loss), # pylint: disable=cell-var-from-loop lambda: draw_loss) # pylint: disable=cell-var-from-loop # Building output tags outputs = { 'tag/draw/v001_draw_relu': True, 'draw_prob': draw_prob, 'draw_loss': draw_loss } # Adding features, placeholders and outputs to graph self.add_meta_information(outputs)
def _get_board_value(self, board_state, current_power, name='board_state_value', reuse=None): """ Computes the estimated value of a board state :param board_state: The board state - (batch, NB_NODES, NB_FEATURES) :param current_power: The power for which we want the board value - (batch,) :param name: The name to use for the operaton :param reuse: Whether to reuse or not the weights from another operation :return: The value of the board state for the specified power - (batch,) """ from diplomacy_research.utils.tensorflow import tf from diplomacy_research.models.layers.graph_convolution import GraphConvolution, preprocess_adjacency # Quick function to retrieve hparams and placeholders and function shorthands hps = lambda hparam_name: self.hparams[hparam_name] relu = tf.nn.relu # Computing norm adjacency norm_adjacency = preprocess_adjacency(get_adjacency_matrix()) norm_adjacency = tf.tile(tf.expand_dims(norm_adjacency, axis=0), [tf.shape(board_state)[0], 1, 1]) # Building scope # No need to use 'stop_gradient_value' - Because this model does not share parameters. scope = tf.VariableScope(name='value/%s' % name, reuse=reuse) with tf.variable_scope(scope): with tf.variable_scope('graph_conv_scope'): graph_conv = board_state # (b, NB_NODES, NB_FEAT) graph_conv = GraphConvolution( input_dim=graph_conv.shape[-1]. value, # (b, NB_NODES, gcn_1) output_dim=hps('value_gcn_1_output_size'), norm_adjacency=norm_adjacency, activation_fn=relu, bias=True)(graph_conv) flat_graph_conv = tf.reshape( graph_conv, shape=[-1, NB_NODES * hps('value_gcn_1_output_size')]) flat_graph_conv = tf.layers.Dense( units=hps('value_embedding_size'), activation=relu, use_bias=True)(flat_graph_conv) # (b, value_emb_size) with tf.variable_scope('value_scope'): current_power_mask = tf.one_hot(current_power, NB_POWERS, dtype=tf.float32) state_value = flat_graph_conv # (b, value_emb_size) state_value = tf.layers.Dense( units=hps('value_h1_size'), # (b, value_h1_size) activation=relu, use_bias=True)(state_value) state_value = tf.layers.Dense( units=hps('value_h2_size'), # (b, value_h2_size) activation=relu, use_bias=True)(state_value) state_value = tf.layers.Dense( units=NB_POWERS, # (b, NB_POWERS) activation=None, use_bias=True)(state_value) state_value = tf.reduce_sum(state_value * current_power_mask, axis=1) # (b,) # Returning return state_value