def _build_module(self, input_layer): # DND based Q head from memories import differentiable_neural_dictionary if self.tp.checkpoint_restore_dir: self.DND = differentiable_neural_dictionary.load_dnd( self.tp.checkpoint_restore_dir) else: self.DND = differentiable_neural_dictionary.QDND( self.DND_size, input_layer.get_shape()[-1], self.num_actions, self.new_value_shift_coefficient, key_error_threshold=self.DND_key_error_threshold) # Retrieve info from DND dictionary self.action = tf.placeholder(tf.int8, [None], name="action") self.input = self.action result = tf.py_func(self.DND.query, [input_layer, self.action, self.number_of_nn], [tf.float64, tf.float64]) self.dnd_embeddings = tf.to_float(result[0]) self.dnd_values = tf.to_float(result[1]) # DND calculation square_diff = tf.square(self.dnd_embeddings - tf.expand_dims(input_layer, 1)) distances = tf.reduce_sum(square_diff, axis=2) + [self.l2_norm_added_delta] weights = 1.0 / distances normalised_weights = weights / tf.reduce_sum( weights, axis=1, keep_dims=True) self.output = tf.reduce_sum(self.dnd_values * normalised_weights, axis=1)
def _build_module(self, input_layer): # DND based Q head from memories import differentiable_neural_dictionary if self.tp.checkpoint_restore_dir: self.DND = differentiable_neural_dictionary.load_dnd(self.tp.checkpoint_restore_dir) else: self.DND = differentiable_neural_dictionary.QDND( self.DND_size, input_layer.get_shape()[-1], self.num_actions, self.new_value_shift_coefficient, key_error_threshold=self.DND_key_error_threshold) # Retrieve info from DND dictionary # self.action = tf.placeholder(tf.int8, [None], name="action") # self.input = self.action self.output = [ self._q_value(input_layer, action) for action in range(self.num_actions) ]
def _build_module(self, input_layer): # DND based Q head from memories import differentiable_neural_dictionary if self.tp.checkpoint_restore_dir: self.DND = differentiable_neural_dictionary.load_dnd( self.tp.checkpoint_restore_dir) else: self.DND = differentiable_neural_dictionary.QDND( self.DND_size, input_layer.get_shape()[-1], self.num_actions, self.new_value_shift_coefficient, key_error_threshold=self.DND_key_error_threshold, learning_rate=self.tp.learning_rate) # Retrieve info from DND dictionary # We assume that all actions have enough entries in the DND self.output = tf.transpose([ self._q_value(input_layer, action) for action in range(self.num_actions) ])