class ResponseState( tfnamedtuple("ResponseStateBase", ("response", "volume", "cost", "time"))): INITIAL_VALUES = ((0., False), (0., False), (0., False), (0., False)) @staticmethod def extract_volume(response): return tf.identity(response[..., 0:1], name="response_volume") @staticmethod def extract_cost(response): return tf.identity(response[..., 1:2], name="response_cost") @staticmethod def extract_time(response): return tf.identity(response[..., 2:3], name="response_time") @staticmethod def extract_data(response): return ResponseState.extract_volume(response), ResponseState.extract_cost(response), \ ResponseState.extract_time(response) @property def response_cost(self): return self.extract_cost(self.response) @property def realized_volume(self): return self.extract_volume(self.response)
class AllStates(tfnamedtuple("AllStatesBase", self.state_name)): @property def response_volume(self): if not hasattr(self, "_response_volume"): self._response_volume = ResponseState.extract_volume( self.response) return self._response_volume
def __init__(self, controller_cell, actuator_cell, response_cell, trainable=True, name=NAME, dtype=FLOAT_TYPE): self._controller_cell = controller_cell self._actuator_cell = actuator_cell self._response_cell = response_cell self._cells = self.Cells(self._response_cell, self._actuator_cell, self._controller_cell) for c in self.cells: c.root = name self.State = tfnamedtuple("LoopState", ["states"]) class AllStates(tfnamedtuple("AllStatesBase", self.state_name)): @property def response_volume(self): if not hasattr(self, "_response_volume"): self._response_volume = ResponseState.extract_volume( self.response) return self._response_volume self.AllStates = AllStates super().__init__(None, state_size=self.state_size, trainable=trainable, name=name, dtype=dtype)
def __init__(self, volume_target, cell, total_time, relative=False, extra_control_input_names=(), trainable=True, name=NAME, dtype=FLOAT_TYPE, K=3): self._volume_target = volume_target self._cell = upgrade_cell(cell, "control", "control_output", root=name) self.State = tfnamedtuple("ControllerState", self._cell.state_name + self.ControllerState._fields) initial_state_params = self.control.initial_state_params + ((0, False),) super(ControllerRNNCell, self).__init__(initial_state_params, state_size=self._get_state_size(), trainable=trainable, name=name, dtype=dtype) self._total_time = total_time self.relative = relative self._epsilon_volume = EPSILON_VOLUME self._control_input_names = ('remaining_time', 'remaining_volume',) + extra_control_input_names if self.with_time_embedding: self.K = K else: self.K = 1 self.time_embedding = None
def __init__(self, volume_target, control, total_time, volume_pattern_provider=None, relative=False, extra_control_input_names=(), trainable=True, name=NAME, dtype=FLOAT_TYPE): self._volume_target = volume_target self._control = upgrade_cell(control, "control", "control_output", root=name) self._control.build = add_summary_variables(self._control)(self._control.build) assert self._control.output_size > 1 self.State = tfnamedtuple("ControllerState", self._control.state_name + self.ControllerState._fields) initial_state_params = self.control.initial_state_params + ((0., False), (0., False)) super(ControllerCell, self).__init__(initial_state_params, state_size=self._get_state_size(), trainable=trainable, name=name, dtype=dtype) self._total_time = total_time if volume_pattern_provider is None: volume_pattern_provider = lambda current: (self.total_time - current) self._volume_pattern_provider = volume_pattern_provider self.relative = relative self._epsilon_volume = EPSILON_VOLUME self._control_input_names = ('remaining_time', 'current_volume_target', 'volume',) + extra_control_input_names self.t_0 = 0
class PIControlLevelCell(RNNCellInterface): NAME = "pi_control_level_cell" State = tfnamedtuple("PiControlState", ("control_output", "i_error")) # TODO(nperrin16): Pay attention to the mutable dict. def __init__(self, k_l, t_i, t_s, plant_gain_var=1., plant_gain_var_constraint=None, use_log=False, signal_bias=None, initial_state_params=((0., False, { 'constraint': lambda u_level: clip_bid(u_level) }), (0., False)), trainable=True, name=NAME, dtype=FLOAT_TYPE): """ k_l: large -> fast convergence but less robustness t_i: small -> fast convergence but more volatility """ super().__init__(initial_state_params=initial_state_params, state_size=self.State(control_output=1, i_error=1), trainable=trainable, name=name, dtype=dtype) self.use_log = use_log self.k_l = k_l self.t_i = np.log(t_i) if use_log else t_i self.t_s = t_s self.plant_gain_var = np.log( plant_gain_var) if use_log else plant_gain_var self.plant_gain_var_constraint = plant_gain_var_constraint if use_log: self.plant_gain = lambda u: np.exp(self.plant_gain_var) self.k_i = t_s / np.exp(t_i) else: self.plant_gain = lambda u: self.plant_gain_var self.k_i = t_s / t_i self.signal_bias = signal_bias def build(self, inputs_shape): super().build(inputs_shape) set_tf_tensor(self, 'k_l', dtype=self.dtype) set_tf_tensor(self, 't_s', dtype=self.dtype) self.t_i = self.add_weight('t_i', shape=(), dtype=self.dtype, trainable=True, initializer=tf.constant_initializer( self.t_i, dtype=self.dtype)) tf.summary.scalar('control/t_i', tf.exp(self.t_i) if self.use_log else self.t_i) self.plant_gain_var = self.add_weight( 'plant_gain', shape=(), dtype=self.dtype, trainable=True, initializer=tf.constant_initializer(self.plant_gain_var, dtype=self.dtype), constraint=self.plant_gain_var_constraint) tf.summary.scalar( 'control/plant_gain', tf.exp(self.plant_gain_var) if self.use_log else self.plant_gain_var) if self.use_log: self.plant_gain = lambda _control_signal_level: tf.exp( self.plant_gain_var) self.k_i = tf.identity(self.t_s / tf.exp(self.t_i), name='k_i') else: self.plant_gain = lambda _control_signal_level: self.plant_gain_var self.k_i = tf.identity(self.t_s / self.t_i, name='k_i') if self.signal_bias is not None: self.signal_bias = self.add_weight( 'signal_bias', shape=(), dtype=self.dtype, trainable=True, initializer=tf.constant_initializer(self.signal_bias, dtype=self.dtype)) else: self.signal_bias = 0. set_tf_tensor(self, 'signal_bias', dtype=self.dtype) tf.summary.scalar('control/signal_bias', self.signal_bias) def call(self, control_input: tf.Tensor, state: State, training=False) -> (tf.Tensor, State): """State is the integral term `i_error`.""" control_signal_level, i_error = state k_p = self.k_l / self.plant_gain(control_signal_level) current_volume_target = tf.identity(control_input[:, 1], name="volume_target") volume = tf.identity(control_input[:, 2], name="volume") error = tf.identity(current_volume_target - volume, name="error")[:, tf.newaxis] p_error = k_p * error i_error = i_error + p_error * self.k_i control_output = clip_bid(self.signal_bias + p_error + i_error, name='control_signal_level') return control_output, self.State(control_output=control_output, i_error=i_error)
class ControllerCell(RNNCellInterface): NAME = "controller_cell" ControllerState = tfnamedtuple("ControllerSubState", ("realized_volume_cum", "current_volume_target")) def __init__(self, volume_target, control, total_time, volume_pattern_provider=None, relative=False, extra_control_input_names=(), trainable=True, name=NAME, dtype=FLOAT_TYPE): self._volume_target = volume_target self._control = upgrade_cell(control, "control", "control_output", root=name) self._control.build = add_summary_variables(self._control)(self._control.build) assert self._control.output_size > 1 self.State = tfnamedtuple("ControllerState", self._control.state_name + self.ControllerState._fields) initial_state_params = self.control.initial_state_params + ((0., False), (0., False)) super(ControllerCell, self).__init__(initial_state_params, state_size=self._get_state_size(), trainable=trainable, name=name, dtype=dtype) self._total_time = total_time if volume_pattern_provider is None: volume_pattern_provider = lambda current: (self.total_time - current) self._volume_pattern_provider = volume_pattern_provider self.relative = relative self._epsilon_volume = EPSILON_VOLUME self._control_input_names = ('remaining_time', 'current_volume_target', 'volume',) + extra_control_input_names self.t_0 = 0 @property def trainable_weights(self): return self.control.trainable_weights + super().trainable_weights @property def non_trainable_weights(self): return self.control.non_trainable_weights + super().non_trainable_weights @property def losses(self): return self.control.losses + super().losses @property def control(self): return self._control @property def total_time(self): return self._total_time @property def volume_target(self): if self.built is False: raise ValueError("Illegal state, cell hasn't been built.") return self._volume_target @RNNCellInterface.root.setter def root(self, root): self._root = root self.control.root = self.full_name def _get_state_size(self): control_state_size = self.control.state_size if not hasattr(control_state_size, '__len__'): control_state_size = (control_state_size,) return self.State(*control_state_size, realized_volume_cum=1, current_volume_target=1) def get_control_state(self, state): return self.control.State(*state[:-len(self.ControllerState._fields)]) def make_state(self, control_state, volume_cum, current_volume_target): return self.State(*control_state, volume_cum, current_volume_target) def build(self, inputs_shape): super(ControllerCell, self).build(inputs_shape) set_tf_tensor(self, '_volume_target', dtype=self.dtype) set_tf_tensor(self, '_total_time', dtype=self.dtype) self.t_0 = tf.zeros(1, self.dtype) def call(self, response: tf.Tensor, state: namedtuple, training=False) -> (tf.Tensor, namedtuple): *control_state, realized_volume_cum, _ = state volume, cost, t = ResponseState.extract_data(response) remaining_time = tf.identity((self.total_time - t) / self.total_time, "remaining_time") volume_cum = tf.identity(realized_volume_cum + volume, name="volume_cum") # time_of_day_ratio = full_pattern / remaining_pattern_part time_of_day_ratio = self._volume_pattern_provider(self.t_0) / self._volume_pattern_provider(t) # intraday_adjustment_factor is (unit-less) relative progress # compute instantaneous volume target (volume/time sampling) to compare with feedback volume on the sample current_volume_target = time_of_day_ratio * (self._volume_target - volume_cum) / self._total_time if self.relative: cvt = tf.identity(current_volume_target / self.volume_target, name="relative_current_target") volume = tf.identity(volume / self.volume_target, name="relative_volume") cost = tf.identity(cost / self.volume_target, name="relative_cost") else: cvt = current_volume_target control_input = self.build_control_input(remaining_time=remaining_time, current_volume_target=cvt, volume_target=self.volume_target * tf.ones_like(volume, dtype=self.dtype), volume=volume, cost=cost) control_output, control_state = self.control(control_input, self.control.State(*control_state), training) return control_output, self.State(*control_state, realized_volume_cum=volume_cum, current_volume_target=current_volume_target) def build_control_input(self, **kwargs): return tf.concat([kwargs[k] for k in self._control_input_names], axis=1, name="control_input")
class ControllerRNNCell(RNNCellInterface): NAME = "controller_rnn_cell" ControllerState = tfnamedtuple("ControllerSubState", ("realized_volume_cum",)) def __init__(self, volume_target, cell, total_time, relative=False, extra_control_input_names=(), trainable=True, name=NAME, dtype=FLOAT_TYPE, K=3): self._volume_target = volume_target self._cell = upgrade_cell(cell, "control", "control_output", root=name) self.State = tfnamedtuple("ControllerState", self._cell.state_name + self.ControllerState._fields) initial_state_params = self.control.initial_state_params + ((0, False),) super(ControllerRNNCell, self).__init__(initial_state_params, state_size=self._get_state_size(), trainable=trainable, name=name, dtype=dtype) self._total_time = total_time self.relative = relative self._epsilon_volume = EPSILON_VOLUME self._control_input_names = ('remaining_time', 'remaining_volume',) + extra_control_input_names if self.with_time_embedding: self.K = K else: self.K = 1 self.time_embedding = None @property def trainable_weights(self): return self.control.trainable_weights + super().trainable_weights @property def non_trainable_weights(self): return self.control.non_trainable_weights + super().non_trainable_weights @property def losses(self): return super().losses + self._cell.losses @property def sub_state_size(self): return len(self.ControllerState._fields) @property def with_time_embedding(self): return "time_embedding" in self._control_input_names @property def control(self): return self._cell @property def total_time(self): return self._total_time @property def volume_target(self): if self.built is False: raise ValueError("Illegal state, cell hasn't been built.") return self._volume_target @RNNCellInterface.root.setter def root(self, root): self._root = root self.control.root = self.full_name def _get_state_size(self): control_state_size = self._cell.state_size if not hasattr(control_state_size, '__len__'): control_state_size = (control_state_size,) return self.State(*control_state_size, realized_volume_cum=1) def get_control_state(self, state): return self.control.State(*state[:-self.sub_state_size]) def make_state(self, control_state, remaining_volume): return self.State(*control_state, remaining_volume) def build(self, inputs_shape): super(ControllerRNNCell, self).build(inputs_shape) set_tf_tensor(self, '_volume_target', dtype=self.dtype) set_tf_tensor(self, '_total_time', dtype=self.dtype) if self.with_time_embedding: self.time_embedding = self.add_weight( "time_embedding", (self.K, 288), initializer=tf.ones_initializer(dtype=self.dtype)) # FIXME(nperrin16) def call(self, response: tf.Tensor, state: namedtuple, training=False) -> (tf.Tensor, namedtuple): *control_state, realized_volume_cum = state volume, cost, time = ResponseState.extract_data(response) volume_cum = tf.identity(realized_volume_cum + volume, name="volume_cum") remaining_time = tf.identity((self.total_time - time) / self.total_time, "remaining_time") remaining_volume = tf.identity(self.volume_target - volume_cum, name="remaining_volume") if self.relative: remaining_volume = tf.identity(remaining_volume / self.volume_target, name="relative_remaining_volume") volume = tf.identity(volume / self.volume_target, name="relative_volume") cost = tf.identity(cost / self.volume_target, name="relative_cost") if self.time_embedding is None: emb = None else: emb = tf.ones([tf.shape(response)[0], 1], dtype=self.dtype ) * self.time_embedding[:, tf.cast(time[0][0], tf.int32)] control_input = self.build_control_input(remaining_time=remaining_time, remaining_volume=remaining_volume, volume_target=self.volume_target * tf.ones_like(volume, dtype=self.dtype), volume=volume, cost=cost, time_embedding=emb) control_output, control_state = self._cell(control_input, self.control.State(*control_state), training) return control_output, self.State(*control_state, realized_volume_cum=volume_cum) def build_control_input(self, **kwargs): return tf.concat([kwargs[k] for k in self._control_input_names], axis=1, name="control_input")