def __init__(self, obs_space, action_space, num_outputs, model_config, name): super(KerasCnnModel, self).__init__( obs_space, action_space, num_outputs, model_config, name) self.inputs = tf.keras.layers.Input( shape=obs_space.shape, name="observations") conv1 = tf.keras.layers.Conv2D(filters=6, kernel_size=3, strides=2, activation=get_activation_fn( model_config.get("conv_activation") ))(self.inputs) conv2 = tf.keras.layers.Conv2D(filters=16, kernel_size=3, strides=2, activation=get_activation_fn( model_config.get("conv_activation") ))(conv1) conv3 = tf.keras.layers.Conv2D(filters=32, kernel_size=3, strides=1, activation=get_activation_fn( model_config.get("conv_activation") ))(conv2) conv_flatten = tf.keras.layers.Flatten()(conv3) state = tf.keras.layers.Dense(model_config['custom_options'] ['hidden_units'], activation=get_activation_fn( model_config.get("fcnet_activation")))(conv_flatten) layer_out = tf.keras.layers.Dense( num_outputs, name="act_output")(state) value_out = tf.keras.layers.Dense(1, name="value_output")(state) self.base_model = tf.keras.Model(self.inputs, [layer_out, value_out]) self.register_variables(self.base_model.variables)
def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kwargs): super().__init__(obs_space, action_space, num_outputs, model_config, name, **kwargs) conv_filters = model_config['conv_filters'] self.is_conv = bool(conv_filters) orig_shape = obs_space.original_space['board'] new_shape = orig_shape.shape + (1, ) if self.is_conv else (np.prod( orig_shape.shape), ) self.inputs = tf.keras.layers.Input(shape=new_shape, name='observations') last_layer = self.inputs if self.is_conv: conv_activation = get_activation_fn( model_config['conv_activation']) for i, (filters, kernel_size, stride) in enumerate(conv_filters, 1): last_layer = tf.keras.layers.Conv2D(filters, kernel_size, stride, name="conv{}".format(i), activation=conv_activation, padding='same')(last_layer) last_layer = tf.keras.layers.Flatten()(last_layer) fc_activation = get_activation_fn(model_config['fcnet_activation']) for i, size in enumerate(model_config['fcnet_hiddens'], 1): last_layer = tf.keras.layers.Dense( size, name='fc{}'.format(i), activation=fc_activation, kernel_initializer=normc_initializer(1.0))(last_layer) layer_out = tf.keras.layers.Dense( num_outputs, name="my_out", activation=None, kernel_initializer=normc_initializer(0.01))(last_layer) value_out = tf.keras.layers.Dense( 1, name="value_out", activation=None, kernel_initializer=normc_initializer(0.01))(last_layer) self.base_model = tf.keras.Model(self.inputs, [layer_out, value_out]) self.register_variables(self.base_model.variables) self._value_out = None
def __init__( self, obs_space, action_space, num_outputs, model_config, name, ): super(FeedForward, self).__init__(obs_space, action_space, num_outputs, model_config, name) # Define input layers if 'original_space' in dir(obs_space): curr_obs_space = obs_space.original_space.spaces["obs"] else: curr_obs_space = obs_space self.use_prev_action = model_config["custom_options"].get( "use_prev_action") if self.use_prev_action: obs_shape = curr_obs_space.shape[0] action_shape = action_space.shape[0] input_layer = tf.keras.layers.Input(shape=(obs_shape + action_shape), name="inputs") else: input_layer = tf.keras.layers.Input( shape=(curr_obs_space.shape[0]), name="inputs") # Preprocess observations with the appropriate number of hidden layers last_layer = input_layer i = 1 activation = get_activation_fn(model_config.get("fcnet_activation")) hiddens = model_config.get("fcnet_hiddens") for size in hiddens: last_layer = tf.keras.layers.Dense( size, name="fc_{}".format(i), activation=activation, kernel_initializer=normc_initializer(1.0))(last_layer) i += 1 logits = tf.keras.layers.Dense(self.num_outputs, activation=tf.keras.activations.linear, name="logits")(last_layer) values = tf.keras.layers.Dense(1, activation=None, name="values")(last_layer) inputs = [input_layer] # Create the RNN model self.model = tf.keras.Model(inputs=inputs, outputs=[logits, values]) self.register_variables(self.model.variables) self.model.summary()
def _build_layers_v2(self, input_dict, num_outputs, options): inputs = input_dict["obs"] filters = options.get("conv_filters") if not filters: filters = _get_filter_config(inputs.shape.as_list()[1:]) activation = get_activation_fn(options.get("conv_activation")) with tf.name_scope("vision_net"): for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): inputs = tf.layers.conv2d( inputs, out_size, kernel, stride, activation=activation, padding="same", name="conv{}".format(i)) out_size, kernel, stride = filters[-1] # skip final linear layer if options.get("no_final_linear"): fc_out = tf.layers.conv2d( inputs, num_outputs, kernel, stride, activation=activation, padding="valid", name="fc_out") return flatten(fc_out), flatten(fc_out) fc1 = tf.layers.conv2d( inputs, out_size, kernel, stride, activation=activation, padding="valid", name="fc1") fc2 = tf.layers.conv2d( fc1, num_outputs, [1, 1], activation=None, padding="same", name="fc2") return flatten(fc2), flatten(fc1)
def _build_layers(self, inputs, num_outputs, options): """Process the flattened inputs. Note that dict inputs will be flattened into a vector. To define a model that processes the components separately, use _build_layers_v2(). """ hiddens = options.get("fcnet_hiddens") activation = get_activation_fn(options.get("fcnet_activation")) if len(inputs.shape) > 2: inputs = tf.layers.flatten(inputs) with tf.name_scope("fc_net"): i = 1 last_layer = inputs for size in hiddens: # skip final linear layer if options.get("no_final_linear") and i == len(hiddens): output = tf.layers.dense( last_layer, num_outputs, kernel_initializer=normc_initializer(1.0), activation=activation, name="fc_out") return output, output label = "fc{}".format(i) last_layer = tf.layers.dense( last_layer, size, kernel_initializer=normc_initializer(1.0), activation=activation, name=label) i += 1 output = tf.layers.dense( last_layer, num_outputs, kernel_initializer=normc_initializer(0.01), activation=None, name="fc_out") return output, last_layer
def __init__(self, obs_space, action_space, num_outputs, model_config, name): super(KerasFcModel, self).__init__(obs_space, action_space, num_outputs, model_config, name) self.inputs = tf.keras.layers.Input(shape=obs_space.shape, name="observations") layer_1 = tf.keras.layers.Dense( model_config.get("fcnet_hiddens")[0], # Uses only the first layer name="my_layer1", activation=get_activation_fn( model_config.get("fcnet_activation")), # tf.nn.relu, kernel_initializer=normc_initializer(1.0))(self.inputs) layer_out = tf.keras.layers.Dense( num_outputs, name="my_out", activation=None, kernel_initializer=normc_initializer(0.01))(layer_1) value_out = tf.keras.layers.Dense( 1, name="value_out", activation=None, kernel_initializer=normc_initializer(0.01))(layer_1) self.base_model = tf.keras.Model(self.inputs, [layer_out, value_out]) self.register_variables(self.base_model.variables)
def __init__(self, obs_space, action_space, num_outputs, model_config, name): with tf.variable_scope(f"{name}_model", reuse=tf.AUTO_REUSE): super().__init__(obs_space, action_space, num_outputs, model_config, name) custom_opts = model_config.get("custom_options", {}) self.use_comm = custom_opts.get("use_comm", True) self.message_coeff = custom_opts.get("message_entropy_coeff", 0.0) obs_space_shape = custom_opts.get("obs_shape", obs_space.shape) if len(obs_space.shape) == 1: n_extra_obs = obs_space.shape[0] - np.prod(obs_space_shape) else: n_extra_obs = 0 # Conv activation = get_activation_fn(model_config.get("conv_activation")) filters = model_config.get("conv_filters") if filters is None: filters = _get_filter_config(obs_space_shape) inputs = tf.keras.layers.Input(shape=(None, *obs_space_shape), name=f"{name}_observations_time") model_inputs = [inputs] cnn_in = tf.reshape(inputs, [-1, *obs_space_shape]) conv_out = build_cnn(cnn_in, filters, activation, name=f"{name}_conv") # FC activation = get_activation_fn( model_config.get("fcnet_activation")) hiddens = model_config.get("fcnet_hiddens") if n_extra_obs > 0: extra_inputs = tf.keras.layers.Input( shape=(n_extra_obs, ), name=f"{name}_extra_observations") model_inputs.append(extra_inputs) fc_in = tf.keras.layers.Concatenate(name=f"{name}_fc_in")( [tf.keras.layers.Flatten()(conv_out), extra_inputs]) else: fc_in = tf.keras.layers.Flatten(name=f"{name}_fc_in")(conv_out) fc_out = build_fc(fc_in, hiddens, activation, name=f"{name}_fc") # LSTM self.cell_size = model_config.get("lstm_cell_size", 256) state_in_h = tf.keras.layers.Input(shape=(self.cell_size, ), name=f"{name}_h") state_in_c = tf.keras.layers.Input(shape=(self.cell_size, ), name=f"{name}_c") seq_in = tf.keras.layers.Input(shape=(), name=f"{name}_seq_in", dtype=tf.int32) prev_actions = tf.keras.layers.Input(shape=(), name=f"{name}_prev_actions", dtype=tf.int32) prev_rewards = tf.keras.layers.Input(shape=(), name=f"{name}_prev_rewards") model_inputs.extend( [prev_actions, prev_rewards, seq_in, state_in_h, state_in_c]) if model_config.get("lstm_use_prev_action_reward"): prev_actions_onehot = tf.one_hot(prev_actions, action_space[0].n) in_tensors = [fc_out, prev_actions_onehot, prev_rewards] else: in_tensors = [fc_out] # CPC objective self.use_cpc = custom_opts.get("use_cpc", False) if self.use_cpc: cpc_params = custom_opts["cpc_opts"] self.cpc_in_shape = [cpc_params["cpc_code_size"]] self.cpc_out_shape = [ cpc_params["cpc_len"], cpc_params["cpc_code_size"] ] cpc_params["name"] = f"{name}_cpc" # The actual CPC encodings self._cpc_ins = None self._cpc_preds = None else: cpc_params = {} lstm_out, model_outputs = build_lstm( in_tensors, state_in_h=state_in_h, state_in_c=state_in_c, seq_in=seq_in, cell_size=self.cell_size, add_cpc=self.use_cpc, cpc_params=cpc_params, name=f"{name}_lstm", ) # Final layer, logits has both actions and messages self.use_inference_policy = custom_opts.get( "use_inference_policy", False) if self.use_inference_policy: inference_policy_opts = custom_opts["inference_policy_opts"] self.pm_type = inference_policy_opts["type"] self.ewma_momentum = inference_policy_opts.get("ewma_momentum") self.pm_hidden = inference_policy_opts.get( "pm_hidden", [64, 64]) self.message_size = action_space[1].n action_logits = tf.keras.layers.Dense( action_space[0].n, activation=tf.keras.activations.linear, name=f"{name}_action_logits", )(lstm_out) unscaled_message_logits = tf.keras.layers.Dense( self.message_size, activation=tf.keras.activations.linear, name=f"{name}_unscaled_message_logits", )(lstm_out) unscaled_message_p = tf.nn.softmax(unscaled_message_logits) model_outputs.append(unscaled_message_p) if self.pm_type == "moving_avg": self._avg_message_p = tf.Variable( name=f"{name}_avg", initial_value=tf.ones( (self.message_size, )) / self.message_size, trainable=False, ) avg_message_vars = [self._avg_message_p] if self.ewma_momentum is None: self._avg_message_t = tf.Variable( name=f"{name}_t", initial_value=tf.zeros(()), trainable=False, ) avg_message_vars.append(self._avg_message_t) self.register_variables(avg_message_vars) logits = tf.keras.layers.Concatenate(name=f"{name}_logits")( [action_logits, unscaled_message_logits]) else: logits = tf.keras.layers.Dense( num_outputs, activation=tf.keras.activations.linear, name=f"{name}_logits", )(lstm_out) values = tf.keras.layers.Dense(1, activation=None, name=f"{name}_values")(lstm_out) self._value_out = None # The actual value model_outputs = [logits, values] + model_outputs # Create the RNN model self.rnn_model = tf.keras.Model(inputs=model_inputs, outputs=model_outputs) self.register_variables(self.rnn_model.variables) self._model_out = None # Actual logits self.rnn_model.summary() if self.use_inference_policy and self.pm_type == "hyper_nn": flattened_vars = [] message_model = tf.keras.Model(inputs=model_inputs, outputs=unscaled_message_logits) for e in message_model.variables: flattened_vars.append( tf.reshape(tf.stop_gradient(e), shape=[1, -1])) concat_vars = tf.keras.layers.Concatenate()(flattened_vars) pm_fc_out, pm_fc_vars = build_fc(concat_vars, self.pm_hidden, "relu", name="pm_fc", return_vars=True) pm_logits_layer = tf.keras.layers.Dense( self.message_size, activation=tf.keras.activations.linear, name=f"{name}_pm_logits", ) self._pm_logits = pm_logits_layer(pm_fc_out) self.register_variables(pm_fc_vars) self.register_variables(pm_logits_layer.variables) # Extra variable definitions self.use_receiver_bias = custom_opts.get("use_receiver_bias", False) self.no_message_outputs = None self._unscaled_message_p = None
def __init__(self, obs_space, action_space, num_outputs, model_config, name): model_config = with_base_config( base_config=DEFAULT_STRATEGO_MODEL_CONFIG, extra_config=model_config) TFModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) print(model_config) observation_mode = model_config['custom_options']['observation_mode'] if observation_mode == PARTIALLY_OBSERVABLE: self.pi_obs_key = 'partial_observation' self.vf_obs_key = 'partial_observation' elif observation_mode == FULLY_OBSERVABLE: self.pi_obs_key = 'full_observation' self.vf_obs_key = 'full_observation' elif observation_mode == BOTH_OBSERVATIONS: self.pi_obs_key = 'partial_observation' self.vf_obs_key = 'full_observation' assert not model_config['vf_share_layers'] else: assert False, "policy observation_mode must be in [PARTIALLY_OBSERVABLE, FULLY_OBSERVABLE, BOTH_OBSERVATIONS]" if model_config["custom_preprocessor"]: print(obs_space) self.preprocessor = ModelCatalog.get_preprocessor_for_space( observation_space=self.obs_space.original_space, options=model_config) else: self.preprocessor = None logger.warn( "No custom preprocessor for StrategoModel was specified.\n" "Some tree search policies may not initialize their placeholders correctly without this." ) self.use_lstm = model_config['use_lstm'] self.fake_lstm = model_config['custom_options'].get('fake_lstm') self.vf_share_layers = model_config.get("vf_share_layers") self.mask_invalid_actions = model_config['custom_options'][ 'mask_invalid_actions'] conv_activation = get_activation_fn( model_config.get("conv_activation")) lstm_filters = model_config["custom_options"]['lstm_filters'] cnn_filters = model_config.get("conv_filters") final_pi_filter_amt = model_config["custom_options"][ "final_pi_filter_amt"] rows = obs_space.original_space[self.pi_obs_key].shape[0] colums = obs_space.original_space[self.pi_obs_key].shape[1] if self.use_lstm: if self.fake_lstm: self._lstm_state_shape = (1, ) else: self._lstm_state_shape = (rows, colums, lstm_filters[0][0]) if self.use_lstm: state_in = [ tf.keras.layers.Input(shape=self._lstm_state_shape, name="pi_lstm_h"), tf.keras.layers.Input(shape=self._lstm_state_shape, name="pi_lstm_c"), tf.keras.layers.Input(shape=self._lstm_state_shape, name="vf_lstm_h"), tf.keras.layers.Input(shape=self._lstm_state_shape, name="vf_lstm_c") ] seq_lens_in = tf.keras.layers.Input(shape=(), name="lstm_seq_in") self.pi_obs_inputs = tf.keras.layers.Input( shape=(None, *obs_space.original_space[self.pi_obs_key].shape), name="pi_observation") self.vf_obs_inputs = tf.keras.layers.Input( shape=(None, *obs_space.original_space[self.vf_obs_key].shape), name="vf_observation") else: state_in, seq_lens_in = None, None self.pi_obs_inputs = tf.keras.layers.Input( shape=obs_space.original_space[self.pi_obs_key].shape, name="pi_observation") self.vf_obs_inputs = tf.keras.layers.Input( shape=obs_space.original_space[self.vf_obs_key].shape, name="vf_observation") # if pi_cnn_filters is None: # assert False # # assuming board size will always remain the same for both pi and vf networks # if self.use_lstm: # single_obs_input_shape = self.pi_obs_inputs.shape.as_list()[2:] # else: # single_obs_input_shape = self.pi_obs_inputs.shape.as_list()[1:] # pi_cnn_filters = _get_filter_config(single_obs_input_shape) # # if v_cnn_filters is None: # assert False # # assuming board size will always remain the same for both pi and vf networks # if self.use_lstm: # single_obs_input_shape = self.pi_obs_inputs.shape.as_list()[2:] # else: # single_obs_input_shape = self.pi_obs_inputs.shape.as_list()[1:] # v_cnn_filters = _get_filter_config(single_obs_input_shape) def maybe_td(layer): if self.use_lstm: return tf.keras.layers.TimeDistributed(layer=layer, name=f"td_{layer.name}") else: return layer def build_primary_layers(prefix: str, obs_in: tf.Tensor, state_in: tf.Tensor): # encapsulated in a function to either be called once for shared policy/vf or twice for separate policy/vf _last_layer = obs_in for i, (out_size, kernel, stride) in enumerate(cnn_filters): _last_layer = maybe_td( tf.keras.layers.Conv2D(filters=out_size, kernel_size=kernel, strides=stride, activation=conv_activation, padding="same", name="{}_conv_{}".format( prefix, i)))(_last_layer) state_out = state_in if self.use_lstm and not self.fake_lstm: for i, (out_size, kernel, stride) in enumerate(lstm_filters): if i > 0: raise NotImplementedError( "Only single lstm layers are implemented right now" ) _last_layer, *state_out = tf.keras.layers.ConvLSTM2D( filters=out_size, kernel_size=kernel, strides=stride, activation=conv_activation, padding="same", return_sequences=True, return_state=True, name="{}_convlstm".format(prefix))( inputs=_last_layer, mask=tf.sequence_mask(seq_lens_in), initial_state=state_in) # state_out = state_in # if self.use_lstm: # _last_layer = maybe_td(tf.keras.layers.Flatten())(_last_layer) # _last_layer, *state_out = tf.keras.layers.LSTM( # units=64, # return_sequences=True, # return_state=True, # name="{}_lstm".format(prefix))( # inputs=_last_layer, # mask=tf.sequence_mask(seq_lens_in), # initial_state=state_in) return _last_layer, state_out if self.use_lstm: pi_state_in = state_in[:2] vf_state_in = state_in[2:] else: pi_state_in, vf_state_in = None, None pi_last_layer, pi_state_out = build_primary_layers( prefix="pi", obs_in=self.pi_obs_inputs, state_in=pi_state_in) vf_last_layer, vf_state_out = build_primary_layers( prefix="vf", obs_in=self.vf_obs_inputs, state_in=vf_state_in) if self.use_lstm: state_out = [*pi_state_out, *vf_state_out] else: state_out = None pi_last_layer = maybe_td( tf.keras.layers.Conv2D(filters=final_pi_filter_amt, kernel_size=[3, 3], strides=1, activation=conv_activation, padding="same", name="{}_conv_{}".format( 'pi', "last")))(pi_last_layer) print( f"action space n: {action_space.n}, rows: {rows}, columns: {colums}, filters: {int(action_space.n / (rows * colums))}" ) unmasked_logits_out = maybe_td( tf.keras.layers.Conv2D( filters=int(action_space.n / (rows * colums)), kernel_size=[3, 3], strides=1, activation=None, padding="same", name="{}_conv_{}".format('pi', "unmasked_logits")))(pi_last_layer) # pi_last_layer = maybe_td(tf.keras.layers.Flatten(name="pi_flatten"))(pi_last_layer) # unmasked_logits_out = maybe_td(tf.keras.layers.Dense( # units=9, # name="pi_unmasked_logits_out", # activation=None, # kernel_initializer=normc_initializer(0.01)))(pi_last_layer) # unmasked_logits_out = maybe_td(tf.keras.layers.Reshape(target_shape=[3,3,1]))(unmasked_logits_out) self._use_q_fn = model_config['custom_options']['q_fn'] if self._use_q_fn: vf_last_layer = maybe_td( tf.keras.layers.Conv2D(filters=final_pi_filter_amt, kernel_size=[3, 3], strides=1, activation=conv_activation, padding="same", name="{}_conv_{}".format( 'vf', "last")))(vf_last_layer) value_out = maybe_td( tf.keras.layers.Conv2D( filters=int(action_space.n / (rows * colums)), kernel_size=[3, 3], strides=1, activation=None, padding="same", name="{}_conv_{}".format('vf', "q_out")))(vf_last_layer) else: vf_last_layer = maybe_td( tf.keras.layers.Conv2D(filters=1, kernel_size=[1, 1], strides=1, activation=conv_activation, padding="same", name="{}_conv_{}".format( 'vf', "last")))(vf_last_layer) vf_last_layer = maybe_td( tf.keras.layers.Flatten(name="vf_flatten"))(vf_last_layer) value_out = maybe_td( tf.keras.layers.Dense( units=1, name="vf_out", activation=None, kernel_initializer=normc_initializer(0.01)))(vf_last_layer) model_inputs = [self.pi_obs_inputs, self.vf_obs_inputs] model_outputs = [unmasked_logits_out, value_out] if self.use_lstm: model_inputs += [seq_lens_in, *state_in] model_outputs += state_out self.base_model = tf.keras.Model(inputs=model_inputs, outputs=model_outputs) print(self.base_model.summary()) self.register_variables(self.base_model.variables)
def __init__(self, obs_space, action_space, num_outputs, model_config, name,): super(GRU, self).__init__(obs_space, action_space, num_outputs, model_config, name) # Define input layers if 'original_space' in dir(obs_space): curr_obs_space = obs_space.original_space.spaces["obs"] else: curr_obs_space = obs_space self.use_prev_action = model_config["custom_options"].get("use_prev_action") if self.use_prev_action: obs_shape = curr_obs_space.shape[0] action_shape = action_space.shape[0] input_layer = tf.keras.layers.Input( shape=(None, obs_shape + action_shape), name="inputs") else: input_layer = tf.keras.layers.Input( shape=(None, curr_obs_space.shape[0]), name="inputs") # Preprocess observations with the appropriate number of hidden layers last_layer = input_layer i = 1 activation = get_activation_fn(model_config.get("fcnet_activation")) hiddens = model_config.get("fcnet_hiddens") for size in hiddens: last_layer = tf.keras.layers.Dense( size, name="fc_{}".format(i), activation=activation, kernel_initializer=normc_initializer(1.0))(last_layer) i += 1 cell_size = model_config["custom_options"].get("cell_size") self.cell_size = cell_size state_in_h = tf.keras.layers.Input(shape=(self.cell_size, ), name="h") seq_in = tf.keras.layers.Input(shape=(), name="seq_in") gru_out, state_h = tf.keras.layers.GRU( self.cell_size, return_sequences=True, return_state=True, name="gru")( inputs=last_layer, mask=tf.sequence_mask(seq_in), initial_state=[state_in_h]) # Postprocess GRU output with another hidden layer and compute values using a shared layer logits = tf.keras.layers.Dense( self.num_outputs, activation=tf.keras.activations.linear, name="logits")(gru_out) values = tf.keras.layers.Dense( 1, activation=None, name="values")(gru_out) inputs = [input_layer, seq_in, state_in_h] # Create the RNN model self.rnn_model = tf.keras.Model( inputs=inputs, outputs=[logits, values, state_h]) self.register_variables(self.rnn_model.variables) self.rnn_model.summary()
def __init__(self, obs_space, action_space, num_outputs, model_config, name, twin_q): model_config = with_base_config( base_config=DEFAULT_STRATEGO_MODEL_CONFIG, extra_config=model_config) TFModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) print(model_config) observation_mode = model_config['custom_options']['observation_mode'] if observation_mode == PARTIALLY_OBSERVABLE: self.pi_obs_key = 'partial_observation' self.vf_obs_key = 'partial_observation' elif observation_mode == FULLY_OBSERVABLE: self.pi_obs_key = 'full_observation' self.vf_obs_key = 'full_observation' elif observation_mode == BOTH_OBSERVATIONS: self.pi_obs_key = 'partial_observation' self.vf_obs_key = 'full_observation' assert not model_config['vf_share_layers'] else: assert False, "policy observation_mode must be in [PARTIALLY_OBSERVABLE, FULLY_OBSERVABLE, BOTH_OBSERVATIONS]" if model_config["custom_preprocessor"]: print(obs_space) self.preprocessor = ModelCatalog.get_preprocessor_for_space( observation_space=self.obs_space.original_space, options=model_config) else: self.preprocessor = None logger.warn( "No custom preprocessor for StrategoModel was specified.\n" "Some tree search policies may not initialize their placeholders correctly without this." ) self.use_lstm = model_config['use_lstm'] if self.use_lstm: raise NotImplementedError self.fake_lstm = model_config['custom_options'].get('fake_lstm', False) self.vf_share_layers = model_config.get("vf_share_layers") self.mask_invalid_actions = model_config['custom_options'][ 'mask_invalid_actions'] self._use_q_fn = model_config['custom_options']['q_fn'] self.twin_q = twin_q assert not (not self._use_q_fn and self.twin_q) if self.twin_q and self.use_lstm: raise NotImplementedError self._sac_alpha = model_config.get("sac_alpha", False) conv_activation = get_activation_fn( model_config.get("conv_activation")) if self.use_lstm: raise NotImplementedError else: state_in, seq_lens_in = None, None self.pi_obs_inputs = tf.keras.layers.Input( shape=obs_space.original_space[self.pi_obs_key].shape, name="pi_observation") self.vf_obs_inputs = tf.keras.layers.Input( shape=obs_space.original_space[self.vf_obs_key].shape, name="vf_observation") def maybe_td(layer): if self.use_lstm: return tf.keras.layers.TimeDistributed(layer=layer, name=f"td_{layer.name}") else: return layer def build_primary_layers(prefix: str, obs_in: tf.Tensor, state_in: tf.Tensor): # encapsulated in a function to either be called once for shared policy/vf or twice for separate policy/vf _last_layer = obs_in state_out = state_in for i, size in enumerate(model_config['fcnet_hiddens']): _last_layer = maybe_td( tf.keras.layers.Dense(size, name="{}_fc_{}".format(prefix, i), activation=conv_activation, kernel_initializer=normc_initializer( 1.0)))(_last_layer) return _last_layer, state_out if self.use_lstm: pi_state_in = state_in[:2] vf_state_in = state_in[2:] else: pi_state_in, vf_state_in = None, None self.main_vf_prefix = "main_vf" if self.twin_q else "vf" pi_last_layer, pi_state_out = build_primary_layers( prefix="pi", obs_in=self.pi_obs_inputs, state_in=pi_state_in) vf_last_layer, vf_state_out = build_primary_layers( prefix=self.main_vf_prefix, obs_in=self.vf_obs_inputs, state_in=vf_state_in) if self.twin_q: twin_vf_last_layer, twin_vf_state_out = build_primary_layers( prefix="twin_vf", obs_in=self.vf_obs_inputs, state_in=None) else: twin_vf_last_layer, twin_vf_state_out = None, None if self.use_lstm: raise NotImplementedError else: state_out = None unmasked_logits_out = maybe_td( tf.keras.layers.Dense( action_space.n, name="{}_fc_{}".format('pi', 'unmasked_logits'), activation=None, kernel_initializer=normc_initializer(1.0))(pi_last_layer)) value_out = maybe_td( tf.keras.layers.Dense( action_space.n, name="{}_fc_{}".format(self.main_vf_prefix, 'q_out'), activation=None, kernel_initializer=normc_initializer(1.0))(vf_last_layer)) if self.twin_q: twin_value_out = maybe_td( tf.keras.layers.Dense(action_space.n, name="{}_fc_{}".format( 'twin_vf', 'q_out'), activation=None, kernel_initializer=normc_initializer( 1.0))(twin_vf_last_layer)) self.pi_model = tf.keras.Model(inputs=[self.pi_obs_inputs], outputs=[unmasked_logits_out]) self.main_q_model = tf.keras.Model(inputs=[self.vf_obs_inputs], outputs=[value_out]) if self.twin_q: self.twin_q_model = tf.keras.Model(inputs=[self.vf_obs_inputs], outputs=[twin_value_out]) print(self.twin_q_model.summary()) self.register_variables(self.twin_q_model.variables) print(self.pi_model.summary()) print(self.main_q_model.summary()) self.register_variables(self.pi_model.variables) self.register_variables(self.main_q_model.variables) self.log_alpha = tf.Variable(0.0, dtype=tf.float32, name="log_alpha") self.alpha = tf.exp(self.log_alpha) self.register_variables([self.log_alpha])
def __init__(self, obs_space, action_space, num_outputs, model_config, name): super(FullyConnectedNetworkWithMask, self).__init__(obs_space, action_space, num_outputs, model_config, name) activation_list = [] self.activation_value = None mask_mode = model_config.get("custom_options")["mask_mode"] assert mask_mode in ['multiply', 'add'] activation = get_activation_fn(model_config.get("fcnet_activation")) hiddens = model_config.get("fcnet_hiddens") no_final_linear = model_config.get("no_final_linear") vf_share_layers = model_config.get("vf_share_layers") inputs = tf.keras.layers.Input(shape=obs_space.shape, name="observations") last_layer = inputs i = 1 mask_placeholder_dict = OrderedDict() self.mask_layer_dict = OrderedDict() self.default_mask = OrderedDict() if no_final_linear: # the last layer is adjusted to be of size num_outputs for size in hiddens[:-1]: layer_name = "fc_{}".format(i) last_layer = tf.keras.layers.Dense( size, name=layer_name, activation=activation, kernel_initializer=normc_initializer(1.0))(last_layer) # here is the multiplication mask_name = "fc_{}_mask".format(i) mask_layer = MultiplyMaskLayer(size, name=mask_name, mask_mode=mask_mode) last_layer = mask_layer(last_layer) mask_placeholder_dict[mask_name] = mask_layer.get_kernel() self.mask_layer_dict[mask_name] = mask_layer activation_list.append(last_layer) i += 1 layer_out = tf.keras.layers.Dense( num_outputs, name="fc_out", activation=activation, kernel_initializer=normc_initializer(1.0))(last_layer) else: # the last layer is a linear to size num_outputs for size in hiddens: layer_name = "fc_{}".format(i) last_layer = tf.keras.layers.Dense( size, name=layer_name, activation=activation, kernel_initializer=normc_initializer(1.0))(last_layer) # here is the multiplication mask_name = "fc_{}_mask".format(i) mask_layer = MultiplyMaskLayer(size, name=mask_name, mask_mode=mask_mode) last_layer = mask_layer(last_layer) mask_placeholder_dict[mask_name] = mask_layer.get_kernel() self.mask_layer_dict[mask_name] = mask_layer activation_list.append(last_layer) i += 1 layer_out = tf.keras.layers.Dense( num_outputs, name="fc_out", activation=None, kernel_initializer=normc_initializer(0.01))(last_layer) if not vf_share_layers: # build a parallel set of hidden layers for the value net last_layer = inputs i = 1 for size in hiddens: last_layer = tf.keras.layers.Dense( size, name="fc_value_{}".format(i), activation=activation, kernel_initializer=normc_initializer(1.0))(last_layer) i += 1 value_out = tf.keras.layers.Dense( 1, name="value_out", activation=None, kernel_initializer=normc_initializer(0.01))(last_layer) self.mask_placeholder_dict = mask_placeholder_dict self.base_model = tf.keras.Model(inputs=inputs, outputs=[layer_out, value_out] + activation_list) # TODO we can add a flag to determine whether to return activation. self.register_variables(self.base_model.variables) self.register_variables(list(self.mask_placeholder_dict.values())) for name, layer in self.mask_layer_dict.items(): self.default_mask[name] = layer.get_weights()
def __init__(self, obs_space, action_space, num_outputs, model_config, name): model_config = with_base_config(base_config=DEFAULT_STRATEGO_MODEL_CONFIG, extra_config=model_config) TFModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) print(model_config) observation_mode = model_config['custom_options']['observation_mode'] if observation_mode == PARTIALLY_OBSERVABLE: self.pi_obs_key = 'partial_observation' self.vf_obs_key = 'partial_observation' elif observation_mode == FULLY_OBSERVABLE: self.pi_obs_key = 'full_observation' self.vf_obs_key = 'full_observation' elif observation_mode == BOTH_OBSERVATIONS: self.pi_obs_key = 'partial_observation' self.vf_obs_key = 'full_observation' assert not model_config['vf_share_layers'] else: assert False, "policy observation_mode must be in [PARTIALLY_OBSERVABLE, FULLY_OBSERVABLE, BOTH_OBSERVATIONS]" if model_config["custom_preprocessor"]: print(obs_space) self.preprocessor = ModelCatalog.get_preprocessor_for_space(observation_space=self.obs_space.original_space, options=model_config) else: self.preprocessor = None logger.warn("No custom preprocessor for StrategoModel was specified.\n" "Some tree search policies may not initialize their placeholders correctly without this.") self.use_lstm = model_config['use_lstm'] self.lstm_cell_size = model_config['lstm_cell_size'] self.vf_share_layers = model_config.get("vf_share_layers") self.mask_invalid_actions = model_config['custom_options']['mask_invalid_actions'] conv_activation = get_activation_fn(model_config.get("conv_activation")) cnn_filters = model_config.get("conv_filters") fc_activation = get_activation_fn(model_config.get("fcnet_activation")) hiddens = model_config.get("fcnet_hiddens") if self.use_lstm: state_in = [tf.keras.layers.Input(shape=(self.lstm_cell_size,), name="pi_lstm_h"), tf.keras.layers.Input(shape=(self.lstm_cell_size,), name="pi_lstm_c"), tf.keras.layers.Input(shape=(self.lstm_cell_size,), name="vf_lstm_h"), tf.keras.layers.Input(shape=(self.lstm_cell_size,), name="vf_lstm_c")] seq_lens_in = tf.keras.layers.Input(shape=(), name="lstm_seq_in") self.pi_obs_inputs = tf.keras.layers.Input( shape=(None, *obs_space.original_space[self.pi_obs_key].shape), name="pi_observation") self.vf_obs_inputs = tf.keras.layers.Input( shape=(None, *obs_space.original_space[self.vf_obs_key].shape), name="vf_observation") else: state_in, seq_lens_in = None, None self.pi_obs_inputs = tf.keras.layers.Input( shape=obs_space.original_space[self.pi_obs_key].shape, name="pi_observation") self.vf_obs_inputs = tf.keras.layers.Input( shape=obs_space.original_space[self.vf_obs_key].shape, name="vf_observation") if cnn_filters is None: # assuming board size will always remain the same for both pi and vf networks if self.use_lstm: single_obs_input_shape = self.pi_obs_inputs.shape.as_list()[2:] else: single_obs_input_shape = self.pi_obs_inputs.shape.as_list()[1:] cnn_filters = _get_filter_config(single_obs_input_shape) def maybe_td(layer): if self.use_lstm: return tf.keras.layers.TimeDistributed(layer=layer) else: return layer def build_primary_layers(prefix: str, obs_in: tf.Tensor, state_in: tf.Tensor): # encapsulated in a function to either be called once for shared policy/vf or twice for separate policy/vf _last_layer = obs_in for i, (out_size, kernel, stride) in enumerate(cnn_filters): _last_layer = maybe_td(tf.keras.layers.Conv2D( filters=out_size, kernel_size=kernel, strides=stride, activation=conv_activation, padding="same", name="{}_conv_{}".format(prefix, i)))(_last_layer) _last_layer = maybe_td(tf.keras.layers.Flatten())(_last_layer) for i, size in enumerate(hiddens): _last_layer = maybe_td(tf.keras.layers.Dense( size, name="{}_fc_{}".format(prefix, i), activation=fc_activation, kernel_initializer=normc_initializer(1.0)))(_last_layer) if self.use_lstm: _last_layer, *state_out = tf.keras.layers.LSTM( units=self.lstm_cell_size, return_sequences=True, return_state=True, name="{}_lstm".format(prefix))( inputs=_last_layer, mask=tf.sequence_mask(seq_lens_in), initial_state=state_in) else: state_out = None return _last_layer, state_out if self.use_lstm: pi_state_in = state_in[:2] vf_state_in = state_in[2:] else: pi_state_in, vf_state_in = None, None policy_file_path = None if 'policy_keras_model_file_path' in model_config['custom_options']: policy_file_path = model_config['custom_options']['policy_keras_model_file_path'] if policy_file_path is not None: if self.use_lstm: raise NotImplementedError pi_state_out = None self._pi_model = load_model(filepath=policy_file_path, compile=False) # remove loaded input layer # pi_model.layers.pop(0) # self.pi_obs_inputs = pi_model.layers[0] # rename layers for layer in self._pi_model.layers: layer._name = "pi_" + layer.name self._pi_model.layers[-1]._name = 'pi_unmasked_logits' self.unmasked_logits_out = self._pi_model(self.pi_obs_inputs) else: self._pi_model = None pi_last_layer, pi_state_out = build_primary_layers(prefix="pi", obs_in=self.pi_obs_inputs, state_in=pi_state_in) self.unmasked_logits_out = maybe_td(tf.keras.layers.Dense( num_outputs, name="pi_unmasked_logits", activation=None, kernel_initializer=normc_initializer(0.01)))(pi_last_layer) vf_last_layer, vf_state_out = build_primary_layers(prefix="vf", obs_in=self.vf_obs_inputs, state_in=vf_state_in) if self.use_lstm: state_out = [*pi_state_out, *vf_state_out] else: state_out = None self._use_q_fn = model_config['custom_options']['q_fn'] if self._use_q_fn: value_out_size = num_outputs else: value_out_size = 1 value_out = maybe_td(tf.keras.layers.Dense( value_out_size, name="vf_out", activation=None, kernel_initializer=normc_initializer(0.01)))(vf_last_layer) model_inputs = [self.pi_obs_inputs, self.vf_obs_inputs] model_outputs = [self.unmasked_logits_out, value_out] if self.use_lstm: model_inputs += [seq_lens_in, *state_in] model_outputs += state_out self.base_model = tf.keras.Model(inputs=model_inputs, outputs=model_outputs) print(self.base_model.summary()) self.register_variables(self.base_model.variables)
def __init__(self, obs_space, action_space, num_outputs, model_config, name): model_config = with_base_config( base_config=DEFAULT_STRATEGO_MODEL_CONFIG, extra_config=model_config) TFModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) print(model_config) observation_mode = model_config['custom_options']['observation_mode'] if observation_mode == PARTIALLY_OBSERVABLE: self._obs_key = 'partial_observation' elif observation_mode == FULLY_OBSERVABLE: self._obs_key = 'full_observation' elif observation_mode == BOTH_OBSERVATIONS: raise NotImplementedError else: assert False, "policy observation_mode must be in [PARTIALLY_OBSERVABLE, FULLY_OBSERVABLE, BOTH_OBSERVATIONS]" self._action_dist_class, self._logit_dim = ModelCatalog.get_action_dist( self.action_space, model_config) self.use_lstm = model_config['use_lstm'] self.fake_lstm = model_config['custom_options'].get('fake_lstm', False) self.mask_invalid_actions = model_config['custom_options'][ 'mask_invalid_actions'] conv_activation = get_activation_fn( model_config.get("conv_activation")) base_lstm_filters = model_config["custom_options"]['base_lstm_filters'] base_cnn_filters = model_config["custom_options"]['base_cnn_filters'] pi_cnn_filters = model_config["custom_options"]['pi_cnn_filters'] q_cnn_filters = model_config["custom_options"]['q_cnn_filters'] rows = obs_space.original_space[self._obs_key].shape[0] colums = obs_space.original_space[self._obs_key].shape[1] if self.use_lstm: self._lstm_state_shape = (rows, colums, base_lstm_filters[0][0]) if self.use_lstm and not self.fake_lstm: self._base_model_out_shape = (rows, colums, base_lstm_filters[0][0]) else: self._base_model_out_shape = (rows, colums, base_cnn_filters[-1][0]) if self.use_lstm: state_in = [ tf.keras.layers.Input(shape=self._lstm_state_shape, name="base_lstm_h"), tf.keras.layers.Input(shape=self._lstm_state_shape, name="base_lstm_c") ] seq_lens_in = tf.keras.layers.Input(shape=(), name="lstm_seq_in") self._obs_inputs = tf.keras.layers.Input( shape=(None, *obs_space.original_space[self._obs_key].shape), name="observation") self._base_model_out = tf.keras.layers.Input( shape=self._base_model_out_shape, name="model_out") else: state_in, seq_lens_in = None, None self._obs_inputs = tf.keras.layers.Input( shape=obs_space.original_space[self._obs_key].shape, name="observation") self._base_model_out = tf.keras.layers.Input( shape=self._base_model_out_shape, name="model_out") def maybe_td(layer): if self.use_lstm: return tf.keras.layers.TimeDistributed(layer=layer, name=f"td_{layer.name}") else: return layer def build_shared_base_layers(prefix: str, obs_in: tf.Tensor, state_in: tf.Tensor): # obs_in = tf.debugging.check_numerics( # obs_in, f"nan found in obs_in", name=None) _last_layer = obs_in for i, (out_size, kernel, stride) in enumerate(base_cnn_filters): _last_layer = maybe_td( tf.keras.layers.Conv2D(filters=out_size, kernel_size=kernel, strides=stride, activation=conv_activation, padding="same", name="{}_conv_{}".format( prefix, i)))(_last_layer) # _last_layer = tf.debugging.check_numerics( # _last_layer, f"nan found in _last_layer {i}", name=None) base_state_out = state_in if self.use_lstm and not self.fake_lstm: for i, (out_size, kernel, stride) in enumerate(base_lstm_filters): if i > 0: raise NotImplementedError( "Only single lstm layers are implemented right now" ) _last_layer, *base_state_out = tf.keras.layers.ConvLSTM2D( filters=out_size, kernel_size=kernel, strides=stride, activation=conv_activation, padding="same", data_format='channels_last', return_sequences=True, return_state=True, name="{}_convlstm".format(prefix))( inputs=_last_layer, initial_state=state_in, mask=tf.sequence_mask(seq_lens_in)) return _last_layer, base_state_out def build_pi_layers(input_layer): _last_layer = input_layer for i, (out_size, kernel, stride) in enumerate(pi_cnn_filters): _last_layer = tf.keras.layers.Conv2D( filters=out_size, kernel_size=kernel, strides=stride, activation=conv_activation, padding="same", name="{}_conv_{}".format('pi', i))(_last_layer) print( f"action space n: {action_space.n}, rows: {rows}, columns: {colums}, filters: {int(action_space.n / (rows * colums))}" ) unmasked_logits = tf.keras.layers.Conv2D( filters=int(action_space.n / (rows * colums)), kernel_size=[3, 3], strides=1, activation=None, padding="same", name="{}_conv_{}".format('pi', "unmasked_logits"))(_last_layer) return unmasked_logits def build_q_layers(input_layer, prefix): _last_layer = input_layer for i, (out_size, kernel, stride) in enumerate(q_cnn_filters): _last_layer = tf.keras.layers.Conv2D( filters=out_size, kernel_size=kernel, strides=stride, activation=conv_activation, padding="same", name="{}_conv_{}".format(prefix, i))(_last_layer) q_val = tf.keras.layers.Conv2D( filters=int(action_space.n / (rows * colums)), kernel_size=[3, 3], strides=1, activation=None, padding="same", name="{}_conv_{}".format(prefix, "q_out"))(_last_layer) return q_val base_model_out, state_out = build_shared_base_layers( prefix="shared_base", obs_in=self._obs_inputs, state_in=state_in) pi_unmasked_logits_out = build_pi_layers( input_layer=self._base_model_out) q1_out = build_q_layers(input_layer=self._base_model_out, prefix="q1") q2_out = build_q_layers(input_layer=self._base_model_out, prefix="q2") base_inputs = [self._obs_inputs] base_outputs = [base_model_out] if self.use_lstm: base_inputs += [seq_lens_in, *state_in] base_outputs += [*state_out] self._base_model = tf.keras.Model(name=f"{name}_base", inputs=base_inputs, outputs=base_outputs) self.pi_model = tf.keras.Model(name=f"{name}_pi_head", inputs=[self._base_model_out], outputs=[pi_unmasked_logits_out]) self.q1_model = tf.keras.Model(name=f"{name}_q1_head", inputs=[self._base_model_out], outputs=[q1_out]) self.q2_model = tf.keras.Model(name=f"{name}_q2_head", inputs=[self._base_model_out], outputs=[q2_out]) print(self._base_model.summary()) print(self.pi_model.summary()) print(self.q1_model.summary()) print(self.q2_model.summary()) self.register_variables(self._base_model.variables) self.register_variables(self.pi_model.variables) self.register_variables(self.q1_model.variables) self.register_variables(self.q2_model.variables) self.log_alpha = tf.Variable(0.0, dtype=tf.float32, name="log_alpha") self.alpha = tf.exp(self.log_alpha) self.register_variables([self.log_alpha])
def __init__(self, obs_space, action_space, num_outputs, model_config, name): super(FullyConnectedNetworkTanh, self).__init__(obs_space, action_space, num_outputs, model_config, name) activation = get_activation_fn(model_config.get("fcnet_activation")) hiddens = model_config.get("fcnet_hiddens") no_final_linear = model_config.get("no_final_linear") vf_share_layers = model_config.get("vf_share_layers") k = model_config["custom_options"]["num_components"] action_length = (num_outputs // k - 1) // 2 assert num_outputs % k == 0 assert (num_outputs // k - 1) % 2 == 0 assert "std_norm" not in model_config["custom_options"] if model_config["custom_options"].get("std_mode") == "free": # learnable parameters log_stds = tf.get_variable(name="learnable_log_std", shape=[k * action_length], initializer=tf.zeros_initializer) num_outputs -= k * action_length elif model_config["custom_options"].get("std_mode") == "zero": log_stds = tf.ones(name="log_std", shape=[k * action_length]) num_outputs -= k * action_length # we are using obs_flat, so take the flattened shape as input inputs = tf.keras.layers.Input(shape=(np.product(obs_space.shape), ), name="observations") last_layer = inputs i = 1 if no_final_linear: raise NotImplementedError( "no_final_linear should be set to False.") # # the last layer is adjusted to be of size num_outputs # for size in hiddens[:-1]: # last_layer = tf.keras.layers.Dense( # size, # name="fc_{}".format(i), # activation=activation, # kernel_initializer=normc_initializer(1.0))(last_layer) # i += 1 # layer_out = tf.keras.layers.Dense( # num_outputs, # name="fc_out", # activation=activation, # kernel_initializer=normc_initializer(1.0))(last_layer) else: # the last layer is a linear to size num_outputs for size in hiddens: last_layer = tf.keras.layers.Dense( size, name="fc_{}".format(i), activation=activation, kernel_initializer=normc_initializer(1.0))(last_layer) i += 1 layer_out = tf.keras.layers.Dense( num_outputs, name="fc_out", activation="tanh", # <<== Here! kernel_initializer=normc_initializer(0.01))(last_layer) if model_config["custom_options"].get("std_mode") == "free": splits = tf.split(layer_out, [action_length * k, k], 1) with tf.control_dependencies( [tf.variables_initializer([log_stds])]): layer_out = tf.concat([ splits[0], tf.broadcast_to(log_stds, tf.shape(splits[0])), splits[1] ], axis=1) elif model_config["custom_options"].get("std_mode") == "zero": splits = tf.split(layer_out, [action_length * k, k], 1) layer_out = tf.concat([ splits[0], tf.broadcast_to(log_stds, tf.shape(splits[0])), splits[1] ], axis=1) if not vf_share_layers: # build a parallel set of hidden layers for the value net last_layer = inputs i = 1 for size in hiddens: last_layer = tf.keras.layers.Dense( size, name="fc_value_{}".format(i), activation=activation, kernel_initializer=normc_initializer(1.0))(last_layer) i += 1 value_out = tf.keras.layers.Dense( 1, name="value_out", activation=None, kernel_initializer=normc_initializer(0.01))(last_layer) self.base_model = tf.keras.Model(inputs, [layer_out, value_out]) self.register_variables(self.base_model.variables) if model_config["custom_options"].get("std_mode") == "free": self.register_variables([log_stds])
def __init__(self, obs_space, action_space, num_outputs, model_config, name): super(ObedienceLSTM, self).__init__(obs_space, action_space, num_outputs, model_config, name) self._value_out = -1 self.obs_space = obs_space vision_space = self.obs_space.original_space.spaces[0] message_space = self.obs_space.original_space.spaces[1] # The inputs of the shared trunk. We will concatenate the observation space with shared info about the # visibility of agents. Currently we assume all the agents have equally sized action spaces. self.num_outputs = num_outputs self.num_agents = model_config["custom_options"]["num_agents"] self.num_symbols = model_config["custom_options"]["num_symbols"] self.cell_size = model_config["custom_options"].get("cell_size") # an extra none for the time dimension inputs = tf.keras.layers.Input( shape=(None,) + vision_space.shape, name="observations") # Build the CNN layer last_layer = inputs activation = get_activation_fn(model_config.get("conv_activation")) filters = model_config.get("conv_filters") for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): last_layer = tf.keras.layers.TimeDistributed(tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), activation=activation, padding="same", channels_last=True, name="conv{}".format(i)))(last_layer) out_size, kernel, stride = filters[-1] if len(filters) == 1: i = -1 # should be batch x time x height x width x channel conv_out = tf.keras.layers.TimeDistributed(tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), activation=activation, padding="valid", name="conv{}".format(i + 1)))(last_layer) flat_layer = tf.keras.layers.TimeDistributed(tf.keras.layers.Flatten())(conv_out) # Add the fully connected layers hiddens = model_config["custom_options"].get("fcnet_hiddens") activation = get_activation_fn(model_config.get("fcnet_activation")) last_layer = flat_layer i = 1 for size in hiddens: last_layer = tf.keras.layers.Dense( size, name="fc_{}_{}".format(i, name), activation=activation, kernel_initializer=normc_initializer(1.0))(last_layer) # ME: string custom initializer i += 1 messages_layer = tf.keras.layers.Input(shape=((None,) + message_space.shape), name="messages") last_layer = tf.keras.layers.concatenate([last_layer, messages_layer]) state_in_h = tf.keras.layers.Input(shape=(self.cell_size,), name="h") state_in_c = tf.keras.layers.Input(shape=(self.cell_size,), name="c") seq_in = tf.keras.layers.Input(shape=(), name="seq_in", dtype=tf.int32) lstm_out, state_h, state_c = tf.keras.layers.LSTM( self.cell_size, return_sequences=True, return_state=True, name="lstm")( inputs=last_layer, mask=tf.sequence_mask(seq_in), initial_state=[state_in_h, state_in_c]) num_actions_out = action_space.nvec[0] num_messages_out = self.num_outputs - num_actions_out # Postprocess LSTM output with another hidden layer and compute values logits = tf.keras.layers.Dense( num_actions_out, activation=tf.keras.activations.linear, name=name)(lstm_out) value_out = tf.keras.layers.Dense( 1, name="value_out", activation=None, kernel_initializer=normc_initializer(0.01))(lstm_out) message_logits = tf.keras.layers.Dense( num_messages_out, activation=tf.keras.activations.linear, name=f'message_{name}')(lstm_out) inputs = [inputs, messages_layer, seq_in, state_in_h, state_in_c] self.rnn_model = tf.keras.Model( inputs=inputs, outputs=[logits, value_out, message_logits, state_h, state_c]) self.register_variables(self.rnn_model.variables)
def __init__(self, obs_space, action_space, num_outputs, model_config, name): super(ConvLSTM, self).__init__(obs_space, action_space, num_outputs, model_config, name) self.obs_space = obs_space self.num_outputs = num_outputs ## Batch x Time x H x W x C input_layer = tf.keras.layers.Input(shape=(None,) + obs_space.shape, name="inputs") conv_activation = get_activation_fn(model_config.get("conv_activation")) filters = model_config.get("conv_filters") last_layer = input_layer for i, (out_size, kernel, stride) in enumerate(filters): ## Batch x Time x H x W x C # Time distributed ensures that the conv operates on each image independently last_layer = tf.keras.layers.TimeDistributed(tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), activation=conv_activation, padding="same", name="conv{}".format(i)))(last_layer) last_layer = tf.keras.layers.TimeDistributed(tf.keras.layers.Flatten())(last_layer) # If true we append the actions into the layer after the conv self.use_prev_action = model_config["custom_options"].get("use_prev_action") if self.use_prev_action: actions_layer = tf.keras.layers.Input(shape=(None, action_space.shape), name="agent_actions") last_layer = tf.keras.layers.concatenate([last_layer, actions_layer]) hiddens = model_config["custom_options"].get("fcnet_hiddens") # should be list of lists assert type(hiddens) == list assert type(hiddens[0]) == list assert type(hiddens[1]) == list i = 1 fc_activation = get_activation_fn(model_config.get("fcnet_activation")) for size in hiddens[0]: last_layer = tf.keras.layers.Dense( size, name="fc_{}".format(i), activation=fc_activation, kernel_initializer=normc_initializer(1.0))(last_layer) i += 1 self.cell_size = model_config['lstm_cell_size'] state_in_h = tf.keras.layers.Input(shape=(self.cell_size,), name="h") state_in_c = tf.keras.layers.Input(shape=(self.cell_size,), name="c") seq_in = tf.keras.layers.Input(shape=(), name="seq_in", dtype=tf.int32) # expects B x T x (H*W*C) lstm_out, state_h, state_c = tf.keras.layers.LSTM( self.cell_size, return_sequences=True, return_state=True, name="lstm")( inputs=last_layer, mask=tf.sequence_mask(seq_in), initial_state=[state_in_h, state_in_c]) # output: B x cell_size last_layer = lstm_out for size in hiddens[1]: last_layer = tf.keras.layers.Dense( size, name="fc_{}".format(i), activation=fc_activation, kernel_initializer=normc_initializer(1.0))(last_layer) i += 1 action = tf.keras.layers.Dense( self.num_outputs, activation=tf.keras.activations.linear, name="action_logits")(last_layer) values = tf.keras.layers.Dense( 1, activation=None, name="values")(last_layer) inputs = [input_layer, seq_in, state_in_h, state_in_c] if self.use_prev_action: inputs.insert(1, actions_layer) outputs = [action, values, state_h, state_c] self.rnn_model = tf.keras.Model( inputs=inputs, outputs=outputs) self.register_variables(self.rnn_model.variables) self.rnn_model.summary()
def __init__(self, obs_space, action_space, num_outputs, model_config, name): super(ActorDoubleCriticNetwork, self).__init__(obs_space, action_space, num_outputs, model_config, name) activation = get_activation_fn(model_config.get("fcnet_activation")) hiddens = model_config.get("fcnet_hiddens") no_final_linear = model_config.get("no_final_linear") vf_share_layers = model_config.get("vf_share_layers") self.use_diversity_value_network = model_config['custom_options'][ "use_diversity_value_network"] # we are using obs_flat, so take the flattened shape as input inputs = tf.keras.layers.Input(shape=(np.product(obs_space.shape), ), name="observations") last_layer = inputs i = 1 if no_final_linear: # the last layer is adjusted to be of size num_outputs for size in hiddens[:-1]: last_layer = tf.keras.layers.Dense( size, name="fc_{}".format(i), activation=activation, kernel_initializer=normc_initializer(1.0))(last_layer) i += 1 layer_out = tf.keras.layers.Dense( num_outputs, name="fc_out", activation=activation, kernel_initializer=normc_initializer(1.0))(last_layer) else: # the last layer is a linear to size num_outputs for size in hiddens: last_layer = tf.keras.layers.Dense( size, name="fc_{}".format(i), activation=activation, kernel_initializer=normc_initializer(1.0))(last_layer) i += 1 layer_out = tf.keras.layers.Dense( num_outputs, name="fc_out", activation=None, kernel_initializer=normc_initializer(0.01))(last_layer) # pengzh: we use three different NN with same size. assert not vf_share_layers last_layer = inputs i = 1 for size in hiddens: last_layer = tf.keras.layers.Dense( size, name="fc_value_{}".format(i), activation=activation, kernel_initializer=normc_initializer(1.0))(last_layer) i += 1 value_out = tf.keras.layers.Dense( 1, name="value_out", activation=None, kernel_initializer=normc_initializer(0.01))(last_layer) if self.use_diversity_value_network: # build the value network for novel last_layer = inputs i = 1 for size in hiddens: last_layer = tf.keras.layers.Dense( size, name="fc_value_novel_{}".format(i), activation=activation, kernel_initializer=normc_initializer(1.0))(last_layer) i += 1 value_out_novel = tf.keras.layers.Dense( 1, name="value_out_novel", activation=None, kernel_initializer=normc_initializer(0.01))(last_layer) self.base_model = tf.keras.Model( inputs, [layer_out, value_out, value_out_novel]) else: self.base_model = tf.keras.Model(inputs, [layer_out, value_out]) self.register_variables(self.base_model.variables)
def __init__(self, obs_space, action_space, num_outputs, model_config, name): super(FullyConnectedNetwork, self).__init__(obs_space, action_space, num_outputs, model_config, name) activation = get_activation_fn(model_config.get("fcnet_activation")) hiddens = model_config.get("fcnet_hiddens") no_final_linear = model_config.get("no_final_linear") vf_share_layers = model_config.get("vf_share_layers") # we are using obs_flat, so take the flattened shape as input inputs = tf.keras.layers.Input(shape=(np.product(obs_space.shape), ), name="observations") last_layer = inputs i = 1 if no_final_linear: # the last layer is adjusted to be of size num_outputs for size in hiddens[:-1]: last_layer = tf.keras.layers.Dense( size, name="fc_{}".format(i), activation=activation, kernel_initializer=normc_initializer(1.0))(last_layer) i += 1 layer_out = tf.keras.layers.Dense( num_outputs, name="fc_out", activation=activation, kernel_initializer=normc_initializer(1.0))(last_layer) else: # the last layer is a linear to size num_outputs for size in hiddens: last_layer = tf.keras.layers.Dense( size, name="fc_{}".format(i), activation=activation, kernel_initializer=normc_initializer(1.0))(last_layer) i += 1 layer_out = tf.keras.layers.Dense( num_outputs, name="fc_out", activation=None, kernel_initializer=normc_initializer(0.01))(last_layer) if not vf_share_layers: # build a parallel set of hidden layers for the value net last_layer = inputs i = 1 for size in hiddens: last_layer = tf.keras.layers.Dense( size, name="fc_value_{}".format(i), activation=activation, kernel_initializer=normc_initializer(1.0))(last_layer) i += 1 value_out = tf.keras.layers.Dense( 1, name="value_out", activation=None, kernel_initializer=normc_initializer(0.01))(last_layer) self.base_model = tf.keras.Model(inputs, [layer_out, value_out]) self.register_variables(self.base_model.variables)
def __init__(self, obs_space, action_space, num_outputs, model_config, name): super(LSTM, self).__init__(obs_space, action_space, num_outputs, model_config, name) self.obs_space = obs_space self.num_outputs = num_outputs ## Batch x Time x H x W x C input_layer = tf.keras.layers.Input((None, ) + obs_space.shape, name="inputs") # If true we append the actions into the layer after the conv self.lstm_use_prev_action_reward = model_config.get( "lstm_use_prev_action_reward") if self.lstm_use_prev_action_reward: actions_layer = tf.keras.layers.Input(shape=(None, ) + action_space.shape, name="agent_actions") input_layer = tf.keras.layers.Concatenate()( [input_layer, actions_layer]) last_layer = input_layer hiddens = model_config.get("fcnet_hiddens") i = 1 fc_activation = get_activation_fn(model_config.get("fcnet_activation")) for size in hiddens: last_layer = tf.keras.layers.Dense( size, name="fc_{}".format(i), activation=fc_activation, kernel_initializer=normc_initializer(1.0))(last_layer) i += 1 self.cell_size = model_config['lstm_cell_size'] state_in_h = tf.keras.layers.Input(shape=(self.cell_size, ), name="h") state_in_c = tf.keras.layers.Input(shape=(self.cell_size, ), name="c") seq_in = tf.keras.layers.Input(shape=(), name="seq_in", dtype=tf.int32) # expects B x T x (Features) lstm_out, state_h, state_c = tf.keras.layers.LSTM( self.cell_size, return_sequences=True, return_state=True, name="lstm")(inputs=last_layer, mask=tf.sequence_mask(seq_in), initial_state=[state_in_h, state_in_c]) # output: B x cell_size last_layer = lstm_out action = tf.keras.layers.Dense(self.num_outputs, activation=tf.keras.activations.linear, name="action_logits")(last_layer) values = tf.keras.layers.Dense(1, activation=None, name="values")(last_layer) inputs = [input_layer, seq_in, state_in_h, state_in_c] if self.lstm_use_prev_action_reward: inputs.insert(1, actions_layer) outputs = [action, values, state_h, state_c] self.rnn_model = tf.keras.Model(inputs=inputs, outputs=outputs) self.register_variables(self.rnn_model.variables) self.rnn_model.summary()
def __init__(self, obs_space, action_space, num_outputs, model_config, name): super(VisionNetwork, self).__init__(obs_space, action_space, num_outputs, model_config, name) activation = get_activation_fn(model_config.get("conv_activation")) filters = model_config.get("conv_filters") if not filters: filters = _get_filter_config(obs_space.shape) no_final_linear = model_config.get("no_final_linear") vf_share_layers = model_config.get("vf_share_layers") inputs = tf.keras.layers.Input(shape=obs_space.shape, name="observations") last_layer = inputs # Build the action layers for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), activation=activation, padding="same", name="conv{}".format(i))(last_layer) out_size, kernel, stride = filters[-1] if no_final_linear: # the last layer is adjusted to be of size num_outputs last_layer = tf.keras.layers.Conv2D(num_outputs, kernel, strides=(stride, stride), activation=activation, padding="valid", name="conv_out")(last_layer) conv_out = last_layer else: last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), activation=activation, padding="valid", name="conv{}".format(i + 1))(last_layer) conv_out = tf.keras.layers.Conv2D(num_outputs, [1, 1], activation=None, padding="same", name="conv_out")(last_layer) # Build the value layers if vf_share_layers: last_layer = tf.keras.layers.Lambda( lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer) value_out = tf.keras.layers.Dense( 1, name="value_out", activation=None, kernel_initializer=normc_initializer(0.01))(last_layer) else: # build a parallel set of hidden layers for the value net last_layer = inputs for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), activation=activation, padding="same", name="conv_value_{}".format(i))(last_layer) out_size, kernel, stride = filters[-1] last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), activation=activation, padding="valid", name="conv_value_{}".format(i + 1))(last_layer) last_layer = tf.keras.layers.Conv2D( 1, [1, 1], activation=None, padding="same", name="conv_value_out")(last_layer) value_out = tf.keras.layers.Lambda( lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer) self.base_model = tf.keras.Model(inputs, [conv_out, value_out]) self.register_variables(self.base_model.variables)
def __init__(self, obs_space, action_space, num_outputs, model_config, name, q_hiddens=None, dueling=False, num_atoms=1, use_noisy=False, v_min=-10.0, v_max=10.0, sigma0=0.5, parameter_noise=False): if q_hiddens or dueling or num_atoms != 1 or use_noisy: raise NotImplementedError model_config = with_base_config( base_config=DEFAULT_STRATEGO_MODEL_CONFIG, extra_config=model_config) TFModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) print(model_config) observation_mode = model_config['custom_options']['observation_mode'] if observation_mode == PARTIALLY_OBSERVABLE: self.vf_obs_key = 'partial_observation' elif observation_mode == FULLY_OBSERVABLE: self.vf_obs_key = 'full_observation' elif observation_mode == BOTH_OBSERVATIONS: raise ValueError( f"Using {BOTH_OBSERVATIONS} format doesn't make sense for a Q-network, there's no policy, just a Q-function" ) else: assert False, "policy observation_mode must be in [PARTIALLY_OBSERVABLE, FULLY_OBSERVABLE, BOTH_OBSERVATIONS]" if model_config["custom_preprocessor"]: print(obs_space) self.preprocessor = ModelCatalog.get_preprocessor_for_space( observation_space=self.obs_space.original_space, options=model_config) else: self.preprocessor = None logger.warn( "No custom preprocessor for StrategoModel was specified.\n" "Some tree search policies may not initialize their placeholders correctly without this." ) self.use_lstm = model_config['use_lstm'] self.vf_share_layers = model_config.get("vf_share_layers") self.mask_invalid_actions = model_config['custom_options'][ 'mask_invalid_actions'] conv_activation = get_activation_fn( model_config.get("conv_activation")) lstm_filters = model_config["custom_options"]['lstm_filters'] cnn_filters = model_config.get("conv_filters") final_pi_filter_amt = model_config["custom_options"][ "final_pi_filter_amt"] rows = obs_space.original_space[self.vf_obs_key].shape[0] colums = obs_space.original_space[self.vf_obs_key].shape[1] if self.use_lstm: self._lstm_state_shape = (rows, colums, lstm_filters[0][0]) # self._lstm_state_shape = (64,) if self.use_lstm: state_in = [ tf.keras.layers.Input(shape=self._lstm_state_shape, name="vf_lstm_h"), tf.keras.layers.Input(shape=self._lstm_state_shape, name="vf_lstm_c") ] seq_lens_in = tf.keras.layers.Input(shape=(), name="lstm_seq_in") self.vf_obs_inputs = tf.keras.layers.Input( shape=(None, *obs_space.original_space[self.vf_obs_key].shape), name="vf_observation") else: state_in, seq_lens_in = None, None self.vf_obs_inputs = tf.keras.layers.Input( shape=obs_space.original_space[self.vf_obs_key].shape, name="vf_observation") # if pi_cnn_filters is None: # assert False # # assuming board size will always remain the same for both pi and vf networks # if self.use_lstm: # single_obs_input_shape = self.pi_obs_inputs.shape.as_list()[2:] # else: # single_obs_input_shape = self.pi_obs_inputs.shape.as_list()[1:] # pi_cnn_filters = _get_filter_config(single_obs_input_shape) # # if v_cnn_filters is None: # assert False # # assuming board size will always remain the same for both pi and vf networks # if self.use_lstm: # single_obs_input_shape = self.pi_obs_inputs.shape.as_list()[2:] # else: # single_obs_input_shape = self.pi_obs_inputs.shape.as_list()[1:] # v_cnn_filters = _get_filter_config(single_obs_input_shape) def maybe_td(layer): if self.use_lstm: return tf.keras.layers.TimeDistributed(layer=layer, name=f"td_{layer.name}") else: return layer def build_primary_layers(prefix: str, obs_in: tf.Tensor, state_in: tf.Tensor): # encapsulated in a function to either be called once for shared policy/vf or twice for separate policy/vf _last_layer = obs_in for i, (out_size, kernel, stride) in enumerate(cnn_filters): _last_layer = maybe_td( tf.keras.layers.Conv2D(filters=out_size, kernel_size=kernel, strides=stride, activation=conv_activation, padding="same", name="{}_conv_{}".format( prefix, i)))(_last_layer) if parameter_noise: # assuming inputs shape (batch_size x w x h x channel) _last_layer = maybe_td( tf.keras.layers.LayerNormalization( axis=(1, 2), name=f"{prefix}_LayerNorm_{i}"))(_last_layer) state_out = state_in if self.use_lstm: for i, (out_size, kernel, stride) in enumerate(lstm_filters): if i > 0: raise NotImplementedError( "Only single lstm layers are implemented right now" ) _last_layer, *state_out = tf.keras.layers.ConvLSTM2D( filters=out_size, kernel_size=kernel, strides=stride, activation=conv_activation, padding="same", return_sequences=True, return_state=True, name="{}_convlstm".format(prefix))( inputs=_last_layer, mask=tf.sequence_mask(seq_lens_in), initial_state=state_in) raise NotImplementedError( "havent checked lstms for q model" "") return _last_layer, state_out if self.use_lstm: vf_state_in = state_in[2:] else: pi_state_in, vf_state_in = None, None vf_last_layer, vf_state_out = build_primary_layers( prefix="vf", obs_in=self.vf_obs_inputs, state_in=vf_state_in) if self.use_lstm: state_out = vf_state_out else: state_out = None vf_last_layer = maybe_td( tf.keras.layers.Conv2D(filters=final_pi_filter_amt, kernel_size=[3, 3], strides=1, activation=conv_activation, padding="same", name="{}_conv_{}".format( 'vf', "last")))(vf_last_layer) if parameter_noise: # assuming inputs shape (batch_size x w x h x channel) vf_last_layer = maybe_td( tf.keras.layers.LayerNormalization( axis=(1, 2), name=f"vf_LayerNorm_last"))(vf_last_layer) print( f"action space n: {action_space.n}, rows: {rows}, columns: {colums}, filters: {int(action_space.n / (rows * colums))}" ) unmasked_logits_out = maybe_td( tf.keras.layers.Conv2D( filters=int(action_space.n / (rows * colums)), kernel_size=[3, 3], strides=1, activation=None, padding="same", name="{}_conv_{}".format('vf', "unmasked_logits")))(vf_last_layer) # vf_last_layer = maybe_td(tf.keras.layers.Conv2D( # filters=1, # kernel_size=[1, 1], # strides=1, # activation=conv_activation, # padding="same", # name="{}_conv_{}".format('vf', "last")))(vf_last_layer) # # vf_last_layer = maybe_td(tf.keras.layers.Flatten(name="vf_flatten"))(vf_last_layer) # # value_out = maybe_td(tf.keras.layers.Dense( # units=1, # name="vf_out", # activation=None, # kernel_initializer=normc_initializer(0.01)))(vf_last_layer) model_inputs = [self.vf_obs_inputs] model_outputs = [unmasked_logits_out] if self.use_lstm: model_inputs += [seq_lens_in, *state_in] model_outputs += state_out self.base_model = tf.keras.Model(inputs=model_inputs, outputs=model_outputs) print(self.base_model.summary()) self.register_variables(self.base_model.variables)