def construct_policy(): training_dimensions = get_training_dimensions() inputs, input_ends = get_nn_inputs(training_dimensions['state']) action_inputs, action_ends = get_nn_inputs(training_dimensions['actions']) x = tf.keras.layers.Concatenate()(input_ends) x = add_dense_layers(128, 3, x, name='policy') last_layer = x outputs = { name: method(last_layer, size, name) for name, method, size in [ ('which-action', create_categorical_output, Actions.NUM_ACTIONS), ('movement', create_linear_output, 3), ('collect-resources', create_categorical_output, Resources.NUM_RESOURCES), ('deposit-resources', create_categorical_output, Resources.NUM_RESOURCES), ('strafe-direction', create_categorical_output, 2), ('change-action', create_categorical_output, 2), ] } model = tf.keras.Model(inputs=inputs + action_inputs, outputs=[o for o in outputs.values()], name='ai_policy_function') tf.keras.utils.plot_model(model, 'output/saved_networks/policy_function.png', rankdir='LR', show_shapes=True) return model, [k for k in outputs.keys()], action_ends
def get_n_actions_input(actions, assignments): num_games = len(actions) training_dimensions = get_training_dimensions() actions_input = initialize_n_input(training_dimensions['actions'], num_games) for idx in range(num_games): actions_input['actions'][idx] = actions_to_numpy( actions[idx], assignments[idx])['actions'] return actions_input
def get_policy_input_layers(): ret = {} critic = PolicyHolder.get_policy() training_dimensions = get_training_dimensions() for input_key in training_dimensions['state']['inputs']: for layer in critic.layers: if layer.name == input_key: ret[input_key] = layer break return ret
def get_critic_input_layers(): ret = {} critic = SimpleCriticFunction.get_critic() training_dimensions = get_training_dimensions() for action_obj in ['state', 'actions']: for input_key in training_dimensions[action_obj]['inputs']: for layer in critic.layers: if layer.name == 'flattened_' + input_key: ret[input_key] = layer break return ret
def get_n_states_input(game_states, assignments=None): num_games = len(game_states) training_dimensions = get_training_dimensions() state_input = initialize_n_input(training_dimensions['state'], num_games) for game_idx, game_state in enumerate(game_states): assignment = Assignment( game_state['player-number'] ) if assignments is None else assignments[game_idx] game_encoding = game_state_to_numpy(game_state, assignment=assignment) for input_key in training_dimensions['state']['inputs']: state_input[input_key][game_idx] = game_encoding[input_key] return state_input
def construct_critic(): training_dimensions = get_training_dimensions() state_inputs, state_input_ends = get_nn_inputs(training_dimensions['state']) action_inputs, action_input_ends = get_nn_inputs(training_dimensions['actions']) x = tf.keras.layers.Concatenate()(state_input_ends + action_input_ends) x = add_dense_layers(128, 3, x, name='critic') x = tf.keras.layers.Dense(1, activation='sigmoid', name='the_output')(x) model = tf.keras.Model(inputs=state_inputs + action_inputs, outputs=x, name='ai_critic_function') tf.keras.utils.plot_model(model, 'output/saved_networks/critic_function.png', rankdir='LR', show_shapes=True) return model
def construct_value_function(): training_dimensions = get_training_dimensions() inputs, input_ends = get_nn_inputs(training_dimensions['state']) x = tf.keras.layers.Concatenate()(input_ends) x = add_dense_layers([1024, 512, 128], 1, x, name='value') x = tf.keras.layers.Dense(1, activation='sigmoid')(x) model = tf.keras.Model(inputs=inputs, outputs=x, name='ai_value_function') tf.keras.utils.plot_model(model, 'output/saved_networks/value_function.png', rankdir='LR', show_shapes=True) return model
def create_action_zeros(): training_dimensions = get_training_dimensions()['actions'] return { input_key: np.zeros(training_dimensions[input_key]) for input_key in training_dimensions['inputs'] }