示例#1
0
def construct_policy():
    training_dimensions = get_training_dimensions()
    inputs, input_ends = get_nn_inputs(training_dimensions['state'])

    action_inputs, action_ends = get_nn_inputs(training_dimensions['actions'])

    x = tf.keras.layers.Concatenate()(input_ends)

    x = add_dense_layers(128, 3, x, name='policy')

    last_layer = x

    outputs = {
        name: method(last_layer, size, name)
        for name, method, size in [
            ('which-action', create_categorical_output, Actions.NUM_ACTIONS),
            ('movement', create_linear_output, 3),
            ('collect-resources', create_categorical_output, Resources.NUM_RESOURCES),
            ('deposit-resources', create_categorical_output, Resources.NUM_RESOURCES),
            ('strafe-direction', create_categorical_output, 2),
            ('change-action', create_categorical_output, 2),
        ]
    }

    model = tf.keras.Model(inputs=inputs + action_inputs, outputs=[o for o in outputs.values()], name='ai_policy_function')
    tf.keras.utils.plot_model(model, 'output/saved_networks/policy_function.png', rankdir='LR', show_shapes=True)
    return model, [k for k in outputs.keys()], action_ends
def get_n_actions_input(actions, assignments):
    num_games = len(actions)
    training_dimensions = get_training_dimensions()
    actions_input = initialize_n_input(training_dimensions['actions'],
                                       num_games)
    for idx in range(num_games):
        actions_input['actions'][idx] = actions_to_numpy(
            actions[idx], assignments[idx])['actions']
    return actions_input
示例#3
0
def get_policy_input_layers():
	ret = {}
	critic = PolicyHolder.get_policy()
	training_dimensions = get_training_dimensions()
	for input_key in training_dimensions['state']['inputs']:
		for layer in critic.layers:
			if layer.name == input_key:
				ret[input_key] = layer
				break
	return ret
示例#4
0
def get_critic_input_layers():
	ret = {}
	critic = SimpleCriticFunction.get_critic()
	training_dimensions = get_training_dimensions()
	for action_obj in ['state', 'actions']:
		for input_key in training_dimensions[action_obj]['inputs']:
			for layer in critic.layers:
				if layer.name == 'flattened_' + input_key:
					ret[input_key] = layer
					break
	return ret
def get_n_states_input(game_states, assignments=None):
    num_games = len(game_states)
    training_dimensions = get_training_dimensions()
    state_input = initialize_n_input(training_dimensions['state'], num_games)
    for game_idx, game_state in enumerate(game_states):
        assignment = Assignment(
            game_state['player-number']
        ) if assignments is None else assignments[game_idx]
        game_encoding = game_state_to_numpy(game_state, assignment=assignment)
        for input_key in training_dimensions['state']['inputs']:
            state_input[input_key][game_idx] = game_encoding[input_key]
    return state_input
示例#6
0
def construct_critic():
    training_dimensions = get_training_dimensions()

    state_inputs, state_input_ends = get_nn_inputs(training_dimensions['state'])
    action_inputs, action_input_ends = get_nn_inputs(training_dimensions['actions'])

    x = tf.keras.layers.Concatenate()(state_input_ends + action_input_ends)
    x = add_dense_layers(128, 3, x, name='critic')
    x = tf.keras.layers.Dense(1, activation='sigmoid', name='the_output')(x)

    model = tf.keras.Model(inputs=state_inputs + action_inputs, outputs=x, name='ai_critic_function')
    tf.keras.utils.plot_model(model, 'output/saved_networks/critic_function.png', rankdir='LR', show_shapes=True)
    return model
示例#7
0
def construct_value_function():
    training_dimensions = get_training_dimensions()
    inputs, input_ends = get_nn_inputs(training_dimensions['state'])

    x = tf.keras.layers.Concatenate()(input_ends)
    x = add_dense_layers([1024, 512, 128], 1, x, name='value')
    x = tf.keras.layers.Dense(1, activation='sigmoid')(x)

    model = tf.keras.Model(inputs=inputs, outputs=x, name='ai_value_function')
    tf.keras.utils.plot_model(model,
                              'output/saved_networks/value_function.png',
                              rankdir='LR',
                              show_shapes=True)
    return model
示例#8
0
def create_action_zeros():
    training_dimensions = get_training_dimensions()['actions']
    return {
        input_key: np.zeros(training_dimensions[input_key])
        for input_key in training_dimensions['inputs']
    }