Exemplo n.º 1
0
def train_sarsa_models():
    params = {"seed": None}

    for seed in get_seeds():
        params["seed"] = seed
        train_experiment(get_sarsa_experiment(params), get_states(),
                         get_rewards())
Exemplo n.º 2
0
def aggregate_models_by_avg(experiment_getter, seeds, params_getter, state_getter, reward_getter):
	aggregated_models = {}

	for seed in seeds:
		experiment = experiment_getter(params_getter(seed))

		models, states, rewards, filenames = get_models_from_experiment(experiment,
																		state_getter(),
																		reward_getter(),
																		train_if_missing=True)

		for model, state, reward in zip(models, states, rewards):
			key = (state, reward)

			if key not in aggregated_models:
				aggregated_models[key] = model
			else:
				aggregated_models[key].food_count_per_episode += model.food_count_per_episode
				aggregated_models[key].rewards_per_episode += model.rewards_per_episode

	for seed in get_seeds()[1:]:
		params = params_getter(seed)
		tmp_experiment = experiment_getter(params)

		tmp_models, tmp_states, tmp_rewards, _ = get_models_from_experiment(
			tmp_experiment,
			state_getter(),
			reward_getter(),
			train_if_missing=True)

		for true_model, true_state in zip(models, states):
			for tmp_model, tmp_state in zip(models, states):
				if true_state == tmp_state:
					true_model.food_count_per_episode += tmp_model.food_count_per_episode
					true_model.rewards_per_episode += tmp_model.rewards_per_episode

		for _, model in aggregated_models.items():
			model.food_count_per_episode = model.food_count_per_episode / len(seeds)
			model.rewards_per_episode = model.rewards_per_episode / len(seeds)

	models, states, rewards = [], [], []

	for (state, reward), model in aggregated_models.items():
		models.append(model)
		states.append(state)
		rewards.append(reward)

	return models, states, rewards
Exemplo n.º 3
0
def experiment01_sarsa():
	def get_params(seed):
		image_output = os.path.join(get_project_path(),
									"images",
									"sarsa",
									"state",
									str(seed),
									"state_sarsa_average_game_score_over_time.png")

		return {"seed": seed,
				"image_output": image_output}

	params = get_params(42)
	experiment = get_sarsa_experiment(params)
	models, states, rewards = aggregate_models_by_avg(get_sarsa_experiment, get_seeds(), get_params, get_states(),
													  get_rewards())

	experiment01(experiment, models, states, rewards, params)
Exemplo n.º 4
0
def experiment01_sarsa_directional_state():
    def get_params(seed):
        image_output = os.path.join(
            get_project_path(), "images", "sarsa", "reward", str(seed),
            "reward_sarsa_directional_state_average_game_score_over_time.png")

        return {"seed": seed, "image_output": image_output}

    params = get_params(42)
    experiment = get_qlearning_experiment(params)
    models, states, rewards = aggregate_models_by_avg(get_sarsa_experiment,
                                                      get_seeds(), get_params,
                                                      get_states, get_rewards)

    models, states, rewards = \
     filter_models_with_rewards_by_state(models, states, rewards,
              lambda state:
              state.startswith("Directional"))
    experiment01(experiment, models, states, rewards, params)
Exemplo n.º 5
0
def get_experiment():
	experiment = setup_experiment(QLearning, "info_augmentation", get_seeds()[0])
	experiment.train_episodes = 1000000
	return experiment