Пример #1
0
def analyze():
    exp_params = ExperimentParameters()

    model_output_dir = "../../../models/sarsa/reward/%i" % exp_params.seed
    exp_params.model_output_dir = model_output_dir

    image_output_dir = "../../../images/sarsa/reward/%i" % exp_params.seed
    exp_params.image_output_dir = image_output_dir

    analyze_models(exp_params)
Пример #2
0
def analyze():
	exp_params = ExperimentParameters()

	model_output_dir = "../../../models/qlearning/state/%i" % exp_params.seed
	exp_params.model_output_dir = model_output_dir

	image_output_dir = "../../../images/qlearning/state/%i" % exp_params.seed
	exp_params.image_output_dir = image_output_dir

	analyze_models(exp_params)
Пример #3
0
def analyze():
    exp_params = ExperimentParameters()
    exp_params.seed = get_reward_seeds()[2]

    model_output_dir = "../../../models/qlearning/reward/%i" % exp_params.seed
    exp_params.model_output_dir = model_output_dir

    image_output_dir = "../../../images/qlearning/reward/%i" % exp_params.seed
    exp_params.image_output_dir = image_output_dir

    analyze_models(exp_params)
Пример #4
0
def analyze():
	exp_params = ExperimentParameters()
	exp_params.seed = get_parameters_seed()

	model_output_dir = "../../../models/expected_sarsa/params/%i" % exp_params.seed
	exp_params.model_output_dir = model_output_dir

	image_output_dir = "../../../images/expected_sarsa/params/%i" % exp_params.seed
	exp_params.image_output_dir = image_output_dir

	analyze_models(exp_params)
Пример #5
0
def analyze_aggregated():
	exp_params = ExperimentParameters()
	exp_params.seed = get_reward_seeds()[0]

	image_output_dir = "../../../images/expected_sarsa/reward/%i" % exp_params.seed
	exp_params.image_output_dir = image_output_dir

	aggregated_models = get_aggregated_models("expected_sarsa", "reward", exp_params, get_reward_seeds())

	filenames = list(aggregated_models.keys())
	models = list(aggregated_models.values())

	analyze_aggregated_models(filenames, models, exp_params)
Пример #6
0
def analyze_aggregated():
	exp_params = ExperimentParameters()
	exp_params.seed = get_state_seeds()[0]

	image_output_dir = "../../../images/qlearning/state/%i" % exp_params.seed
	exp_params.image_output_dir = image_output_dir

	aggregated_models = get_aggregated_models("qlearning", "state", exp_params, get_state_seeds())

	filenames = list(aggregated_models.keys())
	models = list(aggregated_models.values())

	analyze_aggregated_models(filenames, models, exp_params)
Пример #7
0
def analyze_aggregated_reward_food_count_correlations(algorithm):
    def get_corr_coefs(models):
        corr_coefs = []

        for model in models:
            average_reward_over_time = compute_mean_over_time(
                model.rewards_per_episode)
            average_food_count_over_time = compute_mean_over_time(
                model.food_count_per_episode)

            if np.all(average_reward_over_time == 0):
                continue

            corr_coef = np.corrcoef(average_reward_over_time,
                                    average_food_count_over_time)
            corr_coefs.append(corr_coef)

        return corr_coefs

    exp_params = ExperimentParameters()
    exp_params.seed = get_reward_seeds()[0]

    aggregated_models = get_aggregated_models(algorithm, "reward", exp_params,
                                              get_reward_seeds())

    states = [filename.split("_")[0] for filename in aggregated_models.keys()]
    models = list(aggregated_models.values())

    current_models, _ = \
     filter_models_by_state(models, states,
             lambda state: state.startswith("Board"))

    corr_coefs = get_corr_coefs(current_models)

    print("Board State")
    print("Average correlation:", np.mean(corr_coefs))
    print("Average absolute correlation:", np.mean(np.abs(corr_coefs)))

    current_models, _ = \
     filter_models_by_state(models, states,
             lambda state: state.startswith("DirectionalDistance"))

    corr_coefs = get_corr_coefs(current_models)

    print("Directional Distance State")
    print("Average correlation:", np.mean(corr_coefs))
    print("Average absolute correlation:", np.mean(np.abs(corr_coefs)))
Пример #8
0
def train():
    for params in get_snake_parameters():
        env = SnakeEnvironment(params)
        params.policy = EpsilonGreedyPolicy(env, params.epsilon)

        exp_params = ExperimentParameters()
        exp_params.env = env
        exp_params.model_class = Sarsa
        exp_params.model_params = params
        exp_params.seed = get_parameters_seed()

        output_dir = "../../../models/sarsa/params/%i" % exp_params.seed
        exp_params.model_output_dir = output_dir

        train_and_store_model(exp_params)
Пример #9
0
def analyze_test():
    params = SnakeParameters()
    params.discount_factor = StaticDiscountFactor(0.95)
    params.learning_rate = StaticLearningRate(0.15)

    env = SnakeEnvironment(params)
    params.policy = GreedyPolicy(env)

    exp_params = ExperimentParameters()
    exp_params.env = env
    exp_params.model_class = Sarsa
    exp_params.model_params = params

    model_output_dir = "../../../models/sarsa/reward/%i" % exp_params.seed
    exp_params.model_output_dir = model_output_dir

    image_output_dir = "../../../images/sarsa/reward/%i" % exp_params.seed
    exp_params.image_output_dir = image_output_dir

    analyze_models_test(exp_params)
Пример #10
0
def train():
    params = SnakeParameters()
    params.discount_factor = StaticDiscountFactor(0.95)
    params.learning_rate = StaticLearningRate(0.15)
    params.reward = get_state_reward()

    env = SnakeEnvironment(params)
    params.policy = EpsilonGreedyPolicy(env, params.epsilon)

    exp_params = ExperimentParameters()
    exp_params.env = env
    exp_params.model_class = Sarsa
    exp_params.model_params = params

    for seed in get_state_seeds():
        exp_params.seed = seed

        output_dir = "../../../models/sarsa/state/%i" % exp_params.seed
        exp_params.model_output_dir = output_dir

        train_models(exp_params)
Пример #11
0
def train():
    params = SnakeParameters()
    params.discount_factor = StaticDiscountFactor(0.85)
    params.learning_rate = StaticLearningRate(0.85)

    env = SnakeEnvironment(params)
    params.policy = EpsilonGreedyPolicy(env, params.epsilon)

    exp_params = ExperimentParameters()
    exp_params.env = env
    exp_params.model_class = QLearning
    exp_params.model_params = params

    for seed in get_reward_seeds():
        exp_params.seed = seed

        output_dir = "../../../models/qlearning/reward/%i" % exp_params.seed
        exp_params.model_output_dir = output_dir

        for state in get_reward_states():
            exp_params.model_params.state = state
            train_models(exp_params)