示例#1
0
def train_model(pathID, model_fit, numbvaccines, steps, efficacy, iterations,
                open_port):
    save_path = master_path + '/user_data/' + str(pathID)
    #Cleaning the data
    names, pars, total_pops, i_states = clean_data(model_fit)
    print("Cleaned Data")

    #Start training
    env = CustomEnv(pars, i_states, total_pops, numbvaccines, steps, names,
                    efficacy)
    train_agent(env, 0.0001, iterations, save_path, open_port)
    print("\n Training Complete \n")

    #Evaluate the model
    policy_data = rewards_from_policy(env, save_path + "/best_model")

    print("All Rewards Data")
    np.save(save_path + "/policy_data.npy", policy_data)
    model = PPO2(MlpPolicy,
                 env,
                 policy_kwargs=policy_kwargs,
                 tensorboard_log=tensorboard_path,
                 gamma=1.0,
                 nminibatches=2)

    cmd_str = "tensorboard --logdir " + tensorboard_path + " --port=" + str(
        port)
    print(cmd_str)
    p = subprocess.Popen(cmd_str.split(" "))

    print(total_episodes * env.max_steps)
    model.learn(total_timesteps=total_episodes * env.max_steps, callback=cb)
    model.save(path + "/ppo2_vaccine")

    p.kill()


if __name__ == "__main__":  #Just to test
    place_names, params, total_populations, initial_states = clean_data(
        np.load(
            "/home/steelshot/nwHacksVaccDistr/model_fitting/data/tests.npy",
            allow_pickle=True))
    env = CustomEnv(params, initial_states, total_populations, 500000, 180,
                    place_names, 0.95)

    train_agent(env, 0.0003, 10000,
                "/home/steelshot/nwHacksVaccDistr/optimization/models")
示例#3
0
def run(countries,
        states,
        steps,
        numbvaccines,
        efficacy,
        pathID,
        iterations=100):
    """
    INPUT PARAMETERS:
    ----------------
    countries : array-like
        The countries that the user is interested in.

    states : array-like (or None)
        The states/provinces that the user would like to consider if they are keeping the country fixed. Default is None.
        
    steps : int
        The number of days that the user wants to run the simulation.
        
    numbvaccines : int
        The number of vaccines that the user wants to allocate per day.
        
    efficacy : float
        The percentage effectiveness of the vaccine that they have chosen
        
    pathID : str
        The ID of the path for the user
        
    iterations : int (default to 100)
        The number of iterations for the training to run
    """
    #First getting the model fitting params
    fitting = covsir_models(countries, states)
    results = fitting.calling()
    print("Fitted data")
    print(results)

    #Cleaning the data
    names, pars, total_pops, i_states = clean_data(results)
    print("Cleaned Data")

    #Start training
    env = CustomEnv(pars, i_states, total_pops, numbvaccines, steps, names,
                    efficacy)
    train_agent(env, 0.0003, iterations,
                master_path + "/user_data/" + str(pathID))
    print("\n Training Complete \n")

    model_path = master_path + "/user_data/" + str(
        pathID) + "/best_model"  #Defining the model path

    #Evaluate the model
    all_rewards = rewards_from_policy(env, numbvaccines, steps, efficacy,
                                      model_path)
    print("All Rewards Data")

    #Get data from policies
    data = data_from_policy(env, model_path)
    np.save(master_path + "/user_data/" + str(pathID) + "/policy_data.npy",
            data)
    print("Data from policy")
    plt.plot(time, policy_results["learned_policy"]["deaths"])
    plt.plot(time, policy_results["no_vaccine"]["deaths"])

    plt.subplot(2, 2, 3)
    plt.ylabel("Succeptible")
    plt.plot(time, policy_results["infected_ratio"]["susceptible"])
    plt.plot(time, policy_results["population_ratio"]["susceptible"])
    plt.plot(time, policy_results["learned_policy"]["susceptible"])
    plt.plot(time, policy_results["no_vaccine"]["susceptible"])

    plt.subplot(2, 2, 4)
    plt.ylabel("Recovered")
    plt.plot(time, policy_results["infected_ratio"]["recovered"])
    plt.plot(time, policy_results["population_ratio"]["recovered"])
    plt.plot(time, policy_results["learned_policy"]["recovered"])
    plt.plot(time, policy_results["no_vaccine"]["recovered"])
    plt.show()


if __name__ == "__main__":  #Just testing to see if it works, this won't run otherwise
    place_names, params, total_populations, initial_states = clean_data(
        np.load(
            "/home/steelshot/nwHacksVaccDistr/user_data/6/fitted_model.npy",
            allow_pickle=True))
    env = CustomEnv(params, initial_states, total_populations, 100000, 30,
                    place_names, 0.95)
    policy_results = rewards_from_policy(
        env, 180, 0.95,
        "/home/steelshot/nwHacksVaccDistr/user_data/6/best_model")
    print(policy_results["policy_comparision"])  #Just call this
    visualize_results(policy_results)