def eval_policy(policy,
                env_name,
                seed,
                type=0,
                eval_episodes=100,
                threshold_pol=7):
    #eval_env, _, _, _ = utils.make_env(env_name, atari_preprocessing)
    #eval_env.seed(seed + 100)
    global cpu_util
    global action_list
    eval_env = OffloadEnv()
    avg_dis_reward = 0.
    for _ in range(eval_episodes):
        state, done = eval_env.reset(), False
        for t in range(200):
            if type == 0:
                action = policy.select_action(np.array(state), eval=True)
            elif type == 1:
                if state[1] < threshold_pol:
                    action = 0
                else:
                    action = 1
            prev_state = state
            # cpu_util.append(state[1])
            # action_list.append(action)
            state, reward, done, _ = eval_env.step(action)
            avg_dis_reward += reward
            print("Eval policy action reward", prev_state, action, reward,
                  state)

    avg_dis_reward /= eval_episodes

    print("---------------------------------------")
    print(f"Evaluation over {eval_episodes} episodes: {avg_dis_reward:.3f}")
    print("---------------------------------------")
    #cpu_npy = np.array(cpu_util)
    #act_npy = np.array(action_list)
    #np.save('./buffers/cpu_util.npy', cpu_npy)
    #np.save('./buffers/action.npy', act_npy)
    return avg_dis_reward
示例#2
0
 env_name = args.env_name
 setting = f"{env_name}_{args.seed}"
 buffer_name = f"{args.buffer_name}_{setting}"
 #env = DummyVecEnv([lambda: env])
 log_dir = args.logdir
 #env = make_vec_env(lambda: env, n_envs=1)
 #eval_env = make_vec_env(lambda: eval_env, n_envs=1)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 #callback = SaveOnBestTrainingRewardCallback(check_freq=10, log_dir=log_dir)
 loop_range = int(args.train_iter / args.eval_freq)
 for j in range(0, 10):
     print("RANDOM SEED ", j)
     lambd = []
     N = []
     env = OffloadEnv(False, args.lambd, args.offload_cost,
                      args.overload_cost, args.holding_cost, args.reward,
                      args.N, j, args.env_name, args.folder)
     env = Monitor(env, log_dir)
     #env.seed(j)
     #torch.manual_seed(j)
     np.random.seed(j)
     if args.algo != 4:
         with open(f"../inop_salmut/{args.folder}/buffers/lambda.npy",
                   "rb") as fp:
             lambd = pickle.load(fp)
         with open(f"../inop_salmut/{args.folder}/buffers/N.npy",
                   "rb") as fp:
             N = pickle.load(fp)
     if args.algo == 0:
         model = PPO('MlpPolicy',
                     env,
    if not os.path.exists("./{args.folder}/buffers"):
        os.makedirs(f"./{args.folder}/buffers")

    if not os.path.exists(args.logdir):
        os.makedirs(args.logdir)
    """
    # Make env and determine properties
    # env, is_atari, state_dim, num_actions = utils.make_env(
    #    args.env, atari_preprocessing)
    #is_atari = False
    #state_dim = 4
    state_dim = 2
    num_actions = 2
    gamma = 0.95
    env = OffloadEnv(True, args.lambd, args.offload_cost, args.overload_cost,
                     args.holding_cost, args.reward, args.N, args.seed,
                     args.env_name, args.folder, args.start_iter, args.step)
    #eval_env = OffloadEnv(True, args.lambd, args.mdp_evolve, args.user_evolve, args.user_identical, args.env_name)
    env.seed(args.seed)
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if args.algo == 3:
        model = structured_learning.structured_learning(
            False, num_actions, state_dim, device, args.BCQ_threshold)
    #env_name = "offload_dqn_mdp_5"
    env_name = args.env_name
    setting = f"{env_name}_{args.seed}"
    buffer_name = f"{args.buffer_name}_{setting}"
    #env = DummyVecEnv([lambda: env])
    #log_dir = "./off_a2c_res_5/"
示例#4
0
    if not os.path.exists("./{args.folder}/models"):
        os.makedirs(f"./{args.folder}/models")

    if not os.path.exists("./{args.folder}/buffers"):
        os.makedirs(f"./{args.folder}/buffers")

    if not os.path.exists(args.logdir):
        os.makedirs(args.logdir)
    """
    state_dim = 2
    num_actions = 2
    gamma = 0.95

    # Defining environment
    env = OffloadEnv(True, args.lambd, args.offload_cost, args.overload_cost,
                     args.holding_cost, args.reward, args.N, args.seed,
                     args.env_name, args.folder)
    #eval_env = OffloadEnv(True, args.lambd, args.mdp_evolve, args.user_evolve, args.user_identical, args.env_name)
    env.seed(args.seed)
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)
    #device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    #env_name = "offload_dqn_mdp_5"

    # Setting variables
    env_name = args.env_name
    setting = f"{env_name}_{args.seed}"
    buffer_name = f"{args.buffer_name}_{setting}"
    #env = DummyVecEnv([lambda: env])
    #env = make_vec_env(lambda: env, n_envs=1)
    testing_eval_med = []