def eval_policy(policy, env_name, seed, type=0, eval_episodes=100, threshold_pol=7): #eval_env, _, _, _ = utils.make_env(env_name, atari_preprocessing) #eval_env.seed(seed + 100) global cpu_util global action_list eval_env = OffloadEnv() avg_dis_reward = 0. for _ in range(eval_episodes): state, done = eval_env.reset(), False for t in range(200): if type == 0: action = policy.select_action(np.array(state), eval=True) elif type == 1: if state[1] < threshold_pol: action = 0 else: action = 1 prev_state = state # cpu_util.append(state[1]) # action_list.append(action) state, reward, done, _ = eval_env.step(action) avg_dis_reward += reward print("Eval policy action reward", prev_state, action, reward, state) avg_dis_reward /= eval_episodes print("---------------------------------------") print(f"Evaluation over {eval_episodes} episodes: {avg_dis_reward:.3f}") print("---------------------------------------") #cpu_npy = np.array(cpu_util) #act_npy = np.array(action_list) #np.save('./buffers/cpu_util.npy', cpu_npy) #np.save('./buffers/action.npy', act_npy) return avg_dis_reward
env_name = args.env_name setting = f"{env_name}_{args.seed}" buffer_name = f"{args.buffer_name}_{setting}" #env = DummyVecEnv([lambda: env]) log_dir = args.logdir #env = make_vec_env(lambda: env, n_envs=1) #eval_env = make_vec_env(lambda: eval_env, n_envs=1) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #callback = SaveOnBestTrainingRewardCallback(check_freq=10, log_dir=log_dir) loop_range = int(args.train_iter / args.eval_freq) for j in range(0, 10): print("RANDOM SEED ", j) lambd = [] N = [] env = OffloadEnv(False, args.lambd, args.offload_cost, args.overload_cost, args.holding_cost, args.reward, args.N, j, args.env_name, args.folder) env = Monitor(env, log_dir) #env.seed(j) #torch.manual_seed(j) np.random.seed(j) if args.algo != 4: with open(f"../inop_salmut/{args.folder}/buffers/lambda.npy", "rb") as fp: lambd = pickle.load(fp) with open(f"../inop_salmut/{args.folder}/buffers/N.npy", "rb") as fp: N = pickle.load(fp) if args.algo == 0: model = PPO('MlpPolicy', env,
if not os.path.exists("./{args.folder}/buffers"): os.makedirs(f"./{args.folder}/buffers") if not os.path.exists(args.logdir): os.makedirs(args.logdir) """ # Make env and determine properties # env, is_atari, state_dim, num_actions = utils.make_env( # args.env, atari_preprocessing) #is_atari = False #state_dim = 4 state_dim = 2 num_actions = 2 gamma = 0.95 env = OffloadEnv(True, args.lambd, args.offload_cost, args.overload_cost, args.holding_cost, args.reward, args.N, args.seed, args.env_name, args.folder, args.start_iter, args.step) #eval_env = OffloadEnv(True, args.lambd, args.mdp_evolve, args.user_evolve, args.user_identical, args.env_name) env.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if args.algo == 3: model = structured_learning.structured_learning( False, num_actions, state_dim, device, args.BCQ_threshold) #env_name = "offload_dqn_mdp_5" env_name = args.env_name setting = f"{env_name}_{args.seed}" buffer_name = f"{args.buffer_name}_{setting}" #env = DummyVecEnv([lambda: env]) #log_dir = "./off_a2c_res_5/"
if not os.path.exists("./{args.folder}/models"): os.makedirs(f"./{args.folder}/models") if not os.path.exists("./{args.folder}/buffers"): os.makedirs(f"./{args.folder}/buffers") if not os.path.exists(args.logdir): os.makedirs(args.logdir) """ state_dim = 2 num_actions = 2 gamma = 0.95 # Defining environment env = OffloadEnv(True, args.lambd, args.offload_cost, args.overload_cost, args.holding_cost, args.reward, args.N, args.seed, args.env_name, args.folder) #eval_env = OffloadEnv(True, args.lambd, args.mdp_evolve, args.user_evolve, args.user_identical, args.env_name) env.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) #device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #env_name = "offload_dqn_mdp_5" # Setting variables env_name = args.env_name setting = f"{env_name}_{args.seed}" buffer_name = f"{args.buffer_name}_{setting}" #env = DummyVecEnv([lambda: env]) #env = make_vec_env(lambda: env, n_envs=1) testing_eval_med = []