def __init__(self, worker_id=0, hdims=[256, 256], actv=tf.nn.relu, ep_len_rollout=1000, max_ep_len_eval=1000): # Parse self.worker_id = worker_id self.ep_len_rollout = ep_len_rollout self.max_ep_len_eval = max_ep_len_eval # Each worker should maintain its own environment import gym from util import suppress_tf_warning suppress_tf_warning() # suppress TF warnings gym.logger.set_level(40) self.env = get_env() odim, adim = self.env.observation_space.shape[ 0], self.env.action_space.shape[0] self.odim = odim self.adim = adim _ = self.env.reset() # Replay buffers to pass self.o_buffer = np.zeros((self.ep_len_rollout, self.odim)) self.a_buffer = np.zeros((self.ep_len_rollout, self.adim)) self.r_buffer = np.zeros((self.ep_len_rollout)) self.o2_buffer = np.zeros((self.ep_len_rollout, self.odim)) self.d_buffer = np.zeros((self.ep_len_rollout)) # Create SAC model self.model, self.sess = create_sac_model(odim=self.odim, adim=self.adim, hdims=hdims, actv=actv) self.sess.run(tf.global_variables_initializer()) print("Ray Worker [%d] Ready." % (self.worker_id)) # Flag to initialize assign operations for 'set_weights()' self.FIRST_SET_FLAG = True # Flag to initialize rollout self.FIRST_ROLLOUT_FLAG = True
def __init__(self, hdims=[256, 256], actv=tf.nn.relu, lr=1e-3, gamma=0.99, alpha_q=0.1, alpha_pi=0.1, polyak=0.995, epsilon=1e-2, seed=1): self.seed = seed # Each worker should maintain its own environment import gym from util import suppress_tf_warning suppress_tf_warning() # suppress TF warnings gym.logger.set_level(40) self.env = get_eval_env() odim, adim = self.env.observation_space.shape[ 0], self.env.action_space.shape[0] self.odim = odim self.adim = adim _ = self.env.reset() # Create SAC model and computational graph self.model, self.sess = create_sac_model(odim=self.odim, adim=self.adim, hdims=hdims, actv=actv) self.step_ops,self.target_init = \ create_sac_graph(self.model,lr=lr,gamma=gamma,alpha_q=alpha_q,alpha_pi=alpha_pi, polyak=polyak,epsilon=epsilon) # Initialize model self.FIRST_SET_FLAG = True tf.set_random_seed(self.seed) np.random.seed(self.seed) self.sess.run(tf.global_variables_initializer()) self.sess.run(self.target_init)
import util util.suppress_tf_warning() ########################## # Real fun code starts. ########################## import random from game import GameConfig from policy import HumanPolicy from policy import MCTSPolicy from play import play_games ########################### ### Configuration to change ########################### SHUFFLE_PLAYERS = True ########################### ### Initialize the env ########################### config = GameConfig() print(config) def BestPolicy(b, c): return MCTSPolicy(b, c, explore=False, debug=True)
import datetime, gym, time, os, psutil, ray import numpy as np import tensorflow as tf from util import gpu_sess, suppress_tf_warning, tic, toc, open_txt, write_txt from sac import ReplayBuffer,create_sac_model,create_sac_graph,\ save_sac_model_and_buffers,restore_sac_model_and_buffers np.set_printoptions(precision=2) suppress_tf_warning() # suppress warning gym.logger.set_level(40) # gym logger # from episci.environment_wrappers.tactical_action_adt_env_continuous import CustomADTEnvContinuous from episci.agents.utils.constants import Agents def train( expname='sac_adt_cont', n_cpu=30, n_workers=30, total_steps=50000, burnin_steps=10, evaluate_every=50, print_every=5, action_length=5, action_length_eval=5, ep_len_rollout=10 * 150, hdims=[128, 128], actv=tf.nn.relu, red_list_train={ Agents.SPOT_4G: 0.15, Agents.SPOT_5G: 0.30, Agents.SPOT_RANDOM: 0.45, Agents.EXPERT_SYSTEM_TRIAL_2: 0.6,