예제 #1
0
        def __init__(self,
                     worker_id=0,
                     hdims=[256, 256],
                     actv=tf.nn.relu,
                     ep_len_rollout=1000,
                     max_ep_len_eval=1000):
            # Parse
            self.worker_id = worker_id
            self.ep_len_rollout = ep_len_rollout
            self.max_ep_len_eval = max_ep_len_eval
            # Each worker should maintain its own environment
            import gym
            from util import suppress_tf_warning
            suppress_tf_warning()  # suppress TF warnings
            gym.logger.set_level(40)
            self.env = get_env()
            odim, adim = self.env.observation_space.shape[
                0], self.env.action_space.shape[0]
            self.odim = odim
            self.adim = adim
            _ = self.env.reset()

            # Replay buffers to pass
            self.o_buffer = np.zeros((self.ep_len_rollout, self.odim))
            self.a_buffer = np.zeros((self.ep_len_rollout, self.adim))
            self.r_buffer = np.zeros((self.ep_len_rollout))
            self.o2_buffer = np.zeros((self.ep_len_rollout, self.odim))
            self.d_buffer = np.zeros((self.ep_len_rollout))

            # Create SAC model
            self.model, self.sess = create_sac_model(odim=self.odim,
                                                     adim=self.adim,
                                                     hdims=hdims,
                                                     actv=actv)
            self.sess.run(tf.global_variables_initializer())
            print("Ray Worker [%d] Ready." % (self.worker_id))

            # Flag to initialize assign operations for 'set_weights()'
            self.FIRST_SET_FLAG = True

            # Flag to initialize rollout
            self.FIRST_ROLLOUT_FLAG = True
예제 #2
0
        def __init__(self,
                     hdims=[256, 256],
                     actv=tf.nn.relu,
                     lr=1e-3,
                     gamma=0.99,
                     alpha_q=0.1,
                     alpha_pi=0.1,
                     polyak=0.995,
                     epsilon=1e-2,
                     seed=1):
            self.seed = seed
            # Each worker should maintain its own environment
            import gym
            from util import suppress_tf_warning
            suppress_tf_warning()  # suppress TF warnings
            gym.logger.set_level(40)
            self.env = get_eval_env()
            odim, adim = self.env.observation_space.shape[
                0], self.env.action_space.shape[0]
            self.odim = odim
            self.adim = adim
            _ = self.env.reset()

            # Create SAC model and computational graph
            self.model, self.sess = create_sac_model(odim=self.odim,
                                                     adim=self.adim,
                                                     hdims=hdims,
                                                     actv=actv)
            self.step_ops,self.target_init = \
                create_sac_graph(self.model,lr=lr,gamma=gamma,alpha_q=alpha_q,alpha_pi=alpha_pi,
                                 polyak=polyak,epsilon=epsilon)

            # Initialize model
            self.FIRST_SET_FLAG = True
            tf.set_random_seed(self.seed)
            np.random.seed(self.seed)
            self.sess.run(tf.global_variables_initializer())
            self.sess.run(self.target_init)
예제 #3
0
import util
util.suppress_tf_warning()

##########################
# Real fun code starts.
##########################

import random

from game import GameConfig
from policy import HumanPolicy
from policy import MCTSPolicy
from play import play_games

###########################
### Configuration to change
###########################

SHUFFLE_PLAYERS = True

###########################
### Initialize the env
###########################

config = GameConfig()
print(config)

def BestPolicy(b, c):
    return MCTSPolicy(b, c, explore=False, debug=True)

예제 #4
0
import datetime, gym, time, os, psutil, ray
import numpy as np
import tensorflow as tf
from util import gpu_sess, suppress_tf_warning, tic, toc, open_txt, write_txt
from sac import ReplayBuffer,create_sac_model,create_sac_graph,\
    save_sac_model_and_buffers,restore_sac_model_and_buffers
np.set_printoptions(precision=2)
suppress_tf_warning()  # suppress warning
gym.logger.set_level(40)  # gym logger
# from episci.environment_wrappers.tactical_action_adt_env_continuous import CustomADTEnvContinuous
from episci.agents.utils.constants import Agents


def train(
        expname='sac_adt_cont',
        n_cpu=30,
        n_workers=30,
        total_steps=50000,
        burnin_steps=10,
        evaluate_every=50,
        print_every=5,
        action_length=5,
        action_length_eval=5,
        ep_len_rollout=10 * 150,
        hdims=[128, 128],
        actv=tf.nn.relu,
        red_list_train={
            Agents.SPOT_4G: 0.15,
            Agents.SPOT_5G: 0.30,
            Agents.SPOT_RANDOM: 0.45,
            Agents.EXPERT_SYSTEM_TRIAL_2: 0.6,