def __init__(self): self.model = DDPG() self.model.load_weights() self.memory = Memory() self.queue = fifo self.model_queue = model_queue self.warmup = config.get(MODEL_WARMUP) self.modelsavefreq = config.get(MODEL_SAVE_FREQ) * 1000
def __init__(self): self.model = DDPG() self.num_episodes = config.get(EVALUATOR_NUM_EPISODES) self.max_step = config.get(EVALUATOR_MAX_STEP) self.visualize = config.get(EVALUATOR_VISABLE) self.save = config.get(EVALUATOR_DRAW) self.save_path = config.get(EVALUATOR_DRAW_PATH) self.queue = fifo self.model_queue = model_queue self.model.load_weights()
def __init__(self): limit = config.get(REPLAY_BUFFER_SIZE) self.actions = RingBuffer(limit) self.rewards = RingBuffer(limit) self.terminals = RingBuffer(limit) self.current_observations = RingBuffer(limit) self.next_observations = RingBuffer(limit)
import logging from utils.configsupport import config from constfile.constkey import * __fileout = config.get(LOG_ISFILEOUT) __filepath = config.get(LOG_FILEPATH) __level = config.get(LOG_LEVEL) __formated = "%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s" __datefmt = '%a, %d %b %Y %H:%M:%S' if not __fileout: logging.basicConfig(level=__level, format=__formated, datefmt=__datefmt) else: logging.basicConfig(level=__level, format=__formated, datefmt=__datefmt, filename=__filepath) log = logging.getLogger("ddpg")
def __init__(self): # random seed for torch __seed = config.get(MODEL_SEED) self.policy_loss = [] self.critic_loss = [] if __seed > 0: self.seed(__seed) self.nb_states = config.get(MODEL_STATE_COUNT) self.nb_actions = config.get(MODEL_ACTION_COUNT) # Create Actor and Critic Network actor_net_cfg = { 'hidden1': config.get(MODEL_ACTOR_HIDDEN1), 'hidden2': config.get(MODEL_ACTOR_HIDDEN2), 'init_w': config.get(MODEL_INIT_WEIGHT) } critic_net_cfg = { 'hidden1': config.get(MODEL_CRITIC_HIDDEN1), 'hidden2': config.get(MODEL_CRITIC_HIDDEN2), 'init_w': config.get(MODEL_INIT_WEIGHT) } self.actor = Actor(self.nb_states, self.nb_actions, **actor_net_cfg) self.actor_target = Actor(self.nb_states, self.nb_actions, **actor_net_cfg) self.actor_optim = Adam( self.actor.parameters(), lr=config.get(MODEL_ACTOR_LR), weight_decay=config.get(MODEL_ACTOR_WEIGHT_DECAY)) self.critic = Critic(self.nb_states, self.nb_actions, **critic_net_cfg) self.critic_target = Critic(self.nb_states, self.nb_actions, **critic_net_cfg) self.critic_optim = Adam( self.critic.parameters(), lr=config.get(MODEL_CRITIC_LR), weight_decay=config.get(MODEL_CRITIC_WEIGHT_DECAY)) hard_update(self.actor_target, self.actor) hard_update(self.critic_target, self.critic) #Create replay buffer self.memory = Memory() self.random_process = OrnsteinUhlenbeckProcess( size=self.nb_actions, theta=config.get(RANDOM_THETA), mu=config.get(RANDOM_MU), sigma=config.get(RANDOM_SIGMA)) # Hyper-parameters self.batch_size = config.get(MODEL_BATCH_SIZE) self.tau = config.get(MODEL_TARGET_TAU) self.discount = config.get(MODEL_DISCOUNT) self.depsilon = 1.0 / config.get(MODEL_EPSILON) self.model_path = config.get(MODEL_SAVE_PATH) # self.epsilon = 1.0 # init device self.device_init()