예제 #1
0
 def __init__(self):
     self.model = DDPG()
     self.model.load_weights()
     self.memory = Memory()
     self.queue = fifo
     self.model_queue = model_queue
     self.warmup = config.get(MODEL_WARMUP)
     self.modelsavefreq = config.get(MODEL_SAVE_FREQ) * 1000
예제 #2
0
 def __init__(self):
     self.model = DDPG()
     self.num_episodes = config.get(EVALUATOR_NUM_EPISODES)
     self.max_step = config.get(EVALUATOR_MAX_STEP)
     self.visualize = config.get(EVALUATOR_VISABLE)
     self.save = config.get(EVALUATOR_DRAW)
     self.save_path = config.get(EVALUATOR_DRAW_PATH)
     self.queue = fifo
     self.model_queue = model_queue
     self.model.load_weights()
예제 #3
0
    def __init__(self):

        limit = config.get(REPLAY_BUFFER_SIZE)

        self.actions = RingBuffer(limit)
        self.rewards = RingBuffer(limit)
        self.terminals = RingBuffer(limit)
        self.current_observations = RingBuffer(limit)
        self.next_observations = RingBuffer(limit)
예제 #4
0
import logging
from utils.configsupport import config
from constfile.constkey import *

__fileout = config.get(LOG_ISFILEOUT)
__filepath = config.get(LOG_FILEPATH)
__level = config.get(LOG_LEVEL)

__formated = "%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s"
__datefmt = '%a, %d %b %Y %H:%M:%S'

if not __fileout:
    logging.basicConfig(level=__level, format=__formated, datefmt=__datefmt)
else:
    logging.basicConfig(level=__level,
                        format=__formated,
                        datefmt=__datefmt,
                        filename=__filepath)

log = logging.getLogger("ddpg")
예제 #5
0
    def __init__(self):

        # random seed for torch
        __seed = config.get(MODEL_SEED)
        self.policy_loss = []
        self.critic_loss = []
        if __seed > 0:
            self.seed(__seed)

        self.nb_states = config.get(MODEL_STATE_COUNT)
        self.nb_actions = config.get(MODEL_ACTION_COUNT)

        # Create Actor and Critic Network
        actor_net_cfg = {
            'hidden1': config.get(MODEL_ACTOR_HIDDEN1),
            'hidden2': config.get(MODEL_ACTOR_HIDDEN2),
            'init_w': config.get(MODEL_INIT_WEIGHT)
        }
        critic_net_cfg = {
            'hidden1': config.get(MODEL_CRITIC_HIDDEN1),
            'hidden2': config.get(MODEL_CRITIC_HIDDEN2),
            'init_w': config.get(MODEL_INIT_WEIGHT)
        }
        self.actor = Actor(self.nb_states, self.nb_actions, **actor_net_cfg)
        self.actor_target = Actor(self.nb_states, self.nb_actions,
                                  **actor_net_cfg)
        self.actor_optim = Adam(
            self.actor.parameters(),
            lr=config.get(MODEL_ACTOR_LR),
            weight_decay=config.get(MODEL_ACTOR_WEIGHT_DECAY))

        self.critic = Critic(self.nb_states, self.nb_actions, **critic_net_cfg)
        self.critic_target = Critic(self.nb_states, self.nb_actions,
                                    **critic_net_cfg)
        self.critic_optim = Adam(
            self.critic.parameters(),
            lr=config.get(MODEL_CRITIC_LR),
            weight_decay=config.get(MODEL_CRITIC_WEIGHT_DECAY))

        hard_update(self.actor_target, self.actor)
        hard_update(self.critic_target, self.critic)

        #Create replay buffer
        self.memory = Memory()

        self.random_process = OrnsteinUhlenbeckProcess(
            size=self.nb_actions,
            theta=config.get(RANDOM_THETA),
            mu=config.get(RANDOM_MU),
            sigma=config.get(RANDOM_SIGMA))

        # Hyper-parameters
        self.batch_size = config.get(MODEL_BATCH_SIZE)
        self.tau = config.get(MODEL_TARGET_TAU)
        self.discount = config.get(MODEL_DISCOUNT)
        self.depsilon = 1.0 / config.get(MODEL_EPSILON)

        self.model_path = config.get(MODEL_SAVE_PATH)

        #
        self.epsilon = 1.0

        # init device
        self.device_init()