def __init__(self, state_space, action_space, seed, opts):

        self.state_space = state_space
        self.action_space = action_space
        self.seed = random.seed(seed)

        self.opt = opts
        self.batch_size = opts.batch
        self.memory_size = opts.memory_size
        self.transfer_rate = opts.transfer_rate
        self.gamma = opts.discount_rate
        self.lr = opts.lr
        self.update_freq = opts.update_freq
        '''DQNetwork'''

        self.local_model = DuelingQNetwork(state_space, action_space,
                                           seed).to(device)
        self.target_model = DuelingQNetwork(state_space, action_space,
                                            seed).to(device)
        self.optimizer = Adam(self.local_model.parameters(), lr=self.lr)
        '''Replay Memory'''

        self.memory = replayMemory(action_space, self.memory_size,
                                   self.batch_size, self.seed)
        '''How often to update the model'''

        self.update_every = 0
    def __init__(self):

        self.env = gym.make(Params['GAME'])
        
        # setting up parameters
        self.batch_size = Params['BATCH_SIZE']
        self.buffer_size = Params['MEMORY_BUFFER_SIZE']
        self.random_action_prob = Params['RANDOM_ACTION_PROB_START']
        self.random_action_prob_end = Params['RANDOM_ACTION_PROB_END']
        self.frame_skip = Params['FRAME_SKIP']
        self.update_freq = Params['UPDATE_FREQ']
        self.sync_freq = Params['SYNC_FREQ']
        self.rand_prob_step = (self.random_action_prob - self.random_action_prob_end)/Params['ANNEALING_STEP']
        self.reward_discount = Params['REWARD_DISCOUNT']
        self.IMG_X = Params['IMG_X']
        self.IMG_Y = Params['IMG_Y']
        
        self.action_space = self.env.action_space.n
        self.updates = 0
        
        # setting up utilities
        self.memory_buffer = replayMemory(self.IMG_X, self.IMG_Y, self.buffer_size)
        
        self.nn = DDQNet(self.action_space)

        # initialize variables    
        self.sess = tf.Session()
        self.saver = tf.train.Saver()
        self.sess.run(tf.global_variables_initializer())
        
        # restore variables
        self.logger = Logger(self.sess, self.saver)
        self.random_action_prob = self.random_action_prob_end if self.logger.restore() else self.random_action_prob
示例#3
0
    def __init__(self, state_space, action_space, seed, batch_size):

        self.state_space = state_space
        self.action_space = action_space
        self.seed = random.seed(seed)
        self.batch_size = batch_size
        '''DQNetwork'''

        self.local_model = pytorch_DQNetwork(state_space, action_space,
                                             seed).to(device)
        self.target_model = pytorch_DQNetwork(state_space, action_space,
                                              seed).to(device)
        self.optimizer = Adam(self.local_model.parameters(), lr=LR)
        '''Replay Memory'''

        self.memory = replayMemory(action_space, MEMORY_SIZE, batch_size, seed)
        '''How often to update the model'''

        self.update_every = 0
#import util module
from util import *

import torch

torch.manual_seed(0)  # set random seed
import torch.optim as optim

env = gym.make(opts.env)
env.seed(opts.env_seed)

#import memory for ppo
from memory import replayMemory

memory = replayMemory(env.action_space.n,
                      memory_size=opts.max_iteration,
                      batch_size=opts.batch,
                      seed=0)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

state_size = env.observation_space.shape[0]
policy = Policy(s_size=state_size, h_size=opts.hidden).to(device)

if opts.print_model:
    print("The model: ", policy)
optimizer = optim.Adam(policy.parameters(), lr=opts.lr)


def reinforce(n_episodes=1000, max_t=1000, gamma=1.0, print_every=100):
    scores_deque = deque(maxlen=100)
    scores = []