def __init__(self,
                 index,
                 model,
                 env,
                 memory,
                 lr=1e-2,
                 preprocess=T.ToTensor(),
                 path=None,
                 frompath=None,
                 num_episodes=1000,
                 epsend=0.05,
                 epsstart=0.9,
                 epsdecay=200,
                 k=4,
                 strategy='future'):
        self.index = index
        self.model = model
        self.envstr = env
        self.env = gym.make(self.envstr)
        self.env.reset()

        self.memory = memory
        self.lr = lr

        #self.optimizer = optim.RMSprop(self.model.parameters(), lr=self.lr )
        self.optimizer = optim.Adam(self.model.parameters(),
                                    lr=self.lr,
                                    eps=1.5e-4)
        bashlogger.info('Optimizer {}: ok.'.format(self.index))

        self.preprocess = preprocess
        self.path = path
        self.frompath = frompath
        self.num_episodes = num_episodes
        self.epsend = epsend
        self.epsstart = epsstart
        self.epsdecay = epsdecay

        #HER params :
        self.k = k
        self.strategy = strategy

        self.sl = statsLogger(path=self.path,
                              filename='logs{}.csv'.format(self.index))
        self.workerfn = lambda: train(model=self.model,
                                      env=self.env,
                                      memory=self.memory,
                                      optimizer=self.optimizer,
                                      logger=self.sl,
                                      preprocess=self.preprocess,
                                      path=self.path,
                                      frompath=self.frompath,
                                      num_episodes=self.num_episodes,
                                      epsend=self.epsend,
                                      epsstart=self.epsstart,
                                      epsdecay=self.epsdecay,
                                      k=self.k,
                                      strategy=self.strategy)

        self.thread = threading.Thread(target=self.workerfn)
예제 #2
0
	def __init__(self,index,model,env,memory,lr=1e-2,preprocess=T.ToTensor(),path=None,frompath=None,num_episodes=1000,epsend=0.05,epsstart=0.9,epsdecay=200,TAU=1e-3,k=4,strategy='future') :
		self.index = index
		self.model = model

		self.wmodel = copy.deepcopy(model)
		hard_update(self.wmodel,self.model)
		global use_cuda
		if use_cuda :
				self.wmodel = self.wmodel.cuda()
			
		self.envstr = env
		self.env = gym.make(self.envstr)
		self.env.reset()

		self.memory = memory
		self.lr = lr
		self.TAU = TAU

		self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr )
		bashlogger.info('Optimizer {}: ok.'.format(self.index) )

		self.preprocess = preprocess
		self.path = path
		self.frompath = frompath
		self.num_episodes = num_episodes
		self.epsend = epsend
		self.epsstart = epsstart
		self.epsdecay = epsdecay

		#HER params :
		self.k = k
		self.strategy = strategy

		self.sl = statsLogger(path=self.path,filename='logs{}.csv'.format(self.index) )
		self.workerfn = lambda: self.train(model=self.wmodel,
										env=self.env,
										memory=self.memory,
										optimizer=self.optimizer,
										logger=self.sl,
										preprocess=self.preprocess,
										path=self.path,
										frompath=self.frompath,
										num_episodes=self.num_episodes,
										epsend=self.epsend,
										epsstart=self.epsstart,
										epsdecay=self.epsdecay,
										k=self.k,
										strategy=self.strategy,
										singlegoal=False)

		self.thread = threading.Thread(target=self.workerfn)
예제 #3
0
	def __init__(self,index,model,env,memory,preprocess=T.ToTensor(),path=None,frompath=None,num_episodes=1000,nbr_ep_per_train=1,HER={'use_her':True,'k':4,'strategy':'future','singlegoal':False},use_cuda=True,rendering=False) :
		self.index = index
		self.model = model
		self.envstr = env
		self.env = gym.make(self.envstr)
		self.env.reset()

		self.memory = memory
		self.optimizers = self.model.generate_optimizers()
		
		self.preprocess = preprocess
		self.path = path
		self.frompath = frompath
		
		self.num_episodes = num_episodes
		self.nbr_ep_per_train = nbr_ep_per_train
		#HER params :
		self.HER = HER

		self.use_cuda = use_cuda
		self.rendering = rendering
		
		self.sl = statsLogger(path=self.path,filename='logs{}.csv'.format(self.index) )
		
		#self.workerfn = lambda: self.trainIN( index=self.index,
		self.workerfn = lambda: self.trainOUT( index=self.index,
										model=self.model,
										env=self.env,
										memory=self.memory,
										optimizers=self.optimizers,
										logger=self.sl,
										preprocess=self.preprocess,
										path=self.path,
										frompath=self.frompath,
										num_episodes=self.num_episodes,
										HER=self.HER,
										use_cuda=self.use_cuda,
										rendering=self.rendering)

		self.thread = threading.Thread(target=self.workerfn)