def NumEpisodesAll(self, configDict): allRuns = configDict["runs"] numEpisodes = 0 for run in allRuns: resultName = self.get_results_name(configDict["directory"], run[0], run[1], run[2]) result = ResultFile(resultName) result.Load() numEpisodes += result.NumRuns() return numEpisodes
def next_model(self, configDict, load=False): resultFName = self.get_results_name(configDict["directory"], configDict["zDistSuccess"], configDict["restartType"], configDict["initZOptions"]) self.resultFile = ResultFile(resultFName, numToWrite=self.numToWrite) if load: self.resultFile.Load() self.episodes = self.resultFile.NumRuns() else: self.episodes = 0
def __init__(self, modelType, modelParams, agentName='', decisionMakerName='', resultFileName='', historyFileName='', directory='', isMultiThreaded=False): super(DecisionMakerAlgoBase, self).__init__(agentName) # params self.params = modelParams # count of trials that model is active and not learning to trim history self.nonTrainingHistCount = 0 # trial to DQN to learn(insert to model as learn trial num) self.trial2LearnModel = -1 # sync mechanism self.endRunLock = Lock() if isMultiThreaded else EmptyLock() self.printTrain = False # create directory if directory != "": fullDirectoryName = "./" + directory + "/" if not os.path.isdir(fullDirectoryName): os.makedirs(fullDirectoryName) else: fullDirectoryName = "./" # create result file if resultFileName != '': self.resultFile = ResultFile(fullDirectoryName + resultFileName, modelParams.numTrials2Save, self.agentName) # create history mngr class self.historyMngr = History(modelParams, historyFileName, fullDirectoryName, isMultiThreaded) self.startScope = fullDirectoryName # create decision maker class decisionClass = eval(modelType) with tf.variable_scope(self.startScope): self.decisionMaker = decisionClass(modelParams, decisionMakerName, fullDirectoryName, isMultiThreaded=isMultiThreaded, agentName=self.agentName)
class BaseNaiveDecisionMaker(BaseDecisionMaker): def __init__(self, numTrials2Save=None, agentName="", resultFName=None, directory=None): super(BaseNaiveDecisionMaker, self).__init__(agentName) self.resultFName = resultFName self.trialNum = 0 self.numTrials2Save = numTrials2Save if resultFName != None: self.lock = Lock() if directory != None: fullDirectoryName = "./" + directory + "/" if not os.path.isdir(fullDirectoryName): os.makedirs(fullDirectoryName) else: fullDirectoryName = "./" self.resultFile = ResultFile(fullDirectoryName + resultFName, numTrials2Save, agentName) def ActionsValues(self, state, validActions, target=True): _, values = self.choose_action(state, validActions) return values def end_run(self, r, score, steps): saveFile = False self.trialNum += 1 print("\t", threading.current_thread().getName(), ":", self.agentName, " #trials =", self.trialNum, "reward =", r) if self.resultFName != None: self.lock.acquire() if self.trialNum % self.numTrials2Save == 0: saveFile = True self.resultFile.end_run(r, score, steps, saveFile) self.lock.release() return saveFile
def __init__(self, num_state, num_action, configDict, createResults=True, numToWrite=10): # parameters of External Environment: self.num_state = num_state self.num_action = num_action self.numToWrite = numToWrite dir2Save = "./" + configDict["directory"] + "/" if createResults: self.resultFile = ResultFile(dir2Save + "talkerResults", numToWrite=numToWrite) else: self.resultFile = None self.episodes = 0
def __init__(self, numTrials2Save=None, agentName="", resultFName=None, directory=None): super(BaseNaiveDecisionMaker, self).__init__(agentName) self.resultFName = resultFName self.trialNum = 0 self.numTrials2Save = numTrials2Save if resultFName != None: self.lock = Lock() if directory != None: fullDirectoryName = "./" + directory + "/" if not os.path.isdir(fullDirectoryName): os.makedirs(fullDirectoryName) else: fullDirectoryName = "./" self.resultFile = ResultFile(fullDirectoryName + resultFName, numTrials2Save, agentName)
class DecisionMakerAlgoBase(BaseDecisionMaker): def __init__(self, modelType, modelParams, agentName='', decisionMakerName='', resultFileName='', historyFileName='', directory='', isMultiThreaded=False): super(DecisionMakerAlgoBase, self).__init__(agentName) # params self.params = modelParams # count of trials that model is active and not learning to trim history self.nonTrainingHistCount = 0 # trial to DQN to learn(insert to model as learn trial num) self.trial2LearnModel = -1 # sync mechanism self.endRunLock = Lock() if isMultiThreaded else EmptyLock() # create directory if directory != "": fullDirectoryName = "./" + directory + "/" if not os.path.isdir(fullDirectoryName): os.makedirs(fullDirectoryName) else: fullDirectoryName = "./" # create result file if resultFileName != '': self.resultFile = ResultFile(fullDirectoryName + resultFileName, modelParams.numTrials2Save, self.agentName) # create history mngr class self.historyMngr = HistoryMngr(modelParams, historyFileName, fullDirectoryName, isMultiThreaded) self.startScope = fullDirectoryName # create decision maker class decisionClass = eval(modelType) with tf.variable_scope(self.startScope): self.decisionMaker = decisionClass(modelParams, decisionMakerName, fullDirectoryName, isMultiThreaded=isMultiThreaded, agentName=self.agentName) def AddHistory(self): return self.historyMngr.AddHistory() def choose_action(self, state, validActions, targetValues=False): if not self.decisionMaker.TakeDfltValues( ) and self.params.normalizeState: state = self.historyMngr.NormalizeState(state) return self.decisionMaker.choose_action(state, validActions, targetValues) def NumRuns(self): return self.decisionMaker.NumRuns() def TrimHistory(self): count = self.nonTrainingHistCount + 1 if count % self.params.numTrials2Learn == 0: self.historyMngr.TrimHistory() print("\t", threading.current_thread().getName(), ":", self.agentName, "->Trim History to size =", self.historyMngr.Size()) self.nonTrainingHistCount += 1 def ResetHistory(self, dump2Old=True, save=False): self.historyMngr.Reset(dump2Old, save) def ResetAllData(self, resetDecisionMaker=True, resetHistory=True, resetResults=True): if resetDecisionMaker: self.decisionMaker.Reset() if resetHistory: self.historyMngr.Reset() if resetResults and self.resultFile != None: self.resultFile.Reset() def ActionsValues(self, state, validActions, targetValues=False): if not self.decisionMaker.TakeDfltValues(): state = self.historyMngr.NormalizeState(state) return self.decisionMaker.ActionsValues(state, validActions, targetValues) def CopyTarget2Model(self, numRuns): print("\t", threading.current_thread().getName(), ":", self.agentName, "->Copy Target 2 Model") self.decisionMaker.CopyTarget2Model(numRuns) def DiscountFactor(self): return self.decisionMaker.DiscountFactor() def DrawStateFromHist(self, realState=True): return self.historyMngr.DrawState(realState) def GetMinReward(self): return self.historyMngr.GetMinReward() def SetMinReward(self, r): self.historyMngr.SetMinReward(r) def GetMaxReward(self): return self.historyMngr.GetMaxReward() def SetMaxReward(self, r): self.historyMngr.SetMaxReward(r) def DecisionMakerType(self): return self.decisionMaker.DecisionMakerType() def NumDfltRuns(self): return self.decisionMaker.NumDfltRuns() def DfltValueInitialized(self): return self.decisionMaker.DfltValueInitialized() def CheckModel(self, plotGraphs): pass def Save(self): self.decisionMaker.Save() self.historyMngr.Save() if self.resultFile != None: self.resultFile.Save() def end_test_run(self, r, score, steps): if self.resultFile != None: self.resultFile.end_run(r, score, steps, True) def NumTestRuns(self): numRuns = 0 if self.resultFile != None: numRuns = self.resultFile.NumRuns() return numRuns
class AlgoBase(object): def __init__(self, num_state, num_action, configDict, createResults=True, numToWrite=10): # parameters of External Environment: self.num_state = num_state self.num_action = num_action self.numToWrite = numToWrite dir2Save = "./" + configDict["directory"] + "/" if createResults: self.resultFile = ResultFile(dir2Save + "talkerResults", numToWrite=numToWrite) else: self.resultFile = None self.episodes = 0 def act(self, state): return -1 def load(self): if self.resultFile != None: loaded = self.resultFile.Load() if loaded: self.episodes = self.resultFile.NumRuns() print "model num episdoes =", self.episodes def observe(self, s, a, r, s_, done): pass def replay(self): pass def end_episode(self, r, sumR, steps, realR=0.0): saved = False if self.resultFile != None: saved = self.resultFile.end_run(r, sumR, steps, realR) return saved, "" def real_action(self): return False def get_results_name(self, directory, zDistSuccess, restartype, initZ): name = "./" + directory + "/" + "talkerResults_" + str( zDistSuccess) + "_" + str(restartype) + "_" + str(initZ) return name.replace(" ", "") def next_model(self, configDict, load=False): resultFName = self.get_results_name(configDict["directory"], configDict["zDistSuccess"], configDict["restartType"], configDict["initZOptions"]) self.resultFile = ResultFile(resultFName, numToWrite=self.numToWrite) if load: self.resultFile.Load() self.episodes = self.resultFile.NumRuns() else: self.episodes = 0 def Results(self, size, key="reward"): return self.resultFile.Results(size, key) def NumEpisodesAll(self, configDict): allRuns = configDict["runs"] numEpisodes = 0 for run in allRuns: resultName = self.get_results_name(configDict["directory"], run[0], run[1], run[2]) result = ResultFile(resultName) result.Load() numEpisodes += result.NumRuns() return numEpisodes