def NumEpisodesAll(self, configDict):
        allRuns = configDict["runs"]
        numEpisodes = 0
        for run in allRuns:
            resultName = self.get_results_name(configDict["directory"], run[0],
                                               run[1], run[2])
            result = ResultFile(resultName)
            result.Load()
            numEpisodes += result.NumRuns()

        return numEpisodes
    def next_model(self, configDict, load=False):
        resultFName = self.get_results_name(configDict["directory"],
                                            configDict["zDistSuccess"],
                                            configDict["restartType"],
                                            configDict["initZOptions"])

        self.resultFile = ResultFile(resultFName, numToWrite=self.numToWrite)
        if load:
            self.resultFile.Load()
            self.episodes = self.resultFile.NumRuns()
        else:
            self.episodes = 0
Пример #3
0
    def __init__(self,
                 modelType,
                 modelParams,
                 agentName='',
                 decisionMakerName='',
                 resultFileName='',
                 historyFileName='',
                 directory='',
                 isMultiThreaded=False):
        super(DecisionMakerAlgoBase, self).__init__(agentName)

        # params
        self.params = modelParams

        # count of trials that model is active and not learning to trim history
        self.nonTrainingHistCount = 0
        # trial to DQN to learn(insert to model as learn trial num)
        self.trial2LearnModel = -1
        # sync mechanism
        self.endRunLock = Lock() if isMultiThreaded else EmptyLock()

        self.printTrain = False

        # create directory
        if directory != "":
            fullDirectoryName = "./" + directory + "/"
            if not os.path.isdir(fullDirectoryName):
                os.makedirs(fullDirectoryName)
        else:
            fullDirectoryName = "./"

        # create result file
        if resultFileName != '':
            self.resultFile = ResultFile(fullDirectoryName + resultFileName,
                                         modelParams.numTrials2Save,
                                         self.agentName)

        # create history mngr class
        self.historyMngr = History(modelParams, historyFileName,
                                   fullDirectoryName, isMultiThreaded)

        self.startScope = fullDirectoryName
        # create decision maker class
        decisionClass = eval(modelType)

        with tf.variable_scope(self.startScope):
            self.decisionMaker = decisionClass(modelParams,
                                               decisionMakerName,
                                               fullDirectoryName,
                                               isMultiThreaded=isMultiThreaded,
                                               agentName=self.agentName)
Пример #4
0
class BaseNaiveDecisionMaker(BaseDecisionMaker):
    def __init__(self,
                 numTrials2Save=None,
                 agentName="",
                 resultFName=None,
                 directory=None):
        super(BaseNaiveDecisionMaker, self).__init__(agentName)
        self.resultFName = resultFName
        self.trialNum = 0
        self.numTrials2Save = numTrials2Save

        if resultFName != None:
            self.lock = Lock()
            if directory != None:
                fullDirectoryName = "./" + directory + "/"
                if not os.path.isdir(fullDirectoryName):
                    os.makedirs(fullDirectoryName)
            else:
                fullDirectoryName = "./"

            self.resultFile = ResultFile(fullDirectoryName + resultFName,
                                         numTrials2Save, agentName)

    def ActionsValues(self, state, validActions, target=True):
        _, values = self.choose_action(state, validActions)

        return values

    def end_run(self, r, score, steps):
        saveFile = False
        self.trialNum += 1

        print("\t",
              threading.current_thread().getName(), ":", self.agentName,
              " #trials =", self.trialNum, "reward =", r)
        if self.resultFName != None:
            self.lock.acquire()
            if self.trialNum % self.numTrials2Save == 0:
                saveFile = True

            self.resultFile.end_run(r, score, steps, saveFile)
            self.lock.release()

        return saveFile
    def __init__(self,
                 num_state,
                 num_action,
                 configDict,
                 createResults=True,
                 numToWrite=10):
        # parameters of External Environment:
        self.num_state = num_state
        self.num_action = num_action
        self.numToWrite = numToWrite

        dir2Save = "./" + configDict["directory"] + "/"
        if createResults:
            self.resultFile = ResultFile(dir2Save + "talkerResults",
                                         numToWrite=numToWrite)
        else:
            self.resultFile = None

        self.episodes = 0
Пример #6
0
    def __init__(self,
                 numTrials2Save=None,
                 agentName="",
                 resultFName=None,
                 directory=None):
        super(BaseNaiveDecisionMaker, self).__init__(agentName)
        self.resultFName = resultFName
        self.trialNum = 0
        self.numTrials2Save = numTrials2Save

        if resultFName != None:
            self.lock = Lock()
            if directory != None:
                fullDirectoryName = "./" + directory + "/"
                if not os.path.isdir(fullDirectoryName):
                    os.makedirs(fullDirectoryName)
            else:
                fullDirectoryName = "./"

            self.resultFile = ResultFile(fullDirectoryName + resultFName,
                                         numTrials2Save, agentName)
Пример #7
0
class DecisionMakerAlgoBase(BaseDecisionMaker):
    def __init__(self,
                 modelType,
                 modelParams,
                 agentName='',
                 decisionMakerName='',
                 resultFileName='',
                 historyFileName='',
                 directory='',
                 isMultiThreaded=False):
        super(DecisionMakerAlgoBase, self).__init__(agentName)

        # params
        self.params = modelParams

        # count of trials that model is active and not learning to trim history
        self.nonTrainingHistCount = 0
        # trial to DQN to learn(insert to model as learn trial num)
        self.trial2LearnModel = -1
        # sync mechanism
        self.endRunLock = Lock() if isMultiThreaded else EmptyLock()

        # create directory
        if directory != "":
            fullDirectoryName = "./" + directory + "/"
            if not os.path.isdir(fullDirectoryName):
                os.makedirs(fullDirectoryName)
        else:
            fullDirectoryName = "./"

        # create result file
        if resultFileName != '':
            self.resultFile = ResultFile(fullDirectoryName + resultFileName,
                                         modelParams.numTrials2Save,
                                         self.agentName)

        # create history mngr class
        self.historyMngr = HistoryMngr(modelParams, historyFileName,
                                       fullDirectoryName, isMultiThreaded)

        self.startScope = fullDirectoryName
        # create decision maker class
        decisionClass = eval(modelType)
        with tf.variable_scope(self.startScope):
            self.decisionMaker = decisionClass(modelParams,
                                               decisionMakerName,
                                               fullDirectoryName,
                                               isMultiThreaded=isMultiThreaded,
                                               agentName=self.agentName)

    def AddHistory(self):
        return self.historyMngr.AddHistory()

    def choose_action(self, state, validActions, targetValues=False):
        if not self.decisionMaker.TakeDfltValues(
        ) and self.params.normalizeState:
            state = self.historyMngr.NormalizeState(state)

        return self.decisionMaker.choose_action(state, validActions,
                                                targetValues)

    def NumRuns(self):
        return self.decisionMaker.NumRuns()

    def TrimHistory(self):
        count = self.nonTrainingHistCount + 1
        if count % self.params.numTrials2Learn == 0:
            self.historyMngr.TrimHistory()
            print("\t",
                  threading.current_thread().getName(), ":", self.agentName,
                  "->Trim History to size =", self.historyMngr.Size())

        self.nonTrainingHistCount += 1

    def ResetHistory(self, dump2Old=True, save=False):
        self.historyMngr.Reset(dump2Old, save)

    def ResetAllData(self,
                     resetDecisionMaker=True,
                     resetHistory=True,
                     resetResults=True):
        if resetDecisionMaker:
            self.decisionMaker.Reset()

        if resetHistory:
            self.historyMngr.Reset()

        if resetResults and self.resultFile != None:
            self.resultFile.Reset()

    def ActionsValues(self, state, validActions, targetValues=False):
        if not self.decisionMaker.TakeDfltValues():
            state = self.historyMngr.NormalizeState(state)

        return self.decisionMaker.ActionsValues(state, validActions,
                                                targetValues)

    def CopyTarget2Model(self, numRuns):
        print("\t",
              threading.current_thread().getName(), ":", self.agentName,
              "->Copy Target 2 Model")
        self.decisionMaker.CopyTarget2Model(numRuns)

    def DiscountFactor(self):
        return self.decisionMaker.DiscountFactor()

    def DrawStateFromHist(self, realState=True):
        return self.historyMngr.DrawState(realState)

    def GetMinReward(self):
        return self.historyMngr.GetMinReward()

    def SetMinReward(self, r):
        self.historyMngr.SetMinReward(r)

    def GetMaxReward(self):
        return self.historyMngr.GetMaxReward()

    def SetMaxReward(self, r):
        self.historyMngr.SetMaxReward(r)

    def DecisionMakerType(self):
        return self.decisionMaker.DecisionMakerType()

    def NumDfltRuns(self):
        return self.decisionMaker.NumDfltRuns()

    def DfltValueInitialized(self):
        return self.decisionMaker.DfltValueInitialized()

    def CheckModel(self, plotGraphs):
        pass

    def Save(self):
        self.decisionMaker.Save()
        self.historyMngr.Save()
        if self.resultFile != None:
            self.resultFile.Save()

    def end_test_run(self, r, score, steps):
        if self.resultFile != None:
            self.resultFile.end_run(r, score, steps, True)

    def NumTestRuns(self):
        numRuns = 0
        if self.resultFile != None:
            numRuns = self.resultFile.NumRuns()

        return numRuns
class AlgoBase(object):
    def __init__(self,
                 num_state,
                 num_action,
                 configDict,
                 createResults=True,
                 numToWrite=10):
        # parameters of External Environment:
        self.num_state = num_state
        self.num_action = num_action
        self.numToWrite = numToWrite

        dir2Save = "./" + configDict["directory"] + "/"
        if createResults:
            self.resultFile = ResultFile(dir2Save + "talkerResults",
                                         numToWrite=numToWrite)
        else:
            self.resultFile = None

        self.episodes = 0

    def act(self, state):
        return -1

    def load(self):
        if self.resultFile != None:
            loaded = self.resultFile.Load()
            if loaded:
                self.episodes = self.resultFile.NumRuns()
            print "model num episdoes =", self.episodes

    def observe(self, s, a, r, s_, done):
        pass

    def replay(self):
        pass

    def end_episode(self, r, sumR, steps, realR=0.0):
        saved = False
        if self.resultFile != None:
            saved = self.resultFile.end_run(r, sumR, steps, realR)

        return saved, ""

    def real_action(self):
        return False

    def get_results_name(self, directory, zDistSuccess, restartype, initZ):
        name = "./" + directory + "/" + "talkerResults_" + str(
            zDistSuccess) + "_" + str(restartype) + "_" + str(initZ)
        return name.replace(" ", "")

    def next_model(self, configDict, load=False):
        resultFName = self.get_results_name(configDict["directory"],
                                            configDict["zDistSuccess"],
                                            configDict["restartType"],
                                            configDict["initZOptions"])

        self.resultFile = ResultFile(resultFName, numToWrite=self.numToWrite)
        if load:
            self.resultFile.Load()
            self.episodes = self.resultFile.NumRuns()
        else:
            self.episodes = 0

    def Results(self, size, key="reward"):
        return self.resultFile.Results(size, key)

    def NumEpisodesAll(self, configDict):
        allRuns = configDict["runs"]
        numEpisodes = 0
        for run in allRuns:
            resultName = self.get_results_name(configDict["directory"], run[0],
                                               run[1], run[2])
            result = ResultFile(resultName)
            result.Load()
            numEpisodes += result.NumRuns()

        return numEpisodes