Exemplo n.º 1
0
    def addToMemory(self, gameState, pastState): 
        if (type(pastState) in (np.ndarray, list, tuple)): #nach reset/start ist pastState einfach False
            past_conv_inputs, past_other_inputs, _ = self.getAgentState(*pastState)
            s  = (past_conv_inputs, past_other_inputs)
            a  = self.getAction(*pastState)  #das (throttle, brake, steer)-tuple. 
            r = self.calculateReward(*gameState)
            conv_inputs, other_inputs, _ = self.getAgentState(*gameState)
            s2 = (conv_inputs, other_inputs)
            markovtuple = [s,a,r,s2,False] #not actually a tuple because punish&endepisode require something mutable
            self.memory.append(markovtuple)  
            print("adding to Memory:",a, r, level=4) 
            #values for evalation:
            
#            statesample = np.array(self.model.getstatecountfeaturevec(self.makeInferenceUsable(s),[self.makeNetUsableAction(a)])[0])
#                
#            relativeNums = np.zeros_like(statesample)
#            for i in range(len(statesample)):
#                relativeNums[i] = (self.CountsByElement[i][statesample[i]]+0.5) / (self.allN+1)
#        
#            count = np.prod(np.array(relativeNums))*1e+23
#            
            count = 0
            
            stateval = self.model.statevalue(self.makeInferenceUsable(s))[0] 
            qval = self.model.qvalue(self.makeInferenceUsable(s),[self.makeNetUsableAction(a)])[0]
            self.episode_statevals.append(stateval)
            return a, r, qval, count, self.humantakingcontrolstring #damit agents das printen können wenn sie wollen
        return None, 0, 0, 0, ""
Exemplo n.º 2
0
    def dauerLearnANN(self, steps):
        i = 0
        res = 0
        while self.containers.KeepRunning and self.model.run_inferences() <= self.conf.train_for and i < steps:
            cando = True
            #hier gehts darum das learnen zu freezen bis die Inference eingeholt hat. (falls update_frequency gesetzt)
            if self.conf.ForEveryInf and self.conf.ComesALearn and self.conf.learnMode == "parallel":
                if self.numLearnAfterInference == self.conf.ComesALearn and self.numInferencesAfterLearn == self.conf.ForEveryInf:
                    self.numLearnAfterInference = self.numInferencesAfterLearn = 0
                    self.unFreezeLearn("updateFrequency")   
                    self.unFreezeInf("updateFrequency")
                #Alle ComesALearn sollst du warten, bis ForEveryInf mal zwischenzeitlich Inference gemacht wurde
                if self.numLearnAfterInference >= self.conf.ComesALearn:
                    self.unFreezeInf("updateFrequency") 
                    if self.numInferencesAfterLearn < self.conf.ForEveryInf and self.canLearn():
                        self.freezeLearn("updateFrequency")
                        print("FREEZELEARN", self.numLearnAfterInference, self.numInferencesAfterLearn, level=2)
                        cando = False
                    else:
                        self.numInferencesAfterLearn = 0
            if cando and not self.containers.freezeLearn and self.canLearn():
                res += self.learnANN()
                if self.conf.ForEveryInf and self.conf.ComesALearn and self.conf.learnMode == "parallel":
                    self.numLearnAfterInference += 1
            i += 1         
#        print(res/steps)
        self.unFreezeInf("updateFrequency") #kann hier ruhig sein, da es eh nur unfreezed falls es aufgrund von diesem grund gefreezed war.
        if self.model.run_inferences() >= self.conf.train_for: #if you exited because you're completely done
            self.saveNet()
            print("Stopping learning because I'm done after", self.model.run_inferences(), "inferences", level=10)
Exemplo n.º 3
0
 def performAction(self, gameState, pastState):
     if self.checkIfAction():
         self.numsteps += 1
         self.repeated_action_for += 1
         self.stepsAfterStart += 1
         self.addToMemory(gameState, pastState)
         
         if self.stepsAfterStart <= self.conf.headstart_num:
             toUse, toSave = self.headstartAction() #weil der anderenfalls immer am anfang an den rand fahren will, denn die ersten states haben ne vollkommen atypische history
         elif self.repeated_action_for < self.action_repeat:
             toUse, toSave = self.last_action 
         else:
             agentState = self.getAgentState(*gameState) #may be overridden
             if len(self.memory) >= self.conf.replaystartsize or self.epsilon == 0:
                 self.epsilon = min(round(max(self.startepsilon-((self.startepsilon-self.minepsilon)*((self.model.run_inferences()-self.conf.replaystartsize)/self.finalepsilonframe)), self.minepsilon), 5), 1)
                 if np.random.random() < self.epsilon:
                     toUse, toSave = self.randomAction(agentState)
                 else:
                     toUse, toSave = self.policyAction(agentState)
             else:
                 toUse, toSave = self.randomAction(agentState)
             self.last_action = toUse, toSave
                       
         self.containers.outputval.update(toUse, toSave, self.containers.inputval.CTimestamp, self.containers.inputval.STimestamp)   #note that his happens BEFORE it learns <- parallel
         if self.conf.learnMode == "between":
             if self.numsteps % self.conf.ForEveryInf == 0 and self.canLearn():
                 print("freezing python because after", self.model.run_inferences(), "iterations I need to learn (between)", level=2)
                 self.freezeInf("LearningComes")
                 self.dauerLearnANN(self.conf.ComesALearn)
                 self.unFreezeInf("LearningComes")
     else:
         toUse, toSave = self.randomAction(agentState)
         self.containers.outputval.update(toUse, toSave, self.containers.inputval.CTimestamp, self.containers.inputval.STimestamp)  
Exemplo n.º 4
0
 def returnRelevant(self):
     print("Removed 4 elements from speedsteer here, seems necessary",
           level=-1)
     return [i for i in self.CenterDistVec] + [0] * 4 + [
         i for i in self.SpeedSteer[4:]
     ] + [i
          for i in self.StatusVector] + [i for i in self.WallDistVec
                                         ] + [i for i in self.LookAheadVec]
Exemplo n.º 5
0
 def eval_episodeVals(self, endReason): #ein bisschen hierher gecheatet aber whatever
     _, _, otherinput_hist, _ = self.containers.inputval.read()
     progress = round(otherinput_hist[0].ProgressVec.Progress*100 if endReason != "lapdone" else 100, 2)
     laptime = round(otherinput_hist[0].ProgressVec.Laptime, 1)
     valid = otherinput_hist[0].ProgressVec.fValidLap
     evalstring = "progress:",progress,"laptime:",laptime,"(valid)" if valid else ""
     print(evalstring, level=8)
     if self.use_evaluator:
         self.evaluator.add_episode([progress, laptime])               
Exemplo n.º 6
0
    def getAgentState(self, *gameState): 
        vvec1_hist, vvec2_hist, otherinput_hist, action_hist = gameState
        assert self.conf.use_cameras, "You disabled cameras in the config, which is impossible for this agent!"
        conv_inputs = np.concatenate([vvec1_hist, vvec2_hist]) if vvec2_hist is not None else vvec1_hist
#        other_inputs = [otherinput_hist[0].SpeedSteer.velocity, action_hist]
        other_inputs = [otherinput_hist[0].SpeedSteer.velocity, [np.zeros_like(i) if i != None else None for i in action_hist]]
        print("Removed actions as input to network, as it only learns from them then", level=-1)
        stands_inputs = otherinput_hist[0].SpeedSteer.velocity < 0.04
        return conv_inputs, other_inputs, stands_inputs
Exemplo n.º 7
0
 def save(self, session):
     folder = self.conf.pretrain_checkpoint_dir if self.isPretrain else self.conf.checkpoint_dir
     checkpoint_file = os.path.join(self.agent.folder(folder), 'model.ckpt')
     session.run(self.pretrain_episode_tf.assign(self.pretrain_episode))
     session.run(self.run_inferences_tf.assign(self.run_inferences))
     self.saver.save(session,
                     checkpoint_file,
                     global_step=self.pretrain_step_tf
                     if self.isPretrain else self.step_tf)
     print("Saved Model.", level=6)
Exemplo n.º 8
0
    def getAgentState(self, *gameState):  
        vvec1_hist, vvec2_hist, otherinput_hist, action_hist = gameState
        flat_actions = flatten([i if i is not None else (0,0,0) for i in action_hist])
#        other_inputs = np.ravel([i.returnRelevant() for i in otherinput_hist])
        other_inputs = np.ravel([i.returnRelevant() for i in otherinput_hist[:2]])
        flat_actions = list(np.zeros_like(flat_actions))
        print("Removed actions as input to network, as it only learns from them then", level=-1)
        other_inputs = np.concatenate((other_inputs,flat_actions))
        stands_inputs = otherinput_hist[0].SpeedSteer.velocity < 0.04
        return None, other_inputs, stands_inputs
Exemplo n.º 9
0
 def freezeInf(self, reason):
     if self.containers.UnityConnected:
         if not reason in self.freezeInfReasons:
             print("freezing Unity because",reason, level=10)
             self.containers.freezeInf = True
             self.freezeInfReasons.append(reason)
             try:
                 self.containers.outputval.freezeUnity()
             except:
                 pass
 def learnANN(self):
     tmp = super().learnANN()
     print("ReinfLearnSteps:", self.model.step(), level=3)
     if self.containers.showscreen:
         infoscreen.print(self.model.step(),
                          "Iterations: >" +
                          str(self.model.run_inferences()),
                          containers=self.containers,
                          wname="ReinfLearnSteps")
     return tmp
Exemplo n.º 11
0
 def handle_commands(self, command, wasValid=False):
     if command == "wallhit":
         self.punishLastAction(self.wallhitPunish)   #ist das doppelt gemoppelt damit, dass er eh das if punish > 10 beibehält?       
         self.endEpisode("wallhit", self.containers.inputval.read())
     if command == "lapdone":
         print("Lap finished", level=6)
         #if wasValid gib +1000 reward?^^
         self.endEpisode("lapdone", self.containers.inputval.read())
     if command == "timeover":
         self.endEpisode("timeover", self.containers.inputval.read())
     if command == "turnedaround":
         self.punishLastAction(self.wrongDirPunish)
         self.endEpisode("turnedaround", self.containers.inputval.read())
Exemplo n.º 12
0
 def showqvals(self, qvals):
     amount = self.conf.steering_steps*4 if self.conf.INCLUDE_ACCPLUSBREAK else self.conf.steering_steps*3
     b = []
     for i in range(amount):
         a = [0]*amount
         a[i] = 1
         b.append(str(self.dediscretize(a)))
     b = list(zip(b, qvals))
     toprint = [str(i[0])[1:-1]+": "+str(i[1]) for i in b]
     toprint = "\n".join(toprint)
     print(b, level=3)
     if self.containers.showscreen:
         infoscreen.print(toprint, containers= self.containers, wname="Current Q Vals")
Exemplo n.º 13
0
 def unFreezeInf(self, reason):
     if self.containers.UnityConnected:
         try:
             del self.freezeInfReasons[self.freezeInfReasons.index(reason)] 
             if len(self.freezeInfReasons) == 0:
                 self.containers.freezeInf = False
                 try: #TODO: stattdessen ne variable unity_connected ahben!
                     print("unfreezing Unity because",reason, level=10)
                     self.containers.outputval.unFreezeUnity()
                 except:
                     pass
         except ValueError:
             pass #you have nothing to do if it wasnt in there anyway.                      
Exemplo n.º 14
0
def readOneDArrayFromString(string):
    tmpstrings = string.split(",")
    tmpfloats = []
    for i in tmpstrings:
        tmp = i.replace(" ", "")
        if len(tmp) > 0:
            try:
                tmp = ("1" if tmp == "T" else "0" if tmp == "F" else tmp)
                x = float(str(tmp))
                tmpfloats.append(x)
            except ValueError:
                print("I'm crying")  #cry.
    return tmpfloats
Exemplo n.º 15
0
 def update(self, toSend, toSave, CTimestamp, STimestamp):
     self.lock.acquire()
     try:
         if int(self.STimestamp) < int(STimestamp):
             self.value = toSend
             self.containers.inputval.addAction(toSave)
             self.CTimestamp, self.STimestamp = CTimestamp, STimestamp #es geht nicht um jetzt, sondern um dann als das ANN gestartet wurde
             print("Updated output-value to", toSend, level=4)
             self.send_via_senderthread(self.value, self.CTimestamp, self.STimestamp)
         else:
             print("Didn't update output-value because the new one wouldn't be newer", level=10)
             #raise
     finally:
         self.lock.release()
Exemplo n.º 16
0
def readTwoDArrayFromString(string):
    tmpstrings = string.split(",")
    tmpreturn = []
    for i in tmpstrings:
        tmp = i.replace(" ", "")
        if len(tmp) > 0:
            try:
                currline = []
                for j in tmp:
                    currline.append(int(j))
                tmpreturn.append(currline)
            except ValueError:
                print("I'm crying")  #cry.
    return np.array(tmpreturn)
Exemplo n.º 17
0
def showhelp():
    print("""Command-line arguments:
"-DQN" to run with the DQN-config
"-nolearn" to store agent's results to the memory, but not perform Reinforcement learning (sets the random-action-chance to 0)
"-noscreen" to turn off the screen showing Q-vals etc.
"-noplot" to turn off the plots evaluating each episode
"-startfresh" to use a RL-agent without and (supervised or reinforcement) pretraining
"-nomemorykeep" to not save the memory for this run.
"-nomemoryload" to not LOAD the memory, which can save a lot of time
"-help" shows this help and exits
Concerning agents:
Without any arguments, the agent defined in "dqn_rl_agent" is used
With the "-svplay"-argument, the agent defined in "dqn_sv_agent" is used
With the argument "--agent xyz", the agent defined in "xyz.py" is used""", level=999)
Exemplo n.º 18
0
 def send_via_senderthread(self, value, CTimestamp, STimestamp):
     #nehme die erste verbindung die keinen error schemißt!   
     print("PYTHON SENDING TIME:", STimestamp, time.time()*1000, level=4)
     if self.containers.KeepRunning:
         assert len(self.containers.senderthreads) > 0, "There is no senderthread at all! How will I send?"
         for i in range(len(self.containers.senderthreads)):
             try:
                 self.containers.senderthreads[i].send(value, CTimestamp, STimestamp)
             except (ConnectionResetError, ConnectionAbortedError):
                     #if unity restarted, the old connection is now useless and should be deleted
                     print("I assume you just restarted Unity.")
                     self.containers.senderthreads[i].delete_me()
                     self.containers.senderthreads[i].join()
                     if i >= len(self.containers.senderthreads)-1:
                         break
Exemplo n.º 19
0
 def eval_episodeVals(self, mem_epi_slice, gameState, endReason):
     vvec1_hist, vvec2_hist, otherinput_hist, action_hist = gameState
     avg_rewards = round(self.memory.average_rewards(mem_epi_slice[0], mem_epi_slice[1]),3)
     avg_values = round(np.mean(np.array(self.episode_statevals)), 3)
     self.episode_statevals = []
     #other evaluation-values we need are time the agent took and percentage the agent made. However, becasue those values are not neccessarily
     #officially known to the agent (since agentstate != environmentstate), we need to take them from the environment-state
     progress = round(otherinput_hist[0].ProgressVec.Progress*100 if endReason != "lapdone" else 100, 2)
     laptime = round(otherinput_hist[0].ProgressVec.Laptime,1)
     valid = otherinput_hist[0].ProgressVec.fValidLap
     evalstring = "Avg-r:",avg_rewards,"Avg-Q:",avg_values,"progress:",progress,"laptime:",laptime,"(valid)" if valid else ""
     print(evalstring, level=8)
     if self.use_evaluator:
         self.evaluator.add_episode([avg_rewards, avg_values, progress, laptime], nr=self.episodes, startMemoryEntry=mem_epi_slice[0], endMemoryEntry=mem_epi_slice[1], endIteration=self.model.run_inferences(), reinfNetSteps=self.model.step(), endEpsilon=self.epsilon)
     return evalstring
Exemplo n.º 20
0
 def save_memory(self):
     with self._lock:
         if self.agent.keep_memory:
             self.agent.freezeEverything("saveMem")
             self.psave(self.memorypath + SAVENAME + 'TMP.pkl')
             print("Saving Memory at",
                   time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime()),
                   level=6)
             if os.path.exists(self.memorypath + SAVENAME + 'TMP.pkl'):
                 if os.path.getsize(
                         self.memorypath + SAVENAME + 'TMP.pkl'
                 ) > 1024:  #only use it as memory if you weren't disturbed while writing
                     shutil.copyfile(self.memorypath + SAVENAME + 'TMP.pkl',
                                     self.memorypath + SAVENAME + '.pkl')
             self.lastsavetime = current_milli_time()
             self.agent.unFreezeEverything("saveMem")
Exemplo n.º 21
0
 def save(self):
     folder = self.conf.pretrain_checkpoint_dir if self.isPretrain else self.conf.checkpoint_dir
     critic_file = os.path.join(
         self.agent.folder(os.path.join(folder, "critic")), 'model.ckpt')
     self.critic.saver.save(self.session,
                            critic_file,
                            global_step=self.critic.pretrain_step_tf
                            if self.isPretrain else self.critic.step_tf)
     actor_file = os.path.join(
         self.agent.folder(os.path.join(folder, "actor")), 'model.ckpt')
     self.session.run(self.actor.run_inferences_tf.assign(self.run_inf))
     self.session.run(
         self.actor.pretrain_episode_tf.assign(self.pretrain_ep))
     self.actor.saver.save(self.session,
                           actor_file,
                           global_step=self.actor.pretrain_step_tf
                           if self.isPretrain else self.actor.step_tf)
     print("Saved Model.", level=6)
Exemplo n.º 22
0
 def extract_appropriate(self, TPList, TPmsperframe, wishmsperframe,
                         filename):
     if float(TPmsperframe) > float(wishmsperframe) * 1.05:
         print(
             "%s could not be used because it recorded not enough frames!" %
             filename)
         return None
     elif float(wishmsperframe) * 0.95 < float(
             TPmsperframe) < float(wishmsperframe) * 1.05:
         returntp = TPList
     else:
         fraction = round(wishmsperframe / TPmsperframe * 100) / 100
         i = 0
         returntp = []
         while round(i) < len(TPList):
             returntp.append(TPList[round(i)])
             i += fraction
     returntp[len(returntp) - 1].endedAfter = True
     return returntp
Exemplo n.º 23
0
    def update(self, visionvec, vvec2, othervecs, STimestamp, CTimestamp):
        self.lock.acquire()
        try:
            if not self.just_reset:
                assert self.action_hist[0] is not None, "the output-val didn't add the last action before running again!"
                self.has_past_state = True
            #20.7.: deleted the "if is_new..." functionality, as I think its absolutely not helpful
            otherinputs = make_otherinputs(othervecs).normalized() #is now a namedtuple instead of an array            
            
            if hasattr(self.containers.myAgent, "time_ends_episode") and self.containers.myAgent.time_ends_episode and otherinputs.ProgressVec.Laptime >= self.containers.myAgent.time_ends_episode:
                self.containers.myAgent.handle_commands("timeover") 
                                          
            if self.conf.use_cameras and self.agent.usesConv:
                self._append_vvec_hist(visionvec, vvec2)
            self.otherinput_hist = self._append_other(otherinputs, self.otherinput_hist)
            self.containers.myAgent.humantakingcontrolstring = "(H)" if self.action_hist[0] is None or otherinputs.Action is None or np.any([abs(self.action_hist[0][i] - otherinputs.Action[i]) > 0.1 for i in range(len(otherinputs.Action))]) else ""
            self.action_hist[0] = tuple(otherinputs.Action) #it was already added in addAction, and will only overwritten here if humantakingcontrol changed it
            self.action_hist = self._append_other(None, self.action_hist)   #will be updated in addAction         
            
                                                 
            #wenn otherinputs.CenterDist >= 10 war und seitdem keine neue action kam, muss er >= 10 bleiben!
#            if self.otherinput_hist[0].CenterDist[0] >= 0.99: 
#                self.hit_a_wall = True 
#            #wird erst sobald ne action kommt wieder false gesetzt.. und solange es true ist:
#            if self.hit_a_wall:
#                self.otherinput_hist[0] = self.otherinput_hist[0]._replace(CenterDist = [1])
                
            try:
                if not self.otherinput_hist[0].SpeedSteer.rightDirection:
                    self.containers.wrongdirectiontime += self.containers.conf.msperframe
                    if self.containers.wrongdirectiontime >= 2000: #bei 2 sekunden falsche richtung
                        self.containers.myAgent.handle_commands("turnedaround")
                else:
                    self.containers.wrongdirectiontime = 0
            except IndexError:
                self.containers.wrongdirectiontime = 0
                              
            self.alreadyread = False
            self.CTimestamp, self.STimestamp = CTimestamp, STimestamp
            print("Updated Input-Vec from", STimestamp, level=2)
            self.just_reset = False
        finally:
            self.lock.release()
Exemplo n.º 24
0
 def randomAction(self, agentState):
     print("Random Action", level=2)
     action = np.random.randint(4) if self.conf.INCLUDE_ACCPLUSBREAK else np.random.randint(3)
     if action == 0: brake, throttle = 0, 1
     if action == 1: brake, throttle = 0, 0
     if action == 2: brake, throttle = 1, 0
     if action == 3: brake, throttle = 1, 1
     if agentState[2]: #"carstands"
         brake, throttle = 0, 1 
     #alternative 1a: steer = ((np.random.random()*2)-1)
     #alternative 1b: steer = min(max(np.random.normal(scale=0.5), 1), -1)
     #für 1a und 1b:  steer = read_supervised.dediscretize_steer(read_supervised.discretize_steering(steer, self.conf.steering_steps))
     #alternative 2:
     tmp = [0]*self.conf.steering_steps
     tmp[np.random.randint(self.conf.steering_steps)] = 1
     steer = read_supervised.dediscretize_steer(tmp)
     #throttle, brake, steer = 1, 0, 0
     result = "["+str(throttle)+", "+str(brake)+", "+str(steer)+"]"
     return result, (throttle, brake, steer)  #er returned immer toUse, toSave     
Exemplo n.º 25
0
 def getAccuracy(self,
                 batch,
                 likeDDPG=True):  #dummy for consistency to DDDQN
     oldstates, actions, _, _, _ = batch
     predict = self.actor.predict(oldstates,
                                  useOnline=False,
                                  is_training=False)
     print(
         "throt",
         np.mean(
             np.array([
                 abs(np.linalg.norm(predict[i][0] - actions[i][0]))
                 for i in range(len(actions))
             ])))
     print(
         "brake",
         np.mean(
             np.array([
                 abs(np.linalg.norm(predict[i][1] - actions[i][1]))
                 for i in range(len(actions))
             ])))
     print(
         "steer",
         np.mean(
             np.array([
                 abs(np.linalg.norm(predict[i][2] - actions[i][2]))
                 for i in range(len(actions))
             ])))
     return np.mean(
         np.array([
             abs(np.linalg.norm(predict[i] - actions[i]))
             for i in range(len(actions))
         ]))
Exemplo n.º 26
0
 def _load(self, from_pretrain=False):
     folder = self.conf.pretrain_checkpoint_dir if from_pretrain else self.conf.checkpoint_dir
     critic_ckpt = tf.train.get_checkpoint_state(
         self.agent.folder(os.path.join(folder, "critic")))
     actor_ckpt = tf.train.get_checkpoint_state(
         self.agent.folder(os.path.join(folder, "actor")))
     if critic_ckpt and actor_ckpt and critic_ckpt.model_checkpoint_path and actor_ckpt.model_checkpoint_path:
         self.critic.saver.restore(self.session,
                                   critic_ckpt.model_checkpoint_path)
         self.actor.saver.restore(self.session,
                                  actor_ckpt.model_checkpoint_path)
         self.run_inf = self.actor.run_inferences_tf.eval(self.session)
         self.pretrain_ep = self.actor.pretrain_episode_tf.eval(
             self.session)
     else:
         print("Couldn't load",
               ("from pretrain" if from_pretrain else "from RL-train"),
               level=10)
         return False
     print("Loaded",
           ("from pretrain" if from_pretrain else "from RL-train"),
           level=10)
     print("Pretrain-Step:",
           self.actor.pretrain_step_tf.eval(self.session),
           "Pretrain-Episode:",
           self.pretrain_ep,
           "Main-Step:",
           self.step(),
           "Run'n Iterations:",
           self.run_inf,
           level=10)
     return True
Exemplo n.º 27
0
 def load(self, session, from_pretrain=False):
     folder = self.conf.pretrain_checkpoint_dir if from_pretrain else self.conf.checkpoint_dir
     ckpt = tf.train.get_checkpoint_state(self.agent.folder(folder))
     if ckpt and ckpt.model_checkpoint_path:
         self.saver.restore(session, ckpt.model_checkpoint_path)
         print("Loaded",
               ("from pretrain" if from_pretrain else "from RL-train"),
               level=10)
         self.pretrain_step = self.pretrain_step_tf.eval(session)
         self.pretrain_episode = self.pretrain_episode_tf.eval(session)
         self.step = self.step_tf.eval(session)
         self.run_inferences = self.run_inferences_tf.eval(session)
         print("Pretrain-Step:",
               self.pretrain_step,
               "Pretrain-Episode:",
               self.pretrain_episode,
               "Main-Step:",
               self.step,
               "Run'n Iterations:",
               self.run_inferences,
               level=10)
         return True
     else:
         print("Couldn't load",
               ("from pretrain" if from_pretrain else "from RL-train"),
               level=10)
         return False
Exemplo n.º 28
0
 def run(self):
     print("Starting receiver_thread")
     while self.containers.KeepRunning and (not self.killme):
         try:        
             if not self.containers.freezeInf:
                 data = self.clientsocket.myreceive()
                 if data: 
                     #print("received data:", data, level=10)   
                     
                     if self.handle_special_commands(copy.deepcopy(data)):
                         continue
                     elif data[:6] == "STime(":
                     
                         #we MUST have the inputval, otherwise there wouldn't be the possibility for historyframes.           
                         STime, CTime, visionvec, vvec2, allOneDs = cutoutandreturnvectors(data) 
                         self.CTimestamp, self.STimestamp = CTime, STime
                         for i in self.containers.receiverthreads:
                             if int(i.STimestamp) < int(self.STimestamp):
                                 i.killme = True
                                 
                         print("PYTHON RECEIVES TIME:", STime, time.time()*1000, level=4)
                         self.containers.inputval.update(visionvec, vvec2, allOneDs, STime, CTime)  #note that visionvec and vvec2 can both be None                                                           
                         self.containers.myAgent.performAction(self.containers.inputval.read(), self.containers.inputval.read(pastState=True))
                     
         except TimeoutError:
             if len(self.containers.receiverthreads) < 2:
                 pass
             else:
                 break
             
     self.containers.receiverthreads.remove(self)
     print("stopping receiver_thread")
Exemplo n.º 29
0
 def checkIfAction(self):
     if self.containers.freezeInf:
         return False
     #hier gehts darum die Inference zu freezen bis das learnen eingeholt hat. (falls update_frequency gesetzt)
     if self.conf.ForEveryInf and self.conf.ComesALearn and self.canLearn() and self.conf.learnMode == "parallel":
         if self.numLearnAfterInference == self.conf.ComesALearn and self.numInferencesAfterLearn == self.conf.ForEveryInf:
             self.numLearnAfterInference = self.numInferencesAfterLearn = 0            
             self.unFreezeLearn("updateFrequency")   
             self.unFreezeInf("updateFrequency")
          #Alle ForEveryInf inferences sollst du warten, bis ComesALearn mal in der zwischenzeit gelernt wurde.
         if self.numInferencesAfterLearn == self.conf.ForEveryInf:
             #gucke ob er in der zwischenzeit ComesALearn mal gelernt hat, wenn nein, freeze Inference
             self.unFreezeLearn("updateFrequency")      
             if self.numLearnAfterInference < self.conf.ComesALearn:
                 self.freezeInf("updateFrequency")
                 print("FREEZEINF", self.numLearnAfterInference, self.numInferencesAfterLearn, level=2)
                 return super().checkIfAction()
             self.numLearnAfterInference = 0
         self.numInferencesAfterLearn += 1
     #print(self.numLearnAfterInference, self.numInferencesAfterLearn, level=10)
     if self.model.run_inferences() >= self.conf.train_for: 
         return False
     else:
         return super().checkIfAction()
 def preTrain(self, dataset, iterations, supervised=False):
     assert self.model.step(
     ) == 0, "I dont pretrain if the model already learned on real data!"
     iterations = self.conf.pretrain_iterations if iterations is None else iterations
     if supervised:
         raise ValueError("A DDPG-Model cannot learn supervisedly!")
     print("Starting pretraining", level=10)
     for i in range(iterations):
         start_time = time.time()
         self.model.inc_episode()
         dataset.reset_batch()
         while dataset.has_next(self.conf.pretrain_batch_size):
             trainBatch = self.make_trainbatch(
                 dataset, self.conf.pretrain_batch_size, 0.3)
             self.model.q_train_step(trainBatch, False)
         if (i + 1) % 25 == 0:
             self.model.save()
         dataset.reset_batch()
         trainBatch = self.make_trainbatch(dataset, dataset.numsamples)
         print(
             'Iteration %3d: Closeness = %.2f (%.1f sec)' %
             (self.model.pretrain_episode(),
              self.model.getAccuracy(trainBatch), time.time() - start_time),
             level=10)