def runEpisode(agent, environment, discount, decision, display, message, pause, episode, testFlag=0, foldN=0): returns = 0 totalDiscount = 1.0 environment.reset(testFlag, foldN) if 'startEpisode' in dir(agent): agent.startEpisode() while True: # DISPLAY CURRENT STATE state = environment.getCurrentState() # display(state) pause() # END IF IN A TERMINAL STATE actions = environment.getPossibleActions(state) if len(actions) == 0: return returns, reward, AOP, explore # returns: accumulative rewards, reward: # GET ACTION (USUALLY FROM AGENT) action = decision(state) if action == None: raise 'Error: Agent returned None action' # EXECUTE ACTION nextState, reward, AOP, explore = environment.doAction(action) if testFlag == 0: # training episode # UPDATE LEARNER if 'observeTransition' in dir(agent): agent.observeTransition(state, action, nextState, reward) else: # testing episode pass returns += reward * totalDiscount totalDiscount *= discount
def runEpisode(agent, qtype, environment, discount, decision, display, message, pause, episode): ########################### # GET THE GRIDWORLD ########################### returns = 0 totalDiscount = 1.0 environment.reset() #for state in mdp.getStates(): # display(state) if 'startEpisode' in dir(agent): agent.startEpisode() message("BEGINNING EPISODE: "+str(episode)+"\n") while True: # DISPLAY CURRENT STATE state = environment.getCurrentState() display(state) pause() # END IF IN A TERMINAL STATE actions = environment.getPossibleActions(state) if len(actions) == 0: message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n") return returns # GET ACTION (USUALLY FROM AGENT) action = decision(state) if action == None: raise 'Error: Agent returned None action' # EXECUTE ACTION nextState, reward, action2 = environment.doAction(action, qtype) message("Started in state: "+str(state)+ "\nSpecified action: "+str(action)+ "\nTook action: "+str(action2)+ "\nEnded in state: "+str(nextState)+ "\nGot reward: "+str(reward)+"\n") # UPDATE LEARNER if 'observeTransition' in dir(agent): agent.observeTransition(state, action2, nextState, reward, environment) if isinstance(reward, tuple): reward=sum(reward) returns += reward * totalDiscount totalDiscount *= discount if 'stopEpisode' in dir(agent): agent.stopEpisode()
def runEpisode(agent, environment, discount, decision, display, message, pause, episode): returns = 0 totalDiscount = 1.0 environment.reset() if "startEpisode" in dir(agent): agent.startEpisode() message("BEGINNING EPISODE: " + str(episode) + "\n") while True: # DISPLAY CURRENT STATE state = environment.getCurrentState() display(state) pause() # END IF IN A TERMINAL STATE actions = environment.getPossibleActions(state) if len(actions) == 0: message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " + str(returns) + "\n") return returns # GET ACTION (USUALLY FROM AGENT) action = decision(state) if action == None: raise "Error: Agent returned None action" # EXECUTE ACTION nextState, reward = environment.doAction(action) message( "Started in state: " + str(state) + "\nTook action: " + str(action) + "\nEnded in state: " + str(nextState) + "\nGot reward: " + str(reward) + "\n" ) # UPDATE LEARNER if "observeTransition" in dir(agent): agent.observeTransition(state, action, nextState, reward) returns += reward * totalDiscount totalDiscount *= discount if "stopEpisode" in dir(agent): agent.stopEpisode()
def runEpisode(agent, environment, discount, decision, display, message, pause, episode): returns = 0 totalDiscount = 1.0 environment.reset() if "startEpisode" in dir(agent): agent.startEpisode() message("BEGINNING EPISODE: " + str(episode) + "\n") while True: # Display current state state = environment.getCurrentState() display(state) pause() # End if in a terminal state actions = environment.getPossibleActions(state) if len(actions) == 0: message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " + str(returns) + "\n") return returns # Get action (usually from agent) action = decision(state) if action == None: raise Exception("Error: Agent returned None action") # Execute action nextState, reward = environment.doAction(action) message("Started in state: " + str(state) + "\nTook action: " + str(action) + "\nEnded in state: " + str(nextState) + "\nGot reward: " + str(reward) + "\n") # Update learner if "observeTransition" in dir(agent): agent.observeTransition(state, action, nextState, reward) returns += reward * totalDiscount totalDiscount *= discount if "stopEpisode" in dir(agent): agent.stopEpisode()
def runEpisode(agent, environment, discount, decision, display, message, pause, episode): returns = 0 totalDiscount = 1.0 environment.reset() if 'startEpisode' in dir(agent): agent.startEpisode() message("BEGINNING EPISODE: " + str(episode) + "\n") while True: # DISPLAY CURRENT STATE state = environment.getCurrentState() display(state) pause() # END IF IN A TERMINAL STATE actions = environment.getPossibleActions(state) if len(actions) == 0: message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " + str( returns) + "\n") return returns # GET ACTION (USUALLY FROM AGENT) action = decision(state) if action == None: raise Exception('Error: Agent returned None action') # EXECUTE ACTION nextState, reward = environment.doAction(action) message("Started in state: " + str(state) + "\nTook action: " + str(action) + "\nEnded in state: " + str(nextState) + "\nGot reward: " + str(reward) + "\n") # UPDATE LEARNER if 'observeTransition' in dir(agent): agent.observeTransition(state, action, nextState, reward) returns += reward * totalDiscount totalDiscount *= discount if 'stopEpisode' in dir(agent): agent.stopEpisode()
def runEpisode(agent, environment, discount, decision, display, message, pause, episode, rates, offset, pmNum, locks,pllock , q,plinfo): returns = 0 totalDiscount = 1.0 environment.reset() if 'startEpisode' in dir(agent): agent.startEpisode() #message("BEGINNING EPISODE: "+str(episode)+"\n") pm = environment.baeplatform.pm while True: # END IF IN A TERMINAL STATE if environment.isTerminal(): message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n") return returns, offset #print rates[offset%len(rates)] # DISPLAY CURRENT STATE state = environment.getCurrentState() pause() offset += 1 # GET ACTION (USUALLY FROM AGENT) action = decision(state) if action == None: raise 'Error: Agent returned None action' # EXECUTE ACTION nextState, reward = environment.doAction(state, action, pmNum,locks,pllock,q,plinfo) message("VM " + str(pm.id) + " Started in state: "+str(state)+ "\nTook action: "+str(action)+ "\nEnded in state: "+str(nextState)+ "\nGot reward: "+str(reward)+"\n") # UPDATE LEARNER if 'observeTransition' in dir(agent): agent.observeTransition(state, action, nextState, reward) returns += reward * totalDiscount totalDiscount *= discount if 'stopEpisode' in dir(agent): agent.stopEpisode()
optimizer.step() begin = time() best_result = 0.0 for i_episode in range(n_episodes): logs = open(log_path, 'a') # Initialize the environment and state env.start() #cur_state = transform(torch.from_numpy(env.getLidar())).unsqueeze(0).to(device) cur_state = torch.from_numpy(env.getLidar()).unsqueeze(0).to(device) for t in count(): # Select and perform an action action = select_action(cur_state) reward, done = env.doAction(action) reward = torch.tensor([reward], device=device) # Observe new state #next_state = transform(torch.from_numpy(env.getLidar())).unsqueeze(0).to(device) next_state = torch.from_numpy(env.getLidar()).unsqueeze(0).to(device) if done: next_state = None # Store the transition in memory memory.push(cur_state, action, next_state, reward) # Perform one step of the optimization (on the target network) optimize_model()
def runEpisode(agent, environment, discount, decision, display, message, pause, episode, update=True, bounded=False): returns = 0 totalDiscount = 1.0 environment.reset(agent) if 'startEpisode' in dir(agent): agent.startEpisode(environment.getCurrentState()) #message("BEGINNING EPISODE: "+str(episode)+"\n") timestep = 0 MAX_TIMESTEPS = 40 while True: #print("timestep ", timestep) # DISPLAY CURRENT STATE state = environment.getCurrentState() if display is not None: display(state) #pause() #if timestep == 0 and episode == 1: #if not update: # input("") if 'should_end_episode' in dir(agent) and agent.should_end_episode(): #message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n") if 'stopEpisode' in dir(agent): agent.stopEpisode() return (timestep, returns) # END IF IN A TERMINAL STATE actions = environment.getPossibleActions(state) if len(actions) == 0 or (bounded and timestep >= MAX_TIMESTEPS): if update and len(actions) == 0: # reached terminal state but we are using n-step agent agent.update(state, None, None, None, update) # keep going until n-step agent says stop continue # for n-step agent elif not update: # not n-step agent so terminate on goal state or time exceeded message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n") if 'stopEpisode' in dir(agent): agent.stopEpisode() return (timestep, returns) # GET ACTION (USUALLY FROM AGENT) action = decision(state) #print(action) if action == None: raise 'Error: Agent returned None action' # EXECUTE ACTION nextState, reward = environment.doAction(action) #message("Started in state: "+str(state)+ # "\nTook action: "+str(action)+ # "\nEnded in state: "+str(nextState)+ # "\nGot reward: "+str(reward)+"\n") # UPDATE LEARNER if 'observeTransition' in dir(agent): agent.observeTransition(state, action, nextState, reward, update) returns += reward * totalDiscount totalDiscount *= discount timestep += 1 if 'stopEpisode' in dir(agent): agent.stopEpisode()
def runEpisode(agent, environment, discount, decision, display, message, pause, episode): returns = 0 totalDiscount = 1.0 environment.reset() if 'startEpisode' in dir(agent): agent.startEpisode() message("BEGINNING EPISODE: "+str(episode)+"\n") f = open("trajectories.txt", "a") appendString='[' steps=0 grid_width=10 while True: # DISPLAY CURRENT STATE state = environment.getCurrentState() display(state) pause() # END IF IN A TERMINAL STATE actions = environment.getPossibleActions(state) if len(actions) == 0: message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n") return returns # GET ACTION (USUALLY FROM AGENT) action = decision(state) if action == None: raise 'Error: Agent returned None action' # EXECUTE ACTION nextState=state if action != 'stay': nextState, reward = environment.doAction(action) message("Started in state: "+str(state)+ "\nTook action: "+str(action)+ "\nEnded in state: "+str(nextState)+ "\nGot reward: "+str(reward)+"\n") # UPDATE LEARNER if 'observeTransition' in dir(agent): agent.observeTransition(state, action, nextState, reward) returns += reward * totalDiscount totalDiscount *= discount stateNumber= (state[1]*grid_width +state[0]) appendString+=str(stateNumber) steps+=1 if(steps==stepsLimit): break; appendString+=',' appendString+='],' print("AppendString ",appendString) f.write(appendString+"\n") f.close() runEpisode(agent, environment, discount, decision, display, message, pause, episode); if 'stopEpisode' in dir(agent): agent.stopEpisode()