Exemplos de doAction em Python, exemplos de environment.doAction em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: ContinousLLMDP.py Projeto: botonchou/NTUChinese

def runEpisode(agent, environment, discount, decision, display, message, pause, episode, testFlag=0, foldN=0):
	returns = 0
	totalDiscount = 1.0
	environment.reset(testFlag, foldN)
	
	if 'startEpisode' in dir(agent): agent.startEpisode()
	while True:

		# DISPLAY CURRENT STATE
		state = environment.getCurrentState()
#		display(state)
		pause()
    
		# END IF IN A TERMINAL STATE
		actions = environment.getPossibleActions(state)
		if len(actions) == 0:
			return returns, reward, AOP, explore # returns: accumulative rewards, reward: 
    
		# GET ACTION (USUALLY FROM AGENT)
		action = decision(state)
		if action == None:
			raise 'Error: Agent returned None action'
    
		# EXECUTE ACTION
		nextState, reward, AOP, explore = environment.doAction(action)
		
		if testFlag == 0: # training episode
			# UPDATE LEARNER
			if 'observeTransition' in dir(agent): 
			    agent.observeTransition(state, action, nextState, reward) 
		else: # testing episode
			pass

		returns += reward * totalDiscount
		totalDiscount *= discount

Exemplo n.º 2

0

Exibir arquivo

Arquivo: gridworld.py Projeto: vsubhashini/RL-bot

def runEpisode(agent, qtype, environment, discount, decision, display, message, pause, episode):

  ###########################
  # GET THE GRIDWORLD
  ###########################

  returns = 0
  totalDiscount = 1.0
  environment.reset()
  
  #for state in mdp.getStates():
  #  display(state)

  if 'startEpisode' in dir(agent): agent.startEpisode()
  message("BEGINNING EPISODE: "+str(episode)+"\n")
  while True:

    # DISPLAY CURRENT STATE
    state = environment.getCurrentState()
    display(state)
    pause()
    
    # END IF IN A TERMINAL STATE
    actions = environment.getPossibleActions(state)
    if len(actions) == 0:
      message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n")
      return returns
    
    # GET ACTION (USUALLY FROM AGENT)
    action = decision(state)
    if action == None:
      raise 'Error: Agent returned None action'
    
    # EXECUTE ACTION
    nextState, reward, action2 = environment.doAction(action, qtype)
    message("Started in state: "+str(state)+
            "\nSpecified action: "+str(action)+
            "\nTook action: "+str(action2)+
            "\nEnded in state: "+str(nextState)+
            "\nGot reward: "+str(reward)+"\n")    
    # UPDATE LEARNER
    if 'observeTransition' in dir(agent): 
      agent.observeTransition(state, action2, nextState, reward, environment)
    if isinstance(reward, tuple):
       reward=sum(reward)
    
    returns += reward * totalDiscount
    totalDiscount *= discount

  if 'stopEpisode' in dir(agent):
    agent.stopEpisode()

Exemplo n.º 3

0

Exibir arquivo

Arquivo: gridworld.py Projeto: AyaZhang/secret

def runEpisode(agent, environment, discount, decision, display, message, pause, episode):
    returns = 0
    totalDiscount = 1.0
    environment.reset()
    if "startEpisode" in dir(agent):
        agent.startEpisode()
    message("BEGINNING EPISODE: " + str(episode) + "\n")
    while True:

        # DISPLAY CURRENT STATE
        state = environment.getCurrentState()
        display(state)
        pause()

        # END IF IN A TERMINAL STATE
        actions = environment.getPossibleActions(state)
        if len(actions) == 0:
            message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " + str(returns) + "\n")
            return returns

        # GET ACTION (USUALLY FROM AGENT)
        action = decision(state)
        if action == None:
            raise "Error: Agent returned None action"

        # EXECUTE ACTION
        nextState, reward = environment.doAction(action)
        message(
            "Started in state: "
            + str(state)
            + "\nTook action: "
            + str(action)
            + "\nEnded in state: "
            + str(nextState)
            + "\nGot reward: "
            + str(reward)
            + "\n"
        )
        # UPDATE LEARNER
        if "observeTransition" in dir(agent):
            agent.observeTransition(state, action, nextState, reward)

        returns += reward * totalDiscount
        totalDiscount *= discount

    if "stopEpisode" in dir(agent):
        agent.stopEpisode()

Exemplo n.º 4

0

Exibir arquivo

def runEpisode(agent, environment, discount, decision, display, message, pause,
               episode):
    returns = 0
    totalDiscount = 1.0
    environment.reset()
    if "startEpisode" in dir(agent):
        agent.startEpisode()
    message("BEGINNING EPISODE: " + str(episode) + "\n")
    while True:

        # Display current state
        state = environment.getCurrentState()
        display(state)
        pause()

        # End if in a terminal state
        actions = environment.getPossibleActions(state)
        if len(actions) == 0:
            message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " +
                    str(returns) + "\n")
            return returns

        # Get action (usually from agent)
        action = decision(state)
        if action == None:
            raise Exception("Error: Agent returned None action")

        # Execute action
        nextState, reward = environment.doAction(action)
        message("Started in state: " + str(state) + "\nTook action: " +
                str(action) + "\nEnded in state: " + str(nextState) +
                "\nGot reward: " + str(reward) + "\n")
        # Update learner
        if "observeTransition" in dir(agent):
            agent.observeTransition(state, action, nextState, reward)

        returns += reward * totalDiscount
        totalDiscount *= discount

    if "stopEpisode" in dir(agent):
        agent.stopEpisode()

Exemplo n.º 5

0

Exibir arquivo

Arquivo: gridworld.py Projeto: pockeleewout/RL_Lab

def runEpisode(agent, environment, discount, decision, display, message, pause,
               episode):
    returns = 0
    totalDiscount = 1.0
    environment.reset()
    if 'startEpisode' in dir(agent): agent.startEpisode()
    message("BEGINNING EPISODE: " + str(episode) + "\n")
    while True:

        # DISPLAY CURRENT STATE
        state = environment.getCurrentState()
        display(state)
        pause()

        # END IF IN A TERMINAL STATE
        actions = environment.getPossibleActions(state)
        if len(actions) == 0:
            message("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " + str(
                returns) + "\n")
            return returns

        # GET ACTION (USUALLY FROM AGENT)
        action = decision(state)
        if action == None:
            raise Exception('Error: Agent returned None action')

        # EXECUTE ACTION
        nextState, reward = environment.doAction(action)
        message("Started in state: " + str(state) +
                "\nTook action: " + str(action) +
                "\nEnded in state: " + str(nextState) +
                "\nGot reward: " + str(reward) + "\n")
        # UPDATE LEARNER
        if 'observeTransition' in dir(agent):
            agent.observeTransition(state, action, nextState, reward)

        returns += reward * totalDiscount
        totalDiscount *= discount

    if 'stopEpisode' in dir(agent):
        agent.stopEpisode()

Exemplo n.º 6

0

Exibir arquivo

Arquivo: bac_baeplatform.py Projeto: ruizhang2015/code_ubuntu

def runEpisode(agent, environment, discount, decision, display, message, pause, episode, rates, offset, pmNum, locks,pllock , q,plinfo):
  returns = 0
  totalDiscount = 1.0
  environment.reset()
  if 'startEpisode' in dir(agent): agent.startEpisode()
  #message("BEGINNING EPISODE: "+str(episode)+"\n")
  pm = environment.baeplatform.pm
  while True:

    
    # END IF IN A TERMINAL STATE
    if environment.isTerminal():
      message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n")
      return returns, offset
    #print rates[offset%len(rates)]
    # DISPLAY CURRENT STATE
    state = environment.getCurrentState()
    pause()
    offset += 1
    # GET ACTION (USUALLY FROM AGENT)
    action = decision(state)
    if action == None:
      raise 'Error: Agent returned None action'
    
    # EXECUTE ACTION
    nextState, reward = environment.doAction(state, action, pmNum,locks,pllock,q,plinfo)
    message("VM " + str(pm.id) + " Started in state: "+str(state)+
            "\nTook action: "+str(action)+
            "\nEnded in state: "+str(nextState)+
            "\nGot reward: "+str(reward)+"\n")    
    # UPDATE LEARNER
    if 'observeTransition' in dir(agent):
        agent.observeTransition(state, action, nextState, reward)
    
    returns += reward * totalDiscount
    totalDiscount *= discount

  if 'stopEpisode' in dir(agent):
    agent.stopEpisode()

Exemplo n.º 7

0

Exibir arquivo

    optimizer.step()


begin = time()
best_result = 0.0

for i_episode in range(n_episodes):
    logs = open(log_path, 'a')
    # Initialize the environment and state
    env.start()
    #cur_state = transform(torch.from_numpy(env.getLidar())).unsqueeze(0).to(device)
    cur_state = torch.from_numpy(env.getLidar()).unsqueeze(0).to(device)
    for t in count():
        # Select and perform an action
        action = select_action(cur_state)
        reward, done = env.doAction(action)
        reward = torch.tensor([reward], device=device)
        
        
        # Observe new state
        #next_state = transform(torch.from_numpy(env.getLidar())).unsqueeze(0).to(device)
        next_state = torch.from_numpy(env.getLidar()).unsqueeze(0).to(device)
        if done:
            next_state = None

        # Store the transition in memory
        memory.push(cur_state, action, next_state, reward)


        # Perform one step of the optimization (on the target network)
        optimize_model()

Exemplo n.º 8

0

Exibir arquivo

def runEpisode(agent, environment, discount, decision, display, message, pause, episode, update=True, bounded=False):
    returns = 0
    totalDiscount = 1.0
    environment.reset(agent)
    if 'startEpisode' in dir(agent): agent.startEpisode(environment.getCurrentState())
    #message("BEGINNING EPISODE: "+str(episode)+"\n")
    
    timestep = 0
    MAX_TIMESTEPS = 40
    
    while True:
        #print("timestep ", timestep)
        # DISPLAY CURRENT STATE
        state = environment.getCurrentState()
        if display is not None:
            display(state)
        #pause()
        #if timestep == 0 and episode == 1:
        #if not update:
        #    input("")
        
        if 'should_end_episode' in dir(agent) and agent.should_end_episode():
            #message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n")
            if 'stopEpisode' in dir(agent):
                agent.stopEpisode()
            return (timestep, returns)
        
        # END IF IN A TERMINAL STATE
        actions = environment.getPossibleActions(state)
        if len(actions) == 0 or (bounded and timestep >= MAX_TIMESTEPS):
            if update and len(actions) == 0: # reached terminal state but we are using n-step agent
                agent.update(state, None, None, None, update) # keep going until n-step agent says stop
                continue # for n-step agent
            elif not update: # not n-step agent so terminate on goal state or time exceeded
                message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n")
                if 'stopEpisode' in dir(agent):
                    agent.stopEpisode()
                return (timestep, returns)

        # GET ACTION (USUALLY FROM AGENT)
        action = decision(state)
        #print(action)
        if action == None:
            raise 'Error: Agent returned None action'

        # EXECUTE ACTION
        nextState, reward = environment.doAction(action)
        #message("Started in state: "+str(state)+
        #        "\nTook action: "+str(action)+
        #        "\nEnded in state: "+str(nextState)+
        #        "\nGot reward: "+str(reward)+"\n")
        # UPDATE LEARNER
        if 'observeTransition' in dir(agent):
            agent.observeTransition(state, action, nextState, reward, update)

        returns += reward * totalDiscount
        totalDiscount *= discount
        
        timestep += 1

    if 'stopEpisode' in dir(agent):
        agent.stopEpisode()

Exemplo n.º 9

0

Exibir arquivo

Arquivo: gridworld.py Projeto: pparas007/dissertation-irl

def runEpisode(agent, environment, discount, decision, display, message, pause, episode):
    returns = 0
    totalDiscount = 1.0
    environment.reset()
    if 'startEpisode' in dir(agent): agent.startEpisode()
    message("BEGINNING EPISODE: "+str(episode)+"\n")
    
    f = open("trajectories.txt", "a")
    
    appendString='['

    steps=0
    grid_width=10
    
    while True:
        # DISPLAY CURRENT STATE
        state = environment.getCurrentState()
        display(state)
        pause()

        # END IF IN A TERMINAL STATE
        actions = environment.getPossibleActions(state)
        if len(actions) == 0:
            message("EPISODE "+str(episode)+" COMPLETE: RETURN WAS "+str(returns)+"\n")
            return returns

        # GET ACTION (USUALLY FROM AGENT)
        action = decision(state)
        
        if action == None:
            raise 'Error: Agent returned None action'

        # EXECUTE ACTION
        nextState=state
        
        if action != 'stay':
            nextState, reward = environment.doAction(action)
            message("Started in state: "+str(state)+
                    "\nTook action: "+str(action)+
                    "\nEnded in state: "+str(nextState)+
                    "\nGot reward: "+str(reward)+"\n")
            # UPDATE LEARNER
            if 'observeTransition' in dir(agent):
                agent.observeTransition(state, action, nextState, reward)
    
            returns += reward * totalDiscount
            totalDiscount *= discount
        
        stateNumber= (state[1]*grid_width +state[0])
        appendString+=str(stateNumber)
        
        steps+=1
        if(steps==stepsLimit):
            break;
        appendString+=','
        
    appendString+='],'
    print("AppendString ",appendString)
    f.write(appendString+"\n")
    f.close()
    
    runEpisode(agent, environment, discount, decision, display, message, pause, episode);
    if 'stopEpisode' in dir(agent):
        agent.stopEpisode()