Exemplos de Dqn.update em Python, exemplos de ai.Dqn.update em Python

Exemplo n.º 1

0

Exibir arquivo

class AgentRealistic:

    def __init__(self,agent_host,agent_port, mission_type, mission_seed, solution_report, state_space_graph):
        """ Constructor for the realistic agent """
        self.AGENT_MOVEMENT_TYPE = 'Discrete' #This can be varied between the following -  {Absolute, Discrete, Continuous}
        self.AGENT_NAME = 'Realistic'
        self.AGENT_ALLOWED_ACTIONS = ["movenorth 1", "movesouth 1", "movewest 1", "moveeast 1"]

        self.agent_host = agent_host
        self.agent_port = agent_port
        self.mission_seed = mission_seed
        self.mission_type = mission_type
        self.state_space = None; # Note - To be a true Realistic Agent, it can not know anything about the state_space a priori!
        self.solution_report = solution_report;
        self.solution_report.setMissionType(self.mission_type)
        self.solution_report.setMissionSeed(self.mission_seed)
        self.last_reward = 0
        self.accumulative_reward = 0
        self.brain = Dqn(2, len(self.AGENT_ALLOWED_ACTIONS), 0.9)
        self.brain.load()


    #----------------------------------------------------------------------------------------------------------------#
    def __ExecuteActionForRealisticAgentWithNoisyTransitionModel__(self, idx_requested_action, noise_level):
        """ Creates a well-defined transition model with a certain noise level """
        n = len(self.AGENT_ALLOWED_ACTIONS)
        pp = noise_level/(n-1) * np.ones((n,1))
        pp[idx_requested_action] = 1.0 - noise_level
        idx_actual = np.random.choice(n, 1, p=pp.flatten()) # sample from the distribution of actions
        actual_action = self.AGENT_ALLOWED_ACTIONS[int(idx_actual)]
        self.agent_host.sendCommand(actual_action)
        return actual_action

    #----------------------------------------------------------------------------------------------------------------#
    def run_agent(self):
        """ Run the Realistic agent and log the performance and resource use """

        partialReward = 0
        #-- Load and initiate mission --#
        print('Generate and load the ' + self.mission_type + ' mission with seed ' + str(self.mission_seed) + ' allowing ' +  self.AGENT_MOVEMENT_TYPE + ' movements')
        mission_xml = init_mission(self.agent_host, self.agent_port, self.AGENT_NAME, self.mission_type, self.mission_seed, self.AGENT_MOVEMENT_TYPE)
        self.solution_report.setMissionXML(mission_xml)

        self.solution_report.start()
        time.sleep(1)

        state_t = self.agent_host.getWorldState()
        first = True
        # -- Get a state-space model by observing the Orcale/GridObserver--#
        while state_t.is_mission_running:
            if first:
                time.sleep(2)
                first = False
             # -- Basic map --#
            state_t = self.agent_host.getWorldState()  
            if state_t.number_of_observations_since_last_state > 0:
                msg = state_t.observations[-1].text  # Get the details for the last observed state
                oracle_and_internal = json.loads(msg)  # Parse the Oracle JSON
                grid = oracle_and_internal.get(u'grid', 1)
                xpos = oracle_and_internal.get(u'XPos', 1)
                zpos = oracle_and_internal.get(u'ZPos', 1)
                ypos = oracle_and_internal.get(u'YPos', 1)
                yaw = oracle_and_internal.get(u'Yaw', 1)  
                pitch = oracle_and_internal.get(u'Pitch', 1)

            #last_signal = [xpos, zpos, ypos, yaw, pitch]
            last_signal = [xpos, ypos]


            action = self.brain.update(self.last_reward, last_signal)
            print("Requested Action:", self.AGENT_ALLOWED_ACTIONS[action])
            self.__ExecuteActionForRealisticAgentWithNoisyTransitionModel__(action, 0.1)
            time.sleep(0.02)
            self.solution_report.action_count = self.solution_report.action_count + 1
            for reward_t in state_t.rewards:
                partialReward += reward_t.getValue()
                #self.last_reward = reward_t.getValue()

                self.accumulative_reward += reward_t.getValue()
                self.solution_report.addReward(reward_t.getValue(), datetime.datetime.now())
                print("Reward_t:",reward_t.getValue())
                print("Cummulative reward so far:", self.accumulative_reward)

            print("Last Reward:{0}".format(partialReward))
            self.last_reward = partialReward
            partialReward = 0


        return

Exemplo n.º 2

0

Exibir arquivo

Arquivo: t_rex_env.py Projeto: hfahrudin/trex-ai

last_reward = 0
memo = []
brain = Dqn((1, 80, 80), 3, 0.9)
condition = True
last_state = np.zeros((1, 80, 80))
counter = 0
steps_count = 0
mean_steps = 0
scores = []
up_pressed = False
dino = cv2.cvtColor(cv2.imread('game_over.png'), cv2.COLOR_BGR2GRAY)
w, h = dino.shape[:2]
brain.load()
while (condition):

    action = brain.update(last_reward, last_state)

    if action == 2:
        pyautogui.keyUp('down')
        pyautogui.keyDown('up')
    elif action == 0:
        pyautogui.keyUp('up')
        pyautogui.keyUp('down')
    else:
        pyautogui.keyUp('up')
        pyautogui.keyDown('down')
#
    screen = np.array(ImageGrab.grab(bbox=(5, 155, 600, 296)))
    a = process_img(screen)
    proc_img = np.expand_dims(a, axis=0)

Exemplo n.º 3

0

Exibir arquivo

            ITE.append(iteration)
            save()
        time.sleep(0.5)


if __name__ == '__main__':
    try:
        address = ('192.168.43.166', 6666)  # server_IP
        readdr = ('192.168.43.76', 6666)  # PC_IP
        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        s.bind(readdr)
        sensorResult = [0, 0, 0, 0, 0]
        reward = 0
        last_dis = 100
        action = train.update(reward, sensorResult)
        Time = time.strftime('%H:%M:%S', time.localtime(time.time()))
        recordfile = open('PCRecord.txt', 'w')
        recordfile.write( '_________\t' + 'L_Sen' + '\t' + 'M_Sen' + '\t' + 'R_Sen' \
            + '\t' + 'Dis' + '\t' + 'Rew' + '\t' + 'Act' + '\n')
        recordfile.close()
        recivedata = '0'.encode()
        iteration = 0
        try:
            load()
        except:
            pass
        threads = []
        learnThread = Process(target=learnCarState)
        learnThread.start()
        threads.append(learnThread)

Exemplo n.º 4

0

Exibir arquivo

        time.sleep(0.1)
        gpio.output(trig_right, 1)
        time.sleep(0.00001)
        gpio.output(trig_right, 0)

        while gpio.input(echo_right) == 0:
            pass
        start2 = time.time()
        while gpio.input(echo_right) == 1:
            pass
        stop2 = time.time()

        dist_right = (stop2 - start2) * 17000

        last_signal = [dist_straight, dist_left, dist_right]
        action = brain.update(last_reward, last_signal)

        print(action)
        if (action == 0):
            action1()
        if (action == 1):
            action3()
        if (action == 2):
            action2()

        if (dist_straight < 30):
            if (action == 0):
                last_reward = -1

        act(last_reward, last_signal)
        c = max(last_signal)

Exemplo n.º 5

0

Exibir arquivo


#brain.load()
saved_grids = []
rewards = []
for i in range(1000):
    state = game.reset()
    reward = 0
    game_over = False
    sum_rewards = 0
    grids = []
    while not game_over:
        grids.append(np.array(state).reshape(6, 6))
        state = logger(state)
        reward2 = reward / 2048
        action = brain.update(reward2, state)
        new_state, new_reward, game_over = game.step(int(action))
        state, reward = new_state, new_reward
        sum_rewards += reward
    rewards.append(sum_rewards)

    #if sum_rewards>4999:
    #    print(np.array(grids))

    print(i, sum_rewards)

    #if i%100 == 0:
    #print("saving")
    #brain.save()
    #with open("rewards", "wb") as f:
    #    pickle.dump(rewards, f)