示例#1
0
        cumulated_reward = 0 #Should going forward give more reward then L/R ?
        print ("Episode = "+str(x))
        observation = env.reset()
        if qlearn.epsilon > 0.05:
            qlearn.epsilon *= epsilon_discount

        #render()
        env.render()

        state = ''.join(map(str, observation))

        for i in range(1000):

            # Pick an action based on the current state
            action = qlearn.chooseAction(state)
            #print ("Action Chosen"+str(action))
            # Execute the action and get feedback
            observation, reward, done, info = env.step(action)
            cumulated_reward += reward
            #print ("Reward="+str(reward))
            if highest_reward < cumulated_reward:
                highest_reward = cumulated_reward

            nextState = ''.join(map(str, observation))

            qlearn.learn(state, action, reward, nextState)

            #env.monitor.flush(force=True)

            if not(done):
        # for each episode, we test the robot for nsteps
        for t in itertools.count():
            state_ = np.zeros(2)
            #state_[0] = int(np.digitize(state[2],vertical_bin))# z first
            state_[0] = int(np.digitize(state[0], horizontal_bins[0]))
            state_[1] = int(np.digitize(state[1], horizontal_bins[1]))

            #Clip the state
            for j in range(2):
                if state_[j] < 0:
                    state_[j] = 0
                elif state_[j] > env.shape[j] - 1:
                    state_[j] = env.shape[j] - 1

            # Pick an action based on the current state
            action = qlearn.chooseAction(tuple(state_))

            # Execute the action in the environment and get feedback
            next_state, reward, done, info = env.step(action)

            cumulated_reward += reward
            next_state_ = np.zeros(2)

            #next_state_[0] = int(np.digitize(next_state[2],vertical_bin)) # z first
            next_state_[0] = int(np.digitize(next_state[0],
                                             horizontal_bins[0]))
            next_state_[1] = int(np.digitize(next_state[1],
                                             horizontal_bins[1]))
            for j in range(2):
                if next_state_[j] < 0:
                    next_state_[j] = 0
示例#3
0
    for x in range(total_episodes):
        done = False

        cumulated_reward = 0  #Should going forward give more reward then L/R ?

        observation = env.reset()

        if qlearn.epsilon > 0.05:
            qlearn.epsilon *= epsilon_discount

        step_counter = 0

        for i in range(1500):
            step_counter += 1
            binarized_observation = binarize_observation(observation)
            action = qlearn.chooseAction(binarized_observation)
            newObservation, reward, done, info = env.step(action)
            binarized_new_observation = binarize_observation(newObservation)
            cumulated_reward += reward

            if highest_reward < cumulated_reward:
                highest_reward = cumulated_reward

            qlearn.learn(binarized_observation, action, reward,
                         binarized_new_observation)
            if not (done):
                observation = newObservation
            else:
                last_time_steps = numpy.append(last_time_steps, [int(i + 1)])
                break
        for t in range(total_steps):

            step_counter += 1

            [image, laser] = observation
            hint_pos = getTargetPoints(image)
            if hint_pos == 0:
                hint_pos = getHintPoints(image)

            height, width, depth = image.shape
            image_action = get_image_action(width, hint_pos)
            laser_action = get_expert_action(laser)

            state = threshold_laser(laser, threshold_value)

            rl_choice = qlearn.chooseAction(state)

            if rl_choice == 1:
                action = 20 - image_action
            else:
                action = laser_action

            next_observation, reward, done, info = env.step(action)

            cumulated_reward += reward
            next_image, next_laser = next_observation
            next_state = threshold_laser(next_laser, threshold_value)

            qlearn.learn(state, rl_choice, reward, next_state)

            if not done: