示例#1
0
    def play_step(self, epsilon=0.0):
        done_reward = None

        if np.random.random() < epsilon:
            action = self.env.action_space.sample()
        else:
            state_a = np.expand_dims(np.array(self.state, copy=False, dtype=np.float32), 0) / 255.0
            state_v = tf.convert_to_tensor(state_a)
            if self.use_categorical:
                q_vals_v = self.net.q_values(state_v)
            else:
                q_vals_v = self.net(state_v)
            act_v = tf.math.argmax(q_vals_v, axis=1)
            action = int(act_v.numpy()[0])

        new_state, reward, is_done, _ = self.env.step(action)
        self.total_reward += reward

        exp = xp.Experience(self.state, action, reward, is_done)
        self.exp_buffer.append(exp)
        self.state = new_state
        if is_done:
            done_reward = self.total_reward
            self._reset()
        return done_reward
示例#2
0
    def compute_experience(self):
        """
        Compute from the start date of project (i.e. first commit) until today
        all the experience of the developer based on their first commit date and
        when they left the project (if they left it).

        """
        curr_date = self.vcs_mgr.first_commit_repo + datetime.timedelta(1)
        end_date = datetime.date.today()
        while curr_date <= end_date:
            curr_xp = experience.Experience(curr_date)
            for dev in self.vcs_mgr.author_dict.values():
                if not dev.exclude:
                    curr_xp.process_dev(dev)
            self.experiences.append(curr_xp)
            curr_date = XPAnalyser.increment_date(curr_date, XPAnalyser.DEFAULT_INCREMENT)
        self.save_analyse()
def simulate(model):

    # Instantiating the learning related parameters
    learning_rate = get_learning_rate(0)
    explore_rate = get_explore_rate(0)
    discount_factor = 0.99

    rewards = []

    num_streaks = 0
    env.render()

    # Initialize experience replay object
    experience = exp.Experience(model, max_memory=max_memory)

    for n_episode in range(NUM_EPISODES):

        loss = 0.0

        # Reset the environment
        obv = env.reset()

        # the initial state
        state_0 = state_to_bucket(obv)
        total_reward = 0
        n_episodes = 0
        envstate = env.render()
        envstate = resize(envstate, (10, 10))
        envstate = envstate.reshape((1, -1))

        for t in range(MAX_T):

            # Select an action
            action = select_action(envstate, model, explore_rate)
            prev_envstate = envstate

            # execute the action
            obv, reward, done, _ = env.step(action)

            # Observe the result
            state = state_to_bucket(obv)
            total_reward += reward
            envstate = env.render()
            envstate = resize(envstate, (10, 10))
            envstate = envstate.reshape((1, -1))

            # Store episode (experience)
            episode = [
                prev_envstate, action, reward, envstate,
                env.is_game_over()
            ]
            experience.remember(episode)
            n_episodes += 1

            # Setting up for the next iteration
            state_0 = state

            # Train neural network model
            inputs, targets = experience.get_data(data_size=data_size)

            h = model.fit(
                inputs,
                targets,
                epochs=8,
                batch_size=16,
                verbose=0,
            )
            loss = model.evaluate(inputs, targets, verbose=0)

            # Print data
            template = "Epoch: {:03d}/{:d} | Loss: {:.4f} | Episodes: {:d} | Win count: {:d} | Win rate: {:.3f}"
            print(
                template.format(n_episode, NUM_EPISODES - 1, loss, n_episodes,
                                sum(win_history), win_rate, t))
            # we simply check if training has exhausted all free cells and if in all
            # cases the agent won
            if win_rate > 0.9: epsilon = 0.05
            if sum(win_history[-hsize:]) == hsize and completion_check(
                    model, qmaze):
                print("Reached 100%% win rate at epoch: %d" % (epoch, ))
                break

            # Render tha maze
            if RENDER_MAZE:
                env.render()

            if REALTIME_RENDERING:
                time.sleep(0.1)

            if env.is_game_over():
                sys.exit()

            if done:
                rewards.append(total_reward)
                print(
                    "Episode %d finished after %f time steps with total reward = %f (streak %d)."
                    % (n_episode, t, total_reward, num_streaks))

                if t <= SOLVED_T:
                    num_streaks += 1
                else:
                    num_streaks = 0
                break

            elif t >= MAX_T - 1:
                print("Episode %d timed out at %d with total reward = %f." %
                      (n_episode, t, total_reward))

        # Update parameters
        explore_rate = get_explore_rate(n_episode)
        learning_rate = get_learning_rate(n_episode)

    plt.plot(rewards)
    plt.title('Episode rewards')
    plt.xlabel('n_episode')
    plt.ylabel('Reward')
    plt.show()
示例#4
0
文件: main.py 项目: pqhuy98/DQN
from numpy.random import randint as rdi

max_explore_rate = 0.40
min_explore_rate = 0.10
epoch = 1000000
height = 32
width = 32
channel = 4
xp_capacity = 1000
xp_nb_batch = 32
play_interval = 1
save_interval = 1000

game = game.Game()
net = model.CNN(height, width, channel, game.actions, "version01")
exp = xp.Experience(xp_capacity, game, channel, net)

for i in range(epoch):
    print "Iteration", i
    #    if (i%play_interval==play_interval-1) :
    #        explore_rate = 0.0
    #    else :
    #        explore_rate = min_explore_rate+(epoch-i)*1.0/epoch*(max_explore_rate-min_explore_rate)
    explore_rate = 0.00
    step = 0
    while True:
        step += 1
        x = random()
        if (x < explore_rate):
            action = rdi(0, game.actions)
        else:
示例#5
0
    

    
"""       
##if __name__ == "__main__":
##    p = int(sys.argv[1])
##    m = int(sys.argv[2])
"""
p=5

m=10
    
assert (m > 0), "The number of markers must be greater than 0"
assert (p <= m), "The number of positive markers must be less or equal to the number of markers"

exp = experience.Experience(p,m)
markers = exp.get_markers()
positive = exp.get_positive_markers()

print("Markers: %s" % (markers))
print("Positive markers: %s" % (positive))

# test stategy 1
cpt = 0
print("Negative markers: %s" % (negative_markers1(markers,positive)))
print("Nb. comparisons: %d" % (cpt))

# test stategy 2
cpt = 0
print("Negative markers: %s" % (negative_markers2(markers,positive)))
print("Nb. comparisons: %d" % (cpt))
def simulate(model):

    # Instantiating the learning related parameters
    learning_rate = get_learning_rate(0)
    explore_rate = 0.1
    discount_factor = 0.99

    rewards = []

    num_streaks = 0
    env.render()

    # Initialize experience replay object
    experience = exp.Experience(model, max_memory=max_memory)

    for n_episode in range(NUM_EPISODES):

        loss = 0.0
        if n_episode > 20:
            explore_rate = 0.05

        # Reset the environment
        obv = env.reset()

        # the initial state
        state_0 = state_to_bucket(obv)
        total_reward = 0
        n_episodes = 0
        envstate = env.render()
        envstate = resize(envstate, (10, 10))
        envstate = envstate.reshape((1, -1))

        for t in range(MAX_T):

            prev_envstate = envstate
            # Get next action
            action = select_action(prev_envstate, model, explore_rate)

            # execute the action
            obv, reward, done, _ = env.step(action)

            # Observe the result
            state = state_to_bucket(obv)
            total_reward += reward
            envstate = env.render()
            envstate = resize(envstate, (10, 10))
            envstate = envstate.reshape((1, -1))

            # Store episode (experience)
            episode = [prev_envstate, action, reward, envstate, env.is_game_over()]
            experience.remember(episode)
            n_episodes += 1

            # Setting up for the next iteration
            state_0 = state

            # Train neural network model
            inputs, targets = experience.get_data(data_size=data_size)

            h = model.fit(
                inputs,
                targets,
                epochs=8,
                batch_size=64,
                verbose=0,
            )

            loss = model.evaluate(inputs, targets, verbose=0)

            # Render tha maze
            if RENDER_MAZE:
                env.render()

            if REALTIME_RENDERING:
                time.sleep(0.1)

            if env.is_game_over():
                sys.exit()

            if done:
                rewards.append(total_reward)
                print("Episode %d finished after %f time steps with total reward = %f (streak %d)."
                      % (n_episode, t, total_reward, num_streaks))

                if t <= SOLVED_T:
                    num_streaks += 1
                else:
                    num_streaks = 0
                break

            elif t >= MAX_T - 1:
                print("Episode %d timed out at %d with total reward = %f."
                      % (n_episode, t, total_reward))

        # Print data
        template = "Epoch: {:03d}/{:d} | Loss: {:.4f} | Episodes: {:d}"
        print(template.format(n_episode, NUM_EPISODES - 1, loss, n_episodes))

        # It's considered done when it's solved over 100 times consecutively
        if num_streaks > STREAK_TO_END:
            break

        # Update parameters
        explore_rate = get_explore_rate(n_episode)
        learning_rate = get_learning_rate(n_episode)

    plt.plot(rewards)
    plt.title('Episode rewards')
    plt.xlabel('n_episode')
    plt.ylabel('Reward')
    plt.show()