Exemplo n.º 1
0
def run():
    env = gym.make('CartPole-v0')
    episode_num = 1
    episode_done = True
    a2c_agent = A2CAgent(env.action_space.n, env.observation_space.shape[0])
    memory = Memory()
    episode_reward = 1
    task_done = deque(maxlen=20)
    while True:
        if episode_done:
            state = env.reset()
            next_state = [
            ]  # next state will pass to initialize the accumulated reward
            episode_done = False
            template = 'episode num {}  ends after {} time steps'
            print(template.format(episode_num, episode_reward))
            task_done.append(episode_reward)
            episode_num += 1
            episode_reward = 0
            if sum(task_done) / len(task_done) > 195:
                print("####F*****G CONGRATULATIONS! TAKS IS DONE####")
                exit()

        for i in range(LOOKAHEAD):
            env.render()

            state = tf.convert_to_tensor(state)
            state = tf.expand_dims(state, 0)
            action = a2c_agent.act(state)
            memory.store(s=state, a=action)
            state, reward, episode_done, _ = env.step(action)
            memory.store(r=reward)
            episode_reward += reward
            next_state = state
            if episode_done:
                next_state = []  # if episode is done next state is None,
                break

        a2c_agent.prepare_train(memory, next_state)
        memory.clear()
Exemplo n.º 2
0
    def run(self):
        total_step = 1
        mem = Memory()
        # Loop for all the episodes
        while Worker.global_episode < args.max_eps:
            current_state = self.env.reset()

            obs = current_state.clip(self.mn_d, self.mx_d)
            current_state = (((obs - self.mn_d) * (self.new_maxd - self.new_mind)
                        ) / (self.mx_d - self.mn_d)) + self.new_mind

            mem.clear()
            ep_reward = 0.
            ep_steps = 0
            self.ep_loss = 0
            time_count = 1
            total_loss = tf.constant(10e5)
            # Loop through one episode, until done or reached maximum steps per episode
            for ep_t in range(args.max_step_per_ep):
                # Take action based on current state
                mu, sigma, _ = self.local_model(
                    tf.convert_to_tensor(current_state[None, :],
                                         dtype=tf.float32))
                cov_matrix = np.diag(sigma[0])
                normal_dist = tfp.distributions.Normal(mu, tf.sqrt(sigma))
                # action = tf.clip_by_value(tf.squeeze(normal_dist.sample(1), axis=0),
                #                           clip_value_min=-0.999999,
                #                           clip_value_max=0.999999)

                action = tf.clip_by_value(mu,
                                          clip_value_min=self.env.action_space.low,
                                          clip_value_max=self.env.action_space.high)

                # Receive new state and reward
                # print(action.numpy()[0])
                new_state, reward, done_game, _ = self.env.step(action.numpy()[0])
                obs = new_state.clip(self.mn_d, self.mx_d)
                new_state = (((obs - self.mn_d) * (self.new_maxd - self.new_mind)
                                  ) / (self.mx_d - self.mn_d)) + self.new_mind

                done = True if ep_t == args.max_step_per_ep - 1 else done_game

                reward = max(min(float(reward), 1.0), -10.0)
                ep_reward += reward

                mem.store(current_state, action, reward)

                if time_count == args.update_freq or done:

                    # Calculate gradient wrt to local model. We do so by tracking the
                    # variables involved in computing the loss by using tf.GradientTape
                    with tf.GradientTape(persistent=True) as tape:
                        tape.watch(total_loss)
                        total_loss = self.compute_loss(done,
                                                       new_state,
                                                       mem,
                                                       args.gamma)

                    self.ep_loss += total_loss
                    # Calculate local gradients
                    grads = tape.gradient(total_loss, self.local_model.trainable_weights)
                    # Push local gradients to global model
                    try:
                        self.opt.apply_gradients(zip(grads,
                                                     self.global_model.trainable_weights))
                    except ValueError:
                        print("ValueError")



                    # Update local model with new weights
                    self.local_model.set_weights(self.global_model.get_weights())

                    mem.clear()
                    time_count = 0

                    if done:  # done and print information
                        Worker.global_moving_average_reward = \
                            record(Worker.global_episode, ep_reward, self.worker_idx,
                                   Worker.global_moving_average_reward, self.result_queue,
                                   self.ep_loss, ep_steps)
                        # We must use a lock to save our model and to print to prevent data races.
                        if ep_reward > Worker.best_score:
                            with Worker.save_lock:
                                print("Saving best model to {}, "
                                      "episode score: {}".format(self.save_dir, ep_reward))
                                self.global_model.save_weights(
                                    os.path.join(self.save_dir,
                                                 'model_{}.h5'.format(self.game_name))
                                )
                                Worker.best_score = ep_reward
                        Worker.global_episode += 1
                        ep_steps += 1
                        time_count += 1
                        total_step += 1
                        break
                ep_steps += 1

                time_count += 1
                current_state = new_state
                total_step += 1
        self.result_queue.put(None)
Exemplo n.º 3
0
"""
from Memory import Memory
from Parser import Parser

if __name__ == '__main__':

    files = ["julia1.txt"]
    JuliaCode = "julia1.txt"
    parser = Parser(JuliaCode)
    print('Executing...----------------------------------------------------\n')
    print(format(JuliaCode))
    print('\bOutput')
    intrepret = parser.parse()
    intrepret.execute()

    Memory.clear()
    print('Test Complete.. No error')
"""
JUlIA Code Samples to Copy and Paste in Julia.txt file

Code 1

function a ( )
x = 1
while <= x 3
print ( x )
 x = + x 1
end
print ( 9999 )
end