Пример #1
0
    if train_model:
        trainer.write_text(summary_writer, 'Hyperparameters', options, steps)
    while steps <= max_steps or not train_model:
        if env.global_done:
            info = env.reset(train_mode=train_model, progress=get_progress())[brain_name]
            trainer.reset_buffers(info, total=True)
        # Decide and take an action
        new_info = trainer.take_action(info, env, brain_name, steps, normalize)
        info = new_info
        trainer.process_experiences(info, time_horizon, gamma, lambd)
        if len(trainer.training_buffer['actions']) > buffer_size and train_model:
            # Perform gradient descent with experience buffer
            trainer.update_model(batch_size, num_epoch)
        if steps % summary_freq == 0 and steps != 0 and train_model:
            # Write training statistics to tensorboard.
            trainer.write_summary(summary_writer, steps, env._curriculum.lesson_number)
        if steps % save_freq == 0 and steps != 0 and train_model:
            # Save Tensorflow model
            save_model(sess, model_path=model_path, steps=steps, saver=saver)
        if train_model:
            steps += 1
            sess.run(ppo_model.increment_step)
            if len(trainer.stats['cumulative_reward']) > 0:
                mean_reward = np.mean(trainer.stats['cumulative_reward'])
                sess.run(ppo_model.update_reward, feed_dict={ppo_model.new_reward: mean_reward})
                last_reward = sess.run(ppo_model.last_reward)
    # Final save Tensorflow model
    if steps != 0 and train_model:
        save_model(sess, model_path=model_path, steps=steps, saver=saver)
env.close()
graph_name = (env_name.strip()
Пример #2
0
        sess.run(init)
    steps = sess.run(ppo_model.global_step)
    summary_writer = tf.summary.FileWriter(summary_path)
    info = env.reset(train_mode=train_model)[brain_name]
    trainer = Trainer(ppo_model, sess, info, is_continuous, use_observations,
                      use_states)
    while steps <= max_steps or not train_model:
        if env.global_done:
            info = env.reset(train_mode=train_model)[brain_name]
        # Decide and take an action
        new_info = trainer.take_action(info, env, brain_name)
        info = new_info
        trainer.process_experiences(info, time_horizon, gamma, lambd)
        if len(trainer.training_buffer['actions']
               ) > buffer_size and train_model:
            # Perform gradient descent with experience buffer
            trainer.update_model(batch_size, num_epoch)
        if steps % summary_freq == 0 and steps != 0 and train_model:
            # Write training statistics to tensorboard.
            trainer.write_summary(summary_writer, steps)
        if steps % save_freq == 0 and steps != 0 and train_model:
            # Save Tensorflow model
            save_model(sess, model_path=model_path, steps=steps, saver=saver)
        steps += 1
        sess.run(ppo_model.increment_step)
    # Final save Tensorflow model
    if steps != 0 and train_model:
        save_model(sess, model_path=model_path, steps=steps, saver=saver)
env.close()
export_graph(model_path, env_name)
Пример #3
0
     trainer.reset_buffers(info, total=True)
 # Decide and take an action
 info = trainer.take_action(info,
                            env,
                            brain_name,
                            steps,
                            normalize,
                            stochastic=True)
 trainer.process_experiences(info, time_horizon, gamma, lambd)
 if len(trainer.training_buffer['actions']
        ) > buffer_size and train_model:
     # Perform gradient descent with experience buffer
     trainer.update_model(batch_size, num_epoch)
 if steps % summary_freq == 0 and steps != 0 and train_model:
     # Write training statistics to tensorboard.
     trainer.write_summary(summary_writer, steps, episode_number)
 if steps % save_freq == 0 and steps != 0 and train_model:
     # Save Tensorflow model
     save_model(sess, model_path=model_path, steps=steps, saver=saver)
 if train_model:
     steps += 1
     sess.run(ppo_model.increment_step)
     if len(trainer.stats['cumulative_reward']) > 0:
         mean_reward = np.mean(trainer.stats['cumulative_reward'])
         sess.run(ppo_model.update_reward,
                  feed_dict={ppo_model.new_reward: mean_reward})
         last_reward = sess.run(ppo_model.last_reward)
 if not watcher_started and render:
     watcher = threading.Thread(target=watch,
                                args=(tf.get_default_session(), ))
     watcher.start()