def setup_ignite( engine: Engine, params: SimpleNamespace, exp_source, run_name: str, extra_metrics: Iterable[str] = (), ): warnings.simplefilter("ignore", category=UserWarning) handler = ptan_ignite.EndOfEpisodeHandler(exp_source, bound_avg_reward=params.stop_reward) handler.attach(engine) ptan_ignite.EpisodeFPSHandler().attach(engine) @engine.on(ptan_ignite.EpisodeEvents.EPISODE_COMPLETED) def episode_completed(trainer: Engine): passed = trainer.state.metrics.get("time_passed", 0) print( "Episode %d: reward=%.2f, steps=%s, " "speed=%.1f f/s, elapsed=%s" % ( trainer.state.episode, trainer.state.episode_reward, trainer.state.episode_steps, trainer.state.metrics.get("avg_fps", 0), timedelta(seconds=int(passed)), ) ) @engine.on(ptan_ignite.EpisodeEvents.BOUND_REWARD_REACHED) def game_solved(trainer: Engine): passed = trainer.state.metrics["time_passed"] print( "Game solved in %s, after %d episodes " "and %d iterations!" % (timedelta(seconds=int(passed)), trainer.state.episode, trainer.state.iteration) ) trainer.should_terminate = True now = datetime.now().isoformat(timespec="minutes") logdir = f"runs/{now}-{params.run_name}-{run_name}" tb = tb_logger.TensorboardLogger(log_dir=logdir) run_avg = RunningAverage(output_transform=lambda v: v["loss"]) run_avg.attach(engine, "avg_loss") metrics = ["reward", "steps", "avg_reward"] handler = tb_logger.OutputHandler(tag="episodes", metric_names=metrics) event = ptan_ignite.EpisodeEvents.EPISODE_COMPLETED tb.attach(engine, log_handler=handler, event_name=event) # write to tensorboard every 100 iterations ptan_ignite.PeriodicEvents().attach(engine) metrics = ["avg_loss", "avg_fps"] metrics.extend(extra_metrics) handler = tb_logger.OutputHandler( tag="train", metric_names=metrics, output_transform=lambda a: a ) event = ptan_ignite.PeriodEvents.ITERS_100_COMPLETED tb.attach(engine, log_handler=handler, event_name=event)
def setup_ignite(engine: Engine, params: SimpleNamespace, exp_source, run_name: str, extra_metrics: Iterable[str] = ()): # get rid of missing metrics warning warnings.simplefilter("ignore", category=UserWarning) # das Ding feuert EpisodenEnde Events / hört selber auf ITERATION_COMPLETED ptan_ignite.EndOfEpisodeHandler( exp_source, bound_avg_reward=params.stop_reward).attach(engine) ptan_ignite.EpisodeFPSHandler().attach(engine) @engine.on(ptan_ignite.EpisodeEvents.EPISODE_COMPLETED) def episode_completed(trainer: Engine): passed = trainer.state.metrics.get('time_passed', 0) print("Episode %d: reward=%.0f, steps=%s, " "speed=%.1f f/s, elapsed=%s" % (trainer.state.episode, trainer.state.episode_reward, trainer.state.episode_steps, trainer.state.metrics.get('avg_fps', 0), timedelta(seconds=int(passed)))) @engine.on(ptan_ignite.EpisodeEvents.BOUND_REWARD_REACHED) def game_solved(trainer: Engine): passed = trainer.state.metrics['time_passed'] print("Game solved in %s, after %d episodes " "and %d iterations!" % (timedelta(seconds=int(passed)), trainer.state.episode, trainer.state.iteration)) trainer.should_terminate = True RunningAverage(output_transform=lambda v: v['loss']).attach( engine, "avg_loss") now = datetime.now().isoformat(timespec='minutes') now = now.replace(":", "") logdir = f"runs/{now}-{params.run_name}-{run_name}" tb = tb_logger.TensorboardLogger(log_dir=logdir) handler = tb_logger.OutputHandler( tag="episodes", metric_names=['reward', 'steps', 'avg_reward']) tb.attach(engine, log_handler=handler, event_name=ptan_ignite.EpisodeEvents.EPISODE_COMPLETED) # write to tensorboard every 100 iterations ptan_ignite.PeriodicEvents().attach(engine) metrics = ['avg_loss', 'avg_fps'] metrics.extend(extra_metrics) handler = tb_logger.OutputHandler(tag="train", metric_names=metrics, output_transform=lambda a: a) tb.attach(engine, log_handler=handler, event_name=ptan_ignite.PeriodEvents.ITERS_100_COMPLETED)
def setup_ignite(engine: Engine, exp_source, run_name: str, extra_metrics: Iterable[str] = ()): # get rid of missing metrics warning warnings.simplefilter("ignore", category=UserWarning) handler = ptan_ignite.EndOfEpisodeHandler(exp_source) handler.attach(engine) ptan_ignite.EpisodeFPSHandler().attach(engine) @engine.on(ptan_ignite.EpisodeEvents.EPISODE_COMPLETED) def episode_completed(trainer: Engine): passed = trainer.state.metrics.get("time_passed", 0) avg_steps = trainer.state.metrics.get("avg_steps", 50) avg_reward = trainer.state.metrics.get("avg_reward", 0.0) print("Episode %d: reward=%.0f (avg %.2f), " "steps=%s (avg %.2f), speed=%.1f f/s, " "elapsed=%s" % ( trainer.state.episode, trainer.state.episode_reward, avg_reward, trainer.state.episode_steps, avg_steps, trainer.state.metrics.get("avg_fps", 0), timedelta(seconds=int(passed)), )) if avg_steps < 15 and trainer.state.episode > 100: print("Average steps has fallen below 10, stop training") trainer.should_terminate = True now = datetime.now().isoformat(timespec="minutes") logdir = f"runs/{now}-{run_name}" tb = tb_logger.TensorboardLogger(log_dir=logdir) run_avg = RunningAverage(output_transform=lambda v: v["loss"]) run_avg.attach(engine, "avg_loss") metrics = ["reward", "steps", "avg_reward", "avg_steps"] handler = tb_logger.OutputHandler(tag="episodes", metric_names=metrics) event = ptan_ignite.EpisodeEvents.EPISODE_COMPLETED tb.attach(engine, log_handler=handler, event_name=event) # write to tensorboard every 100 iterations ptan_ignite.PeriodicEvents().attach(engine) metrics = ["avg_loss", "avg_fps"] metrics.extend(extra_metrics) handler = tb_logger.OutputHandler(tag="train", metric_names=metrics, output_transform=lambda a: a) event = ptan_ignite.PeriodEvents.ITERS_100_COMPLETED tb.attach(engine, log_handler=handler, event_name=event)
def setup_ignite(engine: Engine, exp_source, run_name: str, extra_metrics: Iterable[str] = ()): warnings.simplefilter('ignore', category=UserWarning) handler = ptan_ignite.EndOfEpisodeHandler(exp_source, subsample_end_of_episode=100) handler.attach(engine) ptan_ignite.EpisodeFPSHandler().attach(engine) @engine.on(ptan_ignite.EpisodeEvents.EPISODE_COMPLETED) def episode_completed(trainer: Engine): passed = trainer.state.metrics.get('time_passed', 0) print('Episode %d: reward=%0.f, steps=%s, speed=%.1f f/s, elapsed=%s' % (trainer.state.episode, trainer.state.episode_reward, trainer.state.episode_steps, trainer.state.metrics.get('avg_fps', 0), timedelta(seconds=int(passed)))) now = datetime.now().isoformat(timespec='minutes') logdir = f'runs-{now}-{run_name}'.replace(':', '') tb = tb_logger.TensorboardLogger(logdir=logdir) run_avg = RunningAverage(output_transform=lambda v: v['loss']) run_avg.attach(engine, 'avg_loss') metrics = ['reward', 'steps', 'avg_reward'] handler = tb_logger.OutputHandler(tag='episodes', metric_names=metrics) event = ptan_ignite.EpisodeEvents.EPISODE_COMPLETED tb.attach(engine, log_handler=handler, event_name=event) # write to tensorboard every 100 iterations ptan_ignite.PeriodicEvents().attach(engine) metrics = ['avg_loss', 'avg_fps'] metrics.extend(extra_metrics) handler = tb_logger.OutputHandler(tag='train', metric_names=metrics, output_transform=lambda a: a) event = ptan_ignite.PeriodEvents.ITERS_1000_COMPLETED tb.attach(engine, log_handler=handler, event_name=event) return tb
def setup_ignite(engine: Engine, exp_source, run_name: str, extra_metrics: Iterable[str] = ()): # get rid of missing metrics warning warnings.simplefilter("ignore", category=UserWarning) handler = ptan_ignite.EndOfEpisodeHandler(exp_source, subsample_end_of_episode=100) handler.attach(engine) ptan_ignite.EpisodeFPSHandler().attach(engine) @engine.on(ptan_ignite.EpisodeEvents.EPISODE_COMPLETED) def episode_completed(trainer: Engine): passed = trainer.state.metrics.get("time_passed", 0) print("Episode %d: reward=%.0f, steps=%s, " "speed=%.1f f/s, elapsed=%s" % ( trainer.state.episode, trainer.state.episode_reward, trainer.state.episode_steps, trainer.state.metrics.get("avg_fps", 0), timedelta(seconds=int(passed)), )) now = datetime.now().isoformat(timespec="minutes") logdir = f"runs/{now}-{run_name}" tb = tb_logger.TensorboardLogger(log_dir=logdir) run_avg = RunningAverage(output_transform=lambda v: v["loss"]) run_avg.attach(engine, "avg_loss") metrics = ["reward", "steps", "avg_reward"] handler = tb_logger.OutputHandler(tag="episodes", metric_names=metrics) event = ptan_ignite.EpisodeEvents.EPISODE_COMPLETED tb.attach(engine, log_handler=handler, event_name=event) ptan_ignite.PeriodicEvents().attach(engine) metrics = ["avg_loss", "avg_fps"] metrics.extend(extra_metrics) handler = tb_logger.OutputHandler(tag="train", metric_names=metrics, output_transform=lambda a: a) event = ptan_ignite.PeriodEvents.ITERS_1000_COMPLETED tb.attach(engine, log_handler=handler, event_name=event) return tb
def setup_ignite(engine: Engine, params: SimpleNamespace, exp_source, run_name: str, net, extra_metrics: Iterable[str] = ()): warnings.simplefilter("ignore", category=UserWarning) handler = ptan_ignite.EndOfEpisodeHandler(exp_source, subsample_end_of_episode=100) handler.attach(engine) ptan_ignite.EpisodeFPSHandler().attach(engine) @engine.on(ptan_ignite.EpisodeEvents.EPISODE_COMPLETED) def episode_completed(trainer: Engine): passed = trainer.state.metrics.get('time_passed', 0) print("Episode %d: reward=%.0f, steps=%s, " "elapsed=%s" % (trainer.state.episode, trainer.state.episode_reward, trainer.state.episode_steps, timedelta(seconds=int(passed)))) path = './saves/(episode-%.3f.data' % trainer.state.episode torch.save(net.state_dict(), path) now = datetime.now().isoformat(timespec='minutes').replace(':', '') logdir = f"runs2/{now}-{params.run_name}-{run_name}" tb = tb_logger.TensorboardLogger(log_dir=logdir) run_avg = RunningAverage(output_transform=lambda v: v['loss']) run_avg.attach(engine, 'avg_loss') metrics = ['reward', 'steps', 'avg_reward'] handler = tb_logger.OutputHandler(tag='episodes', metric_names=metrics) event = ptan_ignite.EpisodeEvents.EPISODE_COMPLETED tb.attach(engine, log_handler=handler, event_name=event) # write to tb every 100 Iterations ptan_ignite.PeriodicEvents().attach(engine) metrics = ['avg_loss', 'avg_fps'] metrics.extend(extra_metrics) handler = tb_logger.OutputHandler(tag="train", metric_names=metrics, output_transform=lambda a: a) event = ptan_ignite.PeriodEvents.ITERS_1000_COMPLETED tb.attach(engine, log_handler=handler, event_name=event) return tb
gamma=params.gamma, device=device) loss_v.backward() optimizer.step() epsilon_tracker.frame(engine.state.iteration * args.envs) if engine.state.iteration % params.target_net_sync == 0: tgt_net.sync() return { "loss": loss_v.item(), "epsilon": selector.epsilon, } engine = Engine(process_batch) ptan_ignite.EndOfEpisodeHandler(exp_source, bound_avg_reward=17.0).attach(engine) ptan_ignite.EpisodeFPSHandler(fps_mul=args.envs).attach(engine) @engine.on(ptan_ignite.EpisodeEvents.EPISODE_COMPLETED) def episode_completed(trainer: Engine): print( "Episode %d: reward=%s, steps=%s, speed=%.3f frames/s, elapsed=%s" % (trainer.state.episode, trainer.state.episode_reward, trainer.state.episode_steps, trainer.state.metrics.get('fps', 0), timedelta(seconds=trainer.state.metrics.get('time_passed', 0)))) @engine.on(ptan_ignite.EpisodeEvents.BOUND_REWARD_REACHED) def game_solved(trainer: Engine): print("Game solved in %s, after %d episodes and %d iterations!" % (timedelta(seconds=trainer.state.metrics['time_passed']), trainer.state.episode, trainer.state.iteration))
gamma=params.gamma, device=device) loss_v.backward() optimizer.step() epsilon_tracker.frame(engine.state.iteration) if engine.state.iteration % params.target_net_sync == 0: tgt_net.sync() return { "loss": loss_v.item(), "epsilon": selector.epsilon, } engine = Engine(process_batch) ptan_ignite.EndOfEpisodeHandler(exp_source, bound_avg_reward=17.0).attach(engine) ptan_ignite.EpisodeFPSHandler().attach(engine) @engine.on(ptan_ignite.EpisodeEvents.EPISODE_COMPLETED) def episode_completed(trainer: Engine): print( "Episode %d: reward=%s, steps=%s, speed=%.3f frames/s, elapsed=%s" % (trainer.state.episode, trainer.state.episode_reward, trainer.state.episode_steps, trainer.state.metrics.get('avg_fps', 0), timedelta(seconds=trainer.state.metrics.get('time_passed', 0)))) @engine.on(ptan_ignite.EpisodeEvents.BOUND_REWARD_REACHED) def game_solved(trainer: Engine): print("Game solved in %s, after %d episodes and %d iterations!" % (timedelta(seconds=trainer.state.metrics['time_passed']), trainer.state.episode, trainer.state.iteration))
frame_stack_count=2) net = dqn_model.DQN(env.observation_space.shape, env.action_space.n).to(device) tgt_net = ptan.agent.TargetNet(net) buffer = ptan.experience.ExperienceReplayBuffer( experience_source=None, buffer_size=params.replay_size) optimizer = optim.Adam(net.parameters(), lr=params.learning_rate) # start subprocess and experience queue exp_queue = mp.Queue(maxsize=BATCH_MUL * 2) play_proc = mp.Process(target=play_func, args=(params, net, args.cuda, exp_queue)) play_proc.start() fps_handler = ptan_ignite.EpisodeFPSHandler() batch_generator = BatchGenerator(buffer, exp_queue, fps_handler, params.replay_initial, params.batch_size) def process_batch(engine, batch): optimizer.zero_grad() loss_v = common.calc_loss_dqn(batch, net, tgt_net.target_model, gamma=params.gamma, device=device) loss_v.backward() optimizer.step() if engine.state.iteration % params.target_net_sync == 0: tgt_net.sync() return {
def setup_ignite( engine: Engine, params: SimpleNamespace, exp_source, run_name: str, model, optimizer, extra_metrics: Iterable[str] = (), ): warnings.simplefilter("ignore", category=UserWarning) handler = ptan_ignite.EndOfEpisodeHandler( exp_source, bound_avg_reward=params.stop_reward) handler.attach(engine) ptan_ignite.EpisodeFPSHandler().attach(engine) objects_to_checkpoint = { 'model': model, 'optimizer': optimizer, 'trainer': engine } checkpoint_dir = Path("models") saver = DiskSaver(str(checkpoint_dir), create_dir=True, require_empty=False) handler = Checkpoint(objects_to_checkpoint, saver, n_saved=2) engine.add_event_handler(Events.ITERATION_COMPLETED(every=1000), handler) checkpoints_paths = list(checkpoint_dir.iterdir()) if checkpoints_paths: checkpoint = torch.load(checkpoints_paths[-1]) print(f"Loading checkpoint {checkpoints_paths[-1].name}") Checkpoint.load_objects(to_load=objects_to_checkpoint, checkpoint=checkpoint) @engine.on(ptan_ignite.EpisodeEvents.EPISODE_COMPLETED) def episode_completed(trainer: Engine): passed = trainer.state.metrics.get('time_passed', 0) print("Episode %d: reward=%.2f, steps=%s, " "speed=%.1f f/s, elapsed=%s" % (trainer.state.episode, trainer.state.episode_reward, trainer.state.episode_steps, trainer.state.metrics.get('avg_fps', 0), timedelta(seconds=int(passed)))) @engine.on(ptan_ignite.EpisodeEvents.BOUND_REWARD_REACHED) def game_solved(trainer: Engine): passed = trainer.state.metrics['time_passed'] print("Game solved in %s, after %d episodes " "and %d iterations!" % (timedelta(seconds=int(passed)), trainer.state.episode, trainer.state.iteration)) trainer.should_terminate = True now = datetime.now().isoformat(timespec='minutes').replace(":", "-") logdir = f"runs/{now}-{params.run_name}-{run_name}" tb = tb_logger.TensorboardLogger(log_dir=logdir) run_avg = RunningAverage(output_transform=lambda v: v['loss']) run_avg.attach(engine, "avg_loss") metrics = ['reward', 'steps', 'avg_reward'] handler = tb_logger.OutputHandler(tag="episodes", metric_names=metrics) event = ptan_ignite.EpisodeEvents.EPISODE_COMPLETED tb.attach(engine, log_handler=handler, event_name=event) # write to tensorboard every 100 iterations ptan_ignite.PeriodicEvents().attach(engine) metrics = ['avg_loss', 'avg_fps'] metrics.extend(extra_metrics) handler = tb_logger.OutputHandler(tag="train", metric_names=metrics, output_transform=lambda a: a) event = ptan_ignite.PeriodEvents.ITERS_100_COMPLETED tb.attach(engine, log_handler=handler, event_name=event)