def __init__(self, config): config.model_type = Config.MODEL_TYPE_CONV2D print(config.device) self.config = config self.env = wrap_deepmind(make_atari(config.env), frame_stack=True) self.num_states = self.env.observation_space.shape[-1] self.num_actions = self.env.action_space.n self.agent = Agent(config, self.num_states, self.num_actions, self.config.num_atoms) self.total_step = np.zeros(100) self.data_path = config.data_path if self.data_path != Config.DATA_PATH_DEFAULT: self.agent.load_model()
def get_env(): benchmark = gym.benchmark_spec('Atari40M') # Change the index to select a different game. task = benchmark.tasks[3] env_id = task.env_id env = gym.make(env_id) env.seed(0) expt_dir = '/tmp/hw3_vid_dir2/' # env = wrappers.Monitor(env, osp.join(expt_dir, "gym"), force=True) env = utils.wrap_deepmind(env) return env
log_freq=args.log_freq, mode=args.mode) plotter = Plotter(save_dirs=save_dirs, plot_types=[ 'avg_scores_ep', 'avg_scores_ts', 'avg_scores_100_ep', 'avg_scores_100_ts', 'scores_ep', 'scores_ts', 'high_scores_ep', 'high_scores_ts', 'low_scores_ep', 'low_scores_ts', 'avg_loss_ep', 'avg_acc_ep', 'timesteps_ep' ], interval_types=['overall', 'window'], plot_freq=args.plot_freq, mode=args.mode) env = make_atari(ENV_GYM) env = wrap_deepmind(env, frame_stack=True, scale=False) if args.mode == 'train': agent = DDQNLearner( env=env, save_dirs=save_dirs, save_freq=args.save_freq, gamma=args.gamma, batch_size=args.batch_size, learning_rate=args.learning_rate, buffer_size=args.buffer_size, learn_start=args.learn_start, target_network_update_freq=args.target_network_update_freq, train_freq=args.train_freq, tot_steps=args.total_step_lim) else:
'--freeze_layers', type=int, default=0, help= 'Number of initial layers to freeze when fine-tuning | Choose from 1, 2, 3' ) args = parser.parse_args() logs_dir = 'logs' if not os.path.exists(logs_dir): os.makedirs(logs_dir) args.save_dir = utils.get_save_dir(logs_dir, args.name) # Training if not torch.cuda.is_available() and args.cuda: print( '--cuda is passed but torch.cuda.is_available() returned False. Will use CPU instead.' ) env = utils.wrap_deepmind(utils.make_atari( args.env, max_episode_steps=args.episode_length, frameskip=args.frameskip), frame_stack=True, stacks=args.agent_history_length) agent = Agent(env, args) agent.train(args.episodes)
def _thunk(): env = make_atari('BreakoutNoFrameskip-v4') env.seed(0 + rank) return wrap_deepmind(env)