def f(): env = gym.make(ENV_NAME + ENV_SUFFIX) env = common.wrap_env(env, resize=True, pytorch_layout=True, stack_frames=frames_per_state) return env
signal.signal(signal.SIGHUP, handle_sighup) signal.signal(signal.SIGINT, handle_sigint) args = parse_args() print(args) os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(str(x) for x in args.gpu) USE_CUDA = t.cuda.is_available() device = t.device('cuda' if USE_CUDA else 'cpu') if USE_CUDA: print(f'Using CUDA device: {t.cuda.get_device_name(device)}') frames_per_state = 4 env = gym.make(ENV_NAME + ENV_SUFFIX) env = common.wrap_env(env, resize=True, pytorch_layout=True, stack_frames=frames_per_state) model = build_model(frames_per_state).to(device) target_model = build_model(frames_per_state).to( device) if args.double else model opt = t.optim.Adam(model.parameters(), lr=1e-4) loss_fn = t.nn.SmoothL1Loss() eps, eps_final = 1, 0.01 eps_steps = 50000 gamma = 0.99 replay = common.ReplayCnnBuffer(100000) batch_size = 32 all_rewards = collections.deque(maxlen=250) losses = collections.deque(maxlen=10000)