pred = OfflinePredictor(PredictConfig( model=Model(), session_init=get_model_loader("models/MsPacman-v0.tfmodel"), input_names=['state'], output_names=['policy'])) student = student_dqn(env, teacher=pred) episodes = 1000000 scores = [] teacher_step_nums = [] step_nums = [] ep_avgs = [0] ep_avg = [] for ee in range(episodes): ob = env.reset() score_ = 0 steps = 0 t_steps = 0 while True: steps += 1 action = student.act(ob) next_ob, reward, done, _ = env.step(action) score_ += reward student.build_memory(ob, action, reward, next_ob, done) ob = next_ob if done:
env = gym.make('MsPacman-v0') env = FireResetEnv(env) env = MapState(env, lambda im: cv2.resize(im, (84, 84))) env = FrameStack(env, 4) student = student_dqn(env) episodes = 10000000 save_every = 1000 record_scores = 50 scores = [] mini_score = [] for ee in range(episodes): ob_big = env.reset() ob = ob_big[None, :, :, 6:] / 255. score_ = 0 while True: action = student.act(ob) next_ob_big, reward, done, _ = env.step(action) next_ob = next_ob_big[None, :, :, 6:] / 255. score_ += reward student.build_memory(ob, action, reward, next_ob, done) ob = next_ob if done: mini_score.append(score_) if (ee + 1) % record_scores == 0 or ee == 0: