Пример #1
0
    pred = OfflinePredictor(PredictConfig(
                model=Model(),
                session_init=get_model_loader("models/MsPacman-v0.tfmodel"),
                input_names=['state'],
                output_names=['policy']))

    student = student_dqn(env, teacher=pred)
    episodes = 1000000
    scores = []
    teacher_step_nums = []
    step_nums = []
    ep_avgs = [0]
    ep_avg = []

    for ee in range(episodes):
        ob = env.reset()
        score_ = 0
        steps = 0
        t_steps = 0
        
        while True:
            steps += 1
            action = student.act(ob)
        
            next_ob, reward, done, _ = env.step(action)
            score_ += reward
            student.build_memory(ob, action, reward, next_ob, done)
            
            ob = next_ob
                    
            if done:
Пример #2
0
env = gym.make('MsPacman-v0')
env = FireResetEnv(env)
env = MapState(env, lambda im: cv2.resize(im, (84, 84)))
env = FrameStack(env, 4)

student = student_dqn(env)

episodes = 10000000
save_every = 1000
record_scores = 50

scores = []
mini_score = []

for ee in range(episodes):
    ob_big = env.reset()
    ob = ob_big[None, :, :, 6:] / 255.
    score_ = 0
    while True:
        action = student.act(ob)
        next_ob_big, reward, done, _ = env.step(action)
        next_ob = next_ob_big[None, :, :, 6:] / 255.
        score_ += reward
        student.build_memory(ob, action, reward, next_ob, done)

        ob = next_ob

        if done:
            mini_score.append(score_)

            if (ee + 1) % record_scores == 0 or ee == 0: