Пример #1
0
    teacher_step_nums = []
    step_nums = []
    ep_avgs = [0]
    ep_avg = []

    for ee in range(episodes):
        ob = env.reset()
        score_ = 0
        steps = 0
        t_steps = 0
        
        while True:
            steps += 1
            action = student.act(ob)
        
            next_ob, reward, done, _ = env.step(action)
            score_ += reward
            student.build_memory(ob, action, reward, next_ob, done)
            
            ob = next_ob
                    
            if done:
                if (ee+1) % 20 == 0 or ee == 0:
                    student.model.save_weights("teacher_student_weights.h5")
                    if ee == 0:
                        print "Completed {}/{} with a score of {}.".format(ee+1, episodes, score_)
                    else:
                        ep_avgs.append(np.mean(ep_avg))
                        ep_avg = []
                        print "Completed {}/{} with a score of {}. Average over the last 20 epochs was {} ({}).".format(ee+1, episodes, score_, ep_avgs[-1], ep_avgs[-1]-ep_avgs[-2])
                
Пример #2
0
for i in range(5000):
    print "===== {} =====".format(str(i))
    ob = env.reset()
    sum_r = 0

    while True:
        act_t = pred(ob[None, :, :, :])[0][0].argmax(
        )  # Record Teacher Action for Training
        act_s = student.predict(np.array(ob[None, :, :, :]) /
                                255.)[0].argmax()  # Students Action

        if act_t != act_s:
            observations_.append(ob / 255.)  # Append Observation
            teacher_actions.append(act_t)

        ob, r, isOver, info = env.step(act_s)  # Take Step
        sum_r += r

        if isOver:
            print "Total Score \t {}".format(str(sum_r))
            break

    scrambled_idx = np.random.choice(len(np.array(observations_)),
                                     size=(len(np.array(observations_)), ),
                                     replace=False)
    print "Training Count \t {}".format(str(len(scrambled_idx)))
    student.fit(np.array(observations_)[scrambled_idx],
                np_utils.to_categorical(teacher_actions,
                                        num_classes=9)[scrambled_idx],
                epochs=5,
                batch_size=8,