Exemplo n.º 1
0
def test():
    """ test distillation and evaluation """
    LEARNING_RATE = 0.0001
    GAME = 'BreakoutNoFrameskip-v4'
    BATCH_SIZE = 32
    EPSILON = 0.05
    ADD_MEM_NUM = 3000
    UPDATE_NUM = 200
    EPOCH = 1
    MEM_SIZE = 50000
    MODEL_PATH = './model/teacher/breakout-1.h5f'
    LOSS_FUC = 'mse'
    EVAL_ITERATION = 3000

    logger = LogWriter(ROOT_PATH, BATCH_SIZE)
    logger.save_setting(args)

    env = make_atari(GAME)
    env = wrap_deepmind(env, frame_stack=True, scale=True)

    teacher = Teacher(MODEL_PATH, env, EPSILON, MEM_SIZE, EVAL_ITERATION)

    student = SingleDtStudent(env, LEARNING_RATE, logger, BATCH_SIZE, EPSILON,
                              teacher, ADD_MEM_NUM, UPDATE_NUM, EPOCH,
                              LOSS_FUC, TARGET_NET_SIZE)

    student.distill()

    logger.save_weights(student, 'student_{}'.format(LOSS_FUC))
    logger.log_total_time_cost()

    # log
    root = 'result_EVAL'
    if not os.path.exists(root):
        os.mkdir(root)
        print('*** Create folder: {} ***'.format(root))
    now_time = time.strftime('%y%m%d_%H%M%S', time.localtime())
    save_path = os.path.join(root, now_time).replace('\\', '/')
    if not os.path.exists(save_path):
        os.mkdir(save_path)
        print('*** Create folder: {} ***'.format(save_path))

    # evaluate teacher
    teacher.evaluate(save_path)

    # evaluate student
    for log_path in glob.glob('./result_DT/*'):
        Evaluator_deprecate(env,
                            log_path,
                            save_path,
                            eval_iteration=EVAL_ITERATION).evaluate()
Exemplo n.º 2
0
def SingleDistillation_main():

    logger = LogWriter(ROOT_PATH, BATCH_SIZE)
    logger.save_setting(args)

    env = make_atari(GAME)
    env = wrap_deepmind(env, frame_stack=True, scale=True)

    teacher = Teacher(MODEL_PATH, env, EPSILON, MEM_SIZE)

    student = SingleDtStudent(env, LEARNING_RATE, logger, BATCH_SIZE, EPSILON,
                              teacher, ADD_MEM_NUM, UPDATE_NUM, EPOCH,
                              LOSS_FUC, TARGET_NET_SIZE)

    student.distill()

    logger.save_weights(student)
    logger.save_model_arch(student)
    logger.log_total_time_cost()
Exemplo n.º 3
0
def Evaluation_deprecate():
    """
    evaluation the performance of both teacher and students under Single-target-distillation situation
    there should be only one model file and csv file under the directory of './model/teacher'
    multiple log directory under path of './result_DT'
    """

    # get the game_name from setting.csv
    with open(glob.glob('./model/teacher/*.csv')[0]) as f:
        reader = csv.reader(f)
        settings_dict = {row[0]: row[1] for row in reader}
    game_name = settings_dict['game']
    print("*** GAME of teacher:{} ***".format(game_name))

    # make environment
    env = make_atari(GAME)
    env = wrap_deepmind(env, frame_stack=True, scale=True)

    # log
    root = 'result_EVAL'
    if not os.path.exists(root):
        os.mkdir(root)
        print('*** Create folder: {} ***'.format(root))
    now_time = time.strftime('%y%m%d_%H%M%S', time.localtime())
    save_path = os.path.join(root, now_time).replace('\\', '/')
    if not os.path.exists(save_path):
        os.mkdir(save_path)
        print('*** Create folder: {} ***'.format(save_path))

    # evaluate teacher
    Teacher(glob.glob('./model/teacher/*.h5f')[0].replace('\\', '/'),
            env,
            eval_iteration=EVAL_ITERATION,
            is_small=True).evaluate(save_path)

    # evaluate student
    for log_path in glob.glob('./result_DT/*'):
        Evaluator_deprecate(env,
                            log_path.replace('\\', '/'),
                            save_path,
                            eval_iteration=EVAL_ITERATION).evaluate()
Exemplo n.º 4
0
threads_no = 1
env = EnvWrapper(threads_no, **sim_args)

#%%
env.reset()
ob_space = env.observation_space
ac_space = env.action_space

print("Observation space shape:", ob_space)
print("Action space shape:", ac_space)

assert ob_space is not None

#%%
teacher = Teacher(env, 1, Preprocessor(False))

lr = 4e-4
config = Config(buffer_size=3 * steps_per_ep * threads_no,
                batch_size=32,
                gamma=0.7,
                tau=1e-3,
                lr=lr,
                update_every=1)
agent = Agent(QNetworkTf, history_length, action_size=7, config=config)
agent.set_epsilon(0.9, 0.001, EPISODE_COUNT - 2)

# Test the model
hyperparams = {**config.__dict__, **sim_args}
tags = [
    "Rew: normalized speed", "Final", f"{Agent.NAME}", sim_args['scenario'],
Exemplo n.º 5
0
#%%
env.reset()
ob_space = env.observation_space
ac_space = env.action_space

print("Observation space shape:", ob_space)
print("Action space shape:", ac_space)

assert ob_space is not None

#%%
class Agent:
    def __init__(self, action_space):
        self.action_space = action_space
        self.actor_loss = 0
        self.critic_loss = 0
        
    def act(self, *args):
        return np.random.sample(self.action_space)
    
    def step(self, *args):
        pass
    
    def reset(self):
        pass

#%%
teacher = Teacher(env, 1, Preprocessor(True))
agent = Agent(env.action_space)
logger = teacher.dry_run(agent, int(simTime/stepTime))
Exemplo n.º 6
0
        }
        rng += 1
        if args.beb:
            sim_args["dryRun"] = True

        print("Steps per episode:", steps_per_ep)

        threads_no = 1
        env = EnvWrapper(threads_no, **sim_args)

        #%%
        env.reset()
        ob_space = env.observation_space
        ac_space = env.action_space

        print("Observation space shape:", ob_space)
        print("Action space shape:", ac_space)

        assert ob_space is not None

        tags = [
            f"{agent_name}", "Final", sim_args['scenario'],
            f"Station count: {sim_args['nWifi']}",
            *[f"{key}: {sim_args[key]}" for key in list(sim_args)[:3]]
        ]

        #%%
        teacher = Teacher(env, 1, Preprocessor(False))
        agent = Agent(env.action_space)
        logger = teacher.eval(agent, simTime, stepTime, history_length, tags)