Exemplo n.º 1
0
def allPolicyFiles(log_dir):
    """

    :param log_dir:
    :return:
    """
    train_args, algo_name, algo_class, srl_model_path, env_kwargs = loadConfigAndSetup(
        log_dir)
    files = glob.glob(os.path.join(log_dir + algo_name + '_*_model.pkl'))
    printYellow(log_dir)
    files = glob.glob(log_dir + '/model_*')

    files_list = []
    for file in files:
        eps = int((file.split('_')[-1]))
        files_list.append((eps, file + '/'))

    def sortFirst(val):
        """

        :param val:
        :return:
        """
        return val[0]

    files_list.sort(key=sortFirst)
    res = np.array(files_list)
    return res[:, 0], res[:, 1]
Exemplo n.º 2
0
def policyCrossEval(log_dir,
                    task,
                    episode,
                    model_path,
                    num_timesteps=2000,
                    num_cpu=1,
                    seed=0):
    train_args, algo_name, algo_class, srl_model_path, env_kwargs = loadConfigAndSetup(
        log_dir)
    env_kwargs = EnvsKwargs(task, env_kwargs)

    OK = True
    if (not OK):
        # no latest model saved yet
        return None, False
    else:
        pass
    printGreen(
        "Evaluation from the model saved at: {}, with evaluation time steps: {}"
        .format(model_path, num_timesteps))

    log_dir, environment, algo_args = createEnv(log_dir,
                                                train_args,
                                                algo_name,
                                                algo_class,
                                                env_kwargs,
                                                num_cpu=num_cpu,
                                                seed=seed)

    reward = policyEval(environment, model_path, log_dir, algo_class,
                        algo_args, num_timesteps, num_cpu)

    # Just a trick to save the episode number of the reward,but need a little bit more space to store
    reward = np.append(episode, reward)
    return reward, True
Exemplo n.º 3
0
def newPolicy(episodes, file_path):
    """

    :param episodes:
    :param file_path:
    :return:
    """
    train_args, algo_name, algo_class, srl_model_path, env_kwargs = loadConfigAndSetup(
        file_path)
    episode, model_path, OK = latestPolicy(file_path, algo_name)
    if episode in episodes:
        return -1, '', False
    else:
        return episode, model_path, True
Exemplo n.º 4
0
def policyCrossEval(log_dir,
                    task,
                    episode,
                    model_path,
                    num_timesteps=2000,
                    num_cpu=1):
    """
    To do a cross evaluation for a certain policy for different tasks
    A version of real time evaluation but with some bugs to fix
    :param log_dir:
    :param task:
    :param episode:
    :param model_path:
    :param num_timesteps: How many timesteps to evaluate the policy
    :param num_cpu:
    :return:
    """
    train_args, algo_name, algo_class, srl_model_path, env_kwargs = loadConfigAndSetup(
        log_dir)
    env_kwargs = EnvsKwargs(task, env_kwargs)

    OK = True
    if (not OK):
        # no latest model saved yet
        return None, False
    else:
        pass
    printGreen(
        "Evaluation from the model saved at: {}, with evaluation time steps: {}"
        .format(model_path, num_timesteps))

    log_dir, environment, algo_args = createEnv(log_dir,
                                                train_args,
                                                algo_name,
                                                algo_class,
                                                env_kwargs,
                                                num_cpu=num_cpu)

    reward = policyEval(environment, model_path, log_dir, algo_class,
                        algo_args, num_timesteps, num_cpu)

    # Just a trick to save the episode number of the reward,but need a little bit more space to store
    reward = np.append(episode, reward)
    return reward, True