Exemplo n.º 1
0
from src.utils import load_qnet, error_info
from collections import deque

# if gpu is to be used
use_cuda = torch.cuda.is_available()
FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if use_cuda else torch.LongTensor
ByteTensor = torch.cuda.ByteTensor if use_cuda else torch.ByteTensor
Tensor = FloatTensor


if __name__ == "__main__":
    env = gym.make("CartPole-v0")
    config = cartpole_config
    eval_qnet = QNet(config.state_dim,config.dqn_hidden_dims,config.action_size)
    load_qnet(eval_qnet,filename='qnet_cp_short.pth.tar')
    eval_qnet.eval()

    methods = ['Model', 'DR', 'WDR', 'Soft DR', 'Soft WDR',
               'Model Bsl', 'DR Bsl', 'WDR Bsl', 'Soft DR Bsl', 'Soft WDR Bsl',
               'Model MSE', 'DR MSE', 'WDR MSE', 'Soft DR MSE', 'Soft WDR MSE',
               'MRDR Q', 'MRDR', 'WMRDR', 'Soft MRDR', 'Soft WMRDR',
               'MRDR-w Q', 'MRDR-w', 'WMRDR-w', 'Soft MRDR-w', 'Soft WMRDR-w',
               'IS', 'WIS', 'Soft IS', 'Soft WIS', 'PDIS', 'WPDIS', 'Soft PDIS', 'Soft WPDIS']
    num_method = len(methods)
    max_name_length = len(max(methods,key=len))

    mse = [deque() for method in methods]
    ind_mse = [deque() for method in methods]

    for i_run in range(config.N):
Exemplo n.º 2
0
from src.utils import load_qnet, error_info
from collections import deque

# if gpu is to be used
use_cuda = False # torch.cuda.is_available()
FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if use_cuda else torch.LongTensor
ByteTensor = torch.cuda.ByteTensor if use_cuda else torch.ByteTensor
Tensor = FloatTensor


if __name__ == "__main__":
    env = gym.make("MountainCar-v0")
    config = mountaincar_config
    eval_qnet = QNet(config.state_dim,config.dqn_hidden_dims,config.action_size)
    load_qnet(eval_qnet,filename='qnet_mc.pth.tar')
    eval_qnet.eval()

    methods = ['Model', 'DR', 'WDR', 'Soft DR', 'Soft WDR',
               'Model Bsl', 'DR Bsl', 'WDR Bsl', 'Soft DR Bsl', 'Soft WDR Bsl',
               'Model MSE', 'DR MSE', 'WDR MSE', 'Soft DR MSE', 'Soft WDR MSE',
               'MRDR Q', 'MRDR', 'WMRDR', 'Soft MRDR', 'Soft WMRDR',
               'MRDR-w Q', 'MRDR-w', 'WMRDR-w', 'Soft MRDR-w', 'Soft WMRDR-w',
               'IS', 'WIS', 'Soft IS', 'Soft WIS', 'PDIS', 'WPDIS', 'Soft PDIS', 'Soft WPDIS']
    num_method = len(methods)
    max_name_length = len(max(methods,key=len))

    mse = [deque() for method in methods]
    ind_mse = [deque() for method in methods]

    for i_run in range(config.N):
Exemplo n.º 3
0
        mse_1, mse_2, mse_3 = error_info(
            results[i_method], target,
            methods[i_method].ljust(max_name_length))
        mse[i_method] = mse_1
        ind_mse[i_method] = mse_2
        mse_w[i_method] = mse_3

    return (mse, ind_mse, mse_w)


if __name__ == "__main__":
    env = gym.make("MountainCar-v0")
    config = mountaincar_config
    eval_qnet = QNet(config.state_dim, config.dqn_hidden_dims,
                     config.action_size)
    load_qnet(eval_qnet, filename='qnet_mc.pth.tar')
    eval_qnet.eval()

    bhv_qnet = QNet(config.state_dim, config.dqn_hidden_dims,
                    config.action_size)
    load_qnet(bhv_qnet, filename='qnet_mc.pth.tar')  # target policy
    bhv_qnet.eval()  # 読み込んだモデルのモードを切り替える

    methods = [
        'Model', 'DR', 'DML_RepBM', 'DML_RepBM_estpz', 'DML_RepBM_estpz_wis',
        'DML_RepBM_estpz_sis', 'DML_RepBM_estpz_swis', 'DML-DR-CROSS-K-ND',
        'dml_dr_cross_k_estpz_nd', 'dml_dr_cross_k_estpz_wis_nd',
        'dml_dr_cross_k_estpz_sis_nd', 'dml_dr_cross_k_estpz_swis_nd',
        'dml_dr_cross_k_chunk_nd', 'WDR', 'Soft DR', 'Soft WDR', 'Model Bsl',
        'DR Bsl', 'DR EstPz Bsl', 'WDR EstPz Bsl', 'WDR Bsl', 'Soft DR Bsl',
        'Soft WDR Bsl', 'Model MSE', 'DR MSE', 'WDR MSE', 'Soft DR MSE',
Exemplo n.º 4
0
    for i_method in range(num_method):
        mse_1, mse_2, mse_3 = error_info(results[i_method], target, methods[i_method].ljust(max_name_length))
        mse[i_method] = mse_1
        ind_mse[i_method] = mse_2
        mse_w[i_method] = mse_3

    return(mse, ind_mse, mse_w)


if __name__ == "__main__":
    env = gym.make("CartPole-v0")
    config = cartpole_config
    state_dim = config.state_dim + config.noise_dim
    eval_qnet = QNet(state_dim, config.dqn_hidden_dims, config.action_size)
    load_qnet(eval_qnet, filename='cp_s.pth.tar') # target policy
    eval_qnet.eval() # 読み込んだモデルのモードを切り替える

    bhv_qnet = QNet(state_dim, config.dqn_hidden_dims, config.action_size)
    load_qnet(bhv_qnet, filename='cp_s.pth.tar') # target policy
    bhv_qnet.eval() # 読み込んだモデルのモードを切り替える

    methods = ['DML-DR-CROSS-K-ND',
               'dml_dr_cross_k_estpz_nd',
               'dml_dr_cross_k_estpz_wis_nd',
               'dml_dr_cross_k_estpz_sis_nd',
               'dml_dr_cross_k_estpz_swis_nd',
               'dml_dr_cross_k_chunk_nd',
               'Model Bsl',
               'DR Bsl',
               'DR EstPz Bsl',
Exemplo n.º 5
0
        mse[i_method] = mse_1
        ind_mse[i_method] = mse_2
        mse_w[i_method] = mse_3

    return(mse, ind_mse, mse_w)


if __name__ == "__main__":
    env = gym.make("Acrobot-v1")
    config = acrobot_config

    noise_dim = config.noise_dim
    state_dim = config.state_dim + noise_dim

    eval_qnet = QNet(state_dim, config.dqn_hidden_dims, config.action_size)
    load_qnet(eval_qnet, filename='acrobat.pth.tar') # target policy
    eval_qnet.eval() # 読み込んだモデルのモードを切り替える

    bhv_qnet = QNet(state_dim, config.dqn_hidden_dims, config.action_size)
    load_qnet(bhv_qnet, filename='acrobat.pth.tar') # target policy
    bhv_qnet.eval() # 読み込んだモデルのモードを切り替える


    methods = ['DML-DR-CROSS-K-ND',
               'dml_dr_cross_k_estpz_nd',
               'dml_dr_cross_k_estpz_wis_nd',
               'dml_dr_cross_k_estpz_sis_nd',
               'dml_dr_cross_k_estpz_swis_nd',
               'dml_dr_cross_k_chunk_nd',
               'Model Bsl',
               'DR Bsl',