Пример #1
0
def train():
    env = gym.make(opts.env_name)
    obs_dim = env.observation_space.low.size
    action_dim = env.action_space.low.size
    input_size = obs_dim + action_dim

    epoch = 2000  # default : 3000
    qf_criterion = torch.nn.MSELoss()
    dataloader = DataLoader(
        # ScatterDataset(path='reg_data/test_data.npy'),
        GymDataset(env, opts.ood_test, opts.env_name),
        batch_size=400,
        shuffle=True,
        num_workers=8,
    )

    ## Choose the training model
    model = FlattenMlp_Dropout(
        input_size=input_size,
        output_size=1,
        hidden_sizes=[256, 256],
    ).cuda()

    print(model)

    ## Choose the optimizer to train
    # optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.95, weight_decay=0.) # default
    # optim = torch.optim.Adam(model.parameters(), lr=1e-2)
    optim = torch.optim.Adam(model.parameters(), lr=1e-3)
    loss_buffer = []

    for ep in range(epoch):
        for i, data in enumerate(dataloader):
            obs_act = Variable(data['obs_act'].type(Tensor))
            next_obs_act = Variable(data['next_obs_act'].type(Tensor))
            rewards = Variable(data['rewards'].type(Tensor))
            terminals = Variable(data['terminals'].type(Tensor))

            # loss, output, stats = criterion(model, input_, target_) # default

            target_q_values = model(next_obs_act).detach()
            y_target = rewards + (1. - terminals) * discount * target_q_values
            y_target = y_target.detach()
            y_pred = model(obs_act)
            loss = qf_criterion(y_pred, y_target)

            optim.zero_grad()
            loss.backward()
            optim.step()

            loss_buffer.append(loss.item())
        print('[Epoch : %d/%d] [loss : %f] ' %
              (ep, epoch, np.mean(np.array(loss_buffer))))

        if ep % 20 == 0:
            torch.save(model.state_dict(),
                       '{}/{}/model_{}.pt'.format(path, opts.env_name, ep))

    test()
Пример #2
0
def test():
    env = gym.make(opts.env_name)
    obs_dim = env.observation_space.low.size
    action_dim = env.action_space.low.size
    input_size = obs_dim + action_dim
    ## Choose the trained model
    dataloader = DataLoader(
        # ScatterDataset(path='reg_data/test_data.npy'),
        GymDataset(env, opts.ood_test, opts.env_name),
        batch_size=400,
        shuffle=True,
        num_workers=8,
    )

    ## Choose the training model
    model = FlattenMlp_Dropout(
        input_size=input_size,
        output_size=1,
        hidden_sizes=[256, 256],
    ).cuda()

    model.load_state_dict(
        torch.load("{}/{}/model_100.pt".format(
            path, opts.env_name)))  # if not handling ensemble

    for i, data in enumerate(dataloader):
        id_obs_act = Variable(data['id_obs_act'].type(Tensor))
        ood_obs_act = Variable(data['ood_obs_act'].type(Tensor))
        # if i == 0 :
        with torch.no_grad():
            ## Load testing dataset
            id_trajectories, ood_trajectories = [], []
            ## Iterative test for each model
            for i in range(10):
                id_output_ = model(id_obs_act).cpu().numpy().T
                ood_output_ = model(ood_obs_act).cpu().numpy().T
                id_trajectories.append(id_output_[:1, :])
                ood_trajectories.append(ood_output_[:1, :])
            id_trajectories = np.vstack(id_trajectories)
            ood_trajectories = np.vstack(ood_trajectories)

            # id_sigma = np.std(id_trajectories, axis=0)
            # ood_sigma = np.std(ood_trajectories, axis=0)
            id_sigma = np.mean(id_trajectories**2, axis=0) - np.mean(
                id_trajectories, axis=0)**2
            ood_sigma = np.mean(ood_trajectories**2, axis=0) - np.mean(
                ood_trajectories, axis=0)**2

            print('id_sigma : {}, ood_sigma : {}'.format(
                np.mean(id_sigma), np.mean(ood_sigma)))
Пример #3
0
def test():
    ## Choose the trained model
    # model = RegNetBase(*args, **kwargs).type(Tensor) # default
    # model = FCN().type(Tensor)
    model = MC_Dropout_Model(input_dim=1,
                             output_dim=1,
                             num_units=200,
                             drop_prob=0.5).type(Tensor)
    # model = SWAG(RegNetBase, subspace_type="pca", *args, **kwargs,
    #              subspace_kwargs={"max_rank": 10, "pca_rank": 10}).type(Tensor)

    with torch.no_grad():
        ## Load testing dataset
        data = np.load('reg_data/test_data.npy')
        z = np.reshape(np.linspace(-3, 3, 100), [-1, 1])
        input_ = torch.from_numpy(z.astype(np.float32)).type(Tensor)

        trajectories = []
        ## Iterative test for each model
        for i in range(10):
            # model.load_state_dict(torch.load("./save/ensemble_" + str(i) + ".pt")) # default
            model.load_state_dict(
                torch.load("dropout_" + str(0) +
                           ".pt"))  # if not handling ensemble
            # model.load_state_dict(torch.load("test_ensemble_" + str(i) + ".pt")) # if not handling ensemble
            # model.load_state_dict(torch.load("ckpts/swag_checkpoint0-300.pt")["state_dict"]) # if not handling ensemble
            # print(model.subspace.cov_mat_sqrt)
            # model.sample(scale=10.)
            output_ = model(input_).cpu().numpy().T
            # trajectories.append(output_) # default
            trajectories.append(output_[:1, :])
        trajectories = np.vstack(trajectories)
        plot_predictive(data, trajectories, z, title="Swag_Confidence 95%")
Пример #4
0
def train():
    env = gym.make(opts.env_name)
    obs_dim = env.observation_space.low.size
    action_dim = env.action_space.low.size
    input_size = obs_dim + action_dim

    epoch = 2000  # default : 3000
    qf_criterion = torch.nn.MSELoss()
    dataloader = DataLoader(
        # ScatterDataset(path='reg_data/test_data.npy'),
        GymDataset(env, opts.ood_test, opts.env_name),
        batch_size=400,
        shuffle=True,
        num_workers=8,
    )

    ## Choose the training model
    model = RaPP(input_size).cuda()

    print(model)

    ## Choose the optimizer to train
    optim = torch.optim.Adam(model.parameters(), lr=1e-3)
    loss_buffer = []

    for ep in range(epoch):
        for i, data in enumerate(dataloader):
            obs_act = Variable(data['obs_act'].type(Tensor))
            y_pred = model(obs_act)
            loss = qf_criterion(y_pred, obs_act)

            optim.zero_grad()
            loss.backward()
            optim.step()

            loss_buffer.append(loss.item())
        print('[Epoch : %d/%d] [loss : %f] ' %
              (ep, epoch, np.mean(np.array(loss_buffer))))

        if ep % 20 == 0:
            torch.save(model.state_dict(),
                       '{}/{}/model_{}.pt'.format(path, opts.env_name, ep))
def get_diffs(x, model, batch_size=256):
    model.eval()
    with torch.no_grad():
        batchified = x.split(batch_size)
        stacked = []
        for _x in batchified:
            model.eval()
            diffs = []
            _x = _x.to(next(model.parameters()).device).float()
            x_tilde = model(_x)
            diffs.append((x_tilde - _x).cpu())

            for layer in model.enc_layer_list:
                _x = layer(_x)
                x_tilde = layer(x_tilde)
                diffs.append((x_tilde - _x).cpu())

            stacked.append(diffs)

        stacked = list(zip(*stacked))
        diffs = [torch.cat(s, dim=0).numpy() for s in stacked]

    return diffs
Пример #6
0
def train():
    epoch = 2000  # default : 3000
    qf_criterion = torch.nn.MSELoss()
    dataloader = DataLoader(
        Point_Dataset(),
        batch_size=400,
        shuffle=True,
        num_workers=8,
    )

    ## Choose the training model
    model = RaPP(4).cuda()

    print(model)

    ## Choose the optimizer to train
    optim = torch.optim.Adam(model.parameters(), lr=1e-3)
    loss_buffer = []

    for ep in range(epoch):
        for i, data in enumerate(dataloader):
            obs_act = Variable(data['obs_act'].type(Tensor))
            y_pred = model(obs_act)
            loss = qf_criterion(y_pred, obs_act)

            optim.zero_grad()
            loss.backward()
            optim.step()

            loss_buffer.append(loss.item())
        print('[Epoch : %d/%d] [loss : %f] ' %
              (ep, epoch, np.mean(np.array(loss_buffer))))

        if ep % 20 == 0:
            torch.save(model.state_dict(),
                       '{}/{}/model_{}.pt'.format(path, opts.env_name, ep))
Пример #7
0
def train():
    env = gym.make(opts.env_name)
    obs_dim = env.observation_space.low.size
    action_dim = env.action_space.low.size
    input_size = obs_dim + action_dim

    kwargs = {
        "dimensions": [200, 50, 50, 50],
        "output_dim": 1,
        "input_dim": input_size
    }
    args = list()

    epoch = 2000  # default : 3000
    qf_criterion = torch.nn.MSELoss()
    dataloader = DataLoader(
        # ScatterDataset(path='reg_data/test_data.npy'),
        GymDataset(env, opts.ood_test, opts.env_name),
        batch_size=400,
        shuffle=True,
        num_workers=8,
    )

    ## Choose the training model
    model = RegNetBase(*args, **kwargs).type(
        Tensor)  # Simple 5-layer fully-connected network
    # model.state_dict(torch.load('{}/{}/model_180.pt'.format(path, opts.env_name)))
    # swag part
    swag_model = SWAG(RegNetBase,
                      subspace_type="pca",
                      *args,
                      **kwargs,
                      subspace_kwargs={
                          "max_rank": 10,
                          "pca_rank": 10
                      }).type(Tensor)
    print(swag_model)
    swag_start = 50

    ## Choose the optimizer to train
    optim = torch.optim.Adam(model.parameters(), lr=1e-3)
    loss_buffer = []

    for ep in range(epoch):
        for i, data in enumerate(dataloader):
            obs_act = Variable(data['obs_act'].type(Tensor))
            next_obs_act = Variable(data['next_obs_act'].type(Tensor))
            rewards = Variable(data['rewards'].type(Tensor))
            terminals = Variable(data['terminals'].type(Tensor))

            # loss, output, stats = criterion(model, input_, target_) # default

            target_q_values = model(next_obs_act).detach()
            y_target = rewards + (1. - terminals) * discount * target_q_values
            y_target = y_target.detach()
            y_pred = model(obs_act)
            loss = qf_criterion(y_pred, y_target)

            optim.zero_grad()
            loss.backward()
            optim.step()

            if ep > swag_start:
                swag_model.collect_model(model)

            loss_buffer.append(loss.item())
        print('[Epoch : %d/%d] [loss : %f] ' %
              (ep, epoch, np.mean(np.array(loss_buffer))))

        if ep % 20 == 0:
            torch.save(swag_model.state_dict(),
                       '{}/{}/model_{}.pt'.format(path, opts.env_name, ep))

    test()